net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <asm/uaccess.h>
  15 #include <asm/system.h>
  16 #include <linux/bitops.h>
  17 #include <linux/types.h>
  18 #include <linux/kernel.h>
  19 #include <linux/string.h>
  20 #include <linux/mm.h>
  21 #include <linux/socket.h>
  22 #include <linux/sockios.h>
  23 #include <linux/in.h>
  24 #include <linux/errno.h>
  25 #include <linux/interrupt.h>
  26 #include <linux/if_ether.h>
  27 #include <linux/inet.h>
  28 #include <linux/netdevice.h>
  29 #include <linux/etherdevice.h>
  30 #include <linux/notifier.h>
  31 #include <net/ip.h>
  32 #include <net/route.h>
  33 #include <linux/skbuff.h>
  34 #include <net/sock.h>
  35 #include <net/pkt_sched.h>
  36
  37
  38 /*      Class-Based Queueing (CBQ) algorithm.
  39         =======================================
  40
  41         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  42                  Management Models for Packet Networks",
  43                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  44
  45                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  46
  47                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  48                  Parameters", 1996
  49
  50                  [4] Sally Floyd and Michael Speer, "Experimental Results
  51                  for Class-Based Queueing", 1998, not published.
  52
  53         -----------------------------------------------------------------------
  54
  55         Algorithm skeleton was taken from NS simulator cbq.cc.
  56         If someone wants to check this code against the LBL version,
  57         he should take into account that ONLY the skeleton was borrowed,
  58         the implementation is different. Particularly:
  59
  60         --- The WRR algorithm is different. Our version looks more
  61         reasonable (I hope) and works when quanta are allowed to be
  62         less than MTU, which is always the case when real time classes
  63         have small rates. Note, that the statement of [3] is
  64         incomplete, delay may actually be estimated even if class
  65         per-round allotment is less than MTU. Namely, if per-round
  66         allotment is W*r_i, and r_1+...+r_k = r < 1
  67
  68         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  69
  70         In the worst case we have IntServ estimate with D = W*r+k*MTU
  71         and C = MTU*r. The proof (if correct at all) is trivial.
  72
  73
  74         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  75         interpret some places, which look like wrong translations
  76         from NS. Anyone is advised to find these differences
  77         and explain to me, why I am wrong 8).
  78
  79         --- Linux has no EOI event, so that we cannot estimate true class
  80         idle time. Workaround is to consider the next dequeue event
  81         as sign that previous packet is finished. This is wrong because of
  82         internal device queueing, but on a permanently loaded link it is true.
  83         Moreover, combined with clock integrator, this scheme looks
  84         very close to an ideal solution.  */
  85
  86 struct cbq_sched_data;
  87
  88
  89 struct cbq_class
  90 {
  91         struct cbq_class        *next;          /* hash table link */
  92         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  93
  94 /* Parameters */
  95         u32                     classid;
  96         unsigned char           priority;       /* class priority */
  97         unsigned char           priority2;      /* priority to be used after overlimit */
  98         unsigned char           ewma_log;       /* time constant for idle time calculation */
  99         unsigned char           ovl_strategy;
 100 #ifdef CONFIG_NET_CLS_POLICE
 101         unsigned char           police;
 102 #endif
 103
 104         u32                     defmap;
 105
 106         /* Link-sharing scheduler parameters */
 107         long                    maxidle;        /* Class parameters: see below. */
 108         long                    offtime;
 109         long                    minidle;
 110         u32                     avpkt;
 111         struct qdisc_rate_table *R_tab;
 112
 113         /* Overlimit strategy parameters */
 114         void                    (*overlimit)(struct cbq_class *cl);
 115         long                    penalty;
 116
 117         /* General scheduler (WRR) parameters */
 118         long                    allot;
 119         long                    quantum;        /* Allotment per WRR round */
 120         long                    weight;         /* Relative allotment: see below */
 121
 122         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 123         struct cbq_class        *split;         /* Ptr to split node */
 124         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 125         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 126         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 127                                                    parent otherwise */
 128         struct cbq_class        *sibling;       /* Sibling chain */
 129         struct cbq_class        *children;      /* Pointer to children chain */
 130
 131         struct Qdisc            *q;             /* Elementary queueing discipline */
 132
 133
 134 /* Variables */
 135         unsigned char           cpriority;      /* Effective priority */
 136         unsigned char           delayed;
 137         unsigned char           level;          /* level of the class in hierarchy:
 138                                                    0 for leaf classes, and maximal
 139                                                    level of children + 1 for nodes.
 140                                                  */
 141
 142         psched_time_t           last;           /* Last end of service */
 143         psched_time_t           undertime;
 144         long                    avgidle;
 145         long                    deficit;        /* Saved deficit for WRR */
 146         unsigned long           penalized;
 147         struct gnet_stats_basic bstats;
 148         struct gnet_stats_queue qstats;
 149         struct gnet_stats_rate_est rate_est;
 150         spinlock_t              *stats_lock;
 151         struct tc_cbq_xstats    xstats;
 152
 153         struct tcf_proto        *filter_list;
 154
 155         int                     refcnt;
 156         int                     filters;
 157
 158         struct cbq_class        *defaults[TC_PRIO_MAX+1];
 159 };
 160
 161 struct cbq_sched_data
 162 {
 163         struct cbq_class        *classes[16];           /* Hash table of all classes */
 164         int                     nclasses[TC_CBQ_MAXPRIO+1];
 165         unsigned                quanta[TC_CBQ_MAXPRIO+1];
 166
 167         struct cbq_class        link;
 168
 169         unsigned                activemask;
 170         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 171                                                                    with backlog */
 172
 173 #ifdef CONFIG_NET_CLS_POLICE
 174         struct cbq_class        *rx_class;
 175 #endif
 176         struct cbq_class        *tx_class;
 177         struct cbq_class        *tx_borrowed;
 178         int                     tx_len;
 179         psched_time_t           now;            /* Cached timestamp */
 180         psched_time_t           now_rt;         /* Cached real time */
 181         unsigned                pmask;
 182
 183         struct timer_list       delay_timer;
 184         struct timer_list       wd_timer;       /* Watchdog timer,
 185                                                    started when CBQ has
 186                                                    backlog, but cannot
 187                                                    transmit just now */
 188         long                    wd_expires;
 189         int                     toplevel;
 190         u32                     hgenerator;
 191 };
 192
 193
 194 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
 195
 196
 197 static __inline__ unsigned cbq_hash(u32 h)
 198 {
 199         h ^= h>>8;
 200         h ^= h>>4;
 201         return h&0xF;
 202 }
 203
 204 static __inline__ struct cbq_class *
 205 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 206 {
 207         struct cbq_class *cl;
 208
 209         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
 210                 if (cl->classid == classid)
 211                         return cl;
 212         return NULL;
 213 }
 214
 215 #ifdef CONFIG_NET_CLS_POLICE
 216
 217 static struct cbq_class *
 218 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 219 {
 220         struct cbq_class *cl, *new;
 221
 222         for (cl = this->tparent; cl; cl = cl->tparent)
 223                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 224                         return new;
 225
 226         return NULL;
 227 }
 228
 229 #endif
 230
 231 /* Classify packet. The procedure is pretty complicated, but
 232    it allows us to combine link sharing and priority scheduling
 233    transparently.
 234
 235    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 236    so that it resolves to split nodes. Then packets are classified
 237    by logical priority, or a more specific classifier may be attached
 238    to the split node.
 239  */
 240
 241 static struct cbq_class *
 242 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 243 {
 244         struct cbq_sched_data *q = qdisc_priv(sch);
 245         struct cbq_class *head = &q->link;
 246         struct cbq_class **defmap;
 247         struct cbq_class *cl = NULL;
 248         u32 prio = skb->priority;
 249         struct tcf_result res;
 250
 251         /*
 252          *  Step 1. If skb->priority points to one of our classes, use it.
 253          */
 254         if (TC_H_MAJ(prio^sch->handle) == 0 &&
 255             (cl = cbq_class_lookup(q, prio)) != NULL)
 256                 return cl;
 257
 258         *qerr = NET_XMIT_BYPASS;
 259         for (;;) {
 260                 int result = 0;
 261                 defmap = head->defaults;
 262
 263                 /*
 264                  * Step 2+n. Apply classifier.
 265                  */
 266                 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
 267                         goto fallback;
 268
 269                 if ((cl = (void*)res.class) == NULL) {
 270                         if (TC_H_MAJ(res.classid))
 271                                 cl = cbq_class_lookup(q, res.classid);
 272                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 273                                 cl = defmap[TC_PRIO_BESTEFFORT];
 274
 275                         if (cl == NULL || cl->level >= head->level)
 276                                 goto fallback;
 277                 }
 278
 279 #ifdef CONFIG_NET_CLS_ACT
 280                 switch (result) {
 281                 case TC_ACT_QUEUED:
 282                 case TC_ACT_STOLEN:
 283                         *qerr = NET_XMIT_SUCCESS;
 284                 case TC_ACT_SHOT:
 285                         return NULL;
 286                 }
 287 #elif defined(CONFIG_NET_CLS_POLICE)
 288                 switch (result) {
 289                 case TC_POLICE_RECLASSIFY:
 290                         return cbq_reclassify(skb, cl);
 291                 case TC_POLICE_SHOT:
 292                         return NULL;
 293                 default:
 294                         break;
 295                 }
 296 #endif
 297                 if (cl->level == 0)
 298                         return cl;
 299
 300                 /*
 301                  * Step 3+n. If classifier selected a link sharing class,
 302                  *         apply agency specific classifier.
 303                  *         Repeat this procdure until we hit a leaf node.
 304                  */
 305                 head = cl;
 306         }
 307
 308 fallback:
 309         cl = head;
 310
 311         /*
 312          * Step 4. No success...
 313          */
 314         if (TC_H_MAJ(prio) == 0 &&
 315             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 316             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 317                 return head;
 318
 319         return cl;
 320 }
 321
 322 /*
 323    A packet has just been enqueued on the empty class.
 324    cbq_activate_class adds it to the tail of active class list
 325    of its priority band.
 326  */
 327
 328 static __inline__ void cbq_activate_class(struct cbq_class *cl)
 329 {
 330         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 331         int prio = cl->cpriority;
 332         struct cbq_class *cl_tail;
 333
 334         cl_tail = q->active[prio];
 335         q->active[prio] = cl;
 336
 337         if (cl_tail != NULL) {
 338                 cl->next_alive = cl_tail->next_alive;
 339                 cl_tail->next_alive = cl;
 340         } else {
 341                 cl->next_alive = cl;
 342                 q->activemask |= (1<<prio);
 343         }
 344 }
 345
 346 /*
 347    Unlink class from active chain.
 348    Note that this same procedure is done directly in cbq_dequeue*
 349    during round-robin procedure.
 350  */
 351
 352 static void cbq_deactivate_class(struct cbq_class *this)
 353 {
 354         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 355         int prio = this->cpriority;
 356         struct cbq_class *cl;
 357         struct cbq_class *cl_prev = q->active[prio];
 358
 359         do {
 360                 cl = cl_prev->next_alive;
 361                 if (cl == this) {
 362                         cl_prev->next_alive = cl->next_alive;
 363                         cl->next_alive = NULL;
 364
 365                         if (cl == q->active[prio]) {
 366                                 q->active[prio] = cl_prev;
 367                                 if (cl == q->active[prio]) {
 368                                         q->active[prio] = NULL;
 369                                         q->activemask &= ~(1<<prio);
 370                                         return;
 371                                 }
 372                         }
 373                         return;
 374                 }
 375         } while ((cl_prev = cl) != q->active[prio]);
 376 }
 377
 378 static void
 379 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 380 {
 381         int toplevel = q->toplevel;
 382
 383         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 384                 psched_time_t now;
 385                 psched_tdiff_t incr;
 386
 387                 PSCHED_GET_TIME(now);
 388                 incr = PSCHED_TDIFF(now, q->now_rt);
 389                 PSCHED_TADD2(q->now, incr, now);
 390
 391                 do {
 392                         if (PSCHED_TLESS(cl->undertime, now)) {
 393                                 q->toplevel = cl->level;
 394                                 return;
 395                         }
 396                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 397         }
 398 }
 399
 400 static int
 401 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 402 {
 403         struct cbq_sched_data *q = qdisc_priv(sch);
 404         int len = skb->len;
 405         int ret;
 406         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 407
 408 #ifdef CONFIG_NET_CLS_POLICE
 409         q->rx_class = cl;
 410 #endif
 411         if (cl == NULL) {
 412                 if (ret == NET_XMIT_BYPASS)
 413                         sch->qstats.drops++;
 414                 kfree_skb(skb);
 415                 return ret;
 416         }
 417
 418 #ifdef CONFIG_NET_CLS_POLICE
 419         cl->q->__parent = sch;
 420 #endif
 421         if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
 422                 sch->q.qlen++;
 423                 sch->bstats.packets++;
 424                 sch->bstats.bytes+=len;
 425                 cbq_mark_toplevel(q, cl);
 426                 if (!cl->next_alive)
 427                         cbq_activate_class(cl);
 428                 return ret;
 429         }
 430
 431         sch->qstats.drops++;
 432         cbq_mark_toplevel(q, cl);
 433         cl->qstats.drops++;
 434         return ret;
 435 }
 436
 437 static int
 438 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 439 {
 440         struct cbq_sched_data *q = qdisc_priv(sch);
 441         struct cbq_class *cl;
 442         int ret;
 443
 444         if ((cl = q->tx_class) == NULL) {
 445                 kfree_skb(skb);
 446                 sch->qstats.drops++;
 447                 return NET_XMIT_CN;
 448         }
 449         q->tx_class = NULL;
 450
 451         cbq_mark_toplevel(q, cl);
 452
 453 #ifdef CONFIG_NET_CLS_POLICE
 454         q->rx_class = cl;
 455         cl->q->__parent = sch;
 456 #endif
 457         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
 458                 sch->q.qlen++;
 459                 sch->qstats.requeues++;
 460                 if (!cl->next_alive)
 461                         cbq_activate_class(cl);
 462                 return 0;
 463         }
 464         sch->qstats.drops++;
 465         cl->qstats.drops++;
 466         return ret;
 467 }
 468
 469 /* Overlimit actions */
 470
 471 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 472
 473 static void cbq_ovl_classic(struct cbq_class *cl)
 474 {
 475         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 476         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
 477
 478         if (!cl->delayed) {
 479                 delay += cl->offtime;
 480
 481                 /*
 482                    Class goes to sleep, so that it will have no
 483                    chance to work avgidle. Let's forgive it 8)
 484
 485                    BTW cbq-2.0 has a crap in this
 486                    place, apparently they forgot to shift it by cl->ewma_log.
 487                  */
 488                 if (cl->avgidle < 0)
 489                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 490                 if (cl->avgidle < cl->minidle)
 491                         cl->avgidle = cl->minidle;
 492                 if (delay <= 0)
 493                         delay = 1;
 494                 PSCHED_TADD2(q->now, delay, cl->undertime);
 495
 496                 cl->xstats.overactions++;
 497                 cl->delayed = 1;
 498         }
 499         if (q->wd_expires == 0 || q->wd_expires > delay)
 500                 q->wd_expires = delay;
 501
 502         /* Dirty work! We must schedule wakeups based on
 503            real available rate, rather than leaf rate,
 504            which may be tiny (even zero).
 505          */
 506         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 507                 struct cbq_class *b;
 508                 psched_tdiff_t base_delay = q->wd_expires;
 509
 510                 for (b = cl->borrow; b; b = b->borrow) {
 511                         delay = PSCHED_TDIFF(b->undertime, q->now);
 512                         if (delay < base_delay) {
 513                                 if (delay <= 0)
 514                                         delay = 1;
 515                                 base_delay = delay;
 516                         }
 517                 }
 518
 519                 q->wd_expires = base_delay;
 520         }
 521 }
 522
 523 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 524    they go overlimit
 525  */
 526
 527 static void cbq_ovl_rclassic(struct cbq_class *cl)
 528 {
 529         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 530         struct cbq_class *this = cl;
 531
 532         do {
 533                 if (cl->level > q->toplevel) {
 534                         cl = NULL;
 535                         break;
 536                 }
 537         } while ((cl = cl->borrow) != NULL);
 538
 539         if (cl == NULL)
 540                 cl = this;
 541         cbq_ovl_classic(cl);
 542 }
 543
 544 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 545
 546 static void cbq_ovl_delay(struct cbq_class *cl)
 547 {
 548         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 549         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
 550
 551         if (!cl->delayed) {
 552                 unsigned long sched = jiffies;
 553
 554                 delay += cl->offtime;
 555                 if (cl->avgidle < 0)
 556                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 557                 if (cl->avgidle < cl->minidle)
 558                         cl->avgidle = cl->minidle;
 559                 PSCHED_TADD2(q->now, delay, cl->undertime);
 560
 561                 if (delay > 0) {
 562                         sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
 563                         cl->penalized = sched;
 564                         cl->cpriority = TC_CBQ_MAXPRIO;
 565                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 566                         if (del_timer(&q->delay_timer) &&
 567                             (long)(q->delay_timer.expires - sched) > 0)
 568                                 q->delay_timer.expires = sched;
 569                         add_timer(&q->delay_timer);
 570                         cl->delayed = 1;
 571                         cl->xstats.overactions++;
 572                         return;
 573                 }
 574                 delay = 1;
 575         }
 576         if (q->wd_expires == 0 || q->wd_expires > delay)
 577                 q->wd_expires = delay;
 578 }
 579
 580 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 581
 582 static void cbq_ovl_lowprio(struct cbq_class *cl)
 583 {
 584         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 585
 586         cl->penalized = jiffies + cl->penalty;
 587
 588         if (cl->cpriority != cl->priority2) {
 589                 cl->cpriority = cl->priority2;
 590                 q->pmask |= (1<<cl->cpriority);
 591                 cl->xstats.overactions++;
 592         }
 593         cbq_ovl_classic(cl);
 594 }
 595
 596 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 597
 598 static void cbq_ovl_drop(struct cbq_class *cl)
 599 {
 600         if (cl->q->ops->drop)
 601                 if (cl->q->ops->drop(cl->q))
 602                         cl->qdisc->q.qlen--;
 603         cl->xstats.overactions++;
 604         cbq_ovl_classic(cl);
 605 }
 606
 607 static void cbq_watchdog(unsigned long arg)
 608 {
 609         struct Qdisc *sch = (struct Qdisc*)arg;
 610
 611         sch->flags &= ~TCQ_F_THROTTLED;
 612         netif_schedule(sch->dev);
 613 }
 614
 615 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
 616 {
 617         struct cbq_class *cl;
 618         struct cbq_class *cl_prev = q->active[prio];
 619         unsigned long now = jiffies;
 620         unsigned long sched = now;
 621
 622         if (cl_prev == NULL)
 623                 return now;
 624
 625         do {
 626                 cl = cl_prev->next_alive;
 627                 if ((long)(now - cl->penalized) > 0) {
 628                         cl_prev->next_alive = cl->next_alive;
 629                         cl->next_alive = NULL;
 630                         cl->cpriority = cl->priority;
 631                         cl->delayed = 0;
 632                         cbq_activate_class(cl);
 633
 634                         if (cl == q->active[prio]) {
 635                                 q->active[prio] = cl_prev;
 636                                 if (cl == q->active[prio]) {
 637                                         q->active[prio] = NULL;
 638                                         return 0;
 639                                 }
 640                         }
 641
 642                         cl = cl_prev->next_alive;
 643                 } else if ((long)(sched - cl->penalized) > 0)
 644                         sched = cl->penalized;
 645         } while ((cl_prev = cl) != q->active[prio]);
 646
 647         return (long)(sched - now);
 648 }
 649
 650 static void cbq_undelay(unsigned long arg)
 651 {
 652         struct Qdisc *sch = (struct Qdisc*)arg;
 653         struct cbq_sched_data *q = qdisc_priv(sch);
 654         long delay = 0;
 655         unsigned pmask;
 656
 657         pmask = q->pmask;
 658         q->pmask = 0;
 659
 660         while (pmask) {
 661                 int prio = ffz(~pmask);
 662                 long tmp;
 663
 664                 pmask &= ~(1<<prio);
 665
 666                 tmp = cbq_undelay_prio(q, prio);
 667                 if (tmp > 0) {
 668                         q->pmask |= 1<<prio;
 669                         if (tmp < delay || delay == 0)
 670                                 delay = tmp;
 671                 }
 672         }
 673
 674         if (delay) {
 675                 q->delay_timer.expires = jiffies + delay;
 676                 add_timer(&q->delay_timer);
 677         }
 678
 679         sch->flags &= ~TCQ_F_THROTTLED;
 680         netif_schedule(sch->dev);
 681 }
 682
 683
 684 #ifdef CONFIG_NET_CLS_POLICE
 685
 686 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 687 {
 688         int len = skb->len;
 689         struct Qdisc *sch = child->__parent;
 690         struct cbq_sched_data *q = qdisc_priv(sch);
 691         struct cbq_class *cl = q->rx_class;
 692
 693         q->rx_class = NULL;
 694
 695         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 696
 697                 cbq_mark_toplevel(q, cl);
 698
 699                 q->rx_class = cl;
 700                 cl->q->__parent = sch;
 701
 702                 if (cl->q->enqueue(skb, cl->q) == 0) {
 703                         sch->q.qlen++;
 704                         sch->bstats.packets++;
 705                         sch->bstats.bytes+=len;
 706                         if (!cl->next_alive)
 707                                 cbq_activate_class(cl);
 708                         return 0;
 709                 }
 710                 sch->qstats.drops++;
 711                 return 0;
 712         }
 713
 714         sch->qstats.drops++;
 715         return -1;
 716 }
 717 #endif
 718
 719 /*
 720    It is mission critical procedure.
 721
 722    We "regenerate" toplevel cutoff, if transmitting class
 723    has backlog and it is not regulated. It is not part of
 724    original CBQ description, but looks more reasonable.
 725    Probably, it is wrong. This question needs further investigation.
 726 */
 727
 728 static __inline__ void
 729 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 730                     struct cbq_class *borrowed)
 731 {
 732         if (cl && q->toplevel >= borrowed->level) {
 733                 if (cl->q->q.qlen > 1) {
 734                         do {
 735                                 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
 736                                         q->toplevel = borrowed->level;
 737                                         return;
 738                                 }
 739                         } while ((borrowed=borrowed->borrow) != NULL);
 740                 }
 741 #if 0
 742         /* It is not necessary now. Uncommenting it
 743            will save CPU cycles, but decrease fairness.
 744          */
 745                 q->toplevel = TC_CBQ_MAXLEVEL;
 746 #endif
 747         }
 748 }
 749
 750 static void
 751 cbq_update(struct cbq_sched_data *q)
 752 {
 753         struct cbq_class *this = q->tx_class;
 754         struct cbq_class *cl = this;
 755         int len = q->tx_len;
 756
 757         q->tx_class = NULL;
 758
 759         for ( ; cl; cl = cl->share) {
 760                 long avgidle = cl->avgidle;
 761                 long idle;
 762
 763                 cl->bstats.packets++;
 764                 cl->bstats.bytes += len;
 765
 766                 /*
 767                    (now - last) is total time between packet right edges.
 768                    (last_pktlen/rate) is "virtual" busy time, so that
 769
 770                          idle = (now - last) - last_pktlen/rate
 771                  */
 772
 773                 idle = PSCHED_TDIFF(q->now, cl->last);
 774                 if ((unsigned long)idle > 128*1024*1024) {
 775                         avgidle = cl->maxidle;
 776                 } else {
 777                         idle -= L2T(cl, len);
 778
 779                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 780                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
 781                    cl->avgidle == true_avgidle/W,
 782                    hence:
 783                  */
 784                         avgidle += idle - (avgidle>>cl->ewma_log);
 785                 }
 786
 787                 if (avgidle <= 0) {
 788                         /* Overlimit or at-limit */
 789
 790                         if (avgidle < cl->minidle)
 791                                 avgidle = cl->minidle;
 792
 793                         cl->avgidle = avgidle;
 794
 795                         /* Calculate expected time, when this class
 796                            will be allowed to send.
 797                            It will occur, when:
 798                            (1-W)*true_avgidle + W*delay = 0, i.e.
 799                            idle = (1/W - 1)*(-true_avgidle)
 800                            or
 801                            idle = (1 - W)*(-cl->avgidle);
 802                          */
 803                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 804
 805                         /*
 806                            That is not all.
 807                            To maintain the rate allocated to the class,
 808                            we add to undertime virtual clock,
 809                            necessary to complete transmitted packet.
 810                            (len/phys_bandwidth has been already passed
 811                            to the moment of cbq_update)
 812                          */
 813
 814                         idle -= L2T(&q->link, len);
 815                         idle += L2T(cl, len);
 816
 817                         PSCHED_AUDIT_TDIFF(idle);
 818
 819                         PSCHED_TADD2(q->now, idle, cl->undertime);
 820                 } else {
 821                         /* Underlimit */
 822
 823                         PSCHED_SET_PASTPERFECT(cl->undertime);
 824                         if (avgidle > cl->maxidle)
 825                                 cl->avgidle = cl->maxidle;
 826                         else
 827                                 cl->avgidle = avgidle;
 828                 }
 829                 cl->last = q->now;
 830         }
 831
 832         cbq_update_toplevel(q, this, q->tx_borrowed);
 833 }
 834
 835 static __inline__ struct cbq_class *
 836 cbq_under_limit(struct cbq_class *cl)
 837 {
 838         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 839         struct cbq_class *this_cl = cl;
 840
 841         if (cl->tparent == NULL)
 842                 return cl;
 843
 844         if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
 845             !PSCHED_TLESS(q->now, cl->undertime)) {
 846                 cl->delayed = 0;
 847                 return cl;
 848         }
 849
 850         do {
 851                 /* It is very suspicious place. Now overlimit
 852                    action is generated for not bounded classes
 853                    only if link is completely congested.
 854                    Though it is in agree with ancestor-only paradigm,
 855                    it looks very stupid. Particularly,
 856                    it means that this chunk of code will either
 857                    never be called or result in strong amplification
 858                    of burstiness. Dangerous, silly, and, however,
 859                    no another solution exists.
 860                  */
 861                 if ((cl = cl->borrow) == NULL) {
 862                         this_cl->qstats.overlimits++;
 863                         this_cl->overlimit(this_cl);
 864                         return NULL;
 865                 }
 866                 if (cl->level > q->toplevel)
 867                         return NULL;
 868         } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
 869                  PSCHED_TLESS(q->now, cl->undertime));
 870
 871         cl->delayed = 0;
 872         return cl;
 873 }
 874
 875 static __inline__ struct sk_buff *
 876 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 877 {
 878         struct cbq_sched_data *q = qdisc_priv(sch);
 879         struct cbq_class *cl_tail, *cl_prev, *cl;
 880         struct sk_buff *skb;
 881         int deficit;
 882
 883         cl_tail = cl_prev = q->active[prio];
 884         cl = cl_prev->next_alive;
 885
 886         do {
 887                 deficit = 0;
 888
 889                 /* Start round */
 890                 do {
 891                         struct cbq_class *borrow = cl;
 892
 893                         if (cl->q->q.qlen &&
 894                             (borrow = cbq_under_limit(cl)) == NULL)
 895                                 goto skip_class;
 896
 897                         if (cl->deficit <= 0) {
 898                                 /* Class exhausted its allotment per
 899                                    this round. Switch to the next one.
 900                                  */
 901                                 deficit = 1;
 902                                 cl->deficit += cl->quantum;
 903                                 goto next_class;
 904                         }
 905
 906                         skb = cl->q->dequeue(cl->q);
 907
 908                         /* Class did not give us any skb :-(
 909                            It could occur even if cl->q->q.qlen != 0
 910                            f.e. if cl->q == "tbf"
 911                          */
 912                         if (skb == NULL)
 913                                 goto skip_class;
 914
 915                         cl->deficit -= skb->len;
 916                         q->tx_class = cl;
 917                         q->tx_borrowed = borrow;
 918                         if (borrow != cl) {
 919 #ifndef CBQ_XSTATS_BORROWS_BYTES
 920                                 borrow->xstats.borrows++;
 921                                 cl->xstats.borrows++;
 922 #else
 923                                 borrow->xstats.borrows += skb->len;
 924                                 cl->xstats.borrows += skb->len;
 925 #endif
 926                         }
 927                         q->tx_len = skb->len;
 928
 929                         if (cl->deficit <= 0) {
 930                                 q->active[prio] = cl;
 931                                 cl = cl->next_alive;
 932                                 cl->deficit += cl->quantum;
 933                         }
 934                         return skb;
 935
 936 skip_class:
 937                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 938                                 /* Class is empty or penalized.
 939                                    Unlink it from active chain.
 940                                  */
 941                                 cl_prev->next_alive = cl->next_alive;
 942                                 cl->next_alive = NULL;
 943
 944                                 /* Did cl_tail point to it? */
 945                                 if (cl == cl_tail) {
 946                                         /* Repair it! */
 947                                         cl_tail = cl_prev;
 948
 949                                         /* Was it the last class in this band? */
 950                                         if (cl == cl_tail) {
 951                                                 /* Kill the band! */
 952                                                 q->active[prio] = NULL;
 953                                                 q->activemask &= ~(1<<prio);
 954                                                 if (cl->q->q.qlen)
 955                                                         cbq_activate_class(cl);
 956                                                 return NULL;
 957                                         }
 958
 959                                         q->active[prio] = cl_tail;
 960                                 }
 961                                 if (cl->q->q.qlen)
 962                                         cbq_activate_class(cl);
 963
 964                                 cl = cl_prev;
 965                         }
 966
 967 next_class:
 968                         cl_prev = cl;
 969                         cl = cl->next_alive;
 970                 } while (cl_prev != cl_tail);
 971         } while (deficit);
 972
 973         q->active[prio] = cl_prev;
 974
 975         return NULL;
 976 }
 977
 978 static __inline__ struct sk_buff *
 979 cbq_dequeue_1(struct Qdisc *sch)
 980 {
 981         struct cbq_sched_data *q = qdisc_priv(sch);
 982         struct sk_buff *skb;
 983         unsigned activemask;
 984
 985         activemask = q->activemask&0xFF;
 986         while (activemask) {
 987                 int prio = ffz(~activemask);
 988                 activemask &= ~(1<<prio);
 989                 skb = cbq_dequeue_prio(sch, prio);
 990                 if (skb)
 991                         return skb;
 992         }
 993         return NULL;
 994 }
 995
 996 static struct sk_buff *
 997 cbq_dequeue(struct Qdisc *sch)
 998 {
 999         struct sk_buff *skb;
1000         struct cbq_sched_data *q = qdisc_priv(sch);
1001         psched_time_t now;
1002         psched_tdiff_t incr;
1003
1004         PSCHED_GET_TIME(now);
1005         incr = PSCHED_TDIFF(now, q->now_rt);
1006
1007         if (q->tx_class) {
1008                 psched_tdiff_t incr2;
1009                 /* Time integrator. We calculate EOS time
1010                    by adding expected packet transmission time.
1011                    If real time is greater, we warp artificial clock,
1012                    so that:
1013
1014                    cbq_time = max(real_time, work);
1015                  */
1016                 incr2 = L2T(&q->link, q->tx_len);
1017                 PSCHED_TADD(q->now, incr2);
1018                 cbq_update(q);
1019                 if ((incr -= incr2) < 0)
1020                         incr = 0;
1021         }
1022         PSCHED_TADD(q->now, incr);
1023         q->now_rt = now;
1024
1025         for (;;) {
1026                 q->wd_expires = 0;
1027
1028                 skb = cbq_dequeue_1(sch);
1029                 if (skb) {
1030                         sch->q.qlen--;
1031                         sch->flags &= ~TCQ_F_THROTTLED;
1032                         return skb;
1033                 }
1034
1035                 /* All the classes are overlimit.
1036
1037                    It is possible, if:
1038
1039                    1. Scheduler is empty.
1040                    2. Toplevel cutoff inhibited borrowing.
1041                    3. Root class is overlimit.
1042
1043                    Reset 2d and 3d conditions and retry.
1044
1045                    Note, that NS and cbq-2.0 are buggy, peeking
1046                    an arbitrary class is appropriate for ancestor-only
1047                    sharing, but not for toplevel algorithm.
1048
1049                    Our version is better, but slower, because it requires
1050                    two passes, but it is unavoidable with top-level sharing.
1051                 */
1052
1053                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1054                     PSCHED_IS_PASTPERFECT(q->link.undertime))
1055                         break;
1056
1057                 q->toplevel = TC_CBQ_MAXLEVEL;
1058                 PSCHED_SET_PASTPERFECT(q->link.undertime);
1059         }
1060
1061         /* No packets in scheduler or nobody wants to give them to us :-(
1062            Sigh... start watchdog timer in the last case. */
1063
1064         if (sch->q.qlen) {
1065                 sch->qstats.overlimits++;
1066                 if (q->wd_expires) {
1067                         long delay = PSCHED_US2JIFFIE(q->wd_expires);
1068                         if (delay <= 0)
1069                                 delay = 1;
1070                         mod_timer(&q->wd_timer, jiffies + delay);
1071                         sch->flags |= TCQ_F_THROTTLED;
1072                 }
1073         }
1074         return NULL;
1075 }
1076
1077 /* CBQ class maintanance routines */
1078
1079 static void cbq_adjust_levels(struct cbq_class *this)
1080 {
1081         if (this == NULL)
1082                 return;
1083
1084         do {
1085                 int level = 0;
1086                 struct cbq_class *cl;
1087
1088                 if ((cl = this->children) != NULL) {
1089                         do {
1090                                 if (cl->level > level)
1091                                         level = cl->level;
1092                         } while ((cl = cl->sibling) != this->children);
1093                 }
1094                 this->level = level+1;
1095         } while ((this = this->tparent) != NULL);
1096 }
1097
1098 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1099 {
1100         struct cbq_class *cl;
1101         unsigned h;
1102
1103         if (q->quanta[prio] == 0)
1104                 return;
1105
1106         for (h=0; h<16; h++) {
1107                 for (cl = q->classes[h]; cl; cl = cl->next) {
1108                         /* BUGGGG... Beware! This expression suffer of
1109                            arithmetic overflows!
1110                          */
1111                         if (cl->priority == prio) {
1112                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1113                                         q->quanta[prio];
1114                         }
1115                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1116                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1117                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1118                         }
1119                 }
1120         }
1121 }
1122
1123 static void cbq_sync_defmap(struct cbq_class *cl)
1124 {
1125         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1126         struct cbq_class *split = cl->split;
1127         unsigned h;
1128         int i;
1129
1130         if (split == NULL)
1131                 return;
1132
1133         for (i=0; i<=TC_PRIO_MAX; i++) {
1134                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1135                         split->defaults[i] = NULL;
1136         }
1137
1138         for (i=0; i<=TC_PRIO_MAX; i++) {
1139                 int level = split->level;
1140
1141                 if (split->defaults[i])
1142                         continue;
1143
1144                 for (h=0; h<16; h++) {
1145                         struct cbq_class *c;
1146
1147                         for (c = q->classes[h]; c; c = c->next) {
1148                                 if (c->split == split && c->level < level &&
1149                                     c->defmap&(1<<i)) {
1150                                         split->defaults[i] = c;
1151                                         level = c->level;
1152                                 }
1153                         }
1154                 }
1155         }
1156 }
1157
1158 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1159 {
1160         struct cbq_class *split = NULL;
1161
1162         if (splitid == 0) {
1163                 if ((split = cl->split) == NULL)
1164                         return;
1165                 splitid = split->classid;
1166         }
1167
1168         if (split == NULL || split->classid != splitid) {
1169                 for (split = cl->tparent; split; split = split->tparent)
1170                         if (split->classid == splitid)
1171                                 break;
1172         }
1173
1174         if (split == NULL)
1175                 return;
1176
1177         if (cl->split != split) {
1178                 cl->defmap = 0;
1179                 cbq_sync_defmap(cl);
1180                 cl->split = split;
1181                 cl->defmap = def&mask;
1182         } else
1183                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1184
1185         cbq_sync_defmap(cl);
1186 }
1187
1188 static void cbq_unlink_class(struct cbq_class *this)
1189 {
1190         struct cbq_class *cl, **clp;
1191         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1192
1193         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1194                 if (cl == this) {
1195                         *clp = cl->next;
1196                         cl->next = NULL;
1197                         break;
1198                 }
1199         }
1200
1201         if (this->tparent) {
1202                 clp=&this->sibling;
1203                 cl = *clp;
1204                 do {
1205                         if (cl == this) {
1206                                 *clp = cl->sibling;
1207                                 break;
1208                         }
1209                         clp = &cl->sibling;
1210                 } while ((cl = *clp) != this->sibling);
1211
1212                 if (this->tparent->children == this) {
1213                         this->tparent->children = this->sibling;
1214                         if (this->sibling == this)
1215                                 this->tparent->children = NULL;
1216                 }
1217         } else {
1218                 BUG_TRAP(this->sibling == this);
1219         }
1220 }
1221
1222 static void cbq_link_class(struct cbq_class *this)
1223 {
1224         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1225         unsigned h = cbq_hash(this->classid);
1226         struct cbq_class *parent = this->tparent;
1227
1228         this->sibling = this;
1229         this->next = q->classes[h];
1230         q->classes[h] = this;
1231
1232         if (parent == NULL)
1233                 return;
1234
1235         if (parent->children == NULL) {
1236                 parent->children = this;
1237         } else {
1238                 this->sibling = parent->children->sibling;
1239                 parent->children->sibling = this;
1240         }
1241 }
1242
1243 static unsigned int cbq_drop(struct Qdisc* sch)
1244 {
1245         struct cbq_sched_data *q = qdisc_priv(sch);
1246         struct cbq_class *cl, *cl_head;
1247         int prio;
1248         unsigned int len;
1249
1250         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1251                 if ((cl_head = q->active[prio]) == NULL)
1252                         continue;
1253
1254                 cl = cl_head;
1255                 do {
1256                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1257                                 sch->q.qlen--;
1258                                 if (!cl->q->q.qlen)
1259                                         cbq_deactivate_class(cl);
1260                                 return len;
1261                         }
1262                 } while ((cl = cl->next_alive) != cl_head);
1263         }
1264         return 0;
1265 }
1266
1267 static void
1268 cbq_reset(struct Qdisc* sch)
1269 {
1270         struct cbq_sched_data *q = qdisc_priv(sch);
1271         struct cbq_class *cl;
1272         int prio;
1273         unsigned h;
1274
1275         q->activemask = 0;
1276         q->pmask = 0;
1277         q->tx_class = NULL;
1278         q->tx_borrowed = NULL;
1279         del_timer(&q->wd_timer);
1280         del_timer(&q->delay_timer);
1281         q->toplevel = TC_CBQ_MAXLEVEL;
1282         PSCHED_GET_TIME(q->now);
1283         q->now_rt = q->now;
1284
1285         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1286                 q->active[prio] = NULL;
1287
1288         for (h = 0; h < 16; h++) {
1289                 for (cl = q->classes[h]; cl; cl = cl->next) {
1290                         qdisc_reset(cl->q);
1291
1292                         cl->next_alive = NULL;
1293                         PSCHED_SET_PASTPERFECT(cl->undertime);
1294                         cl->avgidle = cl->maxidle;
1295                         cl->deficit = cl->quantum;
1296                         cl->cpriority = cl->priority;
1297                 }
1298         }
1299         sch->q.qlen = 0;
1300 }
1301
1302
1303 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1304 {
1305         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1306                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1307                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1308         }
1309         if (lss->change&TCF_CBQ_LSS_EWMA)
1310                 cl->ewma_log = lss->ewma_log;
1311         if (lss->change&TCF_CBQ_LSS_AVPKT)
1312                 cl->avpkt = lss->avpkt;
1313         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1314                 cl->minidle = -(long)lss->minidle;
1315         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1316                 cl->maxidle = lss->maxidle;
1317                 cl->avgidle = lss->maxidle;
1318         }
1319         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1320                 cl->offtime = lss->offtime;
1321         return 0;
1322 }
1323
1324 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1325 {
1326         q->nclasses[cl->priority]--;
1327         q->quanta[cl->priority] -= cl->weight;
1328         cbq_normalize_quanta(q, cl->priority);
1329 }
1330
1331 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1332 {
1333         q->nclasses[cl->priority]++;
1334         q->quanta[cl->priority] += cl->weight;
1335         cbq_normalize_quanta(q, cl->priority);
1336 }
1337
1338 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1339 {
1340         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1341
1342         if (wrr->allot)
1343                 cl->allot = wrr->allot;
1344         if (wrr->weight)
1345                 cl->weight = wrr->weight;
1346         if (wrr->priority) {
1347                 cl->priority = wrr->priority-1;
1348                 cl->cpriority = cl->priority;
1349                 if (cl->priority >= cl->priority2)
1350                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1351         }
1352
1353         cbq_addprio(q, cl);
1354         return 0;
1355 }
1356
1357 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1358 {
1359         switch (ovl->strategy) {
1360         case TC_CBQ_OVL_CLASSIC:
1361                 cl->overlimit = cbq_ovl_classic;
1362                 break;
1363         case TC_CBQ_OVL_DELAY:
1364                 cl->overlimit = cbq_ovl_delay;
1365                 break;
1366         case TC_CBQ_OVL_LOWPRIO:
1367                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1368                     ovl->priority2-1 <= cl->priority)
1369                         return -EINVAL;
1370                 cl->priority2 = ovl->priority2-1;
1371                 cl->overlimit = cbq_ovl_lowprio;
1372                 break;
1373         case TC_CBQ_OVL_DROP:
1374                 cl->overlimit = cbq_ovl_drop;
1375                 break;
1376         case TC_CBQ_OVL_RCLASSIC:
1377                 cl->overlimit = cbq_ovl_rclassic;
1378                 break;
1379         default:
1380                 return -EINVAL;
1381         }
1382         cl->penalty = (ovl->penalty*HZ)/1000;
1383         return 0;
1384 }
1385
1386 #ifdef CONFIG_NET_CLS_POLICE
1387 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1388 {
1389         cl->police = p->police;
1390
1391         if (cl->q->handle) {
1392                 if (p->police == TC_POLICE_RECLASSIFY)
1393                         cl->q->reshape_fail = cbq_reshape_fail;
1394                 else
1395                         cl->q->reshape_fail = NULL;
1396         }
1397         return 0;
1398 }
1399 #endif
1400
1401 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1402 {
1403         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1404         return 0;
1405 }
1406
1407 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1408 {
1409         struct cbq_sched_data *q = qdisc_priv(sch);
1410         struct rtattr *tb[TCA_CBQ_MAX];
1411         struct tc_ratespec *r;
1412
1413         if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1414             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1415             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1416                 return -EINVAL;
1417
1418         if (tb[TCA_CBQ_LSSOPT-1] &&
1419             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1420                 return -EINVAL;
1421
1422         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1423
1424         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1425                 return -EINVAL;
1426
1427         q->link.refcnt = 1;
1428         q->link.sibling = &q->link;
1429         q->link.classid = sch->handle;
1430         q->link.qdisc = sch;
1431         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1432                                             sch->handle)))
1433                 q->link.q = &noop_qdisc;
1434
1435         q->link.priority = TC_CBQ_MAXPRIO-1;
1436         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1437         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1438         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1439         q->link.overlimit = cbq_ovl_classic;
1440         q->link.allot = psched_mtu(sch->dev);
1441         q->link.quantum = q->link.allot;
1442         q->link.weight = q->link.R_tab->rate.rate;
1443
1444         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1445         q->link.avpkt = q->link.allot/2;
1446         q->link.minidle = -0x7FFFFFFF;
1447         q->link.stats_lock = &sch->dev->queue_lock;
1448
1449         init_timer(&q->wd_timer);
1450         q->wd_timer.data = (unsigned long)sch;
1451         q->wd_timer.function = cbq_watchdog;
1452         init_timer(&q->delay_timer);
1453         q->delay_timer.data = (unsigned long)sch;
1454         q->delay_timer.function = cbq_undelay;
1455         q->toplevel = TC_CBQ_MAXLEVEL;
1456         PSCHED_GET_TIME(q->now);
1457         q->now_rt = q->now;
1458
1459         cbq_link_class(&q->link);
1460
1461         if (tb[TCA_CBQ_LSSOPT-1])
1462                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1463
1464         cbq_addprio(q, &q->link);
1465         return 0;
1466 }
1467
1468 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1469 {
1470         unsigned char    *b = skb->tail;
1471
1472         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1473         return skb->len;
1474
1475 rtattr_failure:
1476         skb_trim(skb, b - skb->data);
1477         return -1;
1478 }
1479
1480 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1481 {
1482         unsigned char    *b = skb->tail;
1483         struct tc_cbq_lssopt opt;
1484
1485         opt.flags = 0;
1486         if (cl->borrow == NULL)
1487                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1488         if (cl->share == NULL)
1489                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1490         opt.ewma_log = cl->ewma_log;
1491         opt.level = cl->level;
1492         opt.avpkt = cl->avpkt;
1493         opt.maxidle = cl->maxidle;
1494         opt.minidle = (u32)(-cl->minidle);
1495         opt.offtime = cl->offtime;
1496         opt.change = ~0;
1497         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1498         return skb->len;
1499
1500 rtattr_failure:
1501         skb_trim(skb, b - skb->data);
1502         return -1;
1503 }
1504
1505 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1506 {
1507         unsigned char    *b = skb->tail;
1508         struct tc_cbq_wrropt opt;
1509
1510         opt.flags = 0;
1511         opt.allot = cl->allot;
1512         opt.priority = cl->priority+1;
1513         opt.cpriority = cl->cpriority+1;
1514         opt.weight = cl->weight;
1515         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1516         return skb->len;
1517
1518 rtattr_failure:
1519         skb_trim(skb, b - skb->data);
1520         return -1;
1521 }
1522
1523 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1524 {
1525         unsigned char    *b = skb->tail;
1526         struct tc_cbq_ovl opt;
1527
1528         opt.strategy = cl->ovl_strategy;
1529         opt.priority2 = cl->priority2+1;
1530         opt.pad = 0;
1531         opt.penalty = (cl->penalty*1000)/HZ;
1532         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1533         return skb->len;
1534
1535 rtattr_failure:
1536         skb_trim(skb, b - skb->data);
1537         return -1;
1538 }
1539
1540 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1541 {
1542         unsigned char    *b = skb->tail;
1543         struct tc_cbq_fopt opt;
1544
1545         if (cl->split || cl->defmap) {
1546                 opt.split = cl->split ? cl->split->classid : 0;
1547                 opt.defmap = cl->defmap;
1548                 opt.defchange = ~0;
1549                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1550         }
1551         return skb->len;
1552
1553 rtattr_failure:
1554         skb_trim(skb, b - skb->data);
1555         return -1;
1556 }
1557
1558 #ifdef CONFIG_NET_CLS_POLICE
1559 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1560 {
1561         unsigned char    *b = skb->tail;
1562         struct tc_cbq_police opt;
1563
1564         if (cl->police) {
1565                 opt.police = cl->police;
1566                 opt.__res1 = 0;
1567                 opt.__res2 = 0;
1568                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1569         }
1570         return skb->len;
1571
1572 rtattr_failure:
1573         skb_trim(skb, b - skb->data);
1574         return -1;
1575 }
1576 #endif
1577
1578 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1579 {
1580         if (cbq_dump_lss(skb, cl) < 0 ||
1581             cbq_dump_rate(skb, cl) < 0 ||
1582             cbq_dump_wrr(skb, cl) < 0 ||
1583             cbq_dump_ovl(skb, cl) < 0 ||
1584 #ifdef CONFIG_NET_CLS_POLICE
1585             cbq_dump_police(skb, cl) < 0 ||
1586 #endif
1587             cbq_dump_fopt(skb, cl) < 0)
1588                 return -1;
1589         return 0;
1590 }
1591
1592 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1593 {
1594         struct cbq_sched_data *q = qdisc_priv(sch);
1595         unsigned char    *b = skb->tail;
1596         struct rtattr *rta;
1597
1598         rta = (struct rtattr*)b;
1599         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1600         if (cbq_dump_attr(skb, &q->link) < 0)
1601                 goto rtattr_failure;
1602         rta->rta_len = skb->tail - b;
1603         return skb->len;
1604
1605 rtattr_failure:
1606         skb_trim(skb, b - skb->data);
1607         return -1;
1608 }
1609
1610 static int
1611 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1612 {
1613         struct cbq_sched_data *q = qdisc_priv(sch);
1614
1615         q->link.xstats.avgidle = q->link.avgidle;
1616         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1617 }
1618
1619 static int
1620 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1621                struct sk_buff *skb, struct tcmsg *tcm)
1622 {
1623         struct cbq_class *cl = (struct cbq_class*)arg;
1624         unsigned char    *b = skb->tail;
1625         struct rtattr *rta;
1626
1627         if (cl->tparent)
1628                 tcm->tcm_parent = cl->tparent->classid;
1629         else
1630                 tcm->tcm_parent = TC_H_ROOT;
1631         tcm->tcm_handle = cl->classid;
1632         tcm->tcm_info = cl->q->handle;
1633
1634         rta = (struct rtattr*)b;
1635         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1636         if (cbq_dump_attr(skb, cl) < 0)
1637                 goto rtattr_failure;
1638         rta->rta_len = skb->tail - b;
1639         return skb->len;
1640
1641 rtattr_failure:
1642         skb_trim(skb, b - skb->data);
1643         return -1;
1644 }
1645
1646 static int
1647 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1648         struct gnet_dump *d)
1649 {
1650         struct cbq_sched_data *q = qdisc_priv(sch);
1651         struct cbq_class *cl = (struct cbq_class*)arg;
1652
1653         cl->qstats.qlen = cl->q->q.qlen;
1654         cl->xstats.avgidle = cl->avgidle;
1655         cl->xstats.undertime = 0;
1656
1657         if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1658                 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1659
1660         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1661 #ifdef CONFIG_NET_ESTIMATOR
1662             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1663 #endif
1664             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1665                 return -1;
1666
1667         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1668 }
1669
1670 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1671                      struct Qdisc **old)
1672 {
1673         struct cbq_class *cl = (struct cbq_class*)arg;
1674
1675         if (cl) {
1676                 if (new == NULL) {
1677                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1678                                                      cl->classid)) == NULL)
1679                                 return -ENOBUFS;
1680                 } else {
1681 #ifdef CONFIG_NET_CLS_POLICE
1682                         if (cl->police == TC_POLICE_RECLASSIFY)
1683                                 new->reshape_fail = cbq_reshape_fail;
1684 #endif
1685                 }
1686                 sch_tree_lock(sch);
1687                 *old = xchg(&cl->q, new);
1688                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1689                 qdisc_reset(*old);
1690                 sch_tree_unlock(sch);
1691
1692                 return 0;
1693         }
1694         return -ENOENT;
1695 }
1696
1697 static struct Qdisc *
1698 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1699 {
1700         struct cbq_class *cl = (struct cbq_class*)arg;
1701
1702         return cl ? cl->q : NULL;
1703 }
1704
1705 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1706 {
1707         struct cbq_class *cl = (struct cbq_class *)arg;
1708
1709         if (cl->q->q.qlen == 0)
1710                 cbq_deactivate_class(cl);
1711 }
1712
1713 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1714 {
1715         struct cbq_sched_data *q = qdisc_priv(sch);
1716         struct cbq_class *cl = cbq_class_lookup(q, classid);
1717
1718         if (cl) {
1719                 cl->refcnt++;
1720                 return (unsigned long)cl;
1721         }
1722         return 0;
1723 }
1724
1725 static void cbq_destroy_filters(struct cbq_class *cl)
1726 {
1727         struct tcf_proto *tp;
1728
1729         while ((tp = cl->filter_list) != NULL) {
1730                 cl->filter_list = tp->next;
1731                 tcf_destroy(tp);
1732         }
1733 }
1734
1735 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1736 {
1737         struct cbq_sched_data *q = qdisc_priv(sch);
1738
1739         BUG_TRAP(!cl->filters);
1740
1741         cbq_destroy_filters(cl);
1742         qdisc_destroy(cl->q);
1743         qdisc_put_rtab(cl->R_tab);
1744 #ifdef CONFIG_NET_ESTIMATOR
1745         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1746 #endif
1747         if (cl != &q->link)
1748                 kfree(cl);
1749 }
1750
1751 static void
1752 cbq_destroy(struct Qdisc* sch)
1753 {
1754         struct cbq_sched_data *q = qdisc_priv(sch);
1755         struct cbq_class *cl;
1756         unsigned h;
1757
1758 #ifdef CONFIG_NET_CLS_POLICE
1759         q->rx_class = NULL;
1760 #endif
1761         /*
1762          * Filters must be destroyed first because we don't destroy the
1763          * classes from root to leafs which means that filters can still
1764          * be bound to classes which have been destroyed already. --TGR '04
1765          */
1766         for (h = 0; h < 16; h++)
1767                 for (cl = q->classes[h]; cl; cl = cl->next)
1768                         cbq_destroy_filters(cl);
1769
1770         for (h = 0; h < 16; h++) {
1771                 struct cbq_class *next;
1772
1773                 for (cl = q->classes[h]; cl; cl = next) {
1774                         next = cl->next;
1775                         cbq_destroy_class(sch, cl);
1776                 }
1777         }
1778 }
1779
1780 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1781 {
1782         struct cbq_class *cl = (struct cbq_class*)arg;
1783
1784         if (--cl->refcnt == 0) {
1785 #ifdef CONFIG_NET_CLS_POLICE
1786                 struct cbq_sched_data *q = qdisc_priv(sch);
1787
1788                 spin_lock_bh(&sch->dev->queue_lock);
1789                 if (q->rx_class == cl)
1790                         q->rx_class = NULL;
1791                 spin_unlock_bh(&sch->dev->queue_lock);
1792 #endif
1793
1794                 cbq_destroy_class(sch, cl);
1795         }
1796 }
1797
1798 static int
1799 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1800                  unsigned long *arg)
1801 {
1802         int err;
1803         struct cbq_sched_data *q = qdisc_priv(sch);
1804         struct cbq_class *cl = (struct cbq_class*)*arg;
1805         struct rtattr *opt = tca[TCA_OPTIONS-1];
1806         struct rtattr *tb[TCA_CBQ_MAX];
1807         struct cbq_class *parent;
1808         struct qdisc_rate_table *rtab = NULL;
1809
1810         if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1811                 return -EINVAL;
1812
1813         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1814             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1815                 return -EINVAL;
1816
1817         if (tb[TCA_CBQ_FOPT-1] &&
1818             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1819                 return -EINVAL;
1820
1821         if (tb[TCA_CBQ_RATE-1] &&
1822             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1823                         return -EINVAL;
1824
1825         if (tb[TCA_CBQ_LSSOPT-1] &&
1826             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1827                         return -EINVAL;
1828
1829         if (tb[TCA_CBQ_WRROPT-1] &&
1830             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1831                         return -EINVAL;
1832
1833 #ifdef CONFIG_NET_CLS_POLICE
1834         if (tb[TCA_CBQ_POLICE-1] &&
1835             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1836                         return -EINVAL;
1837 #endif
1838
1839         if (cl) {
1840                 /* Check parent */
1841                 if (parentid) {
1842                         if (cl->tparent && cl->tparent->classid != parentid)
1843                                 return -EINVAL;
1844                         if (!cl->tparent && parentid != TC_H_ROOT)
1845                                 return -EINVAL;
1846                 }
1847
1848                 if (tb[TCA_CBQ_RATE-1]) {
1849                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1850                         if (rtab == NULL)
1851                                 return -EINVAL;
1852                 }
1853
1854                 /* Change class parameters */
1855                 sch_tree_lock(sch);
1856
1857                 if (cl->next_alive != NULL)
1858                         cbq_deactivate_class(cl);
1859
1860                 if (rtab) {
1861                         rtab = xchg(&cl->R_tab, rtab);
1862                         qdisc_put_rtab(rtab);
1863                 }
1864
1865                 if (tb[TCA_CBQ_LSSOPT-1])
1866                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1867
1868                 if (tb[TCA_CBQ_WRROPT-1]) {
1869                         cbq_rmprio(q, cl);
1870                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1871                 }
1872
1873                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1874                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1875
1876 #ifdef CONFIG_NET_CLS_POLICE
1877                 if (tb[TCA_CBQ_POLICE-1])
1878                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1879 #endif
1880
1881                 if (tb[TCA_CBQ_FOPT-1])
1882                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1883
1884                 if (cl->q->q.qlen)
1885                         cbq_activate_class(cl);
1886
1887                 sch_tree_unlock(sch);
1888
1889 #ifdef CONFIG_NET_ESTIMATOR
1890                 if (tca[TCA_RATE-1])
1891                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1892                                 cl->stats_lock, tca[TCA_RATE-1]);
1893 #endif
1894                 return 0;
1895         }
1896
1897         if (parentid == TC_H_ROOT)
1898                 return -EINVAL;
1899
1900         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1901             tb[TCA_CBQ_LSSOPT-1] == NULL)
1902                 return -EINVAL;
1903
1904         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1905         if (rtab == NULL)
1906                 return -EINVAL;
1907
1908         if (classid) {
1909                 err = -EINVAL;
1910                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1911                         goto failure;
1912         } else {
1913                 int i;
1914                 classid = TC_H_MAKE(sch->handle,0x8000);
1915
1916                 for (i=0; i<0x8000; i++) {
1917                         if (++q->hgenerator >= 0x8000)
1918                                 q->hgenerator = 1;
1919                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1920                                 break;
1921                 }
1922                 err = -ENOSR;
1923                 if (i >= 0x8000)
1924                         goto failure;
1925                 classid = classid|q->hgenerator;
1926         }
1927
1928         parent = &q->link;
1929         if (parentid) {
1930                 parent = cbq_class_lookup(q, parentid);
1931                 err = -EINVAL;
1932                 if (parent == NULL)
1933                         goto failure;
1934         }
1935
1936         err = -ENOBUFS;
1937         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1938         if (cl == NULL)
1939                 goto failure;
1940         cl->R_tab = rtab;
1941         rtab = NULL;
1942         cl->refcnt = 1;
1943         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1944                 cl->q = &noop_qdisc;
1945         cl->classid = classid;
1946         cl->tparent = parent;
1947         cl->qdisc = sch;
1948         cl->allot = parent->allot;
1949         cl->quantum = cl->allot;
1950         cl->weight = cl->R_tab->rate.rate;
1951         cl->stats_lock = &sch->dev->queue_lock;
1952
1953         sch_tree_lock(sch);
1954         cbq_link_class(cl);
1955         cl->borrow = cl->tparent;
1956         if (cl->tparent != &q->link)
1957                 cl->share = cl->tparent;
1958         cbq_adjust_levels(parent);
1959         cl->minidle = -0x7FFFFFFF;
1960         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1961         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1962         if (cl->ewma_log==0)
1963                 cl->ewma_log = q->link.ewma_log;
1964         if (cl->maxidle==0)
1965                 cl->maxidle = q->link.maxidle;
1966         if (cl->avpkt==0)
1967                 cl->avpkt = q->link.avpkt;
1968         cl->overlimit = cbq_ovl_classic;
1969         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1970                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1971 #ifdef CONFIG_NET_CLS_POLICE
1972         if (tb[TCA_CBQ_POLICE-1])
1973                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1974 #endif
1975         if (tb[TCA_CBQ_FOPT-1])
1976                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1977         sch_tree_unlock(sch);
1978
1979 #ifdef CONFIG_NET_ESTIMATOR
1980         if (tca[TCA_RATE-1])
1981                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1982                         cl->stats_lock, tca[TCA_RATE-1]);
1983 #endif
1984
1985         *arg = (unsigned long)cl;
1986         return 0;
1987
1988 failure:
1989         qdisc_put_rtab(rtab);
1990         return err;
1991 }
1992
1993 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1994 {
1995         struct cbq_sched_data *q = qdisc_priv(sch);
1996         struct cbq_class *cl = (struct cbq_class*)arg;
1997         unsigned int qlen;
1998
1999         if (cl->filters || cl->children || cl == &q->link)
2000                 return -EBUSY;
2001
2002         sch_tree_lock(sch);
2003
2004         qlen = cl->q->q.qlen;
2005         qdisc_reset(cl->q);
2006         qdisc_tree_decrease_qlen(cl->q, qlen);
2007
2008         if (cl->next_alive)
2009                 cbq_deactivate_class(cl);
2010
2011         if (q->tx_borrowed == cl)
2012                 q->tx_borrowed = q->tx_class;
2013         if (q->tx_class == cl) {
2014                 q->tx_class = NULL;
2015                 q->tx_borrowed = NULL;
2016         }
2017 #ifdef CONFIG_NET_CLS_POLICE
2018         if (q->rx_class == cl)
2019                 q->rx_class = NULL;
2020 #endif
2021
2022         cbq_unlink_class(cl);
2023         cbq_adjust_levels(cl->tparent);
2024         cl->defmap = 0;
2025         cbq_sync_defmap(cl);
2026
2027         cbq_rmprio(q, cl);
2028         sch_tree_unlock(sch);
2029
2030         if (--cl->refcnt == 0)
2031                 cbq_destroy_class(sch, cl);
2032
2033         return 0;
2034 }
2035
2036 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2037 {
2038         struct cbq_sched_data *q = qdisc_priv(sch);
2039         struct cbq_class *cl = (struct cbq_class *)arg;
2040
2041         if (cl == NULL)
2042                 cl = &q->link;
2043
2044         return &cl->filter_list;
2045 }
2046
2047 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2048                                      u32 classid)
2049 {
2050         struct cbq_sched_data *q = qdisc_priv(sch);
2051         struct cbq_class *p = (struct cbq_class*)parent;
2052         struct cbq_class *cl = cbq_class_lookup(q, classid);
2053
2054         if (cl) {
2055                 if (p && p->level <= cl->level)
2056                         return 0;
2057                 cl->filters++;
2058                 return (unsigned long)cl;
2059         }
2060         return 0;
2061 }
2062
2063 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2064 {
2065         struct cbq_class *cl = (struct cbq_class*)arg;
2066
2067         cl->filters--;
2068 }
2069
2070 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2071 {
2072         struct cbq_sched_data *q = qdisc_priv(sch);
2073         unsigned h;
2074
2075         if (arg->stop)
2076                 return;
2077
2078         for (h = 0; h < 16; h++) {
2079                 struct cbq_class *cl;
2080
2081                 for (cl = q->classes[h]; cl; cl = cl->next) {
2082                         if (arg->count < arg->skip) {
2083                                 arg->count++;
2084                                 continue;
2085                         }
2086                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2087                                 arg->stop = 1;
2088                                 return;
2089                         }
2090                         arg->count++;
2091                 }
2092         }
2093 }
2094
2095 static struct Qdisc_class_ops cbq_class_ops = {
2096         .graft          =       cbq_graft,
2097         .leaf           =       cbq_leaf,
2098         .qlen_notify    =       cbq_qlen_notify,
2099         .get            =       cbq_get,
2100         .put            =       cbq_put,
2101         .change         =       cbq_change_class,
2102         .delete         =       cbq_delete,
2103         .walk           =       cbq_walk,
2104         .tcf_chain      =       cbq_find_tcf,
2105         .bind_tcf       =       cbq_bind_filter,
2106         .unbind_tcf     =       cbq_unbind_filter,
2107         .dump           =       cbq_dump_class,
2108         .dump_stats     =       cbq_dump_class_stats,
2109 };
2110
2111 static struct Qdisc_ops cbq_qdisc_ops = {
2112         .next           =       NULL,
2113         .cl_ops         =       &cbq_class_ops,
2114         .id             =       "cbq",
2115         .priv_size      =       sizeof(struct cbq_sched_data),
2116         .enqueue        =       cbq_enqueue,
2117         .dequeue        =       cbq_dequeue,
2118         .requeue        =       cbq_requeue,
2119         .drop           =       cbq_drop,
2120         .init           =       cbq_init,
2121         .reset          =       cbq_reset,
2122         .destroy        =       cbq_destroy,
2123         .change         =       NULL,
2124         .dump           =       cbq_dump,
2125         .dump_stats     =       cbq_dump_stats,
2126         .owner          =       THIS_MODULE,
2127 };
2128
2129 static int __init cbq_module_init(void)
2130 {
2131         return register_qdisc(&cbq_qdisc_ops);
2132 }
2133 static void __exit cbq_module_exit(void)
2134 {
2135         unregister_qdisc(&cbq_qdisc_ops);
2136 }
2137 module_init(cbq_module_init)
2138 module_exit(cbq_module_exit)
2139 MODULE_LICENSE("GPL");