net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <asm/uaccess.h>
  15 #include <asm/system.h>
  16 #include <linux/bitops.h>
  17 #include <linux/types.h>
  18 #include <linux/kernel.h>
  19 #include <linux/string.h>
  20 #include <linux/mm.h>
  21 #include <linux/socket.h>
  22 #include <linux/sockios.h>
  23 #include <linux/in.h>
  24 #include <linux/errno.h>
  25 #include <linux/interrupt.h>
  26 #include <linux/if_ether.h>
  27 #include <linux/inet.h>
  28 #include <linux/netdevice.h>
  29 #include <linux/etherdevice.h>
  30 #include <linux/notifier.h>
  31 #include <net/ip.h>
  32 #include <net/route.h>
  33 #include <linux/skbuff.h>
  34 #include <net/sock.h>
  35 #include <net/pkt_sched.h>
  36
  37
  38 /*      Class-Based Queueing (CBQ) algorithm.
  39         =======================================
  40
  41         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  42                  Management Models for Packet Networks",
  43                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  44
  45                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  46
  47                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  48                  Parameters", 1996
  49
  50                  [4] Sally Floyd and Michael Speer, "Experimental Results
  51                  for Class-Based Queueing", 1998, not published.
  52
  53         -----------------------------------------------------------------------
  54
  55         Algorithm skeleton was taken from NS simulator cbq.cc.
  56         If someone wants to check this code against the LBL version,
  57         he should take into account that ONLY the skeleton was borrowed,
  58         the implementation is different. Particularly:
  59
  60         --- The WRR algorithm is different. Our version looks more
  61         reasonable (I hope) and works when quanta are allowed to be
  62         less than MTU, which is always the case when real time classes
  63         have small rates. Note, that the statement of [3] is
  64         incomplete, delay may actually be estimated even if class
  65         per-round allotment is less than MTU. Namely, if per-round
  66         allotment is W*r_i, and r_1+...+r_k = r < 1
  67
  68         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  69
  70         In the worst case we have IntServ estimate with D = W*r+k*MTU
  71         and C = MTU*r. The proof (if correct at all) is trivial.
  72
  73
  74         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  75         interpret some places, which look like wrong translations
  76         from NS. Anyone is advised to find these differences
  77         and explain to me, why I am wrong 8).
  78
  79         --- Linux has no EOI event, so that we cannot estimate true class
  80         idle time. Workaround is to consider the next dequeue event
  81         as sign that previous packet is finished. This is wrong because of
  82         internal device queueing, but on a permanently loaded link it is true.
  83         Moreover, combined with clock integrator, this scheme looks
  84         very close to an ideal solution.  */
  85
  86 struct cbq_sched_data;
  87
  88
  89 struct cbq_class
  90 {
  91         struct cbq_class        *next;          /* hash table link */
  92         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  93
  94 /* Parameters */
  95         u32                     classid;
  96         unsigned char           priority;       /* class priority */
  97         unsigned char           priority2;      /* priority to be used after overlimit */
  98         unsigned char           ewma_log;       /* time constant for idle time calculation */
  99         unsigned char           ovl_strategy;
 100 #ifdef CONFIG_NET_CLS_POLICE
 101         unsigned char           police;
 102 #endif
 103
 104         u32                     defmap;
 105
 106         /* Link-sharing scheduler parameters */
 107         long                    maxidle;        /* Class parameters: see below. */
 108         long                    offtime;
 109         long                    minidle;
 110         u32                     avpkt;
 111         struct qdisc_rate_table *R_tab;
 112
 113         /* Overlimit strategy parameters */
 114         void                    (*overlimit)(struct cbq_class *cl);
 115         psched_tdiff_t          penalty;
 116
 117         /* General scheduler (WRR) parameters */
 118         long                    allot;
 119         long                    quantum;        /* Allotment per WRR round */
 120         long                    weight;         /* Relative allotment: see below */
 121
 122         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 123         struct cbq_class        *split;         /* Ptr to split node */
 124         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 125         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 126         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 127                                                    parent otherwise */
 128         struct cbq_class        *sibling;       /* Sibling chain */
 129         struct cbq_class        *children;      /* Pointer to children chain */
 130
 131         struct Qdisc            *q;             /* Elementary queueing discipline */
 132
 133
 134 /* Variables */
 135         unsigned char           cpriority;      /* Effective priority */
 136         unsigned char           delayed;
 137         unsigned char           level;          /* level of the class in hierarchy:
 138                                                    0 for leaf classes, and maximal
 139                                                    level of children + 1 for nodes.
 140                                                  */
 141
 142         psched_time_t           last;           /* Last end of service */
 143         psched_time_t           undertime;
 144         long                    avgidle;
 145         long                    deficit;        /* Saved deficit for WRR */
 146         psched_time_t           penalized;
 147         struct gnet_stats_basic bstats;
 148         struct gnet_stats_queue qstats;
 149         struct gnet_stats_rate_est rate_est;
 150         spinlock_t              *stats_lock;
 151         struct tc_cbq_xstats    xstats;
 152
 153         struct tcf_proto        *filter_list;
 154
 155         int                     refcnt;
 156         int                     filters;
 157
 158         struct cbq_class        *defaults[TC_PRIO_MAX+1];
 159 };
 160
 161 struct cbq_sched_data
 162 {
 163         struct cbq_class        *classes[16];           /* Hash table of all classes */
 164         int                     nclasses[TC_CBQ_MAXPRIO+1];
 165         unsigned                quanta[TC_CBQ_MAXPRIO+1];
 166
 167         struct cbq_class        link;
 168
 169         unsigned                activemask;
 170         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 171                                                                    with backlog */
 172
 173 #ifdef CONFIG_NET_CLS_POLICE
 174         struct cbq_class        *rx_class;
 175 #endif
 176         struct cbq_class        *tx_class;
 177         struct cbq_class        *tx_borrowed;
 178         int                     tx_len;
 179         psched_time_t           now;            /* Cached timestamp */
 180         psched_time_t           now_rt;         /* Cached real time */
 181         unsigned                pmask;
 182
 183         struct hrtimer          delay_timer;
 184         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 185                                                    started when CBQ has
 186                                                    backlog, but cannot
 187                                                    transmit just now */
 188         psched_tdiff_t          wd_expires;
 189         int                     toplevel;
 190         u32                     hgenerator;
 191 };
 192
 193
 194 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
 195
 196
 197 static __inline__ unsigned cbq_hash(u32 h)
 198 {
 199         h ^= h>>8;
 200         h ^= h>>4;
 201         return h&0xF;
 202 }
 203
 204 static __inline__ struct cbq_class *
 205 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 206 {
 207         struct cbq_class *cl;
 208
 209         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
 210                 if (cl->classid == classid)
 211                         return cl;
 212         return NULL;
 213 }
 214
 215 #ifdef CONFIG_NET_CLS_POLICE
 216
 217 static struct cbq_class *
 218 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 219 {
 220         struct cbq_class *cl, *new;
 221
 222         for (cl = this->tparent; cl; cl = cl->tparent)
 223                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 224                         return new;
 225
 226         return NULL;
 227 }
 228
 229 #endif
 230
 231 /* Classify packet. The procedure is pretty complicated, but
 232    it allows us to combine link sharing and priority scheduling
 233    transparently.
 234
 235    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 236    so that it resolves to split nodes. Then packets are classified
 237    by logical priority, or a more specific classifier may be attached
 238    to the split node.
 239  */
 240
 241 static struct cbq_class *
 242 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 243 {
 244         struct cbq_sched_data *q = qdisc_priv(sch);
 245         struct cbq_class *head = &q->link;
 246         struct cbq_class **defmap;
 247         struct cbq_class *cl = NULL;
 248         u32 prio = skb->priority;
 249         struct tcf_result res;
 250
 251         /*
 252          *  Step 1. If skb->priority points to one of our classes, use it.
 253          */
 254         if (TC_H_MAJ(prio^sch->handle) == 0 &&
 255             (cl = cbq_class_lookup(q, prio)) != NULL)
 256                 return cl;
 257
 258         *qerr = NET_XMIT_BYPASS;
 259         for (;;) {
 260                 int result = 0;
 261                 defmap = head->defaults;
 262
 263                 /*
 264                  * Step 2+n. Apply classifier.
 265                  */
 266                 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
 267                         goto fallback;
 268
 269                 if ((cl = (void*)res.class) == NULL) {
 270                         if (TC_H_MAJ(res.classid))
 271                                 cl = cbq_class_lookup(q, res.classid);
 272                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 273                                 cl = defmap[TC_PRIO_BESTEFFORT];
 274
 275                         if (cl == NULL || cl->level >= head->level)
 276                                 goto fallback;
 277                 }
 278
 279 #ifdef CONFIG_NET_CLS_ACT
 280                 switch (result) {
 281                 case TC_ACT_QUEUED:
 282                 case TC_ACT_STOLEN:
 283                         *qerr = NET_XMIT_SUCCESS;
 284                 case TC_ACT_SHOT:
 285                         return NULL;
 286                 }
 287 #elif defined(CONFIG_NET_CLS_POLICE)
 288                 switch (result) {
 289                 case TC_POLICE_RECLASSIFY:
 290                         return cbq_reclassify(skb, cl);
 291                 case TC_POLICE_SHOT:
 292                         return NULL;
 293                 default:
 294                         break;
 295                 }
 296 #endif
 297                 if (cl->level == 0)
 298                         return cl;
 299
 300                 /*
 301                  * Step 3+n. If classifier selected a link sharing class,
 302                  *         apply agency specific classifier.
 303                  *         Repeat this procdure until we hit a leaf node.
 304                  */
 305                 head = cl;
 306         }
 307
 308 fallback:
 309         cl = head;
 310
 311         /*
 312          * Step 4. No success...
 313          */
 314         if (TC_H_MAJ(prio) == 0 &&
 315             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 316             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 317                 return head;
 318
 319         return cl;
 320 }
 321
 322 /*
 323    A packet has just been enqueued on the empty class.
 324    cbq_activate_class adds it to the tail of active class list
 325    of its priority band.
 326  */
 327
 328 static __inline__ void cbq_activate_class(struct cbq_class *cl)
 329 {
 330         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 331         int prio = cl->cpriority;
 332         struct cbq_class *cl_tail;
 333
 334         cl_tail = q->active[prio];
 335         q->active[prio] = cl;
 336
 337         if (cl_tail != NULL) {
 338                 cl->next_alive = cl_tail->next_alive;
 339                 cl_tail->next_alive = cl;
 340         } else {
 341                 cl->next_alive = cl;
 342                 q->activemask |= (1<<prio);
 343         }
 344 }
 345
 346 /*
 347    Unlink class from active chain.
 348    Note that this same procedure is done directly in cbq_dequeue*
 349    during round-robin procedure.
 350  */
 351
 352 static void cbq_deactivate_class(struct cbq_class *this)
 353 {
 354         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 355         int prio = this->cpriority;
 356         struct cbq_class *cl;
 357         struct cbq_class *cl_prev = q->active[prio];
 358
 359         do {
 360                 cl = cl_prev->next_alive;
 361                 if (cl == this) {
 362                         cl_prev->next_alive = cl->next_alive;
 363                         cl->next_alive = NULL;
 364
 365                         if (cl == q->active[prio]) {
 366                                 q->active[prio] = cl_prev;
 367                                 if (cl == q->active[prio]) {
 368                                         q->active[prio] = NULL;
 369                                         q->activemask &= ~(1<<prio);
 370                                         return;
 371                                 }
 372                         }
 373                         return;
 374                 }
 375         } while ((cl_prev = cl) != q->active[prio]);
 376 }
 377
 378 static void
 379 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 380 {
 381         int toplevel = q->toplevel;
 382
 383         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 384                 psched_time_t now;
 385                 psched_tdiff_t incr;
 386
 387                 PSCHED_GET_TIME(now);
 388                 incr = PSCHED_TDIFF(now, q->now_rt);
 389                 PSCHED_TADD2(q->now, incr, now);
 390
 391                 do {
 392                         if (PSCHED_TLESS(cl->undertime, now)) {
 393                                 q->toplevel = cl->level;
 394                                 return;
 395                         }
 396                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 397         }
 398 }
 399
 400 static int
 401 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 402 {
 403         struct cbq_sched_data *q = qdisc_priv(sch);
 404         int len = skb->len;
 405         int ret;
 406         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 407
 408 #ifdef CONFIG_NET_CLS_POLICE
 409         q->rx_class = cl;
 410 #endif
 411         if (cl == NULL) {
 412                 if (ret == NET_XMIT_BYPASS)
 413                         sch->qstats.drops++;
 414                 kfree_skb(skb);
 415                 return ret;
 416         }
 417
 418 #ifdef CONFIG_NET_CLS_POLICE
 419         cl->q->__parent = sch;
 420 #endif
 421         if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
 422                 sch->q.qlen++;
 423                 sch->bstats.packets++;
 424                 sch->bstats.bytes+=len;
 425                 cbq_mark_toplevel(q, cl);
 426                 if (!cl->next_alive)
 427                         cbq_activate_class(cl);
 428                 return ret;
 429         }
 430
 431         sch->qstats.drops++;
 432         cbq_mark_toplevel(q, cl);
 433         cl->qstats.drops++;
 434         return ret;
 435 }
 436
 437 static int
 438 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 439 {
 440         struct cbq_sched_data *q = qdisc_priv(sch);
 441         struct cbq_class *cl;
 442         int ret;
 443
 444         if ((cl = q->tx_class) == NULL) {
 445                 kfree_skb(skb);
 446                 sch->qstats.drops++;
 447                 return NET_XMIT_CN;
 448         }
 449         q->tx_class = NULL;
 450
 451         cbq_mark_toplevel(q, cl);
 452
 453 #ifdef CONFIG_NET_CLS_POLICE
 454         q->rx_class = cl;
 455         cl->q->__parent = sch;
 456 #endif
 457         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
 458                 sch->q.qlen++;
 459                 sch->qstats.requeues++;
 460                 if (!cl->next_alive)
 461                         cbq_activate_class(cl);
 462                 return 0;
 463         }
 464         sch->qstats.drops++;
 465         cl->qstats.drops++;
 466         return ret;
 467 }
 468
 469 /* Overlimit actions */
 470
 471 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 472
 473 static void cbq_ovl_classic(struct cbq_class *cl)
 474 {
 475         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 476         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
 477
 478         if (!cl->delayed) {
 479                 delay += cl->offtime;
 480
 481                 /*
 482                    Class goes to sleep, so that it will have no
 483                    chance to work avgidle. Let's forgive it 8)
 484
 485                    BTW cbq-2.0 has a crap in this
 486                    place, apparently they forgot to shift it by cl->ewma_log.
 487                  */
 488                 if (cl->avgidle < 0)
 489                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 490                 if (cl->avgidle < cl->minidle)
 491                         cl->avgidle = cl->minidle;
 492                 if (delay <= 0)
 493                         delay = 1;
 494                 PSCHED_TADD2(q->now, delay, cl->undertime);
 495
 496                 cl->xstats.overactions++;
 497                 cl->delayed = 1;
 498         }
 499         if (q->wd_expires == 0 || q->wd_expires > delay)
 500                 q->wd_expires = delay;
 501
 502         /* Dirty work! We must schedule wakeups based on
 503            real available rate, rather than leaf rate,
 504            which may be tiny (even zero).
 505          */
 506         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 507                 struct cbq_class *b;
 508                 psched_tdiff_t base_delay = q->wd_expires;
 509
 510                 for (b = cl->borrow; b; b = b->borrow) {
 511                         delay = PSCHED_TDIFF(b->undertime, q->now);
 512                         if (delay < base_delay) {
 513                                 if (delay <= 0)
 514                                         delay = 1;
 515                                 base_delay = delay;
 516                         }
 517                 }
 518
 519                 q->wd_expires = base_delay;
 520         }
 521 }
 522
 523 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 524    they go overlimit
 525  */
 526
 527 static void cbq_ovl_rclassic(struct cbq_class *cl)
 528 {
 529         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 530         struct cbq_class *this = cl;
 531
 532         do {
 533                 if (cl->level > q->toplevel) {
 534                         cl = NULL;
 535                         break;
 536                 }
 537         } while ((cl = cl->borrow) != NULL);
 538
 539         if (cl == NULL)
 540                 cl = this;
 541         cbq_ovl_classic(cl);
 542 }
 543
 544 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 545
 546 static void cbq_ovl_delay(struct cbq_class *cl)
 547 {
 548         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 549         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
 550
 551         if (!cl->delayed) {
 552                 psched_time_t sched = q->now;
 553                 ktime_t expires;
 554
 555                 delay += cl->offtime;
 556                 if (cl->avgidle < 0)
 557                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 558                 if (cl->avgidle < cl->minidle)
 559                         cl->avgidle = cl->minidle;
 560                 PSCHED_TADD2(q->now, delay, cl->undertime);
 561
 562                 if (delay > 0) {
 563                         sched += delay + cl->penalty;
 564                         cl->penalized = sched;
 565                         cl->cpriority = TC_CBQ_MAXPRIO;
 566                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 567
 568                         expires = ktime_set(0, 0);
 569                         expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
 570                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
 571                             ktime_to_ns(ktime_sub(q->delay_timer.expires,
 572                                                   expires)) > 0)
 573                                 q->delay_timer.expires = expires;
 574                         hrtimer_restart(&q->delay_timer);
 575                         cl->delayed = 1;
 576                         cl->xstats.overactions++;
 577                         return;
 578                 }
 579                 delay = 1;
 580         }
 581         if (q->wd_expires == 0 || q->wd_expires > delay)
 582                 q->wd_expires = delay;
 583 }
 584
 585 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 586
 587 static void cbq_ovl_lowprio(struct cbq_class *cl)
 588 {
 589         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 590
 591         cl->penalized = q->now + cl->penalty;
 592
 593         if (cl->cpriority != cl->priority2) {
 594                 cl->cpriority = cl->priority2;
 595                 q->pmask |= (1<<cl->cpriority);
 596                 cl->xstats.overactions++;
 597         }
 598         cbq_ovl_classic(cl);
 599 }
 600
 601 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 602
 603 static void cbq_ovl_drop(struct cbq_class *cl)
 604 {
 605         if (cl->q->ops->drop)
 606                 if (cl->q->ops->drop(cl->q))
 607                         cl->qdisc->q.qlen--;
 608         cl->xstats.overactions++;
 609         cbq_ovl_classic(cl);
 610 }
 611
 612 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 613                                        psched_time_t now)
 614 {
 615         struct cbq_class *cl;
 616         struct cbq_class *cl_prev = q->active[prio];
 617         psched_time_t sched = now;
 618
 619         if (cl_prev == NULL)
 620                 return 0;
 621
 622         do {
 623                 cl = cl_prev->next_alive;
 624                 if (now - cl->penalized > 0) {
 625                         cl_prev->next_alive = cl->next_alive;
 626                         cl->next_alive = NULL;
 627                         cl->cpriority = cl->priority;
 628                         cl->delayed = 0;
 629                         cbq_activate_class(cl);
 630
 631                         if (cl == q->active[prio]) {
 632                                 q->active[prio] = cl_prev;
 633                                 if (cl == q->active[prio]) {
 634                                         q->active[prio] = NULL;
 635                                         return 0;
 636                                 }
 637                         }
 638
 639                         cl = cl_prev->next_alive;
 640                 } else if (sched - cl->penalized > 0)
 641                         sched = cl->penalized;
 642         } while ((cl_prev = cl) != q->active[prio]);
 643
 644         return sched - now;
 645 }
 646
 647 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 648 {
 649         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 650                                                 delay_timer);
 651         struct Qdisc *sch = q->watchdog.qdisc;
 652         psched_time_t now;
 653         psched_tdiff_t delay = 0;
 654         unsigned pmask;
 655
 656         PSCHED_GET_TIME(now);
 657
 658         pmask = q->pmask;
 659         q->pmask = 0;
 660
 661         while (pmask) {
 662                 int prio = ffz(~pmask);
 663                 psched_tdiff_t tmp;
 664
 665                 pmask &= ~(1<<prio);
 666
 667                 tmp = cbq_undelay_prio(q, prio, now);
 668                 if (tmp > 0) {
 669                         q->pmask |= 1<<prio;
 670                         if (tmp < delay || delay == 0)
 671                                 delay = tmp;
 672                 }
 673         }
 674
 675         if (delay) {
 676                 ktime_t time;
 677
 678                 time = ktime_set(0, 0);
 679                 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
 680                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 681         }
 682
 683         sch->flags &= ~TCQ_F_THROTTLED;
 684         netif_schedule(sch->dev);
 685         return HRTIMER_NORESTART;
 686 }
 687
 688
 689 #ifdef CONFIG_NET_CLS_POLICE
 690
 691 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 692 {
 693         int len = skb->len;
 694         struct Qdisc *sch = child->__parent;
 695         struct cbq_sched_data *q = qdisc_priv(sch);
 696         struct cbq_class *cl = q->rx_class;
 697
 698         q->rx_class = NULL;
 699
 700         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 701
 702                 cbq_mark_toplevel(q, cl);
 703
 704                 q->rx_class = cl;
 705                 cl->q->__parent = sch;
 706
 707                 if (cl->q->enqueue(skb, cl->q) == 0) {
 708                         sch->q.qlen++;
 709                         sch->bstats.packets++;
 710                         sch->bstats.bytes+=len;
 711                         if (!cl->next_alive)
 712                                 cbq_activate_class(cl);
 713                         return 0;
 714                 }
 715                 sch->qstats.drops++;
 716                 return 0;
 717         }
 718
 719         sch->qstats.drops++;
 720         return -1;
 721 }
 722 #endif
 723
 724 /*
 725    It is mission critical procedure.
 726
 727    We "regenerate" toplevel cutoff, if transmitting class
 728    has backlog and it is not regulated. It is not part of
 729    original CBQ description, but looks more reasonable.
 730    Probably, it is wrong. This question needs further investigation.
 731 */
 732
 733 static __inline__ void
 734 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 735                     struct cbq_class *borrowed)
 736 {
 737         if (cl && q->toplevel >= borrowed->level) {
 738                 if (cl->q->q.qlen > 1) {
 739                         do {
 740                                 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
 741                                         q->toplevel = borrowed->level;
 742                                         return;
 743                                 }
 744                         } while ((borrowed=borrowed->borrow) != NULL);
 745                 }
 746 #if 0
 747         /* It is not necessary now. Uncommenting it
 748            will save CPU cycles, but decrease fairness.
 749          */
 750                 q->toplevel = TC_CBQ_MAXLEVEL;
 751 #endif
 752         }
 753 }
 754
 755 static void
 756 cbq_update(struct cbq_sched_data *q)
 757 {
 758         struct cbq_class *this = q->tx_class;
 759         struct cbq_class *cl = this;
 760         int len = q->tx_len;
 761
 762         q->tx_class = NULL;
 763
 764         for ( ; cl; cl = cl->share) {
 765                 long avgidle = cl->avgidle;
 766                 long idle;
 767
 768                 cl->bstats.packets++;
 769                 cl->bstats.bytes += len;
 770
 771                 /*
 772                    (now - last) is total time between packet right edges.
 773                    (last_pktlen/rate) is "virtual" busy time, so that
 774
 775                          idle = (now - last) - last_pktlen/rate
 776                  */
 777
 778                 idle = PSCHED_TDIFF(q->now, cl->last);
 779                 if ((unsigned long)idle > 128*1024*1024) {
 780                         avgidle = cl->maxidle;
 781                 } else {
 782                         idle -= L2T(cl, len);
 783
 784                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 785                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
 786                    cl->avgidle == true_avgidle/W,
 787                    hence:
 788                  */
 789                         avgidle += idle - (avgidle>>cl->ewma_log);
 790                 }
 791
 792                 if (avgidle <= 0) {
 793                         /* Overlimit or at-limit */
 794
 795                         if (avgidle < cl->minidle)
 796                                 avgidle = cl->minidle;
 797
 798                         cl->avgidle = avgidle;
 799
 800                         /* Calculate expected time, when this class
 801                            will be allowed to send.
 802                            It will occur, when:
 803                            (1-W)*true_avgidle + W*delay = 0, i.e.
 804                            idle = (1/W - 1)*(-true_avgidle)
 805                            or
 806                            idle = (1 - W)*(-cl->avgidle);
 807                          */
 808                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 809
 810                         /*
 811                            That is not all.
 812                            To maintain the rate allocated to the class,
 813                            we add to undertime virtual clock,
 814                            necessary to complete transmitted packet.
 815                            (len/phys_bandwidth has been already passed
 816                            to the moment of cbq_update)
 817                          */
 818
 819                         idle -= L2T(&q->link, len);
 820                         idle += L2T(cl, len);
 821
 822                         PSCHED_AUDIT_TDIFF(idle);
 823
 824                         PSCHED_TADD2(q->now, idle, cl->undertime);
 825                 } else {
 826                         /* Underlimit */
 827
 828                         PSCHED_SET_PASTPERFECT(cl->undertime);
 829                         if (avgidle > cl->maxidle)
 830                                 cl->avgidle = cl->maxidle;
 831                         else
 832                                 cl->avgidle = avgidle;
 833                 }
 834                 cl->last = q->now;
 835         }
 836
 837         cbq_update_toplevel(q, this, q->tx_borrowed);
 838 }
 839
 840 static __inline__ struct cbq_class *
 841 cbq_under_limit(struct cbq_class *cl)
 842 {
 843         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 844         struct cbq_class *this_cl = cl;
 845
 846         if (cl->tparent == NULL)
 847                 return cl;
 848
 849         if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
 850             !PSCHED_TLESS(q->now, cl->undertime)) {
 851                 cl->delayed = 0;
 852                 return cl;
 853         }
 854
 855         do {
 856                 /* It is very suspicious place. Now overlimit
 857                    action is generated for not bounded classes
 858                    only if link is completely congested.
 859                    Though it is in agree with ancestor-only paradigm,
 860                    it looks very stupid. Particularly,
 861                    it means that this chunk of code will either
 862                    never be called or result in strong amplification
 863                    of burstiness. Dangerous, silly, and, however,
 864                    no another solution exists.
 865                  */
 866                 if ((cl = cl->borrow) == NULL) {
 867                         this_cl->qstats.overlimits++;
 868                         this_cl->overlimit(this_cl);
 869                         return NULL;
 870                 }
 871                 if (cl->level > q->toplevel)
 872                         return NULL;
 873         } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
 874                  PSCHED_TLESS(q->now, cl->undertime));
 875
 876         cl->delayed = 0;
 877         return cl;
 878 }
 879
 880 static __inline__ struct sk_buff *
 881 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 882 {
 883         struct cbq_sched_data *q = qdisc_priv(sch);
 884         struct cbq_class *cl_tail, *cl_prev, *cl;
 885         struct sk_buff *skb;
 886         int deficit;
 887
 888         cl_tail = cl_prev = q->active[prio];
 889         cl = cl_prev->next_alive;
 890
 891         do {
 892                 deficit = 0;
 893
 894                 /* Start round */
 895                 do {
 896                         struct cbq_class *borrow = cl;
 897
 898                         if (cl->q->q.qlen &&
 899                             (borrow = cbq_under_limit(cl)) == NULL)
 900                                 goto skip_class;
 901
 902                         if (cl->deficit <= 0) {
 903                                 /* Class exhausted its allotment per
 904                                    this round. Switch to the next one.
 905                                  */
 906                                 deficit = 1;
 907                                 cl->deficit += cl->quantum;
 908                                 goto next_class;
 909                         }
 910
 911                         skb = cl->q->dequeue(cl->q);
 912
 913                         /* Class did not give us any skb :-(
 914                            It could occur even if cl->q->q.qlen != 0
 915                            f.e. if cl->q == "tbf"
 916                          */
 917                         if (skb == NULL)
 918                                 goto skip_class;
 919
 920                         cl->deficit -= skb->len;
 921                         q->tx_class = cl;
 922                         q->tx_borrowed = borrow;
 923                         if (borrow != cl) {
 924 #ifndef CBQ_XSTATS_BORROWS_BYTES
 925                                 borrow->xstats.borrows++;
 926                                 cl->xstats.borrows++;
 927 #else
 928                                 borrow->xstats.borrows += skb->len;
 929                                 cl->xstats.borrows += skb->len;
 930 #endif
 931                         }
 932                         q->tx_len = skb->len;
 933
 934                         if (cl->deficit <= 0) {
 935                                 q->active[prio] = cl;
 936                                 cl = cl->next_alive;
 937                                 cl->deficit += cl->quantum;
 938                         }
 939                         return skb;
 940
 941 skip_class:
 942                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 943                                 /* Class is empty or penalized.
 944                                    Unlink it from active chain.
 945                                  */
 946                                 cl_prev->next_alive = cl->next_alive;
 947                                 cl->next_alive = NULL;
 948
 949                                 /* Did cl_tail point to it? */
 950                                 if (cl == cl_tail) {
 951                                         /* Repair it! */
 952                                         cl_tail = cl_prev;
 953
 954                                         /* Was it the last class in this band? */
 955                                         if (cl == cl_tail) {
 956                                                 /* Kill the band! */
 957                                                 q->active[prio] = NULL;
 958                                                 q->activemask &= ~(1<<prio);
 959                                                 if (cl->q->q.qlen)
 960                                                         cbq_activate_class(cl);
 961                                                 return NULL;
 962                                         }
 963
 964                                         q->active[prio] = cl_tail;
 965                                 }
 966                                 if (cl->q->q.qlen)
 967                                         cbq_activate_class(cl);
 968
 969                                 cl = cl_prev;
 970                         }
 971
 972 next_class:
 973                         cl_prev = cl;
 974                         cl = cl->next_alive;
 975                 } while (cl_prev != cl_tail);
 976         } while (deficit);
 977
 978         q->active[prio] = cl_prev;
 979
 980         return NULL;
 981 }
 982
 983 static __inline__ struct sk_buff *
 984 cbq_dequeue_1(struct Qdisc *sch)
 985 {
 986         struct cbq_sched_data *q = qdisc_priv(sch);
 987         struct sk_buff *skb;
 988         unsigned activemask;
 989
 990         activemask = q->activemask&0xFF;
 991         while (activemask) {
 992                 int prio = ffz(~activemask);
 993                 activemask &= ~(1<<prio);
 994                 skb = cbq_dequeue_prio(sch, prio);
 995                 if (skb)
 996                         return skb;
 997         }
 998         return NULL;
 999 }
1000
1001 static struct sk_buff *
1002 cbq_dequeue(struct Qdisc *sch)
1003 {
1004         struct sk_buff *skb;
1005         struct cbq_sched_data *q = qdisc_priv(sch);
1006         psched_time_t now;
1007         psched_tdiff_t incr;
1008
1009         PSCHED_GET_TIME(now);
1010         incr = PSCHED_TDIFF(now, q->now_rt);
1011
1012         if (q->tx_class) {
1013                 psched_tdiff_t incr2;
1014                 /* Time integrator. We calculate EOS time
1015                    by adding expected packet transmission time.
1016                    If real time is greater, we warp artificial clock,
1017                    so that:
1018
1019                    cbq_time = max(real_time, work);
1020                  */
1021                 incr2 = L2T(&q->link, q->tx_len);
1022                 PSCHED_TADD(q->now, incr2);
1023                 cbq_update(q);
1024                 if ((incr -= incr2) < 0)
1025                         incr = 0;
1026         }
1027         PSCHED_TADD(q->now, incr);
1028         q->now_rt = now;
1029
1030         for (;;) {
1031                 q->wd_expires = 0;
1032
1033                 skb = cbq_dequeue_1(sch);
1034                 if (skb) {
1035                         sch->q.qlen--;
1036                         sch->flags &= ~TCQ_F_THROTTLED;
1037                         return skb;
1038                 }
1039
1040                 /* All the classes are overlimit.
1041
1042                    It is possible, if:
1043
1044                    1. Scheduler is empty.
1045                    2. Toplevel cutoff inhibited borrowing.
1046                    3. Root class is overlimit.
1047
1048                    Reset 2d and 3d conditions and retry.
1049
1050                    Note, that NS and cbq-2.0 are buggy, peeking
1051                    an arbitrary class is appropriate for ancestor-only
1052                    sharing, but not for toplevel algorithm.
1053
1054                    Our version is better, but slower, because it requires
1055                    two passes, but it is unavoidable with top-level sharing.
1056                 */
1057
1058                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1059                     PSCHED_IS_PASTPERFECT(q->link.undertime))
1060                         break;
1061
1062                 q->toplevel = TC_CBQ_MAXLEVEL;
1063                 PSCHED_SET_PASTPERFECT(q->link.undertime);
1064         }
1065
1066         /* No packets in scheduler or nobody wants to give them to us :-(
1067            Sigh... start watchdog timer in the last case. */
1068
1069         if (sch->q.qlen) {
1070                 sch->qstats.overlimits++;
1071                 if (q->wd_expires)
1072                         qdisc_watchdog_schedule(&q->watchdog,
1073                                                 q->now + q->wd_expires);
1074         }
1075         return NULL;
1076 }
1077
1078 /* CBQ class maintanance routines */
1079
1080 static void cbq_adjust_levels(struct cbq_class *this)
1081 {
1082         if (this == NULL)
1083                 return;
1084
1085         do {
1086                 int level = 0;
1087                 struct cbq_class *cl;
1088
1089                 if ((cl = this->children) != NULL) {
1090                         do {
1091                                 if (cl->level > level)
1092                                         level = cl->level;
1093                         } while ((cl = cl->sibling) != this->children);
1094                 }
1095                 this->level = level+1;
1096         } while ((this = this->tparent) != NULL);
1097 }
1098
1099 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1100 {
1101         struct cbq_class *cl;
1102         unsigned h;
1103
1104         if (q->quanta[prio] == 0)
1105                 return;
1106
1107         for (h=0; h<16; h++) {
1108                 for (cl = q->classes[h]; cl; cl = cl->next) {
1109                         /* BUGGGG... Beware! This expression suffer of
1110                            arithmetic overflows!
1111                          */
1112                         if (cl->priority == prio) {
1113                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1114                                         q->quanta[prio];
1115                         }
1116                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1117                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1118                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1119                         }
1120                 }
1121         }
1122 }
1123
1124 static void cbq_sync_defmap(struct cbq_class *cl)
1125 {
1126         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1127         struct cbq_class *split = cl->split;
1128         unsigned h;
1129         int i;
1130
1131         if (split == NULL)
1132                 return;
1133
1134         for (i=0; i<=TC_PRIO_MAX; i++) {
1135                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1136                         split->defaults[i] = NULL;
1137         }
1138
1139         for (i=0; i<=TC_PRIO_MAX; i++) {
1140                 int level = split->level;
1141
1142                 if (split->defaults[i])
1143                         continue;
1144
1145                 for (h=0; h<16; h++) {
1146                         struct cbq_class *c;
1147
1148                         for (c = q->classes[h]; c; c = c->next) {
1149                                 if (c->split == split && c->level < level &&
1150                                     c->defmap&(1<<i)) {
1151                                         split->defaults[i] = c;
1152                                         level = c->level;
1153                                 }
1154                         }
1155                 }
1156         }
1157 }
1158
1159 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1160 {
1161         struct cbq_class *split = NULL;
1162
1163         if (splitid == 0) {
1164                 if ((split = cl->split) == NULL)
1165                         return;
1166                 splitid = split->classid;
1167         }
1168
1169         if (split == NULL || split->classid != splitid) {
1170                 for (split = cl->tparent; split; split = split->tparent)
1171                         if (split->classid == splitid)
1172                                 break;
1173         }
1174
1175         if (split == NULL)
1176                 return;
1177
1178         if (cl->split != split) {
1179                 cl->defmap = 0;
1180                 cbq_sync_defmap(cl);
1181                 cl->split = split;
1182                 cl->defmap = def&mask;
1183         } else
1184                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1185
1186         cbq_sync_defmap(cl);
1187 }
1188
1189 static void cbq_unlink_class(struct cbq_class *this)
1190 {
1191         struct cbq_class *cl, **clp;
1192         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1193
1194         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1195                 if (cl == this) {
1196                         *clp = cl->next;
1197                         cl->next = NULL;
1198                         break;
1199                 }
1200         }
1201
1202         if (this->tparent) {
1203                 clp=&this->sibling;
1204                 cl = *clp;
1205                 do {
1206                         if (cl == this) {
1207                                 *clp = cl->sibling;
1208                                 break;
1209                         }
1210                         clp = &cl->sibling;
1211                 } while ((cl = *clp) != this->sibling);
1212
1213                 if (this->tparent->children == this) {
1214                         this->tparent->children = this->sibling;
1215                         if (this->sibling == this)
1216                                 this->tparent->children = NULL;
1217                 }
1218         } else {
1219                 BUG_TRAP(this->sibling == this);
1220         }
1221 }
1222
1223 static void cbq_link_class(struct cbq_class *this)
1224 {
1225         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1226         unsigned h = cbq_hash(this->classid);
1227         struct cbq_class *parent = this->tparent;
1228
1229         this->sibling = this;
1230         this->next = q->classes[h];
1231         q->classes[h] = this;
1232
1233         if (parent == NULL)
1234                 return;
1235
1236         if (parent->children == NULL) {
1237                 parent->children = this;
1238         } else {
1239                 this->sibling = parent->children->sibling;
1240                 parent->children->sibling = this;
1241         }
1242 }
1243
1244 static unsigned int cbq_drop(struct Qdisc* sch)
1245 {
1246         struct cbq_sched_data *q = qdisc_priv(sch);
1247         struct cbq_class *cl, *cl_head;
1248         int prio;
1249         unsigned int len;
1250
1251         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1252                 if ((cl_head = q->active[prio]) == NULL)
1253                         continue;
1254
1255                 cl = cl_head;
1256                 do {
1257                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1258                                 sch->q.qlen--;
1259                                 if (!cl->q->q.qlen)
1260                                         cbq_deactivate_class(cl);
1261                                 return len;
1262                         }
1263                 } while ((cl = cl->next_alive) != cl_head);
1264         }
1265         return 0;
1266 }
1267
1268 static void
1269 cbq_reset(struct Qdisc* sch)
1270 {
1271         struct cbq_sched_data *q = qdisc_priv(sch);
1272         struct cbq_class *cl;
1273         int prio;
1274         unsigned h;
1275
1276         q->activemask = 0;
1277         q->pmask = 0;
1278         q->tx_class = NULL;
1279         q->tx_borrowed = NULL;
1280         qdisc_watchdog_cancel(&q->watchdog);
1281         hrtimer_cancel(&q->delay_timer);
1282         q->toplevel = TC_CBQ_MAXLEVEL;
1283         PSCHED_GET_TIME(q->now);
1284         q->now_rt = q->now;
1285
1286         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1287                 q->active[prio] = NULL;
1288
1289         for (h = 0; h < 16; h++) {
1290                 for (cl = q->classes[h]; cl; cl = cl->next) {
1291                         qdisc_reset(cl->q);
1292
1293                         cl->next_alive = NULL;
1294                         PSCHED_SET_PASTPERFECT(cl->undertime);
1295                         cl->avgidle = cl->maxidle;
1296                         cl->deficit = cl->quantum;
1297                         cl->cpriority = cl->priority;
1298                 }
1299         }
1300         sch->q.qlen = 0;
1301 }
1302
1303
1304 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1305 {
1306         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1307                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1308                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1309         }
1310         if (lss->change&TCF_CBQ_LSS_EWMA)
1311                 cl->ewma_log = lss->ewma_log;
1312         if (lss->change&TCF_CBQ_LSS_AVPKT)
1313                 cl->avpkt = lss->avpkt;
1314         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1315                 cl->minidle = -(long)lss->minidle;
1316         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1317                 cl->maxidle = lss->maxidle;
1318                 cl->avgidle = lss->maxidle;
1319         }
1320         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1321                 cl->offtime = lss->offtime;
1322         return 0;
1323 }
1324
1325 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1326 {
1327         q->nclasses[cl->priority]--;
1328         q->quanta[cl->priority] -= cl->weight;
1329         cbq_normalize_quanta(q, cl->priority);
1330 }
1331
1332 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1333 {
1334         q->nclasses[cl->priority]++;
1335         q->quanta[cl->priority] += cl->weight;
1336         cbq_normalize_quanta(q, cl->priority);
1337 }
1338
1339 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1340 {
1341         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1342
1343         if (wrr->allot)
1344                 cl->allot = wrr->allot;
1345         if (wrr->weight)
1346                 cl->weight = wrr->weight;
1347         if (wrr->priority) {
1348                 cl->priority = wrr->priority-1;
1349                 cl->cpriority = cl->priority;
1350                 if (cl->priority >= cl->priority2)
1351                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1352         }
1353
1354         cbq_addprio(q, cl);
1355         return 0;
1356 }
1357
1358 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1359 {
1360         switch (ovl->strategy) {
1361         case TC_CBQ_OVL_CLASSIC:
1362                 cl->overlimit = cbq_ovl_classic;
1363                 break;
1364         case TC_CBQ_OVL_DELAY:
1365                 cl->overlimit = cbq_ovl_delay;
1366                 break;
1367         case TC_CBQ_OVL_LOWPRIO:
1368                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1369                     ovl->priority2-1 <= cl->priority)
1370                         return -EINVAL;
1371                 cl->priority2 = ovl->priority2-1;
1372                 cl->overlimit = cbq_ovl_lowprio;
1373                 break;
1374         case TC_CBQ_OVL_DROP:
1375                 cl->overlimit = cbq_ovl_drop;
1376                 break;
1377         case TC_CBQ_OVL_RCLASSIC:
1378                 cl->overlimit = cbq_ovl_rclassic;
1379                 break;
1380         default:
1381                 return -EINVAL;
1382         }
1383         cl->penalty = ovl->penalty;
1384         return 0;
1385 }
1386
1387 #ifdef CONFIG_NET_CLS_POLICE
1388 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1389 {
1390         cl->police = p->police;
1391
1392         if (cl->q->handle) {
1393                 if (p->police == TC_POLICE_RECLASSIFY)
1394                         cl->q->reshape_fail = cbq_reshape_fail;
1395                 else
1396                         cl->q->reshape_fail = NULL;
1397         }
1398         return 0;
1399 }
1400 #endif
1401
1402 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1403 {
1404         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1405         return 0;
1406 }
1407
1408 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1409 {
1410         struct cbq_sched_data *q = qdisc_priv(sch);
1411         struct rtattr *tb[TCA_CBQ_MAX];
1412         struct tc_ratespec *r;
1413
1414         if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1415             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1416             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1417                 return -EINVAL;
1418
1419         if (tb[TCA_CBQ_LSSOPT-1] &&
1420             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1421                 return -EINVAL;
1422
1423         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1424
1425         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1426                 return -EINVAL;
1427
1428         q->link.refcnt = 1;
1429         q->link.sibling = &q->link;
1430         q->link.classid = sch->handle;
1431         q->link.qdisc = sch;
1432         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1433                                             sch->handle)))
1434                 q->link.q = &noop_qdisc;
1435
1436         q->link.priority = TC_CBQ_MAXPRIO-1;
1437         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1438         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1439         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1440         q->link.overlimit = cbq_ovl_classic;
1441         q->link.allot = psched_mtu(sch->dev);
1442         q->link.quantum = q->link.allot;
1443         q->link.weight = q->link.R_tab->rate.rate;
1444
1445         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1446         q->link.avpkt = q->link.allot/2;
1447         q->link.minidle = -0x7FFFFFFF;
1448         q->link.stats_lock = &sch->dev->queue_lock;
1449
1450         qdisc_watchdog_init(&q->watchdog, sch);
1451         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1452         q->delay_timer.function = cbq_undelay;
1453         q->toplevel = TC_CBQ_MAXLEVEL;
1454         PSCHED_GET_TIME(q->now);
1455         q->now_rt = q->now;
1456
1457         cbq_link_class(&q->link);
1458
1459         if (tb[TCA_CBQ_LSSOPT-1])
1460                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1461
1462         cbq_addprio(q, &q->link);
1463         return 0;
1464 }
1465
1466 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1467 {
1468         unsigned char    *b = skb->tail;
1469
1470         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1471         return skb->len;
1472
1473 rtattr_failure:
1474         skb_trim(skb, b - skb->data);
1475         return -1;
1476 }
1477
1478 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1479 {
1480         unsigned char    *b = skb->tail;
1481         struct tc_cbq_lssopt opt;
1482
1483         opt.flags = 0;
1484         if (cl->borrow == NULL)
1485                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1486         if (cl->share == NULL)
1487                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1488         opt.ewma_log = cl->ewma_log;
1489         opt.level = cl->level;
1490         opt.avpkt = cl->avpkt;
1491         opt.maxidle = cl->maxidle;
1492         opt.minidle = (u32)(-cl->minidle);
1493         opt.offtime = cl->offtime;
1494         opt.change = ~0;
1495         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1496         return skb->len;
1497
1498 rtattr_failure:
1499         skb_trim(skb, b - skb->data);
1500         return -1;
1501 }
1502
1503 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1504 {
1505         unsigned char    *b = skb->tail;
1506         struct tc_cbq_wrropt opt;
1507
1508         opt.flags = 0;
1509         opt.allot = cl->allot;
1510         opt.priority = cl->priority+1;
1511         opt.cpriority = cl->cpriority+1;
1512         opt.weight = cl->weight;
1513         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1514         return skb->len;
1515
1516 rtattr_failure:
1517         skb_trim(skb, b - skb->data);
1518         return -1;
1519 }
1520
1521 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1522 {
1523         unsigned char    *b = skb->tail;
1524         struct tc_cbq_ovl opt;
1525
1526         opt.strategy = cl->ovl_strategy;
1527         opt.priority2 = cl->priority2+1;
1528         opt.pad = 0;
1529         opt.penalty = cl->penalty;
1530         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1531         return skb->len;
1532
1533 rtattr_failure:
1534         skb_trim(skb, b - skb->data);
1535         return -1;
1536 }
1537
1538 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1539 {
1540         unsigned char    *b = skb->tail;
1541         struct tc_cbq_fopt opt;
1542
1543         if (cl->split || cl->defmap) {
1544                 opt.split = cl->split ? cl->split->classid : 0;
1545                 opt.defmap = cl->defmap;
1546                 opt.defchange = ~0;
1547                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1548         }
1549         return skb->len;
1550
1551 rtattr_failure:
1552         skb_trim(skb, b - skb->data);
1553         return -1;
1554 }
1555
1556 #ifdef CONFIG_NET_CLS_POLICE
1557 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1558 {
1559         unsigned char    *b = skb->tail;
1560         struct tc_cbq_police opt;
1561
1562         if (cl->police) {
1563                 opt.police = cl->police;
1564                 opt.__res1 = 0;
1565                 opt.__res2 = 0;
1566                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1567         }
1568         return skb->len;
1569
1570 rtattr_failure:
1571         skb_trim(skb, b - skb->data);
1572         return -1;
1573 }
1574 #endif
1575
1576 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1577 {
1578         if (cbq_dump_lss(skb, cl) < 0 ||
1579             cbq_dump_rate(skb, cl) < 0 ||
1580             cbq_dump_wrr(skb, cl) < 0 ||
1581             cbq_dump_ovl(skb, cl) < 0 ||
1582 #ifdef CONFIG_NET_CLS_POLICE
1583             cbq_dump_police(skb, cl) < 0 ||
1584 #endif
1585             cbq_dump_fopt(skb, cl) < 0)
1586                 return -1;
1587         return 0;
1588 }
1589
1590 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1591 {
1592         struct cbq_sched_data *q = qdisc_priv(sch);
1593         unsigned char    *b = skb->tail;
1594         struct rtattr *rta;
1595
1596         rta = (struct rtattr*)b;
1597         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1598         if (cbq_dump_attr(skb, &q->link) < 0)
1599                 goto rtattr_failure;
1600         rta->rta_len = skb->tail - b;
1601         return skb->len;
1602
1603 rtattr_failure:
1604         skb_trim(skb, b - skb->data);
1605         return -1;
1606 }
1607
1608 static int
1609 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1610 {
1611         struct cbq_sched_data *q = qdisc_priv(sch);
1612
1613         q->link.xstats.avgidle = q->link.avgidle;
1614         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1615 }
1616
1617 static int
1618 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1619                struct sk_buff *skb, struct tcmsg *tcm)
1620 {
1621         struct cbq_class *cl = (struct cbq_class*)arg;
1622         unsigned char    *b = skb->tail;
1623         struct rtattr *rta;
1624
1625         if (cl->tparent)
1626                 tcm->tcm_parent = cl->tparent->classid;
1627         else
1628                 tcm->tcm_parent = TC_H_ROOT;
1629         tcm->tcm_handle = cl->classid;
1630         tcm->tcm_info = cl->q->handle;
1631
1632         rta = (struct rtattr*)b;
1633         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1634         if (cbq_dump_attr(skb, cl) < 0)
1635                 goto rtattr_failure;
1636         rta->rta_len = skb->tail - b;
1637         return skb->len;
1638
1639 rtattr_failure:
1640         skb_trim(skb, b - skb->data);
1641         return -1;
1642 }
1643
1644 static int
1645 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1646         struct gnet_dump *d)
1647 {
1648         struct cbq_sched_data *q = qdisc_priv(sch);
1649         struct cbq_class *cl = (struct cbq_class*)arg;
1650
1651         cl->qstats.qlen = cl->q->q.qlen;
1652         cl->xstats.avgidle = cl->avgidle;
1653         cl->xstats.undertime = 0;
1654
1655         if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1656                 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1657
1658         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1659 #ifdef CONFIG_NET_ESTIMATOR
1660             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1661 #endif
1662             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1663                 return -1;
1664
1665         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1666 }
1667
1668 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1669                      struct Qdisc **old)
1670 {
1671         struct cbq_class *cl = (struct cbq_class*)arg;
1672
1673         if (cl) {
1674                 if (new == NULL) {
1675                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1676                                                      cl->classid)) == NULL)
1677                                 return -ENOBUFS;
1678                 } else {
1679 #ifdef CONFIG_NET_CLS_POLICE
1680                         if (cl->police == TC_POLICE_RECLASSIFY)
1681                                 new->reshape_fail = cbq_reshape_fail;
1682 #endif
1683                 }
1684                 sch_tree_lock(sch);
1685                 *old = xchg(&cl->q, new);
1686                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1687                 qdisc_reset(*old);
1688                 sch_tree_unlock(sch);
1689
1690                 return 0;
1691         }
1692         return -ENOENT;
1693 }
1694
1695 static struct Qdisc *
1696 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1697 {
1698         struct cbq_class *cl = (struct cbq_class*)arg;
1699
1700         return cl ? cl->q : NULL;
1701 }
1702
1703 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1704 {
1705         struct cbq_class *cl = (struct cbq_class *)arg;
1706
1707         if (cl->q->q.qlen == 0)
1708                 cbq_deactivate_class(cl);
1709 }
1710
1711 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1712 {
1713         struct cbq_sched_data *q = qdisc_priv(sch);
1714         struct cbq_class *cl = cbq_class_lookup(q, classid);
1715
1716         if (cl) {
1717                 cl->refcnt++;
1718                 return (unsigned long)cl;
1719         }
1720         return 0;
1721 }
1722
1723 static void cbq_destroy_filters(struct cbq_class *cl)
1724 {
1725         struct tcf_proto *tp;
1726
1727         while ((tp = cl->filter_list) != NULL) {
1728                 cl->filter_list = tp->next;
1729                 tcf_destroy(tp);
1730         }
1731 }
1732
1733 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1734 {
1735         struct cbq_sched_data *q = qdisc_priv(sch);
1736
1737         BUG_TRAP(!cl->filters);
1738
1739         cbq_destroy_filters(cl);
1740         qdisc_destroy(cl->q);
1741         qdisc_put_rtab(cl->R_tab);
1742 #ifdef CONFIG_NET_ESTIMATOR
1743         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1744 #endif
1745         if (cl != &q->link)
1746                 kfree(cl);
1747 }
1748
1749 static void
1750 cbq_destroy(struct Qdisc* sch)
1751 {
1752         struct cbq_sched_data *q = qdisc_priv(sch);
1753         struct cbq_class *cl;
1754         unsigned h;
1755
1756 #ifdef CONFIG_NET_CLS_POLICE
1757         q->rx_class = NULL;
1758 #endif
1759         /*
1760          * Filters must be destroyed first because we don't destroy the
1761          * classes from root to leafs which means that filters can still
1762          * be bound to classes which have been destroyed already. --TGR '04
1763          */
1764         for (h = 0; h < 16; h++)
1765                 for (cl = q->classes[h]; cl; cl = cl->next)
1766                         cbq_destroy_filters(cl);
1767
1768         for (h = 0; h < 16; h++) {
1769                 struct cbq_class *next;
1770
1771                 for (cl = q->classes[h]; cl; cl = next) {
1772                         next = cl->next;
1773                         cbq_destroy_class(sch, cl);
1774                 }
1775         }
1776 }
1777
1778 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1779 {
1780         struct cbq_class *cl = (struct cbq_class*)arg;
1781
1782         if (--cl->refcnt == 0) {
1783 #ifdef CONFIG_NET_CLS_POLICE
1784                 struct cbq_sched_data *q = qdisc_priv(sch);
1785
1786                 spin_lock_bh(&sch->dev->queue_lock);
1787                 if (q->rx_class == cl)
1788                         q->rx_class = NULL;
1789                 spin_unlock_bh(&sch->dev->queue_lock);
1790 #endif
1791
1792                 cbq_destroy_class(sch, cl);
1793         }
1794 }
1795
1796 static int
1797 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1798                  unsigned long *arg)
1799 {
1800         int err;
1801         struct cbq_sched_data *q = qdisc_priv(sch);
1802         struct cbq_class *cl = (struct cbq_class*)*arg;
1803         struct rtattr *opt = tca[TCA_OPTIONS-1];
1804         struct rtattr *tb[TCA_CBQ_MAX];
1805         struct cbq_class *parent;
1806         struct qdisc_rate_table *rtab = NULL;
1807
1808         if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1809                 return -EINVAL;
1810
1811         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1812             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1813                 return -EINVAL;
1814
1815         if (tb[TCA_CBQ_FOPT-1] &&
1816             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1817                 return -EINVAL;
1818
1819         if (tb[TCA_CBQ_RATE-1] &&
1820             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1821                         return -EINVAL;
1822
1823         if (tb[TCA_CBQ_LSSOPT-1] &&
1824             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1825                         return -EINVAL;
1826
1827         if (tb[TCA_CBQ_WRROPT-1] &&
1828             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1829                         return -EINVAL;
1830
1831 #ifdef CONFIG_NET_CLS_POLICE
1832         if (tb[TCA_CBQ_POLICE-1] &&
1833             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1834                         return -EINVAL;
1835 #endif
1836
1837         if (cl) {
1838                 /* Check parent */
1839                 if (parentid) {
1840                         if (cl->tparent && cl->tparent->classid != parentid)
1841                                 return -EINVAL;
1842                         if (!cl->tparent && parentid != TC_H_ROOT)
1843                                 return -EINVAL;
1844                 }
1845
1846                 if (tb[TCA_CBQ_RATE-1]) {
1847                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1848                         if (rtab == NULL)
1849                                 return -EINVAL;
1850                 }
1851
1852                 /* Change class parameters */
1853                 sch_tree_lock(sch);
1854
1855                 if (cl->next_alive != NULL)
1856                         cbq_deactivate_class(cl);
1857
1858                 if (rtab) {
1859                         rtab = xchg(&cl->R_tab, rtab);
1860                         qdisc_put_rtab(rtab);
1861                 }
1862
1863                 if (tb[TCA_CBQ_LSSOPT-1])
1864                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1865
1866                 if (tb[TCA_CBQ_WRROPT-1]) {
1867                         cbq_rmprio(q, cl);
1868                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1869                 }
1870
1871                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1872                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1873
1874 #ifdef CONFIG_NET_CLS_POLICE
1875                 if (tb[TCA_CBQ_POLICE-1])
1876                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1877 #endif
1878
1879                 if (tb[TCA_CBQ_FOPT-1])
1880                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1881
1882                 if (cl->q->q.qlen)
1883                         cbq_activate_class(cl);
1884
1885                 sch_tree_unlock(sch);
1886
1887 #ifdef CONFIG_NET_ESTIMATOR
1888                 if (tca[TCA_RATE-1])
1889                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1890                                 cl->stats_lock, tca[TCA_RATE-1]);
1891 #endif
1892                 return 0;
1893         }
1894
1895         if (parentid == TC_H_ROOT)
1896                 return -EINVAL;
1897
1898         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1899             tb[TCA_CBQ_LSSOPT-1] == NULL)
1900                 return -EINVAL;
1901
1902         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1903         if (rtab == NULL)
1904                 return -EINVAL;
1905
1906         if (classid) {
1907                 err = -EINVAL;
1908                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1909                         goto failure;
1910         } else {
1911                 int i;
1912                 classid = TC_H_MAKE(sch->handle,0x8000);
1913
1914                 for (i=0; i<0x8000; i++) {
1915                         if (++q->hgenerator >= 0x8000)
1916                                 q->hgenerator = 1;
1917                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1918                                 break;
1919                 }
1920                 err = -ENOSR;
1921                 if (i >= 0x8000)
1922                         goto failure;
1923                 classid = classid|q->hgenerator;
1924         }
1925
1926         parent = &q->link;
1927         if (parentid) {
1928                 parent = cbq_class_lookup(q, parentid);
1929                 err = -EINVAL;
1930                 if (parent == NULL)
1931                         goto failure;
1932         }
1933
1934         err = -ENOBUFS;
1935         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1936         if (cl == NULL)
1937                 goto failure;
1938         cl->R_tab = rtab;
1939         rtab = NULL;
1940         cl->refcnt = 1;
1941         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1942                 cl->q = &noop_qdisc;
1943         cl->classid = classid;
1944         cl->tparent = parent;
1945         cl->qdisc = sch;
1946         cl->allot = parent->allot;
1947         cl->quantum = cl->allot;
1948         cl->weight = cl->R_tab->rate.rate;
1949         cl->stats_lock = &sch->dev->queue_lock;
1950
1951         sch_tree_lock(sch);
1952         cbq_link_class(cl);
1953         cl->borrow = cl->tparent;
1954         if (cl->tparent != &q->link)
1955                 cl->share = cl->tparent;
1956         cbq_adjust_levels(parent);
1957         cl->minidle = -0x7FFFFFFF;
1958         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1959         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1960         if (cl->ewma_log==0)
1961                 cl->ewma_log = q->link.ewma_log;
1962         if (cl->maxidle==0)
1963                 cl->maxidle = q->link.maxidle;
1964         if (cl->avpkt==0)
1965                 cl->avpkt = q->link.avpkt;
1966         cl->overlimit = cbq_ovl_classic;
1967         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1968                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1969 #ifdef CONFIG_NET_CLS_POLICE
1970         if (tb[TCA_CBQ_POLICE-1])
1971                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1972 #endif
1973         if (tb[TCA_CBQ_FOPT-1])
1974                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1975         sch_tree_unlock(sch);
1976
1977 #ifdef CONFIG_NET_ESTIMATOR
1978         if (tca[TCA_RATE-1])
1979                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1980                         cl->stats_lock, tca[TCA_RATE-1]);
1981 #endif
1982
1983         *arg = (unsigned long)cl;
1984         return 0;
1985
1986 failure:
1987         qdisc_put_rtab(rtab);
1988         return err;
1989 }
1990
1991 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1992 {
1993         struct cbq_sched_data *q = qdisc_priv(sch);
1994         struct cbq_class *cl = (struct cbq_class*)arg;
1995         unsigned int qlen;
1996
1997         if (cl->filters || cl->children || cl == &q->link)
1998                 return -EBUSY;
1999
2000         sch_tree_lock(sch);
2001
2002         qlen = cl->q->q.qlen;
2003         qdisc_reset(cl->q);
2004         qdisc_tree_decrease_qlen(cl->q, qlen);
2005
2006         if (cl->next_alive)
2007                 cbq_deactivate_class(cl);
2008
2009         if (q->tx_borrowed == cl)
2010                 q->tx_borrowed = q->tx_class;
2011         if (q->tx_class == cl) {
2012                 q->tx_class = NULL;
2013                 q->tx_borrowed = NULL;
2014         }
2015 #ifdef CONFIG_NET_CLS_POLICE
2016         if (q->rx_class == cl)
2017                 q->rx_class = NULL;
2018 #endif
2019
2020         cbq_unlink_class(cl);
2021         cbq_adjust_levels(cl->tparent);
2022         cl->defmap = 0;
2023         cbq_sync_defmap(cl);
2024
2025         cbq_rmprio(q, cl);
2026         sch_tree_unlock(sch);
2027
2028         if (--cl->refcnt == 0)
2029                 cbq_destroy_class(sch, cl);
2030
2031         return 0;
2032 }
2033
2034 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2035 {
2036         struct cbq_sched_data *q = qdisc_priv(sch);
2037         struct cbq_class *cl = (struct cbq_class *)arg;
2038
2039         if (cl == NULL)
2040                 cl = &q->link;
2041
2042         return &cl->filter_list;
2043 }
2044
2045 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2046                                      u32 classid)
2047 {
2048         struct cbq_sched_data *q = qdisc_priv(sch);
2049         struct cbq_class *p = (struct cbq_class*)parent;
2050         struct cbq_class *cl = cbq_class_lookup(q, classid);
2051
2052         if (cl) {
2053                 if (p && p->level <= cl->level)
2054                         return 0;
2055                 cl->filters++;
2056                 return (unsigned long)cl;
2057         }
2058         return 0;
2059 }
2060
2061 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2062 {
2063         struct cbq_class *cl = (struct cbq_class*)arg;
2064
2065         cl->filters--;
2066 }
2067
2068 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2069 {
2070         struct cbq_sched_data *q = qdisc_priv(sch);
2071         unsigned h;
2072
2073         if (arg->stop)
2074                 return;
2075
2076         for (h = 0; h < 16; h++) {
2077                 struct cbq_class *cl;
2078
2079                 for (cl = q->classes[h]; cl; cl = cl->next) {
2080                         if (arg->count < arg->skip) {
2081                                 arg->count++;
2082                                 continue;
2083                         }
2084                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2085                                 arg->stop = 1;
2086                                 return;
2087                         }
2088                         arg->count++;
2089                 }
2090         }
2091 }
2092
2093 static struct Qdisc_class_ops cbq_class_ops = {
2094         .graft          =       cbq_graft,
2095         .leaf           =       cbq_leaf,
2096         .qlen_notify    =       cbq_qlen_notify,
2097         .get            =       cbq_get,
2098         .put            =       cbq_put,
2099         .change         =       cbq_change_class,
2100         .delete         =       cbq_delete,
2101         .walk           =       cbq_walk,
2102         .tcf_chain      =       cbq_find_tcf,
2103         .bind_tcf       =       cbq_bind_filter,
2104         .unbind_tcf     =       cbq_unbind_filter,
2105         .dump           =       cbq_dump_class,
2106         .dump_stats     =       cbq_dump_class_stats,
2107 };
2108
2109 static struct Qdisc_ops cbq_qdisc_ops = {
2110         .next           =       NULL,
2111         .cl_ops         =       &cbq_class_ops,
2112         .id             =       "cbq",
2113         .priv_size      =       sizeof(struct cbq_sched_data),
2114         .enqueue        =       cbq_enqueue,
2115         .dequeue        =       cbq_dequeue,
2116         .requeue        =       cbq_requeue,
2117         .drop           =       cbq_drop,
2118         .init           =       cbq_init,
2119         .reset          =       cbq_reset,
2120         .destroy        =       cbq_destroy,
2121         .change         =       NULL,
2122         .dump           =       cbq_dump,
2123         .dump_stats     =       cbq_dump_stats,
2124         .owner          =       THIS_MODULE,
2125 };
2126
2127 static int __init cbq_module_init(void)
2128 {
2129         return register_qdisc(&cbq_qdisc_ops);
2130 }
2131 static void __exit cbq_module_exit(void)
2132 {
2133         unregister_qdisc(&cbq_qdisc_ops);
2134 }
2135 module_init(cbq_module_init)
2136 module_exit(cbq_module_exit)
2137 MODULE_LICENSE("GPL");