net/sched/sch_sfq.c

   1 /*
   2  * net/sched/sch_sfq.c  Stochastic Fairness Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  */
  11
  12 #include <linux/module.h>
  13 #include <linux/types.h>
  14 #include <linux/kernel.h>
  15 #include <linux/jiffies.h>
  16 #include <linux/string.h>
  17 #include <linux/in.h>
  18 #include <linux/errno.h>
  19 #include <linux/init.h>
  20 #include <linux/skbuff.h>
  21 #include <linux/jhash.h>
  22 #include <linux/slab.h>
  23 #include <linux/vmalloc.h>
  24 #include <net/netlink.h>
  25 #include <net/pkt_sched.h>
  26 #include <net/flow_keys.h>
  27
  28
  29 /*      Stochastic Fairness Queuing algorithm.
  30         =======================================
  31
  32         Source:
  33         Paul E. McKenney "Stochastic Fairness Queuing",
  34         IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
  35
  36         Paul E. McKenney "Stochastic Fairness Queuing",
  37         "Interworking: Research and Experience", v.2, 1991, p.113-131.
  38
  39
  40         See also:
  41         M. Shreedhar and George Varghese "Efficient Fair
  42         Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
  43
  44
  45         This is not the thing that is usually called (W)FQ nowadays.
  46         It does not use any timestamp mechanism, but instead
  47         processes queues in round-robin order.
  48
  49         ADVANTAGE:
  50
  51         - It is very cheap. Both CPU and memory requirements are minimal.
  52
  53         DRAWBACKS:
  54
  55         - "Stochastic" -> It is not 100% fair.
  56         When hash collisions occur, several flows are considered as one.
  57
  58         - "Round-robin" -> It introduces larger delays than virtual clock
  59         based schemes, and should not be used for isolating interactive
  60         traffic from non-interactive. It means, that this scheduler
  61         should be used as leaf of CBQ or P3, which put interactive traffic
  62         to higher priority band.
  63
  64         We still need true WFQ for top level CSZ, but using WFQ
  65         for the best effort traffic is absolutely pointless:
  66         SFQ is superior for this purpose.
  67
  68         IMPLEMENTATION:
  69         This implementation limits maximal queue length to 128;
  70         max mtu to 2^18-1; max 128 flows, number of hash buckets to 1024.
  71         The only goal of this restrictions was that all data
  72         fit into one 4K page on 32bit arches.
  73
  74         It is easy to increase these values, but not in flight.  */
  75
  76 #define SFQ_DEPTH               128 /* max number of packets per flow */
  77 #define SFQ_SLOTS               128 /* max number of flows */
  78 #define SFQ_EMPTY_SLOT          255
  79 #define SFQ_DEFAULT_HASH_DIVISOR 1024
  80
  81 /* We use 16 bits to store allot, and want to handle packets up to 64K
  82  * Scale allot by 8 (1<<3) so that no overflow occurs.
  83  */
  84 #define SFQ_ALLOT_SHIFT         3
  85 #define SFQ_ALLOT_SIZE(X)       DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
  86
  87 /* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
  88 typedef unsigned char sfq_index;
  89
  90 /*
  91  * We dont use pointers to save space.
  92  * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array
  93  * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
  94  * are 'pointers' to dep[] array
  95  */
  96 struct sfq_head {
  97         sfq_index       next;
  98         sfq_index       prev;
  99 };
 100
 101 struct sfq_slot {
 102         struct sk_buff  *skblist_next;
 103         struct sk_buff  *skblist_prev;
 104         sfq_index       qlen; /* number of skbs in skblist */
 105         sfq_index       next; /* next slot in sfq chain */
 106         struct sfq_head dep; /* anchor in dep[] chains */
 107         unsigned short  hash; /* hash value (index in ht[]) */
 108         short           allot; /* credit for this slot */
 109 };
 110
 111 struct sfq_sched_data {
 112 /* Parameters */
 113         int             perturb_period;
 114         unsigned int    quantum;        /* Allotment per round: MUST BE >= MTU */
 115         int             limit;
 116         unsigned int    divisor;        /* number of slots in hash table */
 117 /* Variables */
 118         struct tcf_proto *filter_list;
 119         struct timer_list perturb_timer;
 120         u32             perturbation;
 121         sfq_index       cur_depth;      /* depth of longest slot */
 122         unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
 123         struct sfq_slot *tail;          /* current slot in round */
 124         sfq_index       *ht;            /* Hash table (divisor slots) */
 125         struct sfq_slot slots[SFQ_SLOTS];
 126         struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */
 127 };
 128
 129 /*
 130  * sfq_head are either in a sfq_slot or in dep[] array
 131  */
 132 static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
 133 {
 134         if (val < SFQ_SLOTS)
 135                 return &q->slots[val].dep;
 136         return &q->dep[val - SFQ_SLOTS];
 137 }
 138
 139 /*
 140  * In order to be able to quickly rehash our queue when timer changes
 141  * q->perturbation, we store flow_keys in skb->cb[]
 142  */
 143 struct sfq_skb_cb {
 144        struct flow_keys        keys;
 145 };
 146
 147 static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
 148 {
 149        BUILD_BUG_ON(sizeof(skb->cb) <
 150                sizeof(struct qdisc_skb_cb) + sizeof(struct sfq_skb_cb));
 151        return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data;
 152 }
 153
 154 static unsigned int sfq_hash(const struct sfq_sched_data *q,
 155                              const struct sk_buff *skb)
 156 {
 157         const struct flow_keys *keys = &sfq_skb_cb(skb)->keys;
 158         unsigned int hash;
 159
 160         hash = jhash_3words((__force u32)keys->dst,
 161                             (__force u32)keys->src ^ keys->ip_proto,
 162                             (__force u32)keys->ports, q->perturbation);
 163         return hash & (q->divisor - 1);
 164 }
 165
 166 static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 167                                  int *qerr)
 168 {
 169         struct sfq_sched_data *q = qdisc_priv(sch);
 170         struct tcf_result res;
 171         int result;
 172
 173         if (TC_H_MAJ(skb->priority) == sch->handle &&
 174             TC_H_MIN(skb->priority) > 0 &&
 175             TC_H_MIN(skb->priority) <= q->divisor)
 176                 return TC_H_MIN(skb->priority);
 177
 178         if (!q->filter_list) {
 179                 skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys);
 180                 return sfq_hash(q, skb) + 1;
 181         }
 182
 183         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 184         result = tc_classify(skb, q->filter_list, &res);
 185         if (result >= 0) {
 186 #ifdef CONFIG_NET_CLS_ACT
 187                 switch (result) {
 188                 case TC_ACT_STOLEN:
 189                 case TC_ACT_QUEUED:
 190                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 191                 case TC_ACT_SHOT:
 192                         return 0;
 193                 }
 194 #endif
 195                 if (TC_H_MIN(res.classid) <= q->divisor)
 196                         return TC_H_MIN(res.classid);
 197         }
 198         return 0;
 199 }
 200
 201 /*
 202  * x : slot number [0 .. SFQ_SLOTS - 1]
 203  */
 204 static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
 205 {
 206         sfq_index p, n;
 207         int qlen = q->slots[x].qlen;
 208
 209         p = qlen + SFQ_SLOTS;
 210         n = q->dep[qlen].next;
 211
 212         q->slots[x].dep.next = n;
 213         q->slots[x].dep.prev = p;
 214
 215         q->dep[qlen].next = x;          /* sfq_dep_head(q, p)->next = x */
 216         sfq_dep_head(q, n)->prev = x;
 217 }
 218
 219 #define sfq_unlink(q, x, n, p)                  \
 220         n = q->slots[x].dep.next;               \
 221         p = q->slots[x].dep.prev;               \
 222         sfq_dep_head(q, p)->next = n;           \
 223         sfq_dep_head(q, n)->prev = p
 224
 225
 226 static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
 227 {
 228         sfq_index p, n;
 229         int d;
 230
 231         sfq_unlink(q, x, n, p);
 232
 233         d = q->slots[x].qlen--;
 234         if (n == p && q->cur_depth == d)
 235                 q->cur_depth--;
 236         sfq_link(q, x);
 237 }
 238
 239 static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
 240 {
 241         sfq_index p, n;
 242         int d;
 243
 244         sfq_unlink(q, x, n, p);
 245
 246         d = ++q->slots[x].qlen;
 247         if (q->cur_depth < d)
 248                 q->cur_depth = d;
 249         sfq_link(q, x);
 250 }
 251
 252 /* helper functions : might be changed when/if skb use a standard list_head */
 253
 254 /* remove one skb from tail of slot queue */
 255 static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
 256 {
 257         struct sk_buff *skb = slot->skblist_prev;
 258
 259         slot->skblist_prev = skb->prev;
 260         skb->prev->next = (struct sk_buff *)slot;
 261         skb->next = skb->prev = NULL;
 262         return skb;
 263 }
 264
 265 /* remove one skb from head of slot queue */
 266 static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
 267 {
 268         struct sk_buff *skb = slot->skblist_next;
 269
 270         slot->skblist_next = skb->next;
 271         skb->next->prev = (struct sk_buff *)slot;
 272         skb->next = skb->prev = NULL;
 273         return skb;
 274 }
 275
 276 static inline void slot_queue_init(struct sfq_slot *slot)
 277 {
 278         slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
 279 }
 280
 281 /* add skb to slot queue (tail add) */
 282 static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
 283 {
 284         skb->prev = slot->skblist_prev;
 285         skb->next = (struct sk_buff *)slot;
 286         slot->skblist_prev->next = skb;
 287         slot->skblist_prev = skb;
 288 }
 289
 290 #define slot_queue_walk(slot, skb)              \
 291         for (skb = slot->skblist_next;          \
 292              skb != (struct sk_buff *)slot;     \
 293              skb = skb->next)
 294
 295 static unsigned int sfq_drop(struct Qdisc *sch)
 296 {
 297         struct sfq_sched_data *q = qdisc_priv(sch);
 298         sfq_index x, d = q->cur_depth;
 299         struct sk_buff *skb;
 300         unsigned int len;
 301         struct sfq_slot *slot;
 302
 303         /* Queue is full! Find the longest slot and drop tail packet from it */
 304         if (d > 1) {
 305                 x = q->dep[d].next;
 306                 slot = &q->slots[x];
 307 drop:
 308                 skb = slot_dequeue_tail(slot);
 309                 len = qdisc_pkt_len(skb);
 310                 sfq_dec(q, x);
 311                 kfree_skb(skb);
 312                 sch->q.qlen--;
 313                 sch->qstats.drops++;
 314                 sch->qstats.backlog -= len;
 315                 return len;
 316         }
 317
 318         if (d == 1) {
 319                 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
 320                 x = q->tail->next;
 321                 slot = &q->slots[x];
 322                 q->tail->next = slot->next;
 323                 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
 324                 goto drop;
 325         }
 326
 327         return 0;
 328 }
 329
 330 static int
 331 sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 332 {
 333         struct sfq_sched_data *q = qdisc_priv(sch);
 334         unsigned int hash;
 335         sfq_index x, qlen;
 336         struct sfq_slot *slot;
 337         int uninitialized_var(ret);
 338
 339         hash = sfq_classify(skb, sch, &ret);
 340         if (hash == 0) {
 341                 if (ret & __NET_XMIT_BYPASS)
 342                         sch->qstats.drops++;
 343                 kfree_skb(skb);
 344                 return ret;
 345         }
 346         hash--;
 347
 348         x = q->ht[hash];
 349         slot = &q->slots[x];
 350         if (x == SFQ_EMPTY_SLOT) {
 351                 x = q->dep[0].next; /* get a free slot */
 352                 q->ht[hash] = x;
 353                 slot = &q->slots[x];
 354                 slot->hash = hash;
 355         }
 356
 357         /* If selected queue has length q->limit, do simple tail drop,
 358          * i.e. drop _this_ packet.
 359          */
 360         if (slot->qlen >= q->limit)
 361                 return qdisc_drop(skb, sch);
 362
 363         sch->qstats.backlog += qdisc_pkt_len(skb);
 364         slot_queue_add(slot, skb);
 365         sfq_inc(q, x);
 366         if (slot->qlen == 1) {          /* The flow is new */
 367                 if (q->tail == NULL) {  /* It is the first flow */
 368                         slot->next = x;
 369                         q->tail = slot;
 370                 } else {
 371                         slot->next = q->tail->next;
 372                         q->tail->next = x;
 373                 }
 374                 slot->allot = q->scaled_quantum;
 375         }
 376         if (++sch->q.qlen <= q->limit)
 377                 return NET_XMIT_SUCCESS;
 378
 379         qlen = slot->qlen;
 380         sfq_drop(sch);
 381         /* Return Congestion Notification only if we dropped a packet
 382          * from this flow.
 383          */
 384         if (qlen != slot->qlen)
 385                 return NET_XMIT_CN;
 386
 387         /* As we dropped a packet, better let upper stack know this */
 388         qdisc_tree_decrease_qlen(sch, 1);
 389         return NET_XMIT_SUCCESS;
 390 }
 391
 392 static struct sk_buff *
 393 sfq_dequeue(struct Qdisc *sch)
 394 {
 395         struct sfq_sched_data *q = qdisc_priv(sch);
 396         struct sk_buff *skb;
 397         sfq_index a, next_a;
 398         struct sfq_slot *slot;
 399
 400         /* No active slots */
 401         if (q->tail == NULL)
 402                 return NULL;
 403
 404 next_slot:
 405         a = q->tail->next;
 406         slot = &q->slots[a];
 407         if (slot->allot <= 0) {
 408                 q->tail = slot;
 409                 slot->allot += q->scaled_quantum;
 410                 goto next_slot;
 411         }
 412         skb = slot_dequeue_head(slot);
 413         sfq_dec(q, a);
 414         qdisc_bstats_update(sch, skb);
 415         sch->q.qlen--;
 416         sch->qstats.backlog -= qdisc_pkt_len(skb);
 417
 418         /* Is the slot empty? */
 419         if (slot->qlen == 0) {
 420                 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
 421                 next_a = slot->next;
 422                 if (a == next_a) {
 423                         q->tail = NULL; /* no more active slots */
 424                         return skb;
 425                 }
 426                 q->tail->next = next_a;
 427         } else {
 428                 slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
 429         }
 430         return skb;
 431 }
 432
 433 static void
 434 sfq_reset(struct Qdisc *sch)
 435 {
 436         struct sk_buff *skb;
 437
 438         while ((skb = sfq_dequeue(sch)) != NULL)
 439                 kfree_skb(skb);
 440 }
 441
 442 /*
 443  * When q->perturbation is changed, we rehash all queued skbs
 444  * to avoid OOO (Out Of Order) effects.
 445  * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
 446  * counters.
 447  */
 448 static void sfq_rehash(struct sfq_sched_data *q)
 449 {
 450         struct sk_buff *skb;
 451         int i;
 452         struct sfq_slot *slot;
 453         struct sk_buff_head list;
 454
 455         __skb_queue_head_init(&list);
 456
 457         for (i = 0; i < SFQ_SLOTS; i++) {
 458                 slot = &q->slots[i];
 459                 if (!slot->qlen)
 460                         continue;
 461                 while (slot->qlen) {
 462                         skb = slot_dequeue_head(slot);
 463                         sfq_dec(q, i);
 464                         __skb_queue_tail(&list, skb);
 465                 }
 466                 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
 467         }
 468         q->tail = NULL;
 469
 470         while ((skb = __skb_dequeue(&list)) != NULL) {
 471                 unsigned int hash = sfq_hash(q, skb);
 472                 sfq_index x = q->ht[hash];
 473
 474                 slot = &q->slots[x];
 475                 if (x == SFQ_EMPTY_SLOT) {
 476                         x = q->dep[0].next; /* get a free slot */
 477                         q->ht[hash] = x;
 478                         slot = &q->slots[x];
 479                         slot->hash = hash;
 480                 }
 481                 slot_queue_add(slot, skb);
 482                 sfq_inc(q, x);
 483                 if (slot->qlen == 1) {          /* The flow is new */
 484                         if (q->tail == NULL) {  /* It is the first flow */
 485                                 slot->next = x;
 486                         } else {
 487                                 slot->next = q->tail->next;
 488                                 q->tail->next = x;
 489                         }
 490                         q->tail = slot;
 491                         slot->allot = q->scaled_quantum;
 492                 }
 493         }
 494 }
 495
 496 static void sfq_perturbation(unsigned long arg)
 497 {
 498         struct Qdisc *sch = (struct Qdisc *)arg;
 499         struct sfq_sched_data *q = qdisc_priv(sch);
 500         spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
 501
 502         spin_lock(root_lock);
 503         q->perturbation = net_random();
 504         if (!q->filter_list && q->tail)
 505                 sfq_rehash(q);
 506         spin_unlock(root_lock);
 507
 508         if (q->perturb_period)
 509                 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
 510 }
 511
 512 static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 513 {
 514         struct sfq_sched_data *q = qdisc_priv(sch);
 515         struct tc_sfq_qopt *ctl = nla_data(opt);
 516         unsigned int qlen;
 517
 518         if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
 519                 return -EINVAL;
 520
 521         if (ctl->divisor &&
 522             (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
 523                 return -EINVAL;
 524
 525         sch_tree_lock(sch);
 526         q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
 527         q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
 528         q->perturb_period = ctl->perturb_period * HZ;
 529         if (ctl->limit)
 530                 q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
 531         if (ctl->divisor)
 532                 q->divisor = ctl->divisor;
 533         qlen = sch->q.qlen;
 534         while (sch->q.qlen > q->limit)
 535                 sfq_drop(sch);
 536         qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
 537
 538         del_timer(&q->perturb_timer);
 539         if (q->perturb_period) {
 540                 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
 541                 q->perturbation = net_random();
 542         }
 543         sch_tree_unlock(sch);
 544         return 0;
 545 }
 546
 547 static void *sfq_alloc(size_t sz)
 548 {
 549         void *ptr = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN);
 550
 551         if (!ptr)
 552                 ptr = vmalloc(sz);
 553         return ptr;
 554 }
 555
 556 static void sfq_free(void *addr)
 557 {
 558         if (addr) {
 559                 if (is_vmalloc_addr(addr))
 560                         vfree(addr);
 561                 else
 562                         kfree(addr);
 563         }
 564 }
 565
 566 static void sfq_destroy(struct Qdisc *sch)
 567 {
 568         struct sfq_sched_data *q = qdisc_priv(sch);
 569
 570         tcf_destroy_chain(&q->filter_list);
 571         q->perturb_period = 0;
 572         del_timer_sync(&q->perturb_timer);
 573         sfq_free(q->ht);
 574 }
 575
 576 static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 577 {
 578         struct sfq_sched_data *q = qdisc_priv(sch);
 579         int i;
 580
 581         q->perturb_timer.function = sfq_perturbation;
 582         q->perturb_timer.data = (unsigned long)sch;
 583         init_timer_deferrable(&q->perturb_timer);
 584
 585         for (i = 0; i < SFQ_DEPTH; i++) {
 586                 q->dep[i].next = i + SFQ_SLOTS;
 587                 q->dep[i].prev = i + SFQ_SLOTS;
 588         }
 589
 590         q->limit = SFQ_DEPTH - 1;
 591         q->cur_depth = 0;
 592         q->tail = NULL;
 593         q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
 594         if (opt == NULL) {
 595                 q->quantum = psched_mtu(qdisc_dev(sch));
 596                 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
 597                 q->perturb_period = 0;
 598                 q->perturbation = net_random();
 599         } else {
 600                 int err = sfq_change(sch, opt);
 601                 if (err)
 602                         return err;
 603         }
 604
 605         q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
 606         if (!q->ht) {
 607                 sfq_destroy(sch);
 608                 return -ENOMEM;
 609         }
 610         for (i = 0; i < q->divisor; i++)
 611                 q->ht[i] = SFQ_EMPTY_SLOT;
 612
 613         for (i = 0; i < SFQ_SLOTS; i++) {
 614                 slot_queue_init(&q->slots[i]);
 615                 sfq_link(q, i);
 616         }
 617         if (q->limit >= 1)
 618                 sch->flags |= TCQ_F_CAN_BYPASS;
 619         else
 620                 sch->flags &= ~TCQ_F_CAN_BYPASS;
 621         return 0;
 622 }
 623
 624 static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 625 {
 626         struct sfq_sched_data *q = qdisc_priv(sch);
 627         unsigned char *b = skb_tail_pointer(skb);
 628         struct tc_sfq_qopt opt;
 629
 630         opt.quantum = q->quantum;
 631         opt.perturb_period = q->perturb_period / HZ;
 632
 633         opt.limit = q->limit;
 634         opt.divisor = q->divisor;
 635         opt.flows = q->limit;
 636
 637         NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 638
 639         return skb->len;
 640
 641 nla_put_failure:
 642         nlmsg_trim(skb, b);
 643         return -1;
 644 }
 645
 646 static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
 647 {
 648         return NULL;
 649 }
 650
 651 static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
 652 {
 653         return 0;
 654 }
 655
 656 static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
 657                               u32 classid)
 658 {
 659         /* we cannot bypass queue discipline anymore */
 660         sch->flags &= ~TCQ_F_CAN_BYPASS;
 661         return 0;
 662 }
 663
 664 static void sfq_put(struct Qdisc *q, unsigned long cl)
 665 {
 666 }
 667
 668 static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl)
 669 {
 670         struct sfq_sched_data *q = qdisc_priv(sch);
 671
 672         if (cl)
 673                 return NULL;
 674         return &q->filter_list;
 675 }
 676
 677 static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
 678                           struct sk_buff *skb, struct tcmsg *tcm)
 679 {
 680         tcm->tcm_handle |= TC_H_MIN(cl);
 681         return 0;
 682 }
 683
 684 static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 685                                 struct gnet_dump *d)
 686 {
 687         struct sfq_sched_data *q = qdisc_priv(sch);
 688         sfq_index idx = q->ht[cl - 1];
 689         struct gnet_stats_queue qs = { 0 };
 690         struct tc_sfq_xstats xstats = { 0 };
 691         struct sk_buff *skb;
 692
 693         if (idx != SFQ_EMPTY_SLOT) {
 694                 const struct sfq_slot *slot = &q->slots[idx];
 695
 696                 xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
 697                 qs.qlen = slot->qlen;
 698                 slot_queue_walk(slot, skb)
 699                         qs.backlog += qdisc_pkt_len(skb);
 700         }
 701         if (gnet_stats_copy_queue(d, &qs) < 0)
 702                 return -1;
 703         return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
 704 }
 705
 706 static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 707 {
 708         struct sfq_sched_data *q = qdisc_priv(sch);
 709         unsigned int i;
 710
 711         if (arg->stop)
 712                 return;
 713
 714         for (i = 0; i < q->divisor; i++) {
 715                 if (q->ht[i] == SFQ_EMPTY_SLOT ||
 716                     arg->count < arg->skip) {
 717                         arg->count++;
 718                         continue;
 719                 }
 720                 if (arg->fn(sch, i + 1, arg) < 0) {
 721                         arg->stop = 1;
 722                         break;
 723                 }
 724                 arg->count++;
 725         }
 726 }
 727
 728 static const struct Qdisc_class_ops sfq_class_ops = {
 729         .leaf           =       sfq_leaf,
 730         .get            =       sfq_get,
 731         .put            =       sfq_put,
 732         .tcf_chain      =       sfq_find_tcf,
 733         .bind_tcf       =       sfq_bind,
 734         .unbind_tcf     =       sfq_put,
 735         .dump           =       sfq_dump_class,
 736         .dump_stats     =       sfq_dump_class_stats,
 737         .walk           =       sfq_walk,
 738 };
 739
 740 static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
 741         .cl_ops         =       &sfq_class_ops,
 742         .id             =       "sfq",
 743         .priv_size      =       sizeof(struct sfq_sched_data),
 744         .enqueue        =       sfq_enqueue,
 745         .dequeue        =       sfq_dequeue,
 746         .peek           =       qdisc_peek_dequeued,
 747         .drop           =       sfq_drop,
 748         .init           =       sfq_init,
 749         .reset          =       sfq_reset,
 750         .destroy        =       sfq_destroy,
 751         .change         =       NULL,
 752         .dump           =       sfq_dump,
 753         .owner          =       THIS_MODULE,
 754 };
 755
 756 static int __init sfq_module_init(void)
 757 {
 758         return register_qdisc(&sfq_qdisc_ops);
 759 }
 760 static void __exit sfq_module_exit(void)
 761 {
 762         unregister_qdisc(&sfq_qdisc_ops);
 763 }
 764 module_init(sfq_module_init)
 765 module_exit(sfq_module_exit)
 766 MODULE_LICENSE("GPL");