net/sched/sch_netem.c

   1 /*
   2  * net/sched/sch_netem.c        Network emulator
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License.
   8  *
   9  *              Many of the algorithms and ideas for this came from
  10  *              NIST Net which is not copyrighted.
  11  *
  12  * Authors:     Stephen Hemminger <shemminger@osdl.org>
  13  *              Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
  14  */
  15
  16 #include <linux/mm.h>
  17 #include <linux/module.h>
  18 #include <linux/slab.h>
  19 #include <linux/types.h>
  20 #include <linux/kernel.h>
  21 #include <linux/errno.h>
  22 #include <linux/skbuff.h>
  23 #include <linux/vmalloc.h>
  24 #include <linux/rtnetlink.h>
  25 #include <linux/reciprocal_div.h>
  26
  27 #include <net/netlink.h>
  28 #include <net/pkt_sched.h>
  29 #include <net/inet_ecn.h>
  30
  31 #define VERSION "1.3"
  32
  33 /*      Network Emulation Queuing algorithm.
  34         ====================================
  35
  36         Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
  37                  Network Emulation Tool
  38                  [2] Luigi Rizzo, DummyNet for FreeBSD
  39
  40          ----------------------------------------------------------------
  41
  42          This started out as a simple way to delay outgoing packets to
  43          test TCP but has grown to include most of the functionality
  44          of a full blown network emulator like NISTnet. It can delay
  45          packets and add random jitter (and correlation). The random
  46          distribution can be loaded from a table as well to provide
  47          normal, Pareto, or experimental curves. Packet loss,
  48          duplication, and reordering can also be emulated.
  49
  50          This qdisc does not do classification that can be handled in
  51          layering other disciplines.  It does not need to do bandwidth
  52          control either since that can be handled by using token
  53          bucket or other rate control.
  54
  55      Correlated Loss Generator models
  56
  57         Added generation of correlated loss according to the
  58         "Gilbert-Elliot" model, a 4-state markov model.
  59
  60         References:
  61         [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
  62         [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
  63         and intuitive loss model for packet networks and its implementation
  64         in the Netem module in the Linux kernel", available in [1]
  65
  66         Authors: Stefano Salsano <stefano.salsano at uniroma2.it
  67                  Fabio Ludovici <fabio.ludovici at yahoo.it>
  68 */
  69
  70 struct netem_sched_data {
  71         /* internal t(ime)fifo qdisc uses sch->q and sch->limit */
  72
  73         /* optional qdisc for classful handling (NULL at netem init) */
  74         struct Qdisc    *qdisc;
  75
  76         struct qdisc_watchdog watchdog;
  77
  78         psched_tdiff_t latency;
  79         psched_tdiff_t jitter;
  80
  81         u32 loss;
  82         u32 ecn;
  83         u32 limit;
  84         u32 counter;
  85         u32 gap;
  86         u32 duplicate;
  87         u32 reorder;
  88         u32 corrupt;
  89         u32 rate;
  90         s32 packet_overhead;
  91         u32 cell_size;
  92         u32 cell_size_reciprocal;
  93         s32 cell_overhead;
  94
  95         struct crndstate {
  96                 u32 last;
  97                 u32 rho;
  98         } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
  99
 100         struct disttable {
 101                 u32  size;
 102                 s16 table[0];
 103         } *delay_dist;
 104
 105         enum  {
 106                 CLG_RANDOM,
 107                 CLG_4_STATES,
 108                 CLG_GILB_ELL,
 109         } loss_model;
 110
 111         /* Correlated Loss Generation models */
 112         struct clgstate {
 113                 /* state of the Markov chain */
 114                 u8 state;
 115
 116                 /* 4-states and Gilbert-Elliot models */
 117                 u32 a1; /* p13 for 4-states or p for GE */
 118                 u32 a2; /* p31 for 4-states or r for GE */
 119                 u32 a3; /* p32 for 4-states or h for GE */
 120                 u32 a4; /* p14 for 4-states or 1-k for GE */
 121                 u32 a5; /* p23 used only in 4-states */
 122         } clg;
 123
 124 };
 125
 126 /* Time stamp put into socket buffer control block
 127  * Only valid when skbs are in our internal t(ime)fifo queue.
 128  */
 129 struct netem_skb_cb {
 130         psched_time_t   time_to_send;
 131 };
 132
 133 static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
 134 {
 135         qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
 136         return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
 137 }
 138
 139 /* init_crandom - initialize correlated random number generator
 140  * Use entropy source for initial seed.
 141  */
 142 static void init_crandom(struct crndstate *state, unsigned long rho)
 143 {
 144         state->rho = rho;
 145         state->last = net_random();
 146 }
 147
 148 /* get_crandom - correlated random number generator
 149  * Next number depends on last value.
 150  * rho is scaled to avoid floating point.
 151  */
 152 static u32 get_crandom(struct crndstate *state)
 153 {
 154         u64 value, rho;
 155         unsigned long answer;
 156
 157         if (state->rho == 0)    /* no correlation */
 158                 return net_random();
 159
 160         value = net_random();
 161         rho = (u64)state->rho + 1;
 162         answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
 163         state->last = answer;
 164         return answer;
 165 }
 166
 167 /* loss_4state - 4-state model loss generator
 168  * Generates losses according to the 4-state Markov chain adopted in
 169  * the GI (General and Intuitive) loss model.
 170  */
 171 static bool loss_4state(struct netem_sched_data *q)
 172 {
 173         struct clgstate *clg = &q->clg;
 174         u32 rnd = net_random();
 175
 176         /*
 177          * Makes a comparison between rnd and the transition
 178          * probabilities outgoing from the current state, then decides the
 179          * next state and if the next packet has to be transmitted or lost.
 180          * The four states correspond to:
 181          *   1 => successfully transmitted packets within a gap period
 182          *   4 => isolated losses within a gap period
 183          *   3 => lost packets within a burst period
 184          *   2 => successfully transmitted packets within a burst period
 185          */
 186         switch (clg->state) {
 187         case 1:
 188                 if (rnd < clg->a4) {
 189                         clg->state = 4;
 190                         return true;
 191                 } else if (clg->a4 < rnd && rnd < clg->a1) {
 192                         clg->state = 3;
 193                         return true;
 194                 } else if (clg->a1 < rnd)
 195                         clg->state = 1;
 196
 197                 break;
 198         case 2:
 199                 if (rnd < clg->a5) {
 200                         clg->state = 3;
 201                         return true;
 202                 } else
 203                         clg->state = 2;
 204
 205                 break;
 206         case 3:
 207                 if (rnd < clg->a3)
 208                         clg->state = 2;
 209                 else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
 210                         clg->state = 1;
 211                         return true;
 212                 } else if (clg->a2 + clg->a3 < rnd) {
 213                         clg->state = 3;
 214                         return true;
 215                 }
 216                 break;
 217         case 4:
 218                 clg->state = 1;
 219                 break;
 220         }
 221
 222         return false;
 223 }
 224
 225 /* loss_gilb_ell - Gilbert-Elliot model loss generator
 226  * Generates losses according to the Gilbert-Elliot loss model or
 227  * its special cases  (Gilbert or Simple Gilbert)
 228  *
 229  * Makes a comparison between random number and the transition
 230  * probabilities outgoing from the current state, then decides the
 231  * next state. A second random number is extracted and the comparison
 232  * with the loss probability of the current state decides if the next
 233  * packet will be transmitted or lost.
 234  */
 235 static bool loss_gilb_ell(struct netem_sched_data *q)
 236 {
 237         struct clgstate *clg = &q->clg;
 238
 239         switch (clg->state) {
 240         case 1:
 241                 if (net_random() < clg->a1)
 242                         clg->state = 2;
 243                 if (net_random() < clg->a4)
 244                         return true;
 245         case 2:
 246                 if (net_random() < clg->a2)
 247                         clg->state = 1;
 248                 if (clg->a3 > net_random())
 249                         return true;
 250         }
 251
 252         return false;
 253 }
 254
 255 static bool loss_event(struct netem_sched_data *q)
 256 {
 257         switch (q->loss_model) {
 258         case CLG_RANDOM:
 259                 /* Random packet drop 0 => none, ~0 => all */
 260                 return q->loss && q->loss >= get_crandom(&q->loss_cor);
 261
 262         case CLG_4_STATES:
 263                 /* 4state loss model algorithm (used also for GI model)
 264                 * Extracts a value from the markov 4 state loss generator,
 265                 * if it is 1 drops a packet and if needed writes the event in
 266                 * the kernel logs
 267                 */
 268                 return loss_4state(q);
 269
 270         case CLG_GILB_ELL:
 271                 /* Gilbert-Elliot loss model algorithm
 272                 * Extracts a value from the Gilbert-Elliot loss generator,
 273                 * if it is 1 drops a packet and if needed writes the event in
 274                 * the kernel logs
 275                 */
 276                 return loss_gilb_ell(q);
 277         }
 278
 279         return false;   /* not reached */
 280 }
 281
 282
 283 /* tabledist - return a pseudo-randomly distributed value with mean mu and
 284  * std deviation sigma.  Uses table lookup to approximate the desired
 285  * distribution, and a uniformly-distributed pseudo-random source.
 286  */
 287 static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
 288                                 struct crndstate *state,
 289                                 const struct disttable *dist)
 290 {
 291         psched_tdiff_t x;
 292         long t;
 293         u32 rnd;
 294
 295         if (sigma == 0)
 296                 return mu;
 297
 298         rnd = get_crandom(state);
 299
 300         /* default uniform distribution */
 301         if (dist == NULL)
 302                 return (rnd % (2*sigma)) - sigma + mu;
 303
 304         t = dist->table[rnd % dist->size];
 305         x = (sigma % NETEM_DIST_SCALE) * t;
 306         if (x >= 0)
 307                 x += NETEM_DIST_SCALE/2;
 308         else
 309                 x -= NETEM_DIST_SCALE/2;
 310
 311         return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
 312 }
 313
 314 static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
 315 {
 316         u64 ticks;
 317
 318         len += q->packet_overhead;
 319
 320         if (q->cell_size) {
 321                 u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
 322
 323                 if (len > cells * q->cell_size) /* extra cell needed for remainder */
 324                         cells++;
 325                 len = cells * (q->cell_size + q->cell_overhead);
 326         }
 327
 328         ticks = (u64)len * NSEC_PER_SEC;
 329
 330         do_div(ticks, q->rate);
 331         return PSCHED_NS2TICKS(ticks);
 332 }
 333
 334 static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 335 {
 336         struct sk_buff_head *list = &sch->q;
 337         psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
 338         struct sk_buff *skb;
 339
 340         if (likely(skb_queue_len(list) < sch->limit)) {
 341                 skb = skb_peek_tail(list);
 342                 /* Optimize for add at tail */
 343                 if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send))
 344                         return qdisc_enqueue_tail(nskb, sch);
 345
 346                 skb_queue_reverse_walk(list, skb) {
 347                         if (tnext >= netem_skb_cb(skb)->time_to_send)
 348                                 break;
 349                 }
 350
 351                 __skb_queue_after(list, skb, nskb);
 352                 sch->qstats.backlog += qdisc_pkt_len(nskb);
 353                 return NET_XMIT_SUCCESS;
 354         }
 355
 356         return qdisc_reshape_fail(nskb, sch);
 357 }
 358
 359 /*
 360  * Insert one skb into qdisc.
 361  * Note: parent depends on return value to account for queue length.
 362  *      NET_XMIT_DROP: queue length didn't change.
 363  *      NET_XMIT_SUCCESS: one skb was queued.
 364  */
 365 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 366 {
 367         struct netem_sched_data *q = qdisc_priv(sch);
 368         /* We don't fill cb now as skb_unshare() may invalidate it */
 369         struct netem_skb_cb *cb;
 370         struct sk_buff *skb2;
 371         int ret;
 372         int count = 1;
 373
 374         /* Random duplication */
 375         if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
 376                 ++count;
 377
 378         /* Drop packet? */
 379         if (loss_event(q)) {
 380                 if (q->ecn && INET_ECN_set_ce(skb))
 381                         sch->qstats.drops++; /* mark packet */
 382                 else
 383                         --count;
 384         }
 385         if (count == 0) {
 386                 sch->qstats.drops++;
 387                 kfree_skb(skb);
 388                 return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 389         }
 390
 391         skb_orphan(skb);
 392
 393         /*
 394          * If we need to duplicate packet, then re-insert at top of the
 395          * qdisc tree, since parent queuer expects that only one
 396          * skb will be queued.
 397          */
 398         if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
 399                 struct Qdisc *rootq = qdisc_root(sch);
 400                 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
 401                 q->duplicate = 0;
 402
 403                 qdisc_enqueue_root(skb2, rootq);
 404                 q->duplicate = dupsave;
 405         }
 406
 407         /*
 408          * Randomized packet corruption.
 409          * Make copy if needed since we are modifying
 410          * If packet is going to be hardware checksummed, then
 411          * do it now in software before we mangle it.
 412          */
 413         if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
 414                 if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
 415                     (skb->ip_summed == CHECKSUM_PARTIAL &&
 416                      skb_checksum_help(skb)))
 417                         return qdisc_drop(skb, sch);
 418
 419                 skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
 420         }
 421
 422         cb = netem_skb_cb(skb);
 423         if (q->gap == 0 ||              /* not doing reordering */
 424             q->counter < q->gap - 1 ||  /* inside last reordering gap */
 425             q->reorder < get_crandom(&q->reorder_cor)) {
 426                 psched_time_t now;
 427                 psched_tdiff_t delay;
 428
 429                 delay = tabledist(q->latency, q->jitter,
 430                                   &q->delay_cor, q->delay_dist);
 431
 432                 now = psched_get_time();
 433
 434                 if (q->rate) {
 435                         struct sk_buff_head *list = &sch->q;
 436
 437                         delay += packet_len_2_sched_time(skb->len, q);
 438
 439                         if (!skb_queue_empty(list)) {
 440                                 /*
 441                                  * Last packet in queue is reference point (now).
 442                                  * First packet in queue is already in flight,
 443                                  * calculate this time bonus and substract
 444                                  * from delay.
 445                                  */
 446                                 delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
 447                                 now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
 448                         }
 449                 }
 450
 451                 cb->time_to_send = now + delay;
 452                 ++q->counter;
 453                 ret = tfifo_enqueue(skb, sch);
 454         } else {
 455                 /*
 456                  * Do re-ordering by putting one out of N packets at the front
 457                  * of the queue.
 458                  */
 459                 cb->time_to_send = psched_get_time();
 460                 q->counter = 0;
 461
 462                 __skb_queue_head(&sch->q, skb);
 463                 sch->qstats.backlog += qdisc_pkt_len(skb);
 464                 sch->qstats.requeues++;
 465                 ret = NET_XMIT_SUCCESS;
 466         }
 467
 468         if (ret != NET_XMIT_SUCCESS) {
 469                 if (net_xmit_drop_count(ret)) {
 470                         sch->qstats.drops++;
 471                         return ret;
 472                 }
 473         }
 474
 475         return NET_XMIT_SUCCESS;
 476 }
 477
 478 static unsigned int netem_drop(struct Qdisc *sch)
 479 {
 480         struct netem_sched_data *q = qdisc_priv(sch);
 481         unsigned int len;
 482
 483         len = qdisc_queue_drop(sch);
 484         if (!len && q->qdisc && q->qdisc->ops->drop)
 485             len = q->qdisc->ops->drop(q->qdisc);
 486         if (len)
 487                 sch->qstats.drops++;
 488
 489         return len;
 490 }
 491
 492 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 493 {
 494         struct netem_sched_data *q = qdisc_priv(sch);
 495         struct sk_buff *skb;
 496
 497         if (qdisc_is_throttled(sch))
 498                 return NULL;
 499
 500 tfifo_dequeue:
 501         skb = qdisc_peek_head(sch);
 502         if (skb) {
 503                 const struct netem_skb_cb *cb = netem_skb_cb(skb);
 504
 505                 /* if more time remaining? */
 506                 if (cb->time_to_send <= psched_get_time()) {
 507                         __skb_unlink(skb, &sch->q);
 508                         sch->qstats.backlog -= qdisc_pkt_len(skb);
 509
 510 #ifdef CONFIG_NET_CLS_ACT
 511                         /*
 512                          * If it's at ingress let's pretend the delay is
 513                          * from the network (tstamp will be updated).
 514                          */
 515                         if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
 516                                 skb->tstamp.tv64 = 0;
 517 #endif
 518
 519                         if (q->qdisc) {
 520                                 int err = qdisc_enqueue(skb, q->qdisc);
 521
 522                                 if (unlikely(err != NET_XMIT_SUCCESS)) {
 523                                         if (net_xmit_drop_count(err)) {
 524                                                 sch->qstats.drops++;
 525                                                 qdisc_tree_decrease_qlen(sch, 1);
 526                                         }
 527                                 }
 528                                 goto tfifo_dequeue;
 529                         }
 530 deliver:
 531                         qdisc_unthrottled(sch);
 532                         qdisc_bstats_update(sch, skb);
 533                         return skb;
 534                 }
 535
 536                 if (q->qdisc) {
 537                         skb = q->qdisc->ops->dequeue(q->qdisc);
 538                         if (skb)
 539                                 goto deliver;
 540                 }
 541                 qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
 542         }
 543
 544         if (q->qdisc) {
 545                 skb = q->qdisc->ops->dequeue(q->qdisc);
 546                 if (skb)
 547                         goto deliver;
 548         }
 549         return NULL;
 550 }
 551
 552 static void netem_reset(struct Qdisc *sch)
 553 {
 554         struct netem_sched_data *q = qdisc_priv(sch);
 555
 556         qdisc_reset_queue(sch);
 557         if (q->qdisc)
 558                 qdisc_reset(q->qdisc);
 559         qdisc_watchdog_cancel(&q->watchdog);
 560 }
 561
 562 static void dist_free(struct disttable *d)
 563 {
 564         if (d) {
 565                 if (is_vmalloc_addr(d))
 566                         vfree(d);
 567                 else
 568                         kfree(d);
 569         }
 570 }
 571
 572 /*
 573  * Distribution data is a variable size payload containing
 574  * signed 16 bit values.
 575  */
 576 static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
 577 {
 578         struct netem_sched_data *q = qdisc_priv(sch);
 579         size_t n = nla_len(attr)/sizeof(__s16);
 580         const __s16 *data = nla_data(attr);
 581         spinlock_t *root_lock;
 582         struct disttable *d;
 583         int i;
 584         size_t s;
 585
 586         if (n > NETEM_DIST_MAX)
 587                 return -EINVAL;
 588
 589         s = sizeof(struct disttable) + n * sizeof(s16);
 590         d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN);
 591         if (!d)
 592                 d = vmalloc(s);
 593         if (!d)
 594                 return -ENOMEM;
 595
 596         d->size = n;
 597         for (i = 0; i < n; i++)
 598                 d->table[i] = data[i];
 599
 600         root_lock = qdisc_root_sleeping_lock(sch);
 601
 602         spin_lock_bh(root_lock);
 603         swap(q->delay_dist, d);
 604         spin_unlock_bh(root_lock);
 605
 606         dist_free(d);
 607         return 0;
 608 }
 609
 610 static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
 611 {
 612         struct netem_sched_data *q = qdisc_priv(sch);
 613         const struct tc_netem_corr *c = nla_data(attr);
 614
 615         init_crandom(&q->delay_cor, c->delay_corr);
 616         init_crandom(&q->loss_cor, c->loss_corr);
 617         init_crandom(&q->dup_cor, c->dup_corr);
 618 }
 619
 620 static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
 621 {
 622         struct netem_sched_data *q = qdisc_priv(sch);
 623         const struct tc_netem_reorder *r = nla_data(attr);
 624
 625         q->reorder = r->probability;
 626         init_crandom(&q->reorder_cor, r->correlation);
 627 }
 628
 629 static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
 630 {
 631         struct netem_sched_data *q = qdisc_priv(sch);
 632         const struct tc_netem_corrupt *r = nla_data(attr);
 633
 634         q->corrupt = r->probability;
 635         init_crandom(&q->corrupt_cor, r->correlation);
 636 }
 637
 638 static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
 639 {
 640         struct netem_sched_data *q = qdisc_priv(sch);
 641         const struct tc_netem_rate *r = nla_data(attr);
 642
 643         q->rate = r->rate;
 644         q->packet_overhead = r->packet_overhead;
 645         q->cell_size = r->cell_size;
 646         if (q->cell_size)
 647                 q->cell_size_reciprocal = reciprocal_value(q->cell_size);
 648         q->cell_overhead = r->cell_overhead;
 649 }
 650
 651 static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
 652 {
 653         struct netem_sched_data *q = qdisc_priv(sch);
 654         const struct nlattr *la;
 655         int rem;
 656
 657         nla_for_each_nested(la, attr, rem) {
 658                 u16 type = nla_type(la);
 659
 660                 switch(type) {
 661                 case NETEM_LOSS_GI: {
 662                         const struct tc_netem_gimodel *gi = nla_data(la);
 663
 664                         if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
 665                                 pr_info("netem: incorrect gi model size\n");
 666                                 return -EINVAL;
 667                         }
 668
 669                         q->loss_model = CLG_4_STATES;
 670
 671                         q->clg.state = 1;
 672                         q->clg.a1 = gi->p13;
 673                         q->clg.a2 = gi->p31;
 674                         q->clg.a3 = gi->p32;
 675                         q->clg.a4 = gi->p14;
 676                         q->clg.a5 = gi->p23;
 677                         break;
 678                 }
 679
 680                 case NETEM_LOSS_GE: {
 681                         const struct tc_netem_gemodel *ge = nla_data(la);
 682
 683                         if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
 684                                 pr_info("netem: incorrect ge model size\n");
 685                                 return -EINVAL;
 686                         }
 687
 688                         q->loss_model = CLG_GILB_ELL;
 689                         q->clg.state = 1;
 690                         q->clg.a1 = ge->p;
 691                         q->clg.a2 = ge->r;
 692                         q->clg.a3 = ge->h;
 693                         q->clg.a4 = ge->k1;
 694                         break;
 695                 }
 696
 697                 default:
 698                         pr_info("netem: unknown loss type %u\n", type);
 699                         return -EINVAL;
 700                 }
 701         }
 702
 703         return 0;
 704 }
 705
 706 static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
 707         [TCA_NETEM_CORR]        = { .len = sizeof(struct tc_netem_corr) },
 708         [TCA_NETEM_REORDER]     = { .len = sizeof(struct tc_netem_reorder) },
 709         [TCA_NETEM_CORRUPT]     = { .len = sizeof(struct tc_netem_corrupt) },
 710         [TCA_NETEM_RATE]        = { .len = sizeof(struct tc_netem_rate) },
 711         [TCA_NETEM_LOSS]        = { .type = NLA_NESTED },
 712         [TCA_NETEM_ECN]         = { .type = NLA_U32 },
 713 };
 714
 715 static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
 716                       const struct nla_policy *policy, int len)
 717 {
 718         int nested_len = nla_len(nla) - NLA_ALIGN(len);
 719
 720         if (nested_len < 0) {
 721                 pr_info("netem: invalid attributes len %d\n", nested_len);
 722                 return -EINVAL;
 723         }
 724
 725         if (nested_len >= nla_attr_size(0))
 726                 return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
 727                                  nested_len, policy);
 728
 729         memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
 730         return 0;
 731 }
 732
 733 /* Parse netlink message to set options */
 734 static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 735 {
 736         struct netem_sched_data *q = qdisc_priv(sch);
 737         struct nlattr *tb[TCA_NETEM_MAX + 1];
 738         struct tc_netem_qopt *qopt;
 739         int ret;
 740
 741         if (opt == NULL)
 742                 return -EINVAL;
 743
 744         qopt = nla_data(opt);
 745         ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
 746         if (ret < 0)
 747                 return ret;
 748
 749         sch->limit = qopt->limit;
 750
 751         q->latency = qopt->latency;
 752         q->jitter = qopt->jitter;
 753         q->limit = qopt->limit;
 754         q->gap = qopt->gap;
 755         q->counter = 0;
 756         q->loss = qopt->loss;
 757         q->duplicate = qopt->duplicate;
 758
 759         /* for compatibility with earlier versions.
 760          * if gap is set, need to assume 100% probability
 761          */
 762         if (q->gap)
 763                 q->reorder = ~0;
 764
 765         if (tb[TCA_NETEM_CORR])
 766                 get_correlation(sch, tb[TCA_NETEM_CORR]);
 767
 768         if (tb[TCA_NETEM_DELAY_DIST]) {
 769                 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
 770                 if (ret)
 771                         return ret;
 772         }
 773
 774         if (tb[TCA_NETEM_REORDER])
 775                 get_reorder(sch, tb[TCA_NETEM_REORDER]);
 776
 777         if (tb[TCA_NETEM_CORRUPT])
 778                 get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
 779
 780         if (tb[TCA_NETEM_RATE])
 781                 get_rate(sch, tb[TCA_NETEM_RATE]);
 782
 783         if (tb[TCA_NETEM_ECN])
 784                 q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
 785
 786         q->loss_model = CLG_RANDOM;
 787         if (tb[TCA_NETEM_LOSS])
 788                 ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
 789
 790         return ret;
 791 }
 792
 793 static int netem_init(struct Qdisc *sch, struct nlattr *opt)
 794 {
 795         struct netem_sched_data *q = qdisc_priv(sch);
 796         int ret;
 797
 798         if (!opt)
 799                 return -EINVAL;
 800
 801         qdisc_watchdog_init(&q->watchdog, sch);
 802
 803         q->loss_model = CLG_RANDOM;
 804         ret = netem_change(sch, opt);
 805         if (ret)
 806                 pr_info("netem: change failed\n");
 807         return ret;
 808 }
 809
 810 static void netem_destroy(struct Qdisc *sch)
 811 {
 812         struct netem_sched_data *q = qdisc_priv(sch);
 813
 814         qdisc_watchdog_cancel(&q->watchdog);
 815         if (q->qdisc)
 816                 qdisc_destroy(q->qdisc);
 817         dist_free(q->delay_dist);
 818 }
 819
 820 static int dump_loss_model(const struct netem_sched_data *q,
 821                            struct sk_buff *skb)
 822 {
 823         struct nlattr *nest;
 824
 825         nest = nla_nest_start(skb, TCA_NETEM_LOSS);
 826         if (nest == NULL)
 827                 goto nla_put_failure;
 828
 829         switch (q->loss_model) {
 830         case CLG_RANDOM:
 831                 /* legacy loss model */
 832                 nla_nest_cancel(skb, nest);
 833                 return 0;       /* no data */
 834
 835         case CLG_4_STATES: {
 836                 struct tc_netem_gimodel gi = {
 837                         .p13 = q->clg.a1,
 838                         .p31 = q->clg.a2,
 839                         .p32 = q->clg.a3,
 840                         .p14 = q->clg.a4,
 841                         .p23 = q->clg.a5,
 842                 };
 843
 844                 if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
 845                         goto nla_put_failure;
 846                 break;
 847         }
 848         case CLG_GILB_ELL: {
 849                 struct tc_netem_gemodel ge = {
 850                         .p = q->clg.a1,
 851                         .r = q->clg.a2,
 852                         .h = q->clg.a3,
 853                         .k1 = q->clg.a4,
 854                 };
 855
 856                 if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
 857                         goto nla_put_failure;
 858                 break;
 859         }
 860         }
 861
 862         nla_nest_end(skb, nest);
 863         return 0;
 864
 865 nla_put_failure:
 866         nla_nest_cancel(skb, nest);
 867         return -1;
 868 }
 869
 870 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 871 {
 872         const struct netem_sched_data *q = qdisc_priv(sch);
 873         struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
 874         struct tc_netem_qopt qopt;
 875         struct tc_netem_corr cor;
 876         struct tc_netem_reorder reorder;
 877         struct tc_netem_corrupt corrupt;
 878         struct tc_netem_rate rate;
 879
 880         qopt.latency = q->latency;
 881         qopt.jitter = q->jitter;
 882         qopt.limit = q->limit;
 883         qopt.loss = q->loss;
 884         qopt.gap = q->gap;
 885         qopt.duplicate = q->duplicate;
 886         if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
 887                 goto nla_put_failure;
 888
 889         cor.delay_corr = q->delay_cor.rho;
 890         cor.loss_corr = q->loss_cor.rho;
 891         cor.dup_corr = q->dup_cor.rho;
 892         if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
 893                 goto nla_put_failure;
 894
 895         reorder.probability = q->reorder;
 896         reorder.correlation = q->reorder_cor.rho;
 897         if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
 898                 goto nla_put_failure;
 899
 900         corrupt.probability = q->corrupt;
 901         corrupt.correlation = q->corrupt_cor.rho;
 902         if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
 903                 goto nla_put_failure;
 904
 905         rate.rate = q->rate;
 906         rate.packet_overhead = q->packet_overhead;
 907         rate.cell_size = q->cell_size;
 908         rate.cell_overhead = q->cell_overhead;
 909         if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
 910                 goto nla_put_failure;
 911
 912         if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
 913                 goto nla_put_failure;
 914
 915         if (dump_loss_model(q, skb) != 0)
 916                 goto nla_put_failure;
 917
 918         return nla_nest_end(skb, nla);
 919
 920 nla_put_failure:
 921         nlmsg_trim(skb, nla);
 922         return -1;
 923 }
 924
 925 static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
 926                           struct sk_buff *skb, struct tcmsg *tcm)
 927 {
 928         struct netem_sched_data *q = qdisc_priv(sch);
 929
 930         if (cl != 1 || !q->qdisc)       /* only one class */
 931                 return -ENOENT;
 932
 933         tcm->tcm_handle |= TC_H_MIN(1);
 934         tcm->tcm_info = q->qdisc->handle;
 935
 936         return 0;
 937 }
 938
 939 static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 940                      struct Qdisc **old)
 941 {
 942         struct netem_sched_data *q = qdisc_priv(sch);
 943
 944         sch_tree_lock(sch);
 945         *old = q->qdisc;
 946         q->qdisc = new;
 947         if (*old) {
 948                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
 949                 qdisc_reset(*old);
 950         }
 951         sch_tree_unlock(sch);
 952
 953         return 0;
 954 }
 955
 956 static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
 957 {
 958         struct netem_sched_data *q = qdisc_priv(sch);
 959         return q->qdisc;
 960 }
 961
 962 static unsigned long netem_get(struct Qdisc *sch, u32 classid)
 963 {
 964         return 1;
 965 }
 966
 967 static void netem_put(struct Qdisc *sch, unsigned long arg)
 968 {
 969 }
 970
 971 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 972 {
 973         if (!walker->stop) {
 974                 if (walker->count >= walker->skip)
 975                         if (walker->fn(sch, 1, walker) < 0) {
 976                                 walker->stop = 1;
 977                                 return;
 978                         }
 979                 walker->count++;
 980         }
 981 }
 982
 983 static const struct Qdisc_class_ops netem_class_ops = {
 984         .graft          =       netem_graft,
 985         .leaf           =       netem_leaf,
 986         .get            =       netem_get,
 987         .put            =       netem_put,
 988         .walk           =       netem_walk,
 989         .dump           =       netem_dump_class,
 990 };
 991
 992 static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
 993         .id             =       "netem",
 994         .cl_ops         =       &netem_class_ops,
 995         .priv_size      =       sizeof(struct netem_sched_data),
 996         .enqueue        =       netem_enqueue,
 997         .dequeue        =       netem_dequeue,
 998         .peek           =       qdisc_peek_dequeued,
 999         .drop           =       netem_drop,
1000         .init           =       netem_init,
1001         .reset          =       netem_reset,
1002         .destroy        =       netem_destroy,
1003         .change         =       netem_change,
1004         .dump           =       netem_dump,
1005         .owner          =       THIS_MODULE,
1006 };
1007
1008
1009 static int __init netem_module_init(void)
1010 {
1011         pr_info("netem: version " VERSION "\n");
1012         return register_qdisc(&netem_qdisc_ops);
1013 }
1014 static void __exit netem_module_exit(void)
1015 {
1016         unregister_qdisc(&netem_qdisc_ops);
1017 }
1018 module_init(netem_module_init)
1019 module_exit(netem_module_exit)
1020 MODULE_LICENSE("GPL");