Linux-2.6.12-rc2
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / sched / sch_netem.c
blob31c29deb139d3ed49cf1d7e70bd778438ee8541a
1 /*
2 * net/sched/sch_netem.c Network emulator
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Many of the algorithms and ideas for this came from
10 * NIST Net which is not copyrighted.
12 * Authors: Stephen Hemminger <shemminger@osdl.org>
13 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/bitops.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/errno.h>
22 #include <linux/netdevice.h>
23 #include <linux/skbuff.h>
24 #include <linux/rtnetlink.h>
26 #include <net/pkt_sched.h>
28 /* Network Emulation Queuing algorithm.
29 ====================================
31 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
32 Network Emulation Tool
33 [2] Luigi Rizzo, DummyNet for FreeBSD
35 ----------------------------------------------------------------
37 This started out as a simple way to delay outgoing packets to
38 test TCP but has grown to include most of the functionality
39 of a full blown network emulator like NISTnet. It can delay
40 packets and add random jitter (and correlation). The random
41 distribution can be loaded from a table as well to provide
42 normal, Pareto, or experimental curves. Packet loss,
43 duplication, and reordering can also be emulated.
45 This qdisc does not do classification that can be handled in
46 layering other disciplines. It does not need to do bandwidth
47 control either since that can be handled by using token
48 bucket or other rate control.
50 The simulator is limited by the Linux timer resolution
51 and will create packet bursts on the HZ boundary (1ms).
54 struct netem_sched_data {
55 struct Qdisc *qdisc;
56 struct sk_buff_head delayed;
57 struct timer_list timer;
59 u32 latency;
60 u32 loss;
61 u32 limit;
62 u32 counter;
63 u32 gap;
64 u32 jitter;
65 u32 duplicate;
67 struct crndstate {
68 unsigned long last;
69 unsigned long rho;
70 } delay_cor, loss_cor, dup_cor;
72 struct disttable {
73 u32 size;
74 s16 table[0];
75 } *delay_dist;
78 /* Time stamp put into socket buffer control block */
79 struct netem_skb_cb {
80 psched_time_t time_to_send;
83 /* init_crandom - initialize correlated random number generator
84 * Use entropy source for initial seed.
86 static void init_crandom(struct crndstate *state, unsigned long rho)
88 state->rho = rho;
89 state->last = net_random();
92 /* get_crandom - correlated random number generator
93 * Next number depends on last value.
94 * rho is scaled to avoid floating point.
96 static unsigned long get_crandom(struct crndstate *state)
98 u64 value, rho;
99 unsigned long answer;
101 if (state->rho == 0) /* no correllation */
102 return net_random();
104 value = net_random();
105 rho = (u64)state->rho + 1;
106 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
107 state->last = answer;
108 return answer;
111 /* tabledist - return a pseudo-randomly distributed value with mean mu and
112 * std deviation sigma. Uses table lookup to approximate the desired
113 * distribution, and a uniformly-distributed pseudo-random source.
115 static long tabledist(unsigned long mu, long sigma,
116 struct crndstate *state, const struct disttable *dist)
118 long t, x;
119 unsigned long rnd;
121 if (sigma == 0)
122 return mu;
124 rnd = get_crandom(state);
126 /* default uniform distribution */
127 if (dist == NULL)
128 return (rnd % (2*sigma)) - sigma + mu;
130 t = dist->table[rnd % dist->size];
131 x = (sigma % NETEM_DIST_SCALE) * t;
132 if (x >= 0)
133 x += NETEM_DIST_SCALE/2;
134 else
135 x -= NETEM_DIST_SCALE/2;
137 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
140 /* Put skb in the private delayed queue. */
141 static int delay_skb(struct Qdisc *sch, struct sk_buff *skb)
143 struct netem_sched_data *q = qdisc_priv(sch);
144 struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
145 psched_tdiff_t td;
146 psched_time_t now;
148 PSCHED_GET_TIME(now);
149 td = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist);
150 PSCHED_TADD2(now, td, cb->time_to_send);
152 /* Always queue at tail to keep packets in order */
153 if (likely(q->delayed.qlen < q->limit)) {
154 __skb_queue_tail(&q->delayed, skb);
155 if (!timer_pending(&q->timer)) {
156 q->timer.expires = jiffies + PSCHED_US2JIFFIE(td);
157 add_timer(&q->timer);
159 return NET_XMIT_SUCCESS;
162 kfree_skb(skb);
163 return NET_XMIT_DROP;
166 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
168 struct netem_sched_data *q = qdisc_priv(sch);
169 struct sk_buff *skb2;
170 int ret;
172 pr_debug("netem_enqueue skb=%p @%lu\n", skb, jiffies);
174 /* Random packet drop 0 => none, ~0 => all */
175 if (q->loss && q->loss >= get_crandom(&q->loss_cor)) {
176 pr_debug("netem_enqueue: random loss\n");
177 sch->qstats.drops++;
178 kfree_skb(skb);
179 return 0; /* lie about loss so TCP doesn't know */
182 /* Random duplication */
183 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)
184 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
185 pr_debug("netem_enqueue: dup %p\n", skb2);
187 if (delay_skb(sch, skb2)) {
188 sch->q.qlen++;
189 sch->bstats.bytes += skb2->len;
190 sch->bstats.packets++;
191 } else
192 sch->qstats.drops++;
195 /* If doing simple delay then gap == 0 so all packets
196 * go into the delayed holding queue
197 * otherwise if doing out of order only "1 out of gap"
198 * packets will be delayed.
200 if (q->counter < q->gap) {
201 ++q->counter;
202 ret = q->qdisc->enqueue(skb, q->qdisc);
203 } else {
204 q->counter = 0;
205 ret = delay_skb(sch, skb);
208 if (likely(ret == NET_XMIT_SUCCESS)) {
209 sch->q.qlen++;
210 sch->bstats.bytes += skb->len;
211 sch->bstats.packets++;
212 } else
213 sch->qstats.drops++;
215 return ret;
218 /* Requeue packets but don't change time stamp */
219 static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
221 struct netem_sched_data *q = qdisc_priv(sch);
222 int ret;
224 if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
225 sch->q.qlen++;
226 sch->qstats.requeues++;
229 return ret;
232 static unsigned int netem_drop(struct Qdisc* sch)
234 struct netem_sched_data *q = qdisc_priv(sch);
235 unsigned int len;
237 if ((len = q->qdisc->ops->drop(q->qdisc)) != 0) {
238 sch->q.qlen--;
239 sch->qstats.drops++;
241 return len;
244 /* Dequeue packet.
245 * Move all packets that are ready to send from the delay holding
246 * list to the underlying qdisc, then just call dequeue
248 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
250 struct netem_sched_data *q = qdisc_priv(sch);
251 struct sk_buff *skb;
253 skb = q->qdisc->dequeue(q->qdisc);
254 if (skb)
255 sch->q.qlen--;
256 return skb;
259 static void netem_watchdog(unsigned long arg)
261 struct Qdisc *sch = (struct Qdisc *)arg;
262 struct netem_sched_data *q = qdisc_priv(sch);
263 struct net_device *dev = sch->dev;
264 struct sk_buff *skb;
265 psched_time_t now;
267 pr_debug("netem_watchdog: fired @%lu\n", jiffies);
269 spin_lock_bh(&dev->queue_lock);
270 PSCHED_GET_TIME(now);
272 while ((skb = skb_peek(&q->delayed)) != NULL) {
273 const struct netem_skb_cb *cb
274 = (const struct netem_skb_cb *)skb->cb;
275 long delay
276 = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
277 pr_debug("netem_watchdog: skb %p@%lu %ld\n",
278 skb, jiffies, delay);
280 /* if more time remaining? */
281 if (delay > 0) {
282 mod_timer(&q->timer, jiffies + delay);
283 break;
285 __skb_unlink(skb, &q->delayed);
287 if (q->qdisc->enqueue(skb, q->qdisc)) {
288 sch->q.qlen--;
289 sch->qstats.drops++;
292 qdisc_run(dev);
293 spin_unlock_bh(&dev->queue_lock);
296 static void netem_reset(struct Qdisc *sch)
298 struct netem_sched_data *q = qdisc_priv(sch);
300 qdisc_reset(q->qdisc);
301 skb_queue_purge(&q->delayed);
303 sch->q.qlen = 0;
304 del_timer_sync(&q->timer);
307 static int set_fifo_limit(struct Qdisc *q, int limit)
309 struct rtattr *rta;
310 int ret = -ENOMEM;
312 rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
313 if (rta) {
314 rta->rta_type = RTM_NEWQDISC;
315 rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt));
316 ((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
318 ret = q->ops->change(q, rta);
319 kfree(rta);
321 return ret;
325 * Distribution data is a variable size payload containing
326 * signed 16 bit values.
328 static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr)
330 struct netem_sched_data *q = qdisc_priv(sch);
331 unsigned long n = RTA_PAYLOAD(attr)/sizeof(__s16);
332 const __s16 *data = RTA_DATA(attr);
333 struct disttable *d;
334 int i;
336 if (n > 65536)
337 return -EINVAL;
339 d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL);
340 if (!d)
341 return -ENOMEM;
343 d->size = n;
344 for (i = 0; i < n; i++)
345 d->table[i] = data[i];
347 spin_lock_bh(&sch->dev->queue_lock);
348 d = xchg(&q->delay_dist, d);
349 spin_unlock_bh(&sch->dev->queue_lock);
351 kfree(d);
352 return 0;
355 static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
357 struct netem_sched_data *q = qdisc_priv(sch);
358 const struct tc_netem_corr *c = RTA_DATA(attr);
360 if (RTA_PAYLOAD(attr) != sizeof(*c))
361 return -EINVAL;
363 init_crandom(&q->delay_cor, c->delay_corr);
364 init_crandom(&q->loss_cor, c->loss_corr);
365 init_crandom(&q->dup_cor, c->dup_corr);
366 return 0;
369 static int netem_change(struct Qdisc *sch, struct rtattr *opt)
371 struct netem_sched_data *q = qdisc_priv(sch);
372 struct tc_netem_qopt *qopt;
373 int ret;
375 if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
376 return -EINVAL;
378 qopt = RTA_DATA(opt);
379 ret = set_fifo_limit(q->qdisc, qopt->limit);
380 if (ret) {
381 pr_debug("netem: can't set fifo limit\n");
382 return ret;
385 q->latency = qopt->latency;
386 q->jitter = qopt->jitter;
387 q->limit = qopt->limit;
388 q->gap = qopt->gap;
389 q->loss = qopt->loss;
390 q->duplicate = qopt->duplicate;
392 /* Handle nested options after initial queue options.
393 * Should have put all options in nested format but too late now.
395 if (RTA_PAYLOAD(opt) > sizeof(*qopt)) {
396 struct rtattr *tb[TCA_NETEM_MAX];
397 if (rtattr_parse(tb, TCA_NETEM_MAX,
398 RTA_DATA(opt) + sizeof(*qopt),
399 RTA_PAYLOAD(opt) - sizeof(*qopt)))
400 return -EINVAL;
402 if (tb[TCA_NETEM_CORR-1]) {
403 ret = get_correlation(sch, tb[TCA_NETEM_CORR-1]);
404 if (ret)
405 return ret;
408 if (tb[TCA_NETEM_DELAY_DIST-1]) {
409 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST-1]);
410 if (ret)
411 return ret;
416 return 0;
419 static int netem_init(struct Qdisc *sch, struct rtattr *opt)
421 struct netem_sched_data *q = qdisc_priv(sch);
422 int ret;
424 if (!opt)
425 return -EINVAL;
427 skb_queue_head_init(&q->delayed);
428 init_timer(&q->timer);
429 q->timer.function = netem_watchdog;
430 q->timer.data = (unsigned long) sch;
431 q->counter = 0;
433 q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
434 if (!q->qdisc) {
435 pr_debug("netem: qdisc create failed\n");
436 return -ENOMEM;
439 ret = netem_change(sch, opt);
440 if (ret) {
441 pr_debug("netem: change failed\n");
442 qdisc_destroy(q->qdisc);
444 return ret;
447 static void netem_destroy(struct Qdisc *sch)
449 struct netem_sched_data *q = qdisc_priv(sch);
451 del_timer_sync(&q->timer);
452 qdisc_destroy(q->qdisc);
453 kfree(q->delay_dist);
456 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
458 const struct netem_sched_data *q = qdisc_priv(sch);
459 unsigned char *b = skb->tail;
460 struct rtattr *rta = (struct rtattr *) b;
461 struct tc_netem_qopt qopt;
462 struct tc_netem_corr cor;
464 qopt.latency = q->latency;
465 qopt.jitter = q->jitter;
466 qopt.limit = q->limit;
467 qopt.loss = q->loss;
468 qopt.gap = q->gap;
469 qopt.duplicate = q->duplicate;
470 RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
472 cor.delay_corr = q->delay_cor.rho;
473 cor.loss_corr = q->loss_cor.rho;
474 cor.dup_corr = q->dup_cor.rho;
475 RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
476 rta->rta_len = skb->tail - b;
478 return skb->len;
480 rtattr_failure:
481 skb_trim(skb, b - skb->data);
482 return -1;
485 static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
486 struct sk_buff *skb, struct tcmsg *tcm)
488 struct netem_sched_data *q = qdisc_priv(sch);
490 if (cl != 1) /* only one class */
491 return -ENOENT;
493 tcm->tcm_handle |= TC_H_MIN(1);
494 tcm->tcm_info = q->qdisc->handle;
496 return 0;
499 static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
500 struct Qdisc **old)
502 struct netem_sched_data *q = qdisc_priv(sch);
504 if (new == NULL)
505 new = &noop_qdisc;
507 sch_tree_lock(sch);
508 *old = xchg(&q->qdisc, new);
509 qdisc_reset(*old);
510 sch->q.qlen = 0;
511 sch_tree_unlock(sch);
513 return 0;
516 static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
518 struct netem_sched_data *q = qdisc_priv(sch);
519 return q->qdisc;
522 static unsigned long netem_get(struct Qdisc *sch, u32 classid)
524 return 1;
527 static void netem_put(struct Qdisc *sch, unsigned long arg)
531 static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
532 struct rtattr **tca, unsigned long *arg)
534 return -ENOSYS;
537 static int netem_delete(struct Qdisc *sch, unsigned long arg)
539 return -ENOSYS;
542 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
544 if (!walker->stop) {
545 if (walker->count >= walker->skip)
546 if (walker->fn(sch, 1, walker) < 0) {
547 walker->stop = 1;
548 return;
550 walker->count++;
554 static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
556 return NULL;
559 static struct Qdisc_class_ops netem_class_ops = {
560 .graft = netem_graft,
561 .leaf = netem_leaf,
562 .get = netem_get,
563 .put = netem_put,
564 .change = netem_change_class,
565 .delete = netem_delete,
566 .walk = netem_walk,
567 .tcf_chain = netem_find_tcf,
568 .dump = netem_dump_class,
571 static struct Qdisc_ops netem_qdisc_ops = {
572 .id = "netem",
573 .cl_ops = &netem_class_ops,
574 .priv_size = sizeof(struct netem_sched_data),
575 .enqueue = netem_enqueue,
576 .dequeue = netem_dequeue,
577 .requeue = netem_requeue,
578 .drop = netem_drop,
579 .init = netem_init,
580 .reset = netem_reset,
581 .destroy = netem_destroy,
582 .change = netem_change,
583 .dump = netem_dump,
584 .owner = THIS_MODULE,
588 static int __init netem_module_init(void)
590 return register_qdisc(&netem_qdisc_ops);
592 static void __exit netem_module_exit(void)
594 unregister_qdisc(&netem_qdisc_ops);
596 module_init(netem_module_init)
597 module_exit(netem_module_exit)
598 MODULE_LICENSE("GPL");