Import 2.2.4pre6
[davej-history.git] / net / sched / estimator.c
bloba35a9168e3d9aecada71e337a0b8e5f81131cd13
1 /*
2 * net/sched/estimator.c Simple rate estimator.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 #include <asm/uaccess.h>
13 #include <asm/system.h>
14 #include <asm/bitops.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/sched.h>
18 #include <linux/string.h>
19 #include <linux/mm.h>
20 #include <linux/socket.h>
21 #include <linux/sockios.h>
22 #include <linux/in.h>
23 #include <linux/errno.h>
24 #include <linux/interrupt.h>
25 #include <linux/netdevice.h>
26 #include <linux/skbuff.h>
27 #include <linux/rtnetlink.h>
28 #include <linux/init.h>
29 #include <linux/proc_fs.h>
30 #include <net/sock.h>
31 #include <net/pkt_sched.h>
34 This code is NOT intended to be used for statistics collection,
35 its purpose is to provide a base for statistical multiplexing
36 for controlled load service.
37 If you need only statistics, run a user level daemon which
38 periodically reads byte counters.
40 Unfortunately, rate estimation is not a very easy task.
41 F.e. I did not find a simple way to estimate the current peak rate
42 and even failed to formulate the problem 8)8)
44 So I preferred not to built an estimator into the scheduler,
45 but run this task separately.
46 Ideally, it should be kernel thread(s), but for now it runs
47 from timers, which puts apparent top bounds on the number of rated
48 flows, has minimal overhead on small, but is enough
49 to handle controlled load service, sets of aggregates.
51 We measure rate over A=(1<<interval) seconds and evaluate EWMA:
53 avrate = avrate*(1-W) + rate*W
55 where W is chosen as negative power of 2: W = 2^(-ewma_log)
57 The resulting time constant is:
59 T = A/(-ln(1-W))
62 NOTES.
64 * The stored value for avbps is scaled by 2^5, so that maximal
65 rate is ~1Gbit, avpps is scaled by 2^10.
67 * Minimal interval is HZ/4=250msec (it is the greatest common divisor
68 for HZ=100 and HZ=1024 8)), maximal interval
69 is (HZ/4)*2^EST_MAX_INTERVAL = 8sec. Shorter intervals
70 are too expensive, longer ones can be implemented
71 at user level painlessly.
74 #if (HZ%4) != 0
75 #error Bad HZ value.
76 #endif
78 #define EST_MAX_INTERVAL 5
80 struct qdisc_estimator
82 struct qdisc_estimator *next;
83 struct tc_stats *stats;
84 unsigned interval;
85 int ewma_log;
86 u64 last_bytes;
87 u32 last_packets;
88 u32 avpps;
89 u32 avbps;
92 struct qdisc_estimator_head
94 struct timer_list timer;
95 struct qdisc_estimator *list;
98 static struct qdisc_estimator_head elist[EST_MAX_INTERVAL+1];
100 static void est_timer(unsigned long arg)
102 int idx = (int)arg;
103 struct qdisc_estimator *e;
105 for (e = elist[idx].list; e; e = e->next) {
106 u64 nbytes = e->stats->bytes;
107 u32 npackets = e->stats->packets;
108 u32 rate;
110 rate = (nbytes - e->last_bytes)<<(7 - idx);
111 e->last_bytes = nbytes;
112 e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log;
113 e->stats->bps = (e->avbps+0xF)>>5;
115 rate = (npackets - e->last_packets)<<(12 - idx);
116 e->last_packets = npackets;
117 e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
118 e->stats->pps = (e->avpps+0x1FF)>>10;
121 elist[idx].timer.expires = jiffies + ((HZ/4)<<idx);
122 add_timer(&elist[idx].timer);
125 int qdisc_new_estimator(struct tc_stats *stats, struct rtattr *opt)
127 struct qdisc_estimator *est;
128 struct tc_estimator *parm = RTA_DATA(opt);
130 if (RTA_PAYLOAD(opt) < sizeof(*parm))
131 return -EINVAL;
133 if (parm->interval < -2 || parm->interval > 3)
134 return -EINVAL;
136 est = kmalloc(sizeof(*est), GFP_KERNEL);
137 if (est == NULL)
138 return -ENOBUFS;
140 memset(est, 0, sizeof(*est));
141 est->interval = parm->interval + 2;
142 est->stats = stats;
143 est->ewma_log = parm->ewma_log;
144 est->last_bytes = stats->bytes;
145 est->avbps = stats->bps<<5;
146 est->last_packets = stats->packets;
147 est->avpps = stats->pps<<10;
149 est->next = elist[est->interval].list;
150 if (est->next == NULL) {
151 init_timer(&elist[est->interval].timer);
152 elist[est->interval].timer.data = est->interval;
153 elist[est->interval].timer.expires = jiffies + ((HZ/4)<<est->interval);
154 elist[est->interval].timer.function = est_timer;
155 add_timer(&elist[est->interval].timer);
157 elist[est->interval].list = est;
158 return 0;
161 void qdisc_kill_estimator(struct tc_stats *stats)
163 int idx;
164 struct qdisc_estimator *est, **pest;
166 for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
167 int killed = 0;
168 pest = &elist[idx].list;
169 while ((est=*pest) != NULL) {
170 if (est->stats != stats) {
171 pest = &est->next;
172 continue;
174 net_serialize_enter();
175 *pest = est->next;
176 net_serialize_leave();
177 kfree(est);
178 killed++;
180 if (killed && elist[idx].list == NULL)
181 del_timer(&elist[idx].timer);