2 * net/sched/estimator.c Simple rate estimator.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 #include <asm/uaccess.h>
13 #include <asm/system.h>
14 #include <linux/bitops.h>
15 #include <linux/module.h>
16 #include <linux/types.h>
17 #include <linux/kernel.h>
18 #include <linux/jiffies.h>
19 #include <linux/string.h>
21 #include <linux/socket.h>
22 #include <linux/sockios.h>
24 #include <linux/errno.h>
25 #include <linux/interrupt.h>
26 #include <linux/netdevice.h>
27 #include <linux/skbuff.h>
28 #include <linux/rtnetlink.h>
29 #include <linux/init.h>
31 #include <net/pkt_sched.h>
34 This code is NOT intended to be used for statistics collection,
35 its purpose is to provide a base for statistical multiplexing
36 for controlled load service.
37 If you need only statistics, run a user level daemon which
38 periodically reads byte counters.
40 Unfortunately, rate estimation is not a very easy task.
41 F.e. I did not find a simple way to estimate the current peak rate
42 and even failed to formulate the problem 8)8)
44 So I preferred not to built an estimator into the scheduler,
45 but run this task separately.
46 Ideally, it should be kernel thread(s), but for now it runs
47 from timers, which puts apparent top bounds on the number of rated
48 flows, has minimal overhead on small, but is enough
49 to handle controlled load service, sets of aggregates.
51 We measure rate over A=(1<<interval) seconds and evaluate EWMA:
53 avrate = avrate*(1-W) + rate*W
55 where W is chosen as negative power of 2: W = 2^(-ewma_log)
57 The resulting time constant is:
64 * The stored value for avbps is scaled by 2^5, so that maximal
65 rate is ~1Gbit, avpps is scaled by 2^10.
67 * Minimal interval is HZ/4=250msec (it is the greatest common divisor
68 for HZ=100 and HZ=1024 8)), maximal interval
69 is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
70 are too expensive, longer ones can be implemented
71 at user level painlessly.
74 #define EST_MAX_INTERVAL 5
76 struct qdisc_estimator
78 struct qdisc_estimator
*next
;
79 struct tc_stats
*stats
;
80 spinlock_t
*stats_lock
;
89 struct qdisc_estimator_head
91 struct timer_list timer
;
92 struct qdisc_estimator
*list
;
95 static struct qdisc_estimator_head elist
[EST_MAX_INTERVAL
+1];
97 /* Estimator array lock */
98 static DEFINE_RWLOCK(est_lock
);
100 static void est_timer(unsigned long arg
)
103 struct qdisc_estimator
*e
;
105 read_lock(&est_lock
);
106 for (e
= elist
[idx
].list
; e
; e
= e
->next
) {
107 struct tc_stats
*st
= e
->stats
;
112 spin_lock(e
->stats_lock
);
114 npackets
= st
->packets
;
115 rate
= (nbytes
- e
->last_bytes
)<<(7 - idx
);
116 e
->last_bytes
= nbytes
;
117 e
->avbps
+= ((long)rate
- (long)e
->avbps
) >> e
->ewma_log
;
118 st
->bps
= (e
->avbps
+0xF)>>5;
120 rate
= (npackets
- e
->last_packets
)<<(12 - idx
);
121 e
->last_packets
= npackets
;
122 e
->avpps
+= ((long)rate
- (long)e
->avpps
) >> e
->ewma_log
;
123 e
->stats
->pps
= (e
->avpps
+0x1FF)>>10;
124 spin_unlock(e
->stats_lock
);
127 mod_timer(&elist
[idx
].timer
, jiffies
+ ((HZ
<<idx
)/4));
128 read_unlock(&est_lock
);
131 int qdisc_new_estimator(struct tc_stats
*stats
, spinlock_t
*stats_lock
, struct rtattr
*opt
)
133 struct qdisc_estimator
*est
;
134 struct tc_estimator
*parm
= RTA_DATA(opt
);
136 if (RTA_PAYLOAD(opt
) < sizeof(*parm
))
139 if (parm
->interval
< -2 || parm
->interval
> 3)
142 est
= kmalloc(sizeof(*est
), GFP_KERNEL
);
146 memset(est
, 0, sizeof(*est
));
147 est
->interval
= parm
->interval
+ 2;
149 est
->stats_lock
= stats_lock
;
150 est
->ewma_log
= parm
->ewma_log
;
151 est
->last_bytes
= stats
->bytes
;
152 est
->avbps
= stats
->bps
<<5;
153 est
->last_packets
= stats
->packets
;
154 est
->avpps
= stats
->pps
<<10;
156 est
->next
= elist
[est
->interval
].list
;
157 if (est
->next
== NULL
) {
158 init_timer(&elist
[est
->interval
].timer
);
159 elist
[est
->interval
].timer
.data
= est
->interval
;
160 elist
[est
->interval
].timer
.expires
= jiffies
+ ((HZ
<<est
->interval
)/4);
161 elist
[est
->interval
].timer
.function
= est_timer
;
162 add_timer(&elist
[est
->interval
].timer
);
164 write_lock_bh(&est_lock
);
165 elist
[est
->interval
].list
= est
;
166 write_unlock_bh(&est_lock
);
170 void qdisc_kill_estimator(struct tc_stats
*stats
)
173 struct qdisc_estimator
*est
, **pest
;
175 for (idx
=0; idx
<= EST_MAX_INTERVAL
; idx
++) {
177 pest
= &elist
[idx
].list
;
178 while ((est
=*pest
) != NULL
) {
179 if (est
->stats
!= stats
) {
184 write_lock_bh(&est_lock
);
186 write_unlock_bh(&est_lock
);
191 if (killed
&& elist
[idx
].list
== NULL
)
192 del_timer(&elist
[idx
].timer
);
196 EXPORT_SYMBOL(qdisc_kill_estimator
);
197 EXPORT_SYMBOL(qdisc_new_estimator
);