2 * net/sched/estimator.c Simple rate estimator.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 #include <asm/uaccess.h>
13 #include <asm/system.h>
14 #include <asm/bitops.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/sched.h>
18 #include <linux/string.h>
20 #include <linux/socket.h>
21 #include <linux/sockios.h>
23 #include <linux/errno.h>
24 #include <linux/interrupt.h>
25 #include <linux/netdevice.h>
26 #include <linux/skbuff.h>
27 #include <linux/rtnetlink.h>
28 #include <linux/init.h>
29 #include <linux/proc_fs.h>
31 #include <net/pkt_sched.h>
34 This code is NOT intended to be used for statistics collection,
35 its purpose is to provide a base for statistical multiplexing
36 for controlled load service.
37 If you need only statistics, run a user level daemon which
38 periodically reads byte counters.
40 Unfortunately, rate estimation is not a very easy task.
41 F.e. I did not find a simple way to estimate the current peak rate
42 and even failed to formulate the problem 8)8)
44 So I preferred not to built an estimator into the scheduler,
45 but run this task separately.
46 Ideally, it should be kernel thread(s), but for now it runs
47 from timers, which puts apparent top bounds on the number of rated
48 flows, has minimal overhead on small, but is enough
49 to handle controlled load service, sets of aggregates.
51 We measure rate over A=(1<<interval) seconds and evaluate EWMA:
53 avrate = avrate*(1-W) + rate*W
55 where W is chosen as negative power of 2: W = 2^(-ewma_log)
57 The resulting time constant is:
64 * The stored value for avbps is scaled by 2^5, so that maximal
65 rate is ~1Gbit, avpps is scaled by 2^10.
67 * Minimal interval is HZ/4=250msec (it is the greatest common divisor
68 for HZ=100 and HZ=1024 8)), maximal interval
69 is (HZ/4)*2^EST_MAX_INTERVAL = 8sec. Shorter intervals
70 are too expensive, longer ones can be implemented
71 at user level painlessly.
78 #define EST_MAX_INTERVAL 5
80 struct qdisc_estimator
82 struct qdisc_estimator
*next
;
83 struct tc_stats
*stats
;
92 struct qdisc_estimator_head
94 struct timer_list timer
;
95 struct qdisc_estimator
*list
;
98 static struct qdisc_estimator_head elist
[EST_MAX_INTERVAL
+1];
100 static void est_timer(unsigned long arg
)
103 struct qdisc_estimator
*e
;
105 for (e
= elist
[idx
].list
; e
; e
= e
->next
) {
106 u64 nbytes
= e
->stats
->bytes
;
107 u32 npackets
= e
->stats
->packets
;
110 rate
= (nbytes
- e
->last_bytes
)<<(7 - idx
);
111 e
->last_bytes
= nbytes
;
112 e
->avbps
+= ((long)rate
- (long)e
->avbps
) >> e
->ewma_log
;
113 e
->stats
->bps
= (e
->avbps
+0xF)>>5;
115 rate
= (npackets
- e
->last_packets
)<<(12 - idx
);
116 e
->last_packets
= npackets
;
117 e
->avpps
+= ((long)rate
- (long)e
->avpps
) >> e
->ewma_log
;
118 e
->stats
->pps
= (e
->avpps
+0x1FF)>>10;
121 elist
[idx
].timer
.expires
= jiffies
+ ((HZ
/4)<<idx
);
122 add_timer(&elist
[idx
].timer
);
125 int qdisc_new_estimator(struct tc_stats
*stats
, struct rtattr
*opt
)
127 struct qdisc_estimator
*est
;
128 struct tc_estimator
*parm
= RTA_DATA(opt
);
130 if (RTA_PAYLOAD(opt
) < sizeof(*parm
))
133 if (parm
->interval
< -2 || parm
->interval
> 3)
136 est
= kmalloc(sizeof(*est
), GFP_KERNEL
);
140 memset(est
, 0, sizeof(*est
));
141 est
->interval
= parm
->interval
+ 2;
143 est
->ewma_log
= parm
->ewma_log
;
144 est
->last_bytes
= stats
->bytes
;
145 est
->avbps
= stats
->bps
<<5;
146 est
->last_packets
= stats
->packets
;
147 est
->avpps
= stats
->pps
<<10;
149 est
->next
= elist
[est
->interval
].list
;
150 if (est
->next
== NULL
) {
151 init_timer(&elist
[est
->interval
].timer
);
152 elist
[est
->interval
].timer
.data
= est
->interval
;
153 elist
[est
->interval
].timer
.expires
= jiffies
+ ((HZ
/4)<<est
->interval
);
154 elist
[est
->interval
].timer
.function
= est_timer
;
155 add_timer(&elist
[est
->interval
].timer
);
157 elist
[est
->interval
].list
= est
;
161 void qdisc_kill_estimator(struct tc_stats
*stats
)
164 struct qdisc_estimator
*est
, **pest
;
166 for (idx
=0; idx
<= EST_MAX_INTERVAL
; idx
++) {
168 pest
= &elist
[idx
].list
;
169 while ((est
=*pest
) != NULL
) {
170 if (est
->stats
!= stats
) {
174 net_serialize_enter();
176 net_serialize_leave();
180 if (killed
&& elist
[idx
].list
== NULL
)
181 del_timer(&elist
[idx
].timer
);