4 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
8 * The floating proportion is a time derivative with an exponentially decaying
11 * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
13 * Where j is an element from {prop_local}, x_{j} is j's number of events,
14 * and i the time period over which the differential is taken. So d/dt_{-i} is
15 * the differential over the i-th last period.
17 * The decaying history gives smooth transitions. The time differential carries
18 * the notion of speed.
20 * The denominator is 2^(1+i) because we want the series to be normalised, ie.
22 * \Sum_{i=0} 1/2^(1+i) = 1
24 * Further more, if we measure time (t) in the same events as x; so that:
32 * Writing this in an iterative fashion we get (dropping the 'd's):
34 * if (++x_{j}, ++t > period)
43 * We optimize away the '/= 2' for the global time delta by noting that:
45 * if (++t > period) t /= 2:
47 * Can be approximated by:
49 * period/2 + (++t % period/2)
51 * [ Furthermore, when we choose period to be 2^n it can be written in terms of
52 * binary operations and wraparound artefacts disappear. ]
54 * Also note that this yields a natural counter of the elapsed periods:
58 * [ Its monotonic increasing property can be applied to mitigate the wrap-
61 * This allows us to do away with the loop over all prop_locals on each period
62 * expiration. By remembering the period count under which it was last accessed
63 * as c_{j}, we can obtain the number of 'missed' cycles from:
67 * We can then lazily catch up to the global period count every time we are
68 * going to use x_{j}, by doing:
70 * x_{j} /= 2^(c - c_{j}), c_{j} = c
73 #include <linux/proportions.h>
74 #include <linux/rcupdate.h>
76 int prop_descriptor_init(struct prop_descriptor
*pd
, int shift
)
80 if (shift
> PROP_MAX_SHIFT
)
81 shift
= PROP_MAX_SHIFT
;
84 pd
->pg
[0].shift
= shift
;
85 mutex_init(&pd
->mutex
);
86 err
= percpu_counter_init_irq(&pd
->pg
[0].events
, 0);
90 err
= percpu_counter_init_irq(&pd
->pg
[1].events
, 0);
92 percpu_counter_destroy(&pd
->pg
[0].events
);
99 * We have two copies, and flip between them to make it seem like an atomic
100 * update. The update is not really atomic wrt the events counter, but
101 * it is internally consistent with the bit layout depending on shift.
103 * We copy the events count, move the bits around and flip the index.
105 void prop_change_shift(struct prop_descriptor
*pd
, int shift
)
112 if (shift
> PROP_MAX_SHIFT
)
113 shift
= PROP_MAX_SHIFT
;
115 mutex_lock(&pd
->mutex
);
117 index
= pd
->index
^ 1;
118 offset
= pd
->pg
[pd
->index
].shift
- shift
;
122 pd
->pg
[index
].shift
= shift
;
124 local_irq_save(flags
);
125 events
= percpu_counter_sum(&pd
->pg
[pd
->index
].events
);
130 percpu_counter_set(&pd
->pg
[index
].events
, events
);
133 * ensure the new pg is fully written before the switch
137 local_irq_restore(flags
);
142 mutex_unlock(&pd
->mutex
);
146 * wrap the access to the data in an rcu_read_lock() section;
147 * this is used to track the active references.
149 static struct prop_global
*prop_get_global(struct prop_descriptor
*pd
)
156 * match the wmb from vcd_flip()
159 return &pd
->pg
[index
];
162 static void prop_put_global(struct prop_descriptor
*pd
, struct prop_global
*pg
)
168 prop_adjust_shift(int *pl_shift
, unsigned long *pl_period
, int new_shift
)
170 int offset
= *pl_shift
- new_shift
;
176 *pl_period
<<= -offset
;
178 *pl_period
>>= offset
;
180 *pl_shift
= new_shift
;
187 #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
189 int prop_local_init_percpu(struct prop_local_percpu
*pl
)
191 spin_lock_init(&pl
->lock
);
194 return percpu_counter_init_irq(&pl
->events
, 0);
197 void prop_local_destroy_percpu(struct prop_local_percpu
*pl
)
199 percpu_counter_destroy(&pl
->events
);
203 * Catch up with missed period expirations.
210 void prop_norm_percpu(struct prop_global
*pg
, struct prop_local_percpu
*pl
)
212 unsigned long period
= 1UL << (pg
->shift
- 1);
213 unsigned long period_mask
= ~(period
- 1);
214 unsigned long global_period
;
217 global_period
= percpu_counter_read(&pg
->events
);
218 global_period
&= period_mask
;
221 * Fast path - check if the local and global period count still match
222 * outside of the lock.
224 if (pl
->period
== global_period
)
227 spin_lock_irqsave(&pl
->lock
, flags
);
228 prop_adjust_shift(&pl
->shift
, &pl
->period
, pg
->shift
);
231 * For each missed period, we half the local counter.
233 * pl->events >> (global_period - pl->period);
235 period
= (global_period
- pl
->period
) >> (pg
->shift
- 1);
236 if (period
< BITS_PER_LONG
) {
237 s64 val
= percpu_counter_read(&pl
->events
);
239 if (val
< (nr_cpu_ids
* PROP_BATCH
))
240 val
= percpu_counter_sum(&pl
->events
);
242 __percpu_counter_add(&pl
->events
, -val
+ (val
>> period
),
245 percpu_counter_set(&pl
->events
, 0);
247 pl
->period
= global_period
;
248 spin_unlock_irqrestore(&pl
->lock
, flags
);
254 void __prop_inc_percpu(struct prop_descriptor
*pd
, struct prop_local_percpu
*pl
)
256 struct prop_global
*pg
= prop_get_global(pd
);
258 prop_norm_percpu(pg
, pl
);
259 __percpu_counter_add(&pl
->events
, 1, PROP_BATCH
);
260 percpu_counter_add(&pg
->events
, 1);
261 prop_put_global(pd
, pg
);
265 * identical to __prop_inc_percpu, except that it limits this pl's fraction to
266 * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
268 void __prop_inc_percpu_max(struct prop_descriptor
*pd
,
269 struct prop_local_percpu
*pl
, long frac
)
271 struct prop_global
*pg
= prop_get_global(pd
);
273 prop_norm_percpu(pg
, pl
);
275 if (unlikely(frac
!= PROP_FRAC_BASE
)) {
276 unsigned long period_2
= 1UL << (pg
->shift
- 1);
277 unsigned long counter_mask
= period_2
- 1;
278 unsigned long global_count
;
279 long numerator
, denominator
;
281 numerator
= percpu_counter_read_positive(&pl
->events
);
282 global_count
= percpu_counter_read(&pg
->events
);
283 denominator
= period_2
+ (global_count
& counter_mask
);
285 if (numerator
> ((denominator
* frac
) >> PROP_FRAC_SHIFT
))
289 percpu_counter_add(&pl
->events
, 1);
290 percpu_counter_add(&pg
->events
, 1);
293 prop_put_global(pd
, pg
);
297 * Obtain a fraction of this proportion
299 * p_{j} = x_{j} / (period/2 + t % period/2)
301 void prop_fraction_percpu(struct prop_descriptor
*pd
,
302 struct prop_local_percpu
*pl
,
303 long *numerator
, long *denominator
)
305 struct prop_global
*pg
= prop_get_global(pd
);
306 unsigned long period_2
= 1UL << (pg
->shift
- 1);
307 unsigned long counter_mask
= period_2
- 1;
308 unsigned long global_count
;
310 prop_norm_percpu(pg
, pl
);
311 *numerator
= percpu_counter_read_positive(&pl
->events
);
313 global_count
= percpu_counter_read(&pg
->events
);
314 *denominator
= period_2
+ (global_count
& counter_mask
);
316 prop_put_global(pd
, pg
);
323 int prop_local_init_single(struct prop_local_single
*pl
)
325 spin_lock_init(&pl
->lock
);
332 void prop_local_destroy_single(struct prop_local_single
*pl
)
337 * Catch up with missed period expirations.
340 void prop_norm_single(struct prop_global
*pg
, struct prop_local_single
*pl
)
342 unsigned long period
= 1UL << (pg
->shift
- 1);
343 unsigned long period_mask
= ~(period
- 1);
344 unsigned long global_period
;
347 global_period
= percpu_counter_read(&pg
->events
);
348 global_period
&= period_mask
;
351 * Fast path - check if the local and global period count still match
352 * outside of the lock.
354 if (pl
->period
== global_period
)
357 spin_lock_irqsave(&pl
->lock
, flags
);
358 prop_adjust_shift(&pl
->shift
, &pl
->period
, pg
->shift
);
360 * For each missed period, we half the local counter.
362 period
= (global_period
- pl
->period
) >> (pg
->shift
- 1);
363 if (likely(period
< BITS_PER_LONG
))
364 pl
->events
>>= period
;
367 pl
->period
= global_period
;
368 spin_unlock_irqrestore(&pl
->lock
, flags
);
374 void __prop_inc_single(struct prop_descriptor
*pd
, struct prop_local_single
*pl
)
376 struct prop_global
*pg
= prop_get_global(pd
);
378 prop_norm_single(pg
, pl
);
380 percpu_counter_add(&pg
->events
, 1);
381 prop_put_global(pd
, pg
);
385 * Obtain a fraction of this proportion
387 * p_{j} = x_{j} / (period/2 + t % period/2)
389 void prop_fraction_single(struct prop_descriptor
*pd
,
390 struct prop_local_single
*pl
,
391 long *numerator
, long *denominator
)
393 struct prop_global
*pg
= prop_get_global(pd
);
394 unsigned long period_2
= 1UL << (pg
->shift
- 1);
395 unsigned long counter_mask
= period_2
- 1;
396 unsigned long global_count
;
398 prop_norm_single(pg
, pl
);
399 *numerator
= pl
->events
;
401 global_count
= percpu_counter_read(&pg
->events
);
402 *denominator
= period_2
+ (global_count
& counter_mask
);
404 prop_put_global(pd
, pg
);