release/src-rt-6.x/linux/linux-2.6/lib/proportions.c

   1 /*
   2  * Floating proportions
   3  *
   4  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
   5  *
   6  * Description:
   7  *
   8  * The floating proportion is a time derivative with an exponentially decaying
   9  * history:
  10  *
  11  *   p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
  12  *
  13  * Where j is an element from {prop_local}, x_{j} is j's number of events,
  14  * and i the time period over which the differential is taken. So d/dt_{-i} is
  15  * the differential over the i-th last period.
  16  *
  17  * The decaying history gives smooth transitions. The time differential carries
  18  * the notion of speed.
  19  *
  20  * The denominator is 2^(1+i) because we want the series to be normalised, ie.
  21  *
  22  *   \Sum_{i=0} 1/2^(1+i) = 1
  23  *
  24  * Further more, if we measure time (t) in the same events as x; so that:
  25  *
  26  *   t = \Sum_{j} x_{j}
  27  *
  28  * we get that:
  29  *
  30  *   \Sum_{j} p_{j} = 1
  31  *
  32  * Writing this in an iterative fashion we get (dropping the 'd's):
  33  *
  34  *   if (++x_{j}, ++t > period)
  35  *     t /= 2;
  36  *     for_each (j)
  37  *       x_{j} /= 2;
  38  *
  39  * so that:
  40  *
  41  *   p_{j} = x_{j} / t;
  42  *
  43  * We optimize away the '/= 2' for the global time delta by noting that:
  44  *
  45  *   if (++t > period) t /= 2:
  46  *
  47  * Can be approximated by:
  48  *
  49  *   period/2 + (++t % period/2)
  50  *
  51  * [ Furthermore, when we choose period to be 2^n it can be written in terms of
  52  *   binary operations and wraparound artefacts disappear. ]
  53  *
  54  * Also note that this yields a natural counter of the elapsed periods:
  55  *
  56  *   c = t / (period/2)
  57  *
  58  * [ Its monotonic increasing property can be applied to mitigate the wrap-
  59  *   around issue. ]
  60  *
  61  * This allows us to do away with the loop over all prop_locals on each period
  62  * expiration. By remembering the period count under which it was last accessed
  63  * as c_{j}, we can obtain the number of 'missed' cycles from:
  64  *
  65  *   c - c_{j}
  66  *
  67  * We can then lazily catch up to the global period count every time we are
  68  * going to use x_{j}, by doing:
  69  *
  70  *   x_{j} /= 2^(c - c_{j}), c_{j} = c
  71  */
  72
  73 #include <linux/proportions.h>
  74 #include <linux/rcupdate.h>
  75
  76 /*
  77  * Limit the time part in order to ensure there are some bits left for the
  78  * cycle counter.
  79  */
  80 #define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
  81
  82 int prop_descriptor_init(struct prop_descriptor *pd, int shift)
  83 {
  84         if (shift > PROP_MAX_SHIFT)
  85                 shift = PROP_MAX_SHIFT;
  86
  87         pd->index = 0;
  88         pd->pg[0].shift = shift;
  89         mutex_init(&pd->mutex);
  90         percpu_counter_init(&pd->pg[0].events, 0);
  91         percpu_counter_init(&pd->pg[1].events, 0);
  92
  93         return 0;
  94 }
  95
  96 /*
  97  * We have two copies, and flip between them to make it seem like an atomic
  98  * update. The update is not really atomic wrt the events counter, but
  99  * it is internally consistent with the bit layout depending on shift.
 100  *
 101  * We copy the events count, move the bits around and flip the index.
 102  */
 103 void prop_change_shift(struct prop_descriptor *pd, int shift)
 104 {
 105         int index;
 106         int offset;
 107         u64 events;
 108         unsigned long flags;
 109
 110         if (shift > PROP_MAX_SHIFT)
 111                 shift = PROP_MAX_SHIFT;
 112
 113         mutex_lock(&pd->mutex);
 114
 115         index = pd->index ^ 1;
 116         offset = pd->pg[pd->index].shift - shift;
 117         if (!offset)
 118                 goto out;
 119
 120         pd->pg[index].shift = shift;
 121
 122         local_irq_save(flags);
 123         events = percpu_counter_sum(&pd->pg[pd->index].events);
 124         if (offset < 0)
 125                 events <<= -offset;
 126         else
 127                 events >>= offset;
 128         percpu_counter_init(&pd->pg[index].events, events);
 129
 130         /*
 131          * ensure the new pg is fully written before the switch
 132          */
 133         smp_wmb();
 134         pd->index = index;
 135         local_irq_restore(flags);
 136
 137         synchronize_rcu();
 138
 139 out:
 140         mutex_unlock(&pd->mutex);
 141 }
 142
 143 /*
 144  * wrap the access to the data in an rcu_read_lock() section;
 145  * this is used to track the active references.
 146  */
 147 static struct prop_global *prop_get_global(struct prop_descriptor *pd)
 148 {
 149         int index;
 150
 151         rcu_read_lock();
 152         index = pd->index;
 153         /*
 154          * match the wmb from vcd_flip()
 155          */
 156         smp_rmb();
 157         return &pd->pg[index];
 158 }
 159
 160 static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
 161 {
 162         rcu_read_unlock();
 163 }
 164
 165 static void
 166 prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
 167 {
 168         int offset = *pl_shift - new_shift;
 169
 170         if (!offset)
 171                 return;
 172
 173         if (offset < 0)
 174                 *pl_period <<= -offset;
 175         else
 176                 *pl_period >>= offset;
 177
 178         *pl_shift = new_shift;
 179 }
 180
 181 /*
 182  * PERCPU
 183  */
 184
 185 int prop_local_init_percpu(struct prop_local_percpu *pl)
 186 {
 187         spin_lock_init(&pl->lock);
 188         pl->shift = 0;
 189         pl->period = 0;
 190         percpu_counter_init(&pl->events, 0);
 191         return 0;
 192 }
 193
 194 void prop_local_destroy_percpu(struct prop_local_percpu *pl)
 195 {
 196         percpu_counter_destroy(&pl->events);
 197 }
 198
 199 /*
 200  * Catch up with missed period expirations.
 201  *
 202  *   until (c_{j} == c)
 203  *     x_{j} -= x_{j}/2;
 204  *     c_{j}++;
 205  */
 206 static
 207 void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
 208 {
 209         unsigned long period = 1UL << (pg->shift - 1);
 210         unsigned long period_mask = ~(period - 1);
 211         unsigned long global_period;
 212         unsigned long flags;
 213
 214         global_period = percpu_counter_read(&pg->events);
 215         global_period &= period_mask;
 216
 217         /*
 218          * Fast path - check if the local and global period count still match
 219          * outside of the lock.
 220          */
 221         if (pl->period == global_period)
 222                 return;
 223
 224         spin_lock_irqsave(&pl->lock, flags);
 225         prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
 226         /*
 227          * For each missed period, we half the local counter.
 228          * basically:
 229          *   pl->events >> (global_period - pl->period);
 230          *
 231          * but since the distributed nature of percpu counters make division
 232          * rather hard, use a regular subtraction loop. This is safe, because
 233          * the events will only every be incremented, hence the subtraction
 234          * can never result in a negative number.
 235          */
 236         while (pl->period != global_period) {
 237                 unsigned long val = percpu_counter_read(&pl->events);
 238                 unsigned long half = (val + 1) >> 1;
 239
 240                 /*
 241                  * Half of zero won't be much less, break out.
 242                  * This limits the loop to shift iterations, even
 243                  * if we missed a million.
 244                  */
 245                 if (!val)
 246                         break;
 247
 248                 percpu_counter_mod(&pl->events, -half);
 249                 pl->period += period;
 250         }
 251         pl->period = global_period;
 252         spin_unlock_irqrestore(&pl->lock, flags);
 253 }
 254
 255 /*
 256  *   ++x_{j}, ++t
 257  */
 258 void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
 259 {
 260         struct prop_global *pg = prop_get_global(pd);
 261
 262         prop_norm_percpu(pg, pl);
 263         percpu_counter_mod(&pl->events, 1);
 264         percpu_counter_mod(&pg->events, 1);
 265         prop_put_global(pd, pg);
 266 }
 267
 268 /*
 269  * Obtain a fraction of this proportion
 270  *
 271  *   p_{j} = x_{j} / (period/2 + t % period/2)
 272  */
 273 void prop_fraction_percpu(struct prop_descriptor *pd,
 274                 struct prop_local_percpu *pl,
 275                 long *numerator, long *denominator)
 276 {
 277         struct prop_global *pg = prop_get_global(pd);
 278         unsigned long period_2 = 1UL << (pg->shift - 1);
 279         unsigned long counter_mask = period_2 - 1;
 280         unsigned long global_count;
 281
 282         prop_norm_percpu(pg, pl);
 283         *numerator = percpu_counter_read_positive(&pl->events);
 284
 285         global_count = percpu_counter_read(&pg->events);
 286         *denominator = period_2 + (global_count & counter_mask);
 287
 288         prop_put_global(pd, pg);
 289 }
 290
 291 /*
 292  * SINGLE
 293  */
 294
 295 int prop_local_init_single(struct prop_local_single *pl)
 296 {
 297         spin_lock_init(&pl->lock);
 298         pl->shift = 0;
 299         pl->period = 0;
 300         pl->events = 0;
 301         return 0;
 302 }
 303
 304 void prop_local_destroy_single(struct prop_local_single *pl)
 305 {
 306 }
 307
 308 /*
 309  * Catch up with missed period expirations.
 310  */
 311 static
 312 void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
 313 {
 314         unsigned long period = 1UL << (pg->shift - 1);
 315         unsigned long period_mask = ~(period - 1);
 316         unsigned long global_period;
 317         unsigned long flags;
 318
 319         global_period = percpu_counter_read(&pg->events);
 320         global_period &= period_mask;
 321
 322         /*
 323          * Fast path - check if the local and global period count still match
 324          * outside of the lock.
 325          */
 326         if (pl->period == global_period)
 327                 return;
 328
 329         spin_lock_irqsave(&pl->lock, flags);
 330         prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
 331         /*
 332          * For each missed period, we half the local counter.
 333          */
 334         period = (global_period - pl->period) >> (pg->shift - 1);
 335         if (likely(period < BITS_PER_LONG))
 336                 pl->events >>= period;
 337         else
 338                 pl->events = 0;
 339         pl->period = global_period;
 340         spin_unlock_irqrestore(&pl->lock, flags);
 341 }
 342
 343 /*
 344  *   ++x_{j}, ++t
 345  */
 346 void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
 347 {
 348         struct prop_global *pg = prop_get_global(pd);
 349
 350         prop_norm_single(pg, pl);
 351         pl->events++;
 352         percpu_counter_mod(&pg->events, 1);
 353         prop_put_global(pd, pg);
 354 }
 355
 356 /*
 357  * Obtain a fraction of this proportion
 358  *
 359  *   p_{j} = x_{j} / (period/2 + t % period/2)
 360  */
 361 void prop_fraction_single(struct prop_descriptor *pd,
 362                 struct prop_local_single *pl,
 363                 long *numerator, long *denominator)
 364 {
 365         struct prop_global *pg = prop_get_global(pd);
 366         unsigned long period_2 = 1UL << (pg->shift - 1);
 367         unsigned long counter_mask = period_2 - 1;
 368         unsigned long global_count;
 369
 370         prop_norm_single(pg, pl);
 371         *numerator = pl->events;
 372
 373         global_count = percpu_counter_read(&pg->events);
 374         *denominator = period_2 + (global_count & counter_mask);
 375
 376         prop_put_global(pd, pg);
 377 }