util/throttle.c

   1 /*
   2  * QEMU throttling infrastructure
   3  *
   4  * Copyright (C) Nodalink, SARL. 2013
   5  *
   6  * Author:
   7  *   Benoît Canet <benoit.canet@irqsave.net>
   8  *
   9  * This program is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU General Public License as
  11  * published by the Free Software Foundation; either version 2 or
  12  * (at your option) version 3 of the License.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, see <http://www.gnu.org/licenses/>.
  21  */
  22
  23 #include "qemu/throttle.h"
  24 #include "qemu/timer.h"
  25 #include "block/aio.h"
  26
  27 /* This function make a bucket leak
  28  *
  29  * @bkt:   the bucket to make leak
  30  * @delta_ns: the time delta
  31  */
  32 void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns)
  33 {
  34     double leak;
  35
  36     /* compute how much to leak */
  37     leak = (bkt->avg * (double) delta_ns) / NANOSECONDS_PER_SECOND;
  38
  39     /* make the bucket leak */
  40     bkt->level = MAX(bkt->level - leak, 0);
  41 }
  42
  43 /* Calculate the time delta since last leak and make proportionals leaks
  44  *
  45  * @now:      the current timestamp in ns
  46  */
  47 static void throttle_do_leak(ThrottleState *ts, int64_t now)
  48 {
  49     /* compute the time elapsed since the last leak */
  50     int64_t delta_ns = now - ts->previous_leak;
  51     int i;
  52
  53     ts->previous_leak = now;
  54
  55     if (delta_ns <= 0) {
  56         return;
  57     }
  58
  59     /* make each bucket leak */
  60     for (i = 0; i < BUCKETS_COUNT; i++) {
  61         throttle_leak_bucket(&ts->cfg.buckets[i], delta_ns);
  62     }
  63 }
  64
  65 /* do the real job of computing the time to wait
  66  *
  67  * @limit: the throttling limit
  68  * @extra: the number of operation to delay
  69  * @ret:   the time to wait in ns
  70  */
  71 static int64_t throttle_do_compute_wait(double limit, double extra)
  72 {
  73     double wait = extra * NANOSECONDS_PER_SECOND;
  74     wait /= limit;
  75     return wait;
  76 }
  77
  78 /* This function compute the wait time in ns that a leaky bucket should trigger
  79  *
  80  * @bkt: the leaky bucket we operate on
  81  * @ret: the resulting wait time in ns or 0 if the operation can go through
  82  */
  83 int64_t throttle_compute_wait(LeakyBucket *bkt)
  84 {
  85     double extra; /* the number of extra units blocking the io */
  86
  87     if (!bkt->avg) {
  88         return 0;
  89     }
  90
  91     extra = bkt->level - bkt->max;
  92
  93     if (extra <= 0) {
  94         return 0;
  95     }
  96
  97     return throttle_do_compute_wait(bkt->avg, extra);
  98 }
  99
 100 /* This function compute the time that must be waited while this IO
 101  *
 102  * @is_write:   true if the current IO is a write, false if it's a read
 103  * @ret:        time to wait
 104  */
 105 static int64_t throttle_compute_wait_for(ThrottleState *ts,
 106                                          bool is_write)
 107 {
 108     BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL,
 109                                    THROTTLE_OPS_TOTAL,
 110                                    THROTTLE_BPS_READ,
 111                                    THROTTLE_OPS_READ},
 112                                   {THROTTLE_BPS_TOTAL,
 113                                    THROTTLE_OPS_TOTAL,
 114                                    THROTTLE_BPS_WRITE,
 115                                    THROTTLE_OPS_WRITE}, };
 116     int64_t wait, max_wait = 0;
 117     int i;
 118
 119     for (i = 0; i < 4; i++) {
 120         BucketType index = to_check[is_write][i];
 121         wait = throttle_compute_wait(&ts->cfg.buckets[index]);
 122         if (wait > max_wait) {
 123             max_wait = wait;
 124         }
 125     }
 126
 127     return max_wait;
 128 }
 129
 130 /* compute the timer for this type of operation
 131  *
 132  * @is_write:   the type of operation
 133  * @now:        the current clock timestamp
 134  * @next_timestamp: the resulting timer
 135  * @ret:        true if a timer must be set
 136  */
 137 bool throttle_compute_timer(ThrottleState *ts,
 138                             bool is_write,
 139                             int64_t now,
 140                             int64_t *next_timestamp)
 141 {
 142     int64_t wait;
 143
 144     /* leak proportionally to the time elapsed */
 145     throttle_do_leak(ts, now);
 146
 147     /* compute the wait time if any */
 148     wait = throttle_compute_wait_for(ts, is_write);
 149
 150     /* if the code must wait compute when the next timer should fire */
 151     if (wait) {
 152         *next_timestamp = now + wait;
 153         return true;
 154     }
 155
 156     /* else no need to wait at all */
 157     *next_timestamp = now;
 158     return false;
 159 }
 160
 161 /* Add timers to event loop */
 162 void throttle_attach_aio_context(ThrottleState *ts, AioContext *new_context)
 163 {
 164     ts->timers[0] = aio_timer_new(new_context, ts->clock_type, SCALE_NS,
 165                                   ts->read_timer_cb, ts->timer_opaque);
 166     ts->timers[1] = aio_timer_new(new_context, ts->clock_type, SCALE_NS,
 167                                   ts->write_timer_cb, ts->timer_opaque);
 168 }
 169
 170 /* To be called first on the ThrottleState */
 171 void throttle_init(ThrottleState *ts,
 172                    AioContext *aio_context,
 173                    QEMUClockType clock_type,
 174                    QEMUTimerCB *read_timer_cb,
 175                    QEMUTimerCB *write_timer_cb,
 176                    void *timer_opaque)
 177 {
 178     memset(ts, 0, sizeof(ThrottleState));
 179
 180     ts->clock_type = clock_type;
 181     ts->read_timer_cb = read_timer_cb;
 182     ts->write_timer_cb = write_timer_cb;
 183     ts->timer_opaque = timer_opaque;
 184     throttle_attach_aio_context(ts, aio_context);
 185 }
 186
 187 /* destroy a timer */
 188 static void throttle_timer_destroy(QEMUTimer **timer)
 189 {
 190     assert(*timer != NULL);
 191
 192     timer_del(*timer);
 193     timer_free(*timer);
 194     *timer = NULL;
 195 }
 196
 197 /* Remove timers from event loop */
 198 void throttle_detach_aio_context(ThrottleState *ts)
 199 {
 200     int i;
 201
 202     for (i = 0; i < 2; i++) {
 203         throttle_timer_destroy(&ts->timers[i]);
 204     }
 205 }
 206
 207 /* To be called last on the ThrottleState */
 208 void throttle_destroy(ThrottleState *ts)
 209 {
 210     throttle_detach_aio_context(ts);
 211 }
 212
 213 /* is any throttling timer configured */
 214 bool throttle_have_timer(ThrottleState *ts)
 215 {
 216     if (ts->timers[0]) {
 217         return true;
 218     }
 219
 220     return false;
 221 }
 222
 223 /* Does any throttling must be done
 224  *
 225  * @cfg: the throttling configuration to inspect
 226  * @ret: true if throttling must be done else false
 227  */
 228 bool throttle_enabled(ThrottleConfig *cfg)
 229 {
 230     int i;
 231
 232     for (i = 0; i < BUCKETS_COUNT; i++) {
 233         if (cfg->buckets[i].avg > 0) {
 234             return true;
 235         }
 236     }
 237
 238     return false;
 239 }
 240
 241 /* return true if any two throttling parameters conflicts
 242  *
 243  * @cfg: the throttling configuration to inspect
 244  * @ret: true if any conflict detected else false
 245  */
 246 bool throttle_conflicting(ThrottleConfig *cfg)
 247 {
 248     bool bps_flag, ops_flag;
 249     bool bps_max_flag, ops_max_flag;
 250
 251     bps_flag = cfg->buckets[THROTTLE_BPS_TOTAL].avg &&
 252                (cfg->buckets[THROTTLE_BPS_READ].avg ||
 253                 cfg->buckets[THROTTLE_BPS_WRITE].avg);
 254
 255     ops_flag = cfg->buckets[THROTTLE_OPS_TOTAL].avg &&
 256                (cfg->buckets[THROTTLE_OPS_READ].avg ||
 257                 cfg->buckets[THROTTLE_OPS_WRITE].avg);
 258
 259     bps_max_flag = cfg->buckets[THROTTLE_BPS_TOTAL].max &&
 260                   (cfg->buckets[THROTTLE_BPS_READ].max  ||
 261                    cfg->buckets[THROTTLE_BPS_WRITE].max);
 262
 263     ops_max_flag = cfg->buckets[THROTTLE_OPS_TOTAL].max &&
 264                    (cfg->buckets[THROTTLE_OPS_READ].max ||
 265                    cfg->buckets[THROTTLE_OPS_WRITE].max);
 266
 267     return bps_flag || ops_flag || bps_max_flag || ops_max_flag;
 268 }
 269
 270 /* check if a throttling configuration is valid
 271  * @cfg: the throttling configuration to inspect
 272  * @ret: true if valid else false
 273  */
 274 bool throttle_is_valid(ThrottleConfig *cfg)
 275 {
 276     bool invalid = false;
 277     int i;
 278
 279     for (i = 0; i < BUCKETS_COUNT; i++) {
 280         if (cfg->buckets[i].avg < 0) {
 281             invalid = true;
 282         }
 283     }
 284
 285     for (i = 0; i < BUCKETS_COUNT; i++) {
 286         if (cfg->buckets[i].max < 0) {
 287             invalid = true;
 288         }
 289     }
 290
 291     return !invalid;
 292 }
 293
 294 /* fix bucket parameters */
 295 static void throttle_fix_bucket(LeakyBucket *bkt)
 296 {
 297     double min;
 298
 299     /* zero bucket level */
 300     bkt->level = 0;
 301
 302     /* The following is done to cope with the Linux CFQ block scheduler
 303      * which regroup reads and writes by block of 100ms in the guest.
 304      * When they are two process one making reads and one making writes cfq
 305      * make a pattern looking like the following:
 306      * WWWWWWWWWWWRRRRRRRRRRRRRRWWWWWWWWWWWWWwRRRRRRRRRRRRRRRRR
 307      * Having a max burst value of 100ms of the average will help smooth the
 308      * throttling
 309      */
 310     min = bkt->avg / 10;
 311     if (bkt->avg && !bkt->max) {
 312         bkt->max = min;
 313     }
 314 }
 315
 316 /* take care of canceling a timer */
 317 static void throttle_cancel_timer(QEMUTimer *timer)
 318 {
 319     assert(timer != NULL);
 320
 321     timer_del(timer);
 322 }
 323
 324 /* Used to configure the throttle
 325  *
 326  * @ts: the throttle state we are working on
 327  * @cfg: the config to set
 328  */
 329 void throttle_config(ThrottleState *ts, ThrottleConfig *cfg)
 330 {
 331     int i;
 332
 333     ts->cfg = *cfg;
 334
 335     for (i = 0; i < BUCKETS_COUNT; i++) {
 336         throttle_fix_bucket(&ts->cfg.buckets[i]);
 337     }
 338
 339     ts->previous_leak = qemu_clock_get_ns(ts->clock_type);
 340
 341     for (i = 0; i < 2; i++) {
 342         throttle_cancel_timer(ts->timers[i]);
 343     }
 344 }
 345
 346 /* used to get config
 347  *
 348  * @ts:  the throttle state we are working on
 349  * @cfg: the config to write
 350  */
 351 void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg)
 352 {
 353     *cfg = ts->cfg;
 354 }
 355
 356
 357 /* Schedule the read or write timer if needed
 358  *
 359  * NOTE: this function is not unit tested due to it's usage of timer_mod
 360  *
 361  * @is_write: the type of operation (read/write)
 362  * @ret:      true if the timer has been scheduled else false
 363  */
 364 bool throttle_schedule_timer(ThrottleState *ts, bool is_write)
 365 {
 366     int64_t now = qemu_clock_get_ns(ts->clock_type);
 367     int64_t next_timestamp;
 368     bool must_wait;
 369
 370     must_wait = throttle_compute_timer(ts,
 371                                        is_write,
 372                                        now,
 373                                        &next_timestamp);
 374
 375     /* request not throttled */
 376     if (!must_wait) {
 377         return false;
 378     }
 379
 380     /* request throttled and timer pending -> do nothing */
 381     if (timer_pending(ts->timers[is_write])) {
 382         return true;
 383     }
 384
 385     /* request throttled and timer not pending -> arm timer */
 386     timer_mod(ts->timers[is_write], next_timestamp);
 387     return true;
 388 }
 389
 390 /* do the accounting for this operation
 391  *
 392  * @is_write: the type of operation (read/write)
 393  * @size:     the size of the operation
 394  */
 395 void throttle_account(ThrottleState *ts, bool is_write, uint64_t size)
 396 {
 397     double units = 1.0;
 398
 399     /* if cfg.op_size is defined and smaller than size we compute unit count */
 400     if (ts->cfg.op_size && size > ts->cfg.op_size) {
 401         units = (double) size / ts->cfg.op_size;
 402     }
 403
 404     ts->cfg.buckets[THROTTLE_BPS_TOTAL].level += size;
 405     ts->cfg.buckets[THROTTLE_OPS_TOTAL].level += units;
 406
 407     if (is_write) {
 408         ts->cfg.buckets[THROTTLE_BPS_WRITE].level += size;
 409         ts->cfg.buckets[THROTTLE_OPS_WRITE].level += units;
 410     } else {
 411         ts->cfg.buckets[THROTTLE_BPS_READ].level += size;
 412         ts->cfg.buckets[THROTTLE_OPS_READ].level += units;
 413     }
 414 }
 415