Update Red Hat Copyright Notices
[nbdkit.git] / filters / stats / stats.cpp
blob63f41c98acbd3c777923911eb384e5be089e4d9f
1 /* nbdkit
2 * Copyright Red Hat
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * * Neither the name of Red Hat nor the names of its contributors may be
16 * used to endorse or promote products derived from this software without
17 * specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
22 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
26 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
27 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
33 #include <config.h>
35 #include <unordered_map>
36 #include <map>
37 #include <vector>
38 #include <algorithm>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <stdint.h>
43 #include <stdbool.h>
44 #include <inttypes.h>
45 #include <string.h>
46 #include <sys/time.h>
47 #include <fcntl.h>
48 #include <unistd.h>
50 #include <pthread.h>
52 #include <nbdkit-filter.h>
54 #include "cleanup.h"
56 #include "tvdiff.h"
57 #include "windows-compat.h"
59 static char *filename;
60 static bool append;
61 static FILE *fp;
62 static struct timeval start_t;
63 static double print_threshold = 0.95;
66 typedef struct {
67 const char *name;
68 uint64_t ops;
69 uint64_t bytes;
70 uint64_t usecs;
72 /* Keeps track of the number of request sizes and alignments. Requests
73 * are split into buckets by the number of bits needed to represent
74 * their size (i.e., floor(log2(req_size))), and the number
75 * of trailing zero-bits in the offset.
77 * The outer map is indexed by size bits, the inner by alignment bits.
78 * The value is the count of such requests. */
79 std::unordered_map<int,
80 std::unordered_map<int, uint64_t>> count;
82 /* Keeps tracks of the aggregated size of all requests in a given
83 * request size bucket. */
84 std::unordered_map<int, uint64_t> size;
85 } nbdstat;
87 /* This lock protects all the stats. */
88 static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
89 static nbdstat pread_st = { "read" };
90 static nbdstat pwrite_st = { "write" };
91 static nbdstat trim_st = { "trim" };
92 static nbdstat zero_st = { "zero" };
93 static nbdstat extents_st = { "extents" };
94 static nbdstat cache_st = { "cache" };
95 static nbdstat flush_st = { "flush" };
98 #define KiB 1024
99 #define MiB 1048576
100 #define GiB 1073741824
102 static int
103 get_alignment (uint64_t offset)
105 /* Cache most common alignments */
106 static int powers[] = {
107 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
109 static uint64_t masks[] = {
110 0, (1 << 1) - 1, (1 << 2) - 1, (1 << 3) - 1, (1 << 4) - 1, (1 << 5) - 1, (1 << 6) - 1,
111 (1 << 7) - 1, (1 << 8) - 1, (1 << 9) - 1, (1 << 10) - 1, (1 << 11) - 1, (1 << 12) - 1,
112 (1 << 13) - 1, (1 << 14) - 1
115 // Can't determine an alignment for 0, so use a special flag value.
116 if (offset==0)
117 return -1;
119 int i = 0;
120 while(++i < static_cast<int> (sizeof (powers)/sizeof(powers[0]))) {
121 if ((offset & masks[i]) != 0)
122 return i - 1;
125 // Larger alignments
126 while (1) {
127 uint64_t mask = (1ul << i) - 1;
128 if ((offset & mask) != 0)
129 return i - 1;
130 i++;
134 static char*
135 humansize (uint64_t bytes)
137 int r;
138 char *ret;
140 if (bytes < KiB)
141 r = asprintf (&ret, "%" PRIu64 " bytes", bytes);
142 else if (bytes < MiB)
143 r = asprintf (&ret, "%.2f KiB", bytes / (double)KiB);
144 else if (bytes < GiB)
145 r = asprintf (&ret, "%.2f MiB", bytes / (double)MiB);
146 else
147 r = asprintf (&ret, "%.2f GiB", bytes / (double)GiB);
148 if (r == -1)
149 ret = NULL;
150 return ret;
153 static char *
154 humanrate (uint64_t bytes, uint64_t usecs)
156 double secs = usecs / 1000000.0;
157 return secs != 0.0 ? humansize (bytes / secs) : NULL;
160 static inline const char *
161 maybe (char *s)
163 return s ? s : "(n/a)";
166 static void
167 print_align_hist(const std::unordered_map<int, uint64_t>& align_map)
169 /* Convert to ordered map (convenient, since we need to mutate counts),
170 * find requests for offset zero (any alignment), and calculate total. */
171 std::map<int, uint64_t> align_hist;
172 uint64_t any_align_count = 0;
173 uint64_t total = 0;
174 for (auto &el : align_map) {
175 int bits = el.first;
176 auto requests = el.second;
177 if (bits == -1) {
178 any_align_count = requests;
179 } else {
180 align_hist[bits] = requests;
182 total += requests;
185 /* "Fix-up" alignment counts (requests with 8-bit alignment also have
186 * 7-bit alignment, 6-bit alignment, etc) */
187 for (auto &el : align_hist) {
188 int bits = el.first;
189 auto requests = el.second;
190 while (--bits >= 0) {
191 auto it = align_hist.find(bits);
192 if (it != align_hist.end())
193 it->second += requests;
195 el.second += any_align_count;
198 /* The smallest alignment must have the largest number of requests, so we
199 * can iterate in map-order, skipping over bits for which the number of
200 * requests does not change */
201 auto it = align_hist.begin();
202 auto cutoff = static_cast<uint64_t> ((1-print_threshold) * total);
203 while(it != align_hist.end()) {
204 auto bits = it->first;
205 auto requests = it->second;
207 if (requests < cutoff) {
208 fprintf (fp, " %2d+ bit-aligned: %4.1f%% (%" PRIu64 ")\n",
209 bits, static_cast<double> (requests) / total * 100, requests);
210 break;
213 // Only print if number of requests differs from the next alignment
214 it++;
215 if (it == align_hist.end() || it->second != requests) {
216 fprintf (fp, " %2d bit aligned: %5.1f%% (%" PRIu64 ")\n",
217 bits, static_cast<double>(requests*100) / total, requests);
222 static void
223 print_histogram (const nbdstat *st)
225 // Aggregate over alignment and invert map (so counts are keys and
226 // request size bits are values)
227 double total = 0;
228 std::map<uint64_t, int> req_count_size_m;
229 for (auto &el1 : st->count) {
230 auto &align_map = el1.second;
231 uint64_t requests = 0;
232 for (auto &el2 : align_map) {
233 requests += el2.second;
235 req_count_size_m[requests] = el1.first;
236 total += static_cast<double> (requests);
238 if (st->ops != static_cast<uint64_t> (total)) {
239 fprintf(stderr, "INTERNAL ERROR: per-bucket count (%f) does "
240 "not match total (%" PRIu64 ")!\n", total, st->ops);
241 abort();
244 /* Print block sizes until we have covered the *print_threshold* percentile */
245 auto to_print = static_cast<uint64_t> (print_threshold * total);
246 uint64_t printed_reqs = 0, printed_sizes = 0;
247 for (auto it = req_count_size_m.rbegin(); it != req_count_size_m.rend(); it++) {
248 if (printed_reqs >= to_print) {
249 auto requests = st->ops - printed_reqs;
250 char *total_size = humansize(st->bytes - printed_sizes);
251 fprintf (fp, " other sizes: %4.1f%% (%" PRIu64 " reqs, %s total)\n",
252 static_cast<double> (requests) / total * 100,
253 requests, total_size);
254 free(total_size);
255 break;
258 auto bits = it->second;
259 auto requests = it->first;
260 char *total_size = humansize(st->size.at(bits));
261 fprintf (fp, " %2d bits: %4.1f%% (%" PRIu64 " reqs, %s total)\n", bits,
262 static_cast<double> (requests) / total * 100, requests,
263 total_size);
264 free(total_size);
265 printed_reqs += requests;
266 total_size += st->size.at(bits);
268 print_align_hist (st->count.at(bits));
272 static void
273 print_stat (const nbdstat *st, int64_t usecs)
275 if (st->ops > 0) {
276 char *size = humansize (st->bytes);
277 char *op_rate = humanrate (st->bytes, st->usecs);
278 char *total_rate = humanrate (st->bytes, usecs);
280 fprintf (fp, "%s: %" PRIu64 " ops, %.6f s, %s, %s/s op, %s/s total\n",
281 st->name, st->ops, st->usecs / 1000000.0, maybe (size),
282 maybe (op_rate), maybe (total_rate));
284 free (size);
285 free (op_rate);
286 free (total_rate);
288 if (print_threshold != 0 and st->count.size() != 0) {
289 fprintf (fp, " Request size and alignment breakdown:\n"),
290 print_histogram (st);
291 fprintf (fp, "\n");
296 static void
297 print_totals (uint64_t usecs)
299 uint64_t ops = pread_st.ops + pwrite_st.ops + trim_st.ops + zero_st.ops +
300 extents_st.ops + flush_st.ops;
301 uint64_t bytes = pread_st.bytes + pwrite_st.bytes + trim_st.bytes +
302 zero_st.bytes;
303 char *size = humansize (bytes);
304 char *rate = humanrate (bytes, usecs);
306 fprintf (fp, "total: %" PRIu64 " ops, %.6f s, %s, %s/s\n",
307 ops, usecs / 1000000.0, maybe (size), maybe (rate));
309 free (size);
310 free (rate);
313 static inline void
314 print_stats (int64_t usecs)
316 print_totals (usecs);
317 print_stat (&pread_st, usecs);
318 print_stat (&pwrite_st, usecs);
319 print_stat (&trim_st, usecs);
320 print_stat (&zero_st, usecs);
321 print_stat (&extents_st, usecs);
322 print_stat (&cache_st, usecs);
323 print_stat (&flush_st, usecs);
324 fflush (fp);
327 static void
328 stats_unload (void)
330 struct timeval now;
331 int64_t usecs;
333 gettimeofday (&now, NULL);
334 usecs = tvdiff_usec (&start_t, &now);
335 if (fp && usecs > 0) {
336 ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
337 print_stats (usecs);
340 if (fp)
341 fclose (fp);
342 free (filename);
345 static int
346 stats_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
347 const char *key, const char *value)
349 int r;
351 if (strcmp (key, "statsfile") == 0) {
352 free (filename);
353 filename = nbdkit_absolute_path (value);
354 if (filename == NULL)
355 return -1;
356 return 0;
358 else if (strcmp (key, "statsappend") == 0) {
359 r = nbdkit_parse_bool (value);
360 if (r == -1)
361 return -1;
362 append = r;
363 return 0;
365 else if (strcmp (key, "statsthreshold") == 0) {
366 int ival;
367 r = nbdkit_parse_int ("printing threshold", value, &ival);
368 if (r == -1)
369 return -1;
370 if (ival > 100 or ival < 0) {
371 nbdkit_error ("statsthreshold must be between 0 and 100 (percent)");
372 return -1;
374 print_threshold = static_cast<double>(ival) / 100;
375 return 0;
378 return next (nxdata, key, value);
381 static int
382 stats_config_complete (nbdkit_next_config_complete *next,
383 nbdkit_backend *nxdata)
385 if (filename == NULL) {
386 nbdkit_error ("stats filter requires statsfile parameter");
387 return -1;
390 return next (nxdata);
393 static int
394 stats_get_ready (int thread_model)
396 int fd;
398 /* Using fopen("ae"/"we") would be more convenient, but as Haiku
399 * still lacks that, use this instead. Atomicity is not essential
400 * here since .config completes before threads that might fork, if
401 * we have to later add yet another fallback to fcntl(fileno()) for
402 * systems without O_CLOEXEC.
404 fd = open (filename,
405 O_CLOEXEC | O_WRONLY | O_CREAT | (append ? O_APPEND : O_TRUNC),
406 0666);
407 if (fd < 0) {
408 nbdkit_error ("open: %s: %m", filename);
409 return -1;
411 fp = fdopen (fd, append ? "a" : "w");
412 if (fp == NULL) {
413 nbdkit_error ("fdopen: %s: %m", filename);
414 return -1;
417 gettimeofday (&start_t, NULL);
419 return 0;
422 #define stats_config_help \
423 "statsfile=<FILE> (required) The file to place the log in.\n" \
424 "statsappend=<BOOL> True to append to the log (default false).\n"
426 static inline void
427 record_stat (nbdstat *st, uint32_t size, uint64_t offset,
428 const struct timeval *start)
430 struct timeval end;
431 uint64_t usecs;
433 gettimeofday (&end, NULL);
434 usecs = tvdiff_usec (start, &end);
436 // fast path if not collecting histogram data
437 static bool out_of_memory = false;
438 if (out_of_memory || print_threshold == 0 || size == 0) {
439 ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
440 st->ops++;
441 st->bytes += size;
442 st->usecs += usecs;
443 return;
446 // Calculate bits needed to represent request size
447 int size_bits = 0;
448 auto tmp = size;
449 while (tmp >>= 1) {
450 size_bits++;
453 // Calculate trailing zero bits
454 int align_bits = get_alignment (offset);
456 ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
457 st->ops++;
458 st->bytes += size;
459 st->usecs += usecs;
461 try {
462 st->count[size_bits][align_bits]++;
463 st->size[size_bits] += size;
465 catch (std::bad_alloc const&) {
466 // Avoid reporting the same error over and over again
467 nbdkit_error ("out of memory for request size statistics");
468 out_of_memory = true;
472 /* Read. */
473 static int
474 stats_pread (nbdkit_next *next,
475 void *handle, void *buf, uint32_t count, uint64_t offset,
476 uint32_t flags, int *err)
478 struct timeval start;
479 int r;
481 gettimeofday (&start, NULL);
482 r = next->pread (next, buf, count, offset, flags, err);
483 if (r == 0) record_stat (&pread_st, count, offset, &start);
484 return r;
487 /* Write. */
488 static int
489 stats_pwrite (nbdkit_next *next,
490 void *handle,
491 const void *buf, uint32_t count, uint64_t offset,
492 uint32_t flags, int *err)
494 struct timeval start;
495 int r;
497 gettimeofday (&start, NULL);
498 r = next->pwrite (next, buf, count, offset, flags, err);
499 if (r == 0) record_stat (&pwrite_st, count, offset, &start);
500 return r;
503 /* Trim. */
504 static int
505 stats_trim (nbdkit_next *next,
506 void *handle,
507 uint32_t count, uint64_t offset, uint32_t flags,
508 int *err)
510 struct timeval start;
511 int r;
513 gettimeofday (&start, NULL);
514 r = next->trim (next, count, offset, flags, err);
515 if (r == 0) record_stat (&trim_st, count, offset, &start);
516 return r;
519 /* Flush. */
520 static int
521 stats_flush (nbdkit_next *next,
522 void *handle, uint32_t flags,
523 int *err)
525 struct timeval start;
526 int r;
528 gettimeofday (&start, NULL);
529 r = next->flush (next, flags, err);
530 if (r == 0) record_stat (&flush_st, 0, 0, &start);
531 return r;
534 /* Zero. */
535 static int
536 stats_zero (nbdkit_next *next,
537 void *handle,
538 uint32_t count, uint64_t offset, uint32_t flags,
539 int *err)
541 struct timeval start;
542 int r;
544 gettimeofday (&start, NULL);
545 r = next->zero (next, count, offset, flags, err);
546 if (r == 0) record_stat (&zero_st, count, offset, &start);
547 return r;
550 /* Extents. */
551 static int
552 stats_extents (nbdkit_next *next,
553 void *handle,
554 uint32_t count, uint64_t offset, uint32_t flags,
555 struct nbdkit_extents *extents, int *err)
557 struct timeval start;
558 int r;
560 gettimeofday (&start, NULL);
561 r = next->extents (next, count, offset, flags, extents, err);
562 /* XXX There's a case for trying to determine how long the extents
563 * will be that are returned to the client (instead of simply using
564 * count), given the flags and the complex rules in the protocol.
566 if (r == 0) record_stat (&extents_st, count, offset, &start);
567 return r;
570 /* Cache. */
571 static int
572 stats_cache (nbdkit_next *next,
573 void *handle,
574 uint32_t count, uint64_t offset, uint32_t flags,
575 int *err)
577 struct timeval start;
578 int r;
580 gettimeofday (&start, NULL);
581 r = next->cache (next, count, offset, flags, err);
582 if (r == 0) record_stat (&cache_st, count, offset, &start);
583 return r;
586 static struct nbdkit_filter filter = []() -> nbdkit_filter {
587 auto f = nbdkit_filter();
588 f.name = "stats";
589 f.longname = "nbdkit stats filter";
590 f.unload = stats_unload;
591 f.config = stats_config;
592 f.config_complete = stats_config_complete;
593 f.config_help = stats_config_help;
594 f.get_ready = stats_get_ready;
595 f.pread = stats_pread;
596 f.pwrite = stats_pwrite;
597 f.flush = stats_flush;
598 f.trim = stats_trim;
599 f.zero = stats_zero;
600 f.extents = stats_extents;
601 f.cache = stats_cache;
603 return f;
604 }();
606 NBDKIT_REGISTER_FILTER(filter)