2 * qdist.c - QEMU helpers for handling frequency distributions of data.
4 * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
6 * License: GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
9 #include "qemu/osdep.h"
10 #include "qemu/qdist.h"
14 #define NAN (0.0 / 0.0)
17 #define QDIST_EMPTY_STR "(empty)"
19 void qdist_init(struct qdist
*dist
)
21 dist
->entries
= g_new(struct qdist_entry
, 1);
26 void qdist_destroy(struct qdist
*dist
)
28 g_free(dist
->entries
);
31 static inline int qdist_cmp_double(double a
, double b
)
41 static int qdist_cmp(const void *ap
, const void *bp
)
43 const struct qdist_entry
*a
= ap
;
44 const struct qdist_entry
*b
= bp
;
46 return qdist_cmp_double(a
->x
, b
->x
);
49 void qdist_add(struct qdist
*dist
, double x
, long count
)
51 struct qdist_entry
*entry
= NULL
;
57 entry
= bsearch(&e
, dist
->entries
, dist
->n
, sizeof(e
), qdist_cmp
);
61 entry
->count
+= count
;
65 if (unlikely(dist
->n
== dist
->size
)) {
67 dist
->entries
= g_renew(struct qdist_entry
, dist
->entries
, dist
->size
);
70 entry
= &dist
->entries
[dist
->n
- 1];
73 qsort(dist
->entries
, dist
->n
, sizeof(*entry
), qdist_cmp
);
76 void qdist_inc(struct qdist
*dist
, double x
)
78 qdist_add(dist
, x
, 1);
82 * Unicode for block elements. See:
83 * https://en.wikipedia.org/wiki/Block_Elements
85 static const gunichar qdist_blocks
[] = {
96 #define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks)
99 * Print a distribution into a string.
101 * This function assumes that appropriate binning has been done on the input;
102 * see qdist_bin__internal() and qdist_pr_plain().
104 * Callers must free the returned string with g_free().
106 static char *qdist_pr_internal(const struct qdist
*dist
)
109 GString
*s
= g_string_new("");
112 /* if only one entry, its printout will be either full or empty */
114 if (dist
->entries
[0].count
) {
115 g_string_append_unichar(s
, qdist_blocks
[QDIST_NR_BLOCK_CODES
- 1]);
117 g_string_append_c(s
, ' ');
122 /* get min and max counts */
123 min
= dist
->entries
[0].count
;
125 for (i
= 0; i
< dist
->n
; i
++) {
126 struct qdist_entry
*e
= &dist
->entries
[i
];
128 if (e
->count
< min
) {
131 if (e
->count
> max
) {
136 for (i
= 0; i
< dist
->n
; i
++) {
137 struct qdist_entry
*e
= &dist
->entries
[i
];
140 /* make an exception with 0; instead of using block[0], print a space */
142 /* divide first to avoid loss of precision when e->count == max */
143 index
= (e
->count
- min
) / (max
- min
) * (QDIST_NR_BLOCK_CODES
- 1);
144 g_string_append_unichar(s
, qdist_blocks
[index
]);
146 g_string_append_c(s
, ' ');
150 return g_string_free(s
, FALSE
);
154 * Bin the distribution in @from into @n bins of consecutive, non-overlapping
155 * intervals, copying the result to @to.
157 * This function is internal to qdist: only this file and test code should
160 * Note: calling this function on an already-binned qdist is a bug.
162 * If @n == 0 or @from->n == 1, use @from->n.
164 void qdist_bin__internal(struct qdist
*to
, const struct qdist
*from
, size_t n
)
175 if (n
== 0 || from
->n
== 1) {
179 /* set equally-sized bins between @from's left and right */
180 xmin
= qdist_xmin(from
);
181 xmax
= qdist_xmax(from
);
182 step
= (xmax
- xmin
) / n
;
185 /* if @from's entries are equally spaced, no need to re-bin */
186 for (i
= 0; i
< from
->n
; i
++) {
187 if (from
->entries
[i
].x
!= xmin
+ i
* step
) {
191 /* they're equally spaced, so copy the dist and bail out */
192 to
->entries
= g_renew(struct qdist_entry
, to
->entries
, n
);
194 memcpy(to
->entries
, from
->entries
, sizeof(*to
->entries
) * to
->n
);
200 for (i
= 0; i
< n
; i
++) {
204 left
= xmin
+ i
* step
;
205 right
= xmin
+ (i
+ 1) * step
;
207 /* Add x, even if it might not get any counts later */
212 * To avoid double-counting we capture [left, right) ranges, except for
213 * the rightmost bin, which captures a [left, right] range.
215 while (j
< from
->n
&& (from
->entries
[j
].x
< right
|| i
== n
- 1)) {
216 struct qdist_entry
*o
= &from
->entries
[j
];
218 qdist_add(to
, x
, o
->count
);
225 * Print @dist into a string, after re-binning it into @n bins of consecutive,
226 * non-overlapping intervals.
228 * If @n == 0, use @orig->n.
230 * Callers must free the returned string with g_free().
232 char *qdist_pr_plain(const struct qdist
*dist
, size_t n
)
238 return g_strdup(QDIST_EMPTY_STR
);
240 qdist_bin__internal(&binned
, dist
, n
);
241 ret
= qdist_pr_internal(&binned
);
242 qdist_destroy(&binned
);
246 static char *qdist_pr_label(const struct qdist
*dist
, size_t n_bins
,
247 uint32_t opt
, bool is_left
)
258 s
= g_string_new("");
259 if (!(opt
& QDIST_PR_LABELS
)) {
263 dec
= opt
& QDIST_PR_NODECIMAL
? 0 : 1;
264 percent
= opt
& QDIST_PR_PERCENT
? "%" : "";
266 n
= n_bins
? n_bins
: dist
->n
;
267 x
= is_left
? qdist_xmin(dist
) : qdist_xmax(dist
);
268 step
= (qdist_xmax(dist
) - qdist_xmin(dist
)) / n
;
270 if (opt
& QDIST_PR_100X
) {
274 if (opt
& QDIST_PR_NOBINRANGE
) {
275 lparen
= rparen
= "";
277 x2
= x
; /* unnecessary, but a dumb compiler might not figure it out */
280 rparen
= is_left
? ")" : "]";
289 g_string_append_printf(s
, "%s%.*f", lparen
, dec
, x1
);
290 if (!(opt
& QDIST_PR_NOBINRANGE
)) {
291 g_string_append_printf(s
, ",%.*f%s", dec
, x2
, rparen
);
293 g_string_append(s
, percent
);
295 return g_string_free(s
, FALSE
);
299 * Print the distribution's histogram into a string.
301 * See also: qdist_pr_plain().
303 * Callers must free the returned string with g_free().
305 char *qdist_pr(const struct qdist
*dist
, size_t n_bins
, uint32_t opt
)
307 const char *border
= opt
& QDIST_PR_BORDER
? "|" : "";
308 char *llabel
, *rlabel
;
313 return g_strdup(QDIST_EMPTY_STR
);
316 s
= g_string_new("");
318 llabel
= qdist_pr_label(dist
, n_bins
, opt
, true);
319 rlabel
= qdist_pr_label(dist
, n_bins
, opt
, false);
320 hgram
= qdist_pr_plain(dist
, n_bins
);
321 g_string_append_printf(s
, "%s%s%s%s%s",
322 llabel
, border
, hgram
, border
, rlabel
);
327 return g_string_free(s
, FALSE
);
330 static inline double qdist_x(const struct qdist
*dist
, int index
)
335 return dist
->entries
[index
].x
;
338 double qdist_xmin(const struct qdist
*dist
)
340 return qdist_x(dist
, 0);
343 double qdist_xmax(const struct qdist
*dist
)
345 return qdist_x(dist
, dist
->n
- 1);
348 size_t qdist_unique_entries(const struct qdist
*dist
)
353 unsigned long qdist_sample_count(const struct qdist
*dist
)
355 unsigned long count
= 0;
358 for (i
= 0; i
< dist
->n
; i
++) {
359 struct qdist_entry
*e
= &dist
->entries
[i
];
366 static double qdist_pairwise_avg(const struct qdist
*dist
, size_t index
,
367 size_t n
, unsigned long count
)
369 /* amortize the recursion by using a base case > 2 */
374 for (i
= 0; i
< n
; i
++) {
375 struct qdist_entry
*e
= &dist
->entries
[index
+ i
];
377 ret
+= e
->x
* e
->count
/ count
;
383 return qdist_pairwise_avg(dist
, index
, n2
, count
) +
384 qdist_pairwise_avg(dist
, index
+ n2
, n
- n2
, count
);
388 double qdist_avg(const struct qdist
*dist
)
392 count
= qdist_sample_count(dist
);
396 return qdist_pairwise_avg(dist
, 0, dist
->n
, count
);