2 * qdist.c - QEMU helpers for handling frequency distributions of data.
4 * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
6 * License: GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
9 #include "qemu/qdist.h"
13 #define NAN (0.0 / 0.0)
16 void qdist_init(struct qdist
*dist
)
18 dist
->entries
= g_malloc(sizeof(*dist
->entries
));
23 void qdist_destroy(struct qdist
*dist
)
25 g_free(dist
->entries
);
28 static inline int qdist_cmp_double(double a
, double b
)
38 static int qdist_cmp(const void *ap
, const void *bp
)
40 const struct qdist_entry
*a
= ap
;
41 const struct qdist_entry
*b
= bp
;
43 return qdist_cmp_double(a
->x
, b
->x
);
46 void qdist_add(struct qdist
*dist
, double x
, long count
)
48 struct qdist_entry
*entry
= NULL
;
54 entry
= bsearch(&e
, dist
->entries
, dist
->n
, sizeof(e
), qdist_cmp
);
58 entry
->count
+= count
;
62 if (unlikely(dist
->n
== dist
->size
)) {
64 dist
->entries
= g_realloc(dist
->entries
,
65 sizeof(*dist
->entries
) * (dist
->size
));
68 entry
= &dist
->entries
[dist
->n
- 1];
71 qsort(dist
->entries
, dist
->n
, sizeof(*entry
), qdist_cmp
);
74 void qdist_inc(struct qdist
*dist
, double x
)
76 qdist_add(dist
, x
, 1);
80 * Unicode for block elements. See:
81 * https://en.wikipedia.org/wiki/Block_Elements
83 static const gunichar qdist_blocks
[] = {
94 #define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks)
97 * Print a distribution into a string.
99 * This function assumes that appropriate binning has been done on the input;
100 * see qdist_bin__internal() and qdist_pr_plain().
102 * Callers must free the returned string with g_free().
104 static char *qdist_pr_internal(const struct qdist
*dist
)
107 GString
*s
= g_string_new("");
110 /* if only one entry, its printout will be either full or empty */
112 if (dist
->entries
[0].count
) {
113 g_string_append_unichar(s
, qdist_blocks
[QDIST_NR_BLOCK_CODES
- 1]);
115 g_string_append_c(s
, ' ');
120 /* get min and max counts */
121 min
= dist
->entries
[0].count
;
123 for (i
= 0; i
< dist
->n
; i
++) {
124 struct qdist_entry
*e
= &dist
->entries
[i
];
126 if (e
->count
< min
) {
129 if (e
->count
> max
) {
134 for (i
= 0; i
< dist
->n
; i
++) {
135 struct qdist_entry
*e
= &dist
->entries
[i
];
138 /* make an exception with 0; instead of using block[0], print a space */
140 /* divide first to avoid loss of precision when e->count == max */
141 index
= (e
->count
- min
) / (max
- min
) * (QDIST_NR_BLOCK_CODES
- 1);
142 g_string_append_unichar(s
, qdist_blocks
[index
]);
144 g_string_append_c(s
, ' ');
148 return g_string_free(s
, FALSE
);
152 * Bin the distribution in @from into @n bins of consecutive, non-overlapping
153 * intervals, copying the result to @to.
155 * This function is internal to qdist: only this file and test code should
158 * Note: calling this function on an already-binned qdist is a bug.
160 * If @n == 0 or @from->n == 1, use @from->n.
162 void qdist_bin__internal(struct qdist
*to
, const struct qdist
*from
, size_t n
)
173 if (n
== 0 || from
->n
== 1) {
177 /* set equally-sized bins between @from's left and right */
178 xmin
= qdist_xmin(from
);
179 xmax
= qdist_xmax(from
);
180 step
= (xmax
- xmin
) / n
;
183 /* if @from's entries are equally spaced, no need to re-bin */
184 for (i
= 0; i
< from
->n
; i
++) {
185 if (from
->entries
[i
].x
!= xmin
+ i
* step
) {
189 /* they're equally spaced, so copy the dist and bail out */
190 to
->entries
= g_new(struct qdist_entry
, from
->n
);
192 memcpy(to
->entries
, from
->entries
, sizeof(*to
->entries
) * to
->n
);
198 for (i
= 0; i
< n
; i
++) {
202 left
= xmin
+ i
* step
;
203 right
= xmin
+ (i
+ 1) * step
;
205 /* Add x, even if it might not get any counts later */
210 * To avoid double-counting we capture [left, right) ranges, except for
211 * the righmost bin, which captures a [left, right] range.
213 while (j
< from
->n
&& (from
->entries
[j
].x
< right
|| i
== n
- 1)) {
214 struct qdist_entry
*o
= &from
->entries
[j
];
216 qdist_add(to
, x
, o
->count
);
223 * Print @dist into a string, after re-binning it into @n bins of consecutive,
224 * non-overlapping intervals.
226 * If @n == 0, use @orig->n.
228 * Callers must free the returned string with g_free().
230 char *qdist_pr_plain(const struct qdist
*dist
, size_t n
)
238 qdist_bin__internal(&binned
, dist
, n
);
239 ret
= qdist_pr_internal(&binned
);
240 qdist_destroy(&binned
);
244 static char *qdist_pr_label(const struct qdist
*dist
, size_t n_bins
,
245 uint32_t opt
, bool is_left
)
256 s
= g_string_new("");
257 if (!(opt
& QDIST_PR_LABELS
)) {
261 dec
= opt
& QDIST_PR_NODECIMAL
? 0 : 1;
262 percent
= opt
& QDIST_PR_PERCENT
? "%" : "";
264 n
= n_bins
? n_bins
: dist
->n
;
265 x
= is_left
? qdist_xmin(dist
) : qdist_xmax(dist
);
266 step
= (qdist_xmax(dist
) - qdist_xmin(dist
)) / n
;
268 if (opt
& QDIST_PR_100X
) {
272 if (opt
& QDIST_PR_NOBINRANGE
) {
273 lparen
= rparen
= "";
275 x2
= x
; /* unnecessary, but a dumb compiler might not figure it out */
278 rparen
= is_left
? ")" : "]";
287 g_string_append_printf(s
, "%s%.*f", lparen
, dec
, x1
);
288 if (!(opt
& QDIST_PR_NOBINRANGE
)) {
289 g_string_append_printf(s
, ",%.*f%s", dec
, x2
, rparen
);
291 g_string_append(s
, percent
);
293 return g_string_free(s
, FALSE
);
297 * Print the distribution's histogram into a string.
299 * See also: qdist_pr_plain().
301 * Callers must free the returned string with g_free().
303 char *qdist_pr(const struct qdist
*dist
, size_t n_bins
, uint32_t opt
)
305 const char *border
= opt
& QDIST_PR_BORDER
? "|" : "";
306 char *llabel
, *rlabel
;
314 s
= g_string_new("");
316 llabel
= qdist_pr_label(dist
, n_bins
, opt
, true);
317 rlabel
= qdist_pr_label(dist
, n_bins
, opt
, false);
318 hgram
= qdist_pr_plain(dist
, n_bins
);
319 g_string_append_printf(s
, "%s%s%s%s%s",
320 llabel
, border
, hgram
, border
, rlabel
);
325 return g_string_free(s
, FALSE
);
328 static inline double qdist_x(const struct qdist
*dist
, int index
)
333 return dist
->entries
[index
].x
;
336 double qdist_xmin(const struct qdist
*dist
)
338 return qdist_x(dist
, 0);
341 double qdist_xmax(const struct qdist
*dist
)
343 return qdist_x(dist
, dist
->n
- 1);
346 size_t qdist_unique_entries(const struct qdist
*dist
)
351 unsigned long qdist_sample_count(const struct qdist
*dist
)
353 unsigned long count
= 0;
356 for (i
= 0; i
< dist
->n
; i
++) {
357 struct qdist_entry
*e
= &dist
->entries
[i
];
364 static double qdist_pairwise_avg(const struct qdist
*dist
, size_t index
,
365 size_t n
, unsigned long count
)
367 /* amortize the recursion by using a base case > 2 */
372 for (i
= 0; i
< n
; i
++) {
373 struct qdist_entry
*e
= &dist
->entries
[index
+ i
];
375 ret
+= e
->x
* e
->count
/ count
;
381 return qdist_pairwise_avg(dist
, index
, n2
, count
) +
382 qdist_pairwise_avg(dist
, index
+ n2
, n
- n2
, count
);
386 double qdist_avg(const struct qdist
*dist
)
390 count
= qdist_sample_count(dist
);
394 return qdist_pairwise_avg(dist
, 0, dist
->n
, count
);