2 * qdist.c - QEMU helpers for handling frequency distributions of data.
4 * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
6 * License: GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
9 #include "qemu/osdep.h"
10 #include "qemu/qdist.h"
14 #define NAN (0.0 / 0.0)
17 void qdist_init(struct qdist
*dist
)
19 dist
->entries
= g_malloc(sizeof(*dist
->entries
));
24 void qdist_destroy(struct qdist
*dist
)
26 g_free(dist
->entries
);
29 static inline int qdist_cmp_double(double a
, double b
)
39 static int qdist_cmp(const void *ap
, const void *bp
)
41 const struct qdist_entry
*a
= ap
;
42 const struct qdist_entry
*b
= bp
;
44 return qdist_cmp_double(a
->x
, b
->x
);
47 void qdist_add(struct qdist
*dist
, double x
, long count
)
49 struct qdist_entry
*entry
= NULL
;
55 entry
= bsearch(&e
, dist
->entries
, dist
->n
, sizeof(e
), qdist_cmp
);
59 entry
->count
+= count
;
63 if (unlikely(dist
->n
== dist
->size
)) {
65 dist
->entries
= g_realloc(dist
->entries
,
66 sizeof(*dist
->entries
) * (dist
->size
));
69 entry
= &dist
->entries
[dist
->n
- 1];
72 qsort(dist
->entries
, dist
->n
, sizeof(*entry
), qdist_cmp
);
75 void qdist_inc(struct qdist
*dist
, double x
)
77 qdist_add(dist
, x
, 1);
81 * Unicode for block elements. See:
82 * https://en.wikipedia.org/wiki/Block_Elements
84 static const gunichar qdist_blocks
[] = {
95 #define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks)
98 * Print a distribution into a string.
100 * This function assumes that appropriate binning has been done on the input;
101 * see qdist_bin__internal() and qdist_pr_plain().
103 * Callers must free the returned string with g_free().
105 static char *qdist_pr_internal(const struct qdist
*dist
)
108 GString
*s
= g_string_new("");
111 /* if only one entry, its printout will be either full or empty */
113 if (dist
->entries
[0].count
) {
114 g_string_append_unichar(s
, qdist_blocks
[QDIST_NR_BLOCK_CODES
- 1]);
116 g_string_append_c(s
, ' ');
121 /* get min and max counts */
122 min
= dist
->entries
[0].count
;
124 for (i
= 0; i
< dist
->n
; i
++) {
125 struct qdist_entry
*e
= &dist
->entries
[i
];
127 if (e
->count
< min
) {
130 if (e
->count
> max
) {
135 for (i
= 0; i
< dist
->n
; i
++) {
136 struct qdist_entry
*e
= &dist
->entries
[i
];
139 /* make an exception with 0; instead of using block[0], print a space */
141 /* divide first to avoid loss of precision when e->count == max */
142 index
= (e
->count
- min
) / (max
- min
) * (QDIST_NR_BLOCK_CODES
- 1);
143 g_string_append_unichar(s
, qdist_blocks
[index
]);
145 g_string_append_c(s
, ' ');
149 return g_string_free(s
, FALSE
);
153 * Bin the distribution in @from into @n bins of consecutive, non-overlapping
154 * intervals, copying the result to @to.
156 * This function is internal to qdist: only this file and test code should
159 * Note: calling this function on an already-binned qdist is a bug.
161 * If @n == 0 or @from->n == 1, use @from->n.
163 void qdist_bin__internal(struct qdist
*to
, const struct qdist
*from
, size_t n
)
174 if (n
== 0 || from
->n
== 1) {
178 /* set equally-sized bins between @from's left and right */
179 xmin
= qdist_xmin(from
);
180 xmax
= qdist_xmax(from
);
181 step
= (xmax
- xmin
) / n
;
184 /* if @from's entries are equally spaced, no need to re-bin */
185 for (i
= 0; i
< from
->n
; i
++) {
186 if (from
->entries
[i
].x
!= xmin
+ i
* step
) {
190 /* they're equally spaced, so copy the dist and bail out */
191 to
->entries
= g_new(struct qdist_entry
, from
->n
);
193 memcpy(to
->entries
, from
->entries
, sizeof(*to
->entries
) * to
->n
);
199 for (i
= 0; i
< n
; i
++) {
203 left
= xmin
+ i
* step
;
204 right
= xmin
+ (i
+ 1) * step
;
206 /* Add x, even if it might not get any counts later */
211 * To avoid double-counting we capture [left, right) ranges, except for
212 * the righmost bin, which captures a [left, right] range.
214 while (j
< from
->n
&& (from
->entries
[j
].x
< right
|| i
== n
- 1)) {
215 struct qdist_entry
*o
= &from
->entries
[j
];
217 qdist_add(to
, x
, o
->count
);
224 * Print @dist into a string, after re-binning it into @n bins of consecutive,
225 * non-overlapping intervals.
227 * If @n == 0, use @orig->n.
229 * Callers must free the returned string with g_free().
231 char *qdist_pr_plain(const struct qdist
*dist
, size_t n
)
239 qdist_bin__internal(&binned
, dist
, n
);
240 ret
= qdist_pr_internal(&binned
);
241 qdist_destroy(&binned
);
245 static char *qdist_pr_label(const struct qdist
*dist
, size_t n_bins
,
246 uint32_t opt
, bool is_left
)
257 s
= g_string_new("");
258 if (!(opt
& QDIST_PR_LABELS
)) {
262 dec
= opt
& QDIST_PR_NODECIMAL
? 0 : 1;
263 percent
= opt
& QDIST_PR_PERCENT
? "%" : "";
265 n
= n_bins
? n_bins
: dist
->n
;
266 x
= is_left
? qdist_xmin(dist
) : qdist_xmax(dist
);
267 step
= (qdist_xmax(dist
) - qdist_xmin(dist
)) / n
;
269 if (opt
& QDIST_PR_100X
) {
273 if (opt
& QDIST_PR_NOBINRANGE
) {
274 lparen
= rparen
= "";
276 x2
= x
; /* unnecessary, but a dumb compiler might not figure it out */
279 rparen
= is_left
? ")" : "]";
288 g_string_append_printf(s
, "%s%.*f", lparen
, dec
, x1
);
289 if (!(opt
& QDIST_PR_NOBINRANGE
)) {
290 g_string_append_printf(s
, ",%.*f%s", dec
, x2
, rparen
);
292 g_string_append(s
, percent
);
294 return g_string_free(s
, FALSE
);
298 * Print the distribution's histogram into a string.
300 * See also: qdist_pr_plain().
302 * Callers must free the returned string with g_free().
304 char *qdist_pr(const struct qdist
*dist
, size_t n_bins
, uint32_t opt
)
306 const char *border
= opt
& QDIST_PR_BORDER
? "|" : "";
307 char *llabel
, *rlabel
;
315 s
= g_string_new("");
317 llabel
= qdist_pr_label(dist
, n_bins
, opt
, true);
318 rlabel
= qdist_pr_label(dist
, n_bins
, opt
, false);
319 hgram
= qdist_pr_plain(dist
, n_bins
);
320 g_string_append_printf(s
, "%s%s%s%s%s",
321 llabel
, border
, hgram
, border
, rlabel
);
326 return g_string_free(s
, FALSE
);
329 static inline double qdist_x(const struct qdist
*dist
, int index
)
334 return dist
->entries
[index
].x
;
337 double qdist_xmin(const struct qdist
*dist
)
339 return qdist_x(dist
, 0);
342 double qdist_xmax(const struct qdist
*dist
)
344 return qdist_x(dist
, dist
->n
- 1);
347 size_t qdist_unique_entries(const struct qdist
*dist
)
352 unsigned long qdist_sample_count(const struct qdist
*dist
)
354 unsigned long count
= 0;
357 for (i
= 0; i
< dist
->n
; i
++) {
358 struct qdist_entry
*e
= &dist
->entries
[i
];
365 static double qdist_pairwise_avg(const struct qdist
*dist
, size_t index
,
366 size_t n
, unsigned long count
)
368 /* amortize the recursion by using a base case > 2 */
373 for (i
= 0; i
< n
; i
++) {
374 struct qdist_entry
*e
= &dist
->entries
[index
+ i
];
376 ret
+= e
->x
* e
->count
/ count
;
382 return qdist_pairwise_avg(dist
, index
, n2
, count
) +
383 qdist_pairwise_avg(dist
, index
+ n2
, n
- n2
, count
);
387 double qdist_avg(const struct qdist
*dist
)
391 count
= qdist_sample_count(dist
);
395 return qdist_pairwise_avg(dist
, 0, dist
->n
, count
);