1 /* 2 * qdist.c - QEMU helpers for handling frequency distributions of data. 3 * 4 * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> 5 * 6 * License: GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 */ 9 #include "qemu/osdep.h" 10 #include "qemu/qdist.h" 11 12 #include <math.h> 13 #ifndef NAN 14 #define NAN (0.0 / 0.0) 15 #endif 16 17 void qdist_init(struct qdist *dist) 18 { 19 dist->entries = g_malloc(sizeof(*dist->entries)); 20 dist->size = 1; 21 dist->n = 0; 22 } 23 24 void qdist_destroy(struct qdist *dist) 25 { 26 g_free(dist->entries); 27 } 28 29 static inline int qdist_cmp_double(double a, double b) 30 { 31 if (a > b) { 32 return 1; 33 } else if (a < b) { 34 return -1; 35 } 36 return 0; 37 } 38 39 static int qdist_cmp(const void *ap, const void *bp) 40 { 41 const struct qdist_entry *a = ap; 42 const struct qdist_entry *b = bp; 43 44 return qdist_cmp_double(a->x, b->x); 45 } 46 47 void qdist_add(struct qdist *dist, double x, long count) 48 { 49 struct qdist_entry *entry = NULL; 50 51 if (dist->n) { 52 struct qdist_entry e; 53 54 e.x = x; 55 entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp); 56 } 57 58 if (entry) { 59 entry->count += count; 60 return; 61 } 62 63 if (unlikely(dist->n == dist->size)) { 64 dist->size *= 2; 65 dist->entries = g_realloc(dist->entries, 66 sizeof(*dist->entries) * (dist->size)); 67 } 68 dist->n++; 69 entry = &dist->entries[dist->n - 1]; 70 entry->x = x; 71 entry->count = count; 72 qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp); 73 } 74 75 void qdist_inc(struct qdist *dist, double x) 76 { 77 qdist_add(dist, x, 1); 78 } 79 80 /* 81 * Unicode for block elements. See: 82 * https://en.wikipedia.org/wiki/Block_Elements 83 */ 84 static const gunichar qdist_blocks[] = { 85 0x2581, 86 0x2582, 87 0x2583, 88 0x2584, 89 0x2585, 90 0x2586, 91 0x2587, 92 0x2588 93 }; 94 95 #define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks) 96 97 /* 98 * Print a distribution into a string. 99 * 100 * This function assumes that appropriate binning has been done on the input; 101 * see qdist_bin__internal() and qdist_pr_plain(). 102 * 103 * Callers must free the returned string with g_free(). 104 */ 105 static char *qdist_pr_internal(const struct qdist *dist) 106 { 107 double min, max; 108 GString *s = g_string_new(""); 109 size_t i; 110 111 /* if only one entry, its printout will be either full or empty */ 112 if (dist->n == 1) { 113 if (dist->entries[0].count) { 114 g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]); 115 } else { 116 g_string_append_c(s, ' '); 117 } 118 goto out; 119 } 120 121 /* get min and max counts */ 122 min = dist->entries[0].count; 123 max = min; 124 for (i = 0; i < dist->n; i++) { 125 struct qdist_entry *e = &dist->entries[i]; 126 127 if (e->count < min) { 128 min = e->count; 129 } 130 if (e->count > max) { 131 max = e->count; 132 } 133 } 134 135 for (i = 0; i < dist->n; i++) { 136 struct qdist_entry *e = &dist->entries[i]; 137 int index; 138 139 /* make an exception with 0; instead of using block[0], print a space */ 140 if (e->count) { 141 /* divide first to avoid loss of precision when e->count == max */ 142 index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1); 143 g_string_append_unichar(s, qdist_blocks[index]); 144 } else { 145 g_string_append_c(s, ' '); 146 } 147 } 148 out: 149 return g_string_free(s, FALSE); 150 } 151 152 /* 153 * Bin the distribution in @from into @n bins of consecutive, non-overlapping 154 * intervals, copying the result to @to. 155 * 156 * This function is internal to qdist: only this file and test code should 157 * ever call it. 158 * 159 * Note: calling this function on an already-binned qdist is a bug. 160 * 161 * If @n == 0 or @from->n == 1, use @from->n. 162 */ 163 void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n) 164 { 165 double xmin, xmax; 166 double step; 167 size_t i, j; 168 169 qdist_init(to); 170 171 if (from->n == 0) { 172 return; 173 } 174 if (n == 0 || from->n == 1) { 175 n = from->n; 176 } 177 178 /* set equally-sized bins between @from's left and right */ 179 xmin = qdist_xmin(from); 180 xmax = qdist_xmax(from); 181 step = (xmax - xmin) / n; 182 183 if (n == from->n) { 184 /* if @from's entries are equally spaced, no need to re-bin */ 185 for (i = 0; i < from->n; i++) { 186 if (from->entries[i].x != xmin + i * step) { 187 goto rebin; 188 } 189 } 190 /* they're equally spaced, so copy the dist and bail out */ 191 to->entries = g_new(struct qdist_entry, from->n); 192 to->n = from->n; 193 memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n); 194 return; 195 } 196 197 rebin: 198 j = 0; 199 for (i = 0; i < n; i++) { 200 double x; 201 double left, right; 202 203 left = xmin + i * step; 204 right = xmin + (i + 1) * step; 205 206 /* Add x, even if it might not get any counts later */ 207 x = left; 208 qdist_add(to, x, 0); 209 210 /* 211 * To avoid double-counting we capture [left, right) ranges, except for 212 * the righmost bin, which captures a [left, right] range. 213 */ 214 while (j < from->n && (from->entries[j].x < right || i == n - 1)) { 215 struct qdist_entry *o = &from->entries[j]; 216 217 qdist_add(to, x, o->count); 218 j++; 219 } 220 } 221 } 222 223 /* 224 * Print @dist into a string, after re-binning it into @n bins of consecutive, 225 * non-overlapping intervals. 226 * 227 * If @n == 0, use @orig->n. 228 * 229 * Callers must free the returned string with g_free(). 230 */ 231 char *qdist_pr_plain(const struct qdist *dist, size_t n) 232 { 233 struct qdist binned; 234 char *ret; 235 236 if (dist->n == 0) { 237 return NULL; 238 } 239 qdist_bin__internal(&binned, dist, n); 240 ret = qdist_pr_internal(&binned); 241 qdist_destroy(&binned); 242 return ret; 243 } 244 245 static char *qdist_pr_label(const struct qdist *dist, size_t n_bins, 246 uint32_t opt, bool is_left) 247 { 248 const char *percent; 249 const char *lparen; 250 const char *rparen; 251 GString *s; 252 double x1, x2, step; 253 double x; 254 double n; 255 int dec; 256 257 s = g_string_new(""); 258 if (!(opt & QDIST_PR_LABELS)) { 259 goto out; 260 } 261 262 dec = opt & QDIST_PR_NODECIMAL ? 0 : 1; 263 percent = opt & QDIST_PR_PERCENT ? "%" : ""; 264 265 n = n_bins ? n_bins : dist->n; 266 x = is_left ? qdist_xmin(dist) : qdist_xmax(dist); 267 step = (qdist_xmax(dist) - qdist_xmin(dist)) / n; 268 269 if (opt & QDIST_PR_100X) { 270 x *= 100.0; 271 step *= 100.0; 272 } 273 if (opt & QDIST_PR_NOBINRANGE) { 274 lparen = rparen = ""; 275 x1 = x; 276 x2 = x; /* unnecessary, but a dumb compiler might not figure it out */ 277 } else { 278 lparen = "["; 279 rparen = is_left ? ")" : "]"; 280 if (is_left) { 281 x1 = x; 282 x2 = x + step; 283 } else { 284 x1 = x - step; 285 x2 = x; 286 } 287 } 288 g_string_append_printf(s, "%s%.*f", lparen, dec, x1); 289 if (!(opt & QDIST_PR_NOBINRANGE)) { 290 g_string_append_printf(s, ",%.*f%s", dec, x2, rparen); 291 } 292 g_string_append(s, percent); 293 out: 294 return g_string_free(s, FALSE); 295 } 296 297 /* 298 * Print the distribution's histogram into a string. 299 * 300 * See also: qdist_pr_plain(). 301 * 302 * Callers must free the returned string with g_free(). 303 */ 304 char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt) 305 { 306 const char *border = opt & QDIST_PR_BORDER ? "|" : ""; 307 char *llabel, *rlabel; 308 char *hgram; 309 GString *s; 310 311 if (dist->n == 0) { 312 return NULL; 313 } 314 315 s = g_string_new(""); 316 317 llabel = qdist_pr_label(dist, n_bins, opt, true); 318 rlabel = qdist_pr_label(dist, n_bins, opt, false); 319 hgram = qdist_pr_plain(dist, n_bins); 320 g_string_append_printf(s, "%s%s%s%s%s", 321 llabel, border, hgram, border, rlabel); 322 g_free(llabel); 323 g_free(rlabel); 324 g_free(hgram); 325 326 return g_string_free(s, FALSE); 327 } 328 329 static inline double qdist_x(const struct qdist *dist, int index) 330 { 331 if (dist->n == 0) { 332 return NAN; 333 } 334 return dist->entries[index].x; 335 } 336 337 double qdist_xmin(const struct qdist *dist) 338 { 339 return qdist_x(dist, 0); 340 } 341 342 double qdist_xmax(const struct qdist *dist) 343 { 344 return qdist_x(dist, dist->n - 1); 345 } 346 347 size_t qdist_unique_entries(const struct qdist *dist) 348 { 349 return dist->n; 350 } 351 352 unsigned long qdist_sample_count(const struct qdist *dist) 353 { 354 unsigned long count = 0; 355 size_t i; 356 357 for (i = 0; i < dist->n; i++) { 358 struct qdist_entry *e = &dist->entries[i]; 359 360 count += e->count; 361 } 362 return count; 363 } 364 365 static double qdist_pairwise_avg(const struct qdist *dist, size_t index, 366 size_t n, unsigned long count) 367 { 368 /* amortize the recursion by using a base case > 2 */ 369 if (n <= 8) { 370 size_t i; 371 double ret = 0; 372 373 for (i = 0; i < n; i++) { 374 struct qdist_entry *e = &dist->entries[index + i]; 375 376 ret += e->x * e->count / count; 377 } 378 return ret; 379 } else { 380 size_t n2 = n / 2; 381 382 return qdist_pairwise_avg(dist, index, n2, count) + 383 qdist_pairwise_avg(dist, index + n2, n - n2, count); 384 } 385 } 386 387 double qdist_avg(const struct qdist *dist) 388 { 389 unsigned long count; 390 391 count = qdist_sample_count(dist); 392 if (!count) { 393 return NAN; 394 } 395 return qdist_pairwise_avg(dist, 0, dist->n, count); 396 } 397