1 /* 2 * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> 3 * 4 * License: GNU GPL, version 2 or later. 5 * See the COPYING file in the top-level directory. 6 */ 7 #include "qemu/osdep.h" 8 #include "qemu/processor.h" 9 #include "qemu/atomic.h" 10 #include "qemu/qht.h" 11 #include "qemu/rcu.h" 12 #include "qemu/xxhash.h" 13 #include "qemu/memalign.h" 14 15 struct thread_stats { 16 size_t rd; 17 size_t not_rd; 18 size_t in; 19 size_t not_in; 20 size_t rm; 21 size_t not_rm; 22 size_t rz; 23 size_t not_rz; 24 }; 25 26 struct thread_info { 27 void (*func)(struct thread_info *); 28 struct thread_stats stats; 29 /* 30 * Seed is in the range [1..UINT64_MAX], because the RNG requires 31 * a non-zero seed. To use, subtract 1 and compare against the 32 * threshold with </>=. This lets threshold = 0 never match (0% hit), 33 * and threshold = UINT64_MAX always match (100% hit). 34 */ 35 uint64_t seed; 36 bool write_op; /* writes alternate between insertions and removals */ 37 bool resize_down; 38 } QEMU_ALIGNED(64); /* avoid false sharing among threads */ 39 40 static struct qht ht; 41 static QemuThread *rw_threads; 42 43 #define DEFAULT_RANGE (4096) 44 #define DEFAULT_QHT_N_ELEMS DEFAULT_RANGE 45 46 static unsigned int duration = 1; 47 static unsigned int n_rw_threads = 1; 48 static unsigned long lookup_range = DEFAULT_RANGE; 49 static unsigned long update_range = DEFAULT_RANGE; 50 static size_t init_range = DEFAULT_RANGE; 51 static size_t init_size = DEFAULT_RANGE; 52 static size_t n_ready_threads; 53 static long populate_offset; 54 static long *keys; 55 56 static size_t resize_min; 57 static size_t resize_max; 58 static struct thread_info *rz_info; 59 static unsigned long resize_delay = 1000; 60 static double resize_rate; /* 0.0 to 1.0 */ 61 static unsigned int n_rz_threads = 1; 62 static QemuThread *rz_threads; 63 static bool precompute_hash; 64 65 static double update_rate; /* 0.0 to 1.0 */ 66 static uint64_t update_threshold; 67 static uint64_t resize_threshold; 68 69 static size_t qht_n_elems = DEFAULT_QHT_N_ELEMS; 70 static int qht_mode; 71 72 static bool test_start; 73 static bool test_stop; 74 75 static struct thread_info *rw_info; 76 77 static const char commands_string[] = 78 " -d = duration, in seconds\n" 79 " -n = number of threads\n" 80 "\n" 81 " -o = offset at which keys start\n" 82 " -p = precompute hashes\n" 83 "\n" 84 " -g = set -s,-k,-K,-l,-r to the same value\n" 85 " -s = initial size hint\n" 86 " -k = initial number of keys\n" 87 " -K = initial range of keys (will be rounded up to pow2)\n" 88 " -l = lookup range of keys (will be rounded up to pow2)\n" 89 " -r = update range of keys (will be rounded up to pow2)\n" 90 "\n" 91 " -u = update rate (0.0 to 100.0), 50/50 split of insertions/removals\n" 92 "\n" 93 " -R = enable auto-resize\n" 94 " -S = resize rate (0.0 to 100.0)\n" 95 " -D = delay (in us) between potential resizes\n" 96 " -N = number of resize threads"; 97 98 static void usage_complete(int argc, char *argv[]) 99 { 100 fprintf(stderr, "Usage: %s [options]\n", argv[0]); 101 fprintf(stderr, "options:\n%s\n", commands_string); 102 exit(-1); 103 } 104 105 static bool is_equal(const void *ap, const void *bp) 106 { 107 const long *a = ap; 108 const long *b = bp; 109 110 return *a == *b; 111 } 112 113 static uint32_t h(unsigned long v) 114 { 115 return qemu_xxhash2(v); 116 } 117 118 static uint32_t hval(unsigned long v) 119 { 120 return v; 121 } 122 123 static uint32_t (*hfunc)(unsigned long v) = h; 124 125 /* 126 * From: https://en.wikipedia.org/wiki/Xorshift 127 * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only 128 * guaranteed to be >= INT_MAX). 129 */ 130 static uint64_t xorshift64star(uint64_t x) 131 { 132 x ^= x >> 12; /* a */ 133 x ^= x << 25; /* b */ 134 x ^= x >> 27; /* c */ 135 return x * UINT64_C(2685821657736338717); 136 } 137 138 static void do_rz(struct thread_info *info) 139 { 140 struct thread_stats *stats = &info->stats; 141 uint64_t r = info->seed - 1; 142 143 if (r < resize_threshold) { 144 size_t size = info->resize_down ? resize_min : resize_max; 145 bool resized; 146 147 resized = qht_resize(&ht, size); 148 info->resize_down = !info->resize_down; 149 150 if (resized) { 151 stats->rz++; 152 } else { 153 stats->not_rz++; 154 } 155 } 156 g_usleep(resize_delay); 157 } 158 159 static void do_rw(struct thread_info *info) 160 { 161 struct thread_stats *stats = &info->stats; 162 uint64_t r = info->seed - 1; 163 uint32_t hash; 164 long *p; 165 166 if (r >= update_threshold) { 167 bool read; 168 169 p = &keys[r & (lookup_range - 1)]; 170 hash = hfunc(*p); 171 read = qht_lookup(&ht, p, hash); 172 if (read) { 173 stats->rd++; 174 } else { 175 stats->not_rd++; 176 } 177 } else { 178 p = &keys[r & (update_range - 1)]; 179 hash = hfunc(*p); 180 if (info->write_op) { 181 bool written = false; 182 183 if (qht_lookup(&ht, p, hash) == NULL) { 184 written = qht_insert(&ht, p, hash, NULL); 185 } 186 if (written) { 187 stats->in++; 188 } else { 189 stats->not_in++; 190 } 191 } else { 192 bool removed = false; 193 194 if (qht_lookup(&ht, p, hash)) { 195 removed = qht_remove(&ht, p, hash); 196 } 197 if (removed) { 198 stats->rm++; 199 } else { 200 stats->not_rm++; 201 } 202 } 203 info->write_op = !info->write_op; 204 } 205 } 206 207 static void *thread_func(void *p) 208 { 209 struct thread_info *info = p; 210 211 rcu_register_thread(); 212 213 qatomic_inc(&n_ready_threads); 214 while (!qatomic_read(&test_start)) { 215 cpu_relax(); 216 } 217 218 rcu_read_lock(); 219 while (!qatomic_read(&test_stop)) { 220 info->seed = xorshift64star(info->seed); 221 info->func(info); 222 } 223 rcu_read_unlock(); 224 225 rcu_unregister_thread(); 226 return NULL; 227 } 228 229 /* sets everything except info->func */ 230 static void prepare_thread_info(struct thread_info *info, int i) 231 { 232 /* seed for the RNG; each thread should have a different one */ 233 info->seed = (i + 1) ^ time(NULL); 234 /* the first update will be a write */ 235 info->write_op = true; 236 /* the first resize will be down */ 237 info->resize_down = true; 238 239 memset(&info->stats, 0, sizeof(info->stats)); 240 } 241 242 static void 243 th_create_n(QemuThread **threads, struct thread_info **infos, const char *name, 244 void (*func)(struct thread_info *), int offset, int n) 245 { 246 struct thread_info *info; 247 QemuThread *th; 248 int i; 249 250 th = g_malloc(sizeof(*th) * n); 251 *threads = th; 252 253 info = qemu_memalign(64, sizeof(*info) * n); 254 *infos = info; 255 256 for (i = 0; i < n; i++) { 257 prepare_thread_info(&info[i], offset + i); 258 info[i].func = func; 259 qemu_thread_create(&th[i], name, thread_func, &info[i], 260 QEMU_THREAD_JOINABLE); 261 } 262 } 263 264 static void create_threads(void) 265 { 266 th_create_n(&rw_threads, &rw_info, "rw", do_rw, 0, n_rw_threads); 267 th_create_n(&rz_threads, &rz_info, "rz", do_rz, n_rw_threads, n_rz_threads); 268 } 269 270 static void pr_params(void) 271 { 272 printf("Parameters:\n"); 273 printf(" duration: %d s\n", duration); 274 printf(" # of threads: %u\n", n_rw_threads); 275 printf(" initial # of keys: %zu\n", init_size); 276 printf(" initial size hint: %zu\n", qht_n_elems); 277 printf(" auto-resize: %s\n", 278 qht_mode & QHT_MODE_AUTO_RESIZE ? "on" : "off"); 279 if (resize_rate) { 280 printf(" resize_rate: %f%%\n", resize_rate * 100.0); 281 printf(" resize range: %zu-%zu\n", resize_min, resize_max); 282 printf(" # resize threads %u\n", n_rz_threads); 283 } 284 printf(" update rate: %f%%\n", update_rate * 100.0); 285 printf(" offset: %ld\n", populate_offset); 286 printf(" initial key range: %zu\n", init_range); 287 printf(" lookup range: %lu\n", lookup_range); 288 printf(" update range: %lu\n", update_range); 289 } 290 291 static void do_threshold(double rate, uint64_t *threshold) 292 { 293 /* 294 * For 0 <= rate <= 1, scale to fit in a uint64_t. 295 * 296 * Scale by 2**64, with a special case for 1.0. 297 * The remainder of the possible values are scattered between 0 298 * and 0xfffffffffffff800 (nextafter(0x1p64, 0)). 299 * 300 * Note that we cannot simply scale by UINT64_MAX, because that 301 * value is not representable as an IEEE double value. 302 * 303 * If we scale by the next largest value, nextafter(0x1p64, 0), 304 * then the remainder of the possible values are scattered between 305 * 0 and 0xfffffffffffff000. Which leaves us with a gap between 306 * the final two inputs that is twice as large as any other. 307 */ 308 if (rate == 1.0) { 309 *threshold = UINT64_MAX; 310 } else { 311 *threshold = rate * 0x1p64; 312 } 313 } 314 315 static void htable_init(void) 316 { 317 unsigned long n = MAX(init_range, update_range); 318 uint64_t r = time(NULL); 319 size_t retries = 0; 320 size_t i; 321 322 /* avoid allocating memory later by allocating all the keys now */ 323 keys = g_malloc(sizeof(*keys) * n); 324 for (i = 0; i < n; i++) { 325 long val = populate_offset + i; 326 327 keys[i] = precompute_hash ? h(val) : hval(val); 328 } 329 330 /* some sanity checks */ 331 g_assert_cmpuint(lookup_range, <=, n); 332 333 /* compute thresholds */ 334 do_threshold(update_rate, &update_threshold); 335 do_threshold(resize_rate, &resize_threshold); 336 337 if (resize_rate) { 338 resize_min = n / 2; 339 resize_max = n; 340 assert(resize_min < resize_max); 341 } else { 342 n_rz_threads = 0; 343 } 344 345 /* initialize the hash table */ 346 qht_init(&ht, is_equal, qht_n_elems, qht_mode); 347 assert(init_size <= init_range); 348 349 pr_params(); 350 351 fprintf(stderr, "Initialization: populating %zu items...", init_size); 352 for (i = 0; i < init_size; i++) { 353 for (;;) { 354 uint32_t hash; 355 long *p; 356 357 r = xorshift64star(r); 358 p = &keys[r & (init_range - 1)]; 359 hash = hfunc(*p); 360 if (qht_insert(&ht, p, hash, NULL)) { 361 break; 362 } 363 retries++; 364 } 365 } 366 fprintf(stderr, " populated after %zu retries\n", retries); 367 } 368 369 static void add_stats(struct thread_stats *s, struct thread_info *info, int n) 370 { 371 int i; 372 373 for (i = 0; i < n; i++) { 374 struct thread_stats *stats = &info[i].stats; 375 376 s->rd += stats->rd; 377 s->not_rd += stats->not_rd; 378 379 s->in += stats->in; 380 s->not_in += stats->not_in; 381 382 s->rm += stats->rm; 383 s->not_rm += stats->not_rm; 384 385 s->rz += stats->rz; 386 s->not_rz += stats->not_rz; 387 } 388 } 389 390 static void pr_stats(void) 391 { 392 struct thread_stats s = {}; 393 double tx; 394 395 add_stats(&s, rw_info, n_rw_threads); 396 add_stats(&s, rz_info, n_rz_threads); 397 398 printf("Results:\n"); 399 400 if (resize_rate) { 401 printf(" Resizes: %zu (%.2f%% of %zu)\n", 402 s.rz, (double)s.rz / (s.rz + s.not_rz) * 100, s.rz + s.not_rz); 403 } 404 405 printf(" Read: %.2f M (%.2f%% of %.2fM)\n", 406 (double)s.rd / 1e6, 407 (double)s.rd / (s.rd + s.not_rd) * 100, 408 (double)(s.rd + s.not_rd) / 1e6); 409 printf(" Inserted: %.2f M (%.2f%% of %.2fM)\n", 410 (double)s.in / 1e6, 411 (double)s.in / (s.in + s.not_in) * 100, 412 (double)(s.in + s.not_in) / 1e6); 413 printf(" Removed: %.2f M (%.2f%% of %.2fM)\n", 414 (double)s.rm / 1e6, 415 (double)s.rm / (s.rm + s.not_rm) * 100, 416 (double)(s.rm + s.not_rm) / 1e6); 417 418 tx = (s.rd + s.not_rd + s.in + s.not_in + s.rm + s.not_rm) / 1e6 / duration; 419 printf(" Throughput: %.2f MT/s\n", tx); 420 printf(" Throughput/thread: %.2f MT/s/thread\n", tx / n_rw_threads); 421 } 422 423 static void run_test(void) 424 { 425 int i; 426 427 while (qatomic_read(&n_ready_threads) != n_rw_threads + n_rz_threads) { 428 cpu_relax(); 429 } 430 431 qatomic_set(&test_start, true); 432 g_usleep(duration * G_USEC_PER_SEC); 433 qatomic_set(&test_stop, true); 434 435 for (i = 0; i < n_rw_threads; i++) { 436 qemu_thread_join(&rw_threads[i]); 437 } 438 for (i = 0; i < n_rz_threads; i++) { 439 qemu_thread_join(&rz_threads[i]); 440 } 441 } 442 443 static void parse_args(int argc, char *argv[]) 444 { 445 int c; 446 447 for (;;) { 448 c = getopt(argc, argv, "d:D:g:k:K:l:hn:N:o:pr:Rs:S:u:"); 449 if (c < 0) { 450 break; 451 } 452 switch (c) { 453 case 'd': 454 duration = atoi(optarg); 455 break; 456 case 'D': 457 resize_delay = atol(optarg); 458 break; 459 case 'g': 460 init_range = pow2ceil(atol(optarg)); 461 lookup_range = pow2ceil(atol(optarg)); 462 update_range = pow2ceil(atol(optarg)); 463 qht_n_elems = atol(optarg); 464 init_size = atol(optarg); 465 break; 466 case 'h': 467 usage_complete(argc, argv); 468 exit(0); 469 case 'k': 470 init_size = atol(optarg); 471 break; 472 case 'K': 473 init_range = pow2ceil(atol(optarg)); 474 break; 475 case 'l': 476 lookup_range = pow2ceil(atol(optarg)); 477 break; 478 case 'n': 479 n_rw_threads = atoi(optarg); 480 break; 481 case 'N': 482 n_rz_threads = atoi(optarg); 483 break; 484 case 'o': 485 populate_offset = atol(optarg); 486 break; 487 case 'p': 488 precompute_hash = true; 489 hfunc = hval; 490 break; 491 case 'r': 492 update_range = pow2ceil(atol(optarg)); 493 break; 494 case 'R': 495 qht_mode |= QHT_MODE_AUTO_RESIZE; 496 break; 497 case 's': 498 qht_n_elems = atol(optarg); 499 break; 500 case 'S': 501 resize_rate = atof(optarg) / 100.0; 502 if (resize_rate > 1.0) { 503 resize_rate = 1.0; 504 } 505 break; 506 case 'u': 507 update_rate = atof(optarg) / 100.0; 508 if (update_rate > 1.0) { 509 update_rate = 1.0; 510 } 511 break; 512 } 513 } 514 } 515 516 int main(int argc, char *argv[]) 517 { 518 parse_args(argc, argv); 519 htable_init(); 520 create_threads(); 521 run_test(); 522 pr_stats(); 523 return 0; 524 } 525