17c43b0c1SIan Rogers // SPDX-License-Identifier: GPL-2.0 27c43b0c1SIan Rogers /* 37c43b0c1SIan Rogers * Benchmark find_next_bit and related bit operations. 47c43b0c1SIan Rogers * 57c43b0c1SIan Rogers * Copyright 2020 Google LLC. 67c43b0c1SIan Rogers */ 77c43b0c1SIan Rogers #include <stdlib.h> 87c43b0c1SIan Rogers #include "bench.h" 97c43b0c1SIan Rogers #include "../util/stat.h" 107c43b0c1SIan Rogers #include <linux/bitmap.h> 117c43b0c1SIan Rogers #include <linux/bitops.h> 127c43b0c1SIan Rogers #include <linux/time64.h> 137c43b0c1SIan Rogers #include <subcmd/parse-options.h> 147c43b0c1SIan Rogers 157c43b0c1SIan Rogers static unsigned int outer_iterations = 5; 167c43b0c1SIan Rogers static unsigned int inner_iterations = 100000; 177c43b0c1SIan Rogers 187c43b0c1SIan Rogers static const struct option options[] = { 197c43b0c1SIan Rogers OPT_UINTEGER('i', "outer-iterations", &outer_iterations, 20f9f95068SColin Ian King "Number of outer iterations used"), 217c43b0c1SIan Rogers OPT_UINTEGER('j', "inner-iterations", &inner_iterations, 22f9f95068SColin Ian King "Number of inner iterations used"), 237c43b0c1SIan Rogers OPT_END() 247c43b0c1SIan Rogers }; 257c43b0c1SIan Rogers 267c43b0c1SIan Rogers static const char *const bench_usage[] = { 277c43b0c1SIan Rogers "perf bench mem find_bit <options>", 287c43b0c1SIan Rogers NULL 297c43b0c1SIan Rogers }; 307c43b0c1SIan Rogers 317c43b0c1SIan Rogers static unsigned int accumulator; 327c43b0c1SIan Rogers static unsigned int use_of_val; 337c43b0c1SIan Rogers 347c43b0c1SIan Rogers static noinline void workload(int val) 357c43b0c1SIan Rogers { 367c43b0c1SIan Rogers use_of_val += val; 377c43b0c1SIan Rogers accumulator++; 387c43b0c1SIan Rogers } 397c43b0c1SIan Rogers 407c43b0c1SIan Rogers #if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__) 417c43b0c1SIan Rogers static bool asm_test_bit(long nr, const unsigned long *addr) 427c43b0c1SIan Rogers { 437c43b0c1SIan Rogers bool oldbit; 447c43b0c1SIan Rogers 457c43b0c1SIan Rogers asm volatile("bt %2,%1" 467c43b0c1SIan Rogers : "=@ccc" (oldbit) 477c43b0c1SIan Rogers : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory"); 487c43b0c1SIan Rogers 497c43b0c1SIan Rogers return oldbit; 507c43b0c1SIan Rogers } 517c43b0c1SIan Rogers #else 527c43b0c1SIan Rogers #define asm_test_bit test_bit 537c43b0c1SIan Rogers #endif 547c43b0c1SIan Rogers 557c43b0c1SIan Rogers static int do_for_each_set_bit(unsigned int num_bits) 567c43b0c1SIan Rogers { 577fc5b571SAndy Shevchenko unsigned long *to_test = bitmap_zalloc(num_bits); 587c43b0c1SIan Rogers struct timeval start, end, diff; 597c43b0c1SIan Rogers u64 runtime_us; 607c43b0c1SIan Rogers struct stats fb_time_stats, tb_time_stats; 617c43b0c1SIan Rogers double time_average, time_stddev; 627c43b0c1SIan Rogers unsigned int bit, i, j; 637c43b0c1SIan Rogers unsigned int set_bits, skip; 647c43b0c1SIan Rogers 657c43b0c1SIan Rogers init_stats(&fb_time_stats); 667c43b0c1SIan Rogers init_stats(&tb_time_stats); 677c43b0c1SIan Rogers 687c43b0c1SIan Rogers for (set_bits = 1; set_bits <= num_bits; set_bits <<= 1) { 697c43b0c1SIan Rogers bitmap_zero(to_test, num_bits); 707c43b0c1SIan Rogers skip = num_bits / set_bits; 717c43b0c1SIan Rogers for (i = 0; i < num_bits; i += skip) 7275d7ba32SSean Christopherson __set_bit(i, to_test); 737c43b0c1SIan Rogers 747c43b0c1SIan Rogers for (i = 0; i < outer_iterations; i++) { 75*d1babea9SIan Rogers #ifndef NDEBUG 76*d1babea9SIan Rogers unsigned int old = accumulator; 77*d1babea9SIan Rogers #endif 78*d1babea9SIan Rogers 797c43b0c1SIan Rogers gettimeofday(&start, NULL); 807c43b0c1SIan Rogers for (j = 0; j < inner_iterations; j++) { 817c43b0c1SIan Rogers for_each_set_bit(bit, to_test, num_bits) 827c43b0c1SIan Rogers workload(bit); 837c43b0c1SIan Rogers } 847c43b0c1SIan Rogers gettimeofday(&end, NULL); 857c43b0c1SIan Rogers assert(old + (inner_iterations * set_bits) == accumulator); 867c43b0c1SIan Rogers timersub(&end, &start, &diff); 877c43b0c1SIan Rogers runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; 887c43b0c1SIan Rogers update_stats(&fb_time_stats, runtime_us); 897c43b0c1SIan Rogers 90*d1babea9SIan Rogers #ifndef NDEBUG 917c43b0c1SIan Rogers old = accumulator; 92*d1babea9SIan Rogers #endif 937c43b0c1SIan Rogers gettimeofday(&start, NULL); 947c43b0c1SIan Rogers for (j = 0; j < inner_iterations; j++) { 957c43b0c1SIan Rogers for (bit = 0; bit < num_bits; bit++) { 967c43b0c1SIan Rogers if (asm_test_bit(bit, to_test)) 977c43b0c1SIan Rogers workload(bit); 987c43b0c1SIan Rogers } 997c43b0c1SIan Rogers } 1007c43b0c1SIan Rogers gettimeofday(&end, NULL); 1017c43b0c1SIan Rogers assert(old + (inner_iterations * set_bits) == accumulator); 1027c43b0c1SIan Rogers timersub(&end, &start, &diff); 1037c43b0c1SIan Rogers runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; 1047c43b0c1SIan Rogers update_stats(&tb_time_stats, runtime_us); 1057c43b0c1SIan Rogers } 1067c43b0c1SIan Rogers 1077c43b0c1SIan Rogers printf("%d operations %d bits set of %d bits\n", 1087c43b0c1SIan Rogers inner_iterations, set_bits, num_bits); 1097c43b0c1SIan Rogers time_average = avg_stats(&fb_time_stats); 1107c43b0c1SIan Rogers time_stddev = stddev_stats(&fb_time_stats); 1117c43b0c1SIan Rogers printf(" Average for_each_set_bit took: %.3f usec (+- %.3f usec)\n", 1127c43b0c1SIan Rogers time_average, time_stddev); 1137c43b0c1SIan Rogers time_average = avg_stats(&tb_time_stats); 1147c43b0c1SIan Rogers time_stddev = stddev_stats(&tb_time_stats); 1157c43b0c1SIan Rogers printf(" Average test_bit loop took: %.3f usec (+- %.3f usec)\n", 1167c43b0c1SIan Rogers time_average, time_stddev); 1177c43b0c1SIan Rogers 1187c43b0c1SIan Rogers if (use_of_val == accumulator) /* Try to avoid compiler tricks. */ 1197c43b0c1SIan Rogers printf("\n"); 1207c43b0c1SIan Rogers } 1217c43b0c1SIan Rogers bitmap_free(to_test); 1227c43b0c1SIan Rogers return 0; 1237c43b0c1SIan Rogers } 1247c43b0c1SIan Rogers 1257c43b0c1SIan Rogers int bench_mem_find_bit(int argc, const char **argv) 1267c43b0c1SIan Rogers { 1277c43b0c1SIan Rogers int err = 0, i; 1287c43b0c1SIan Rogers 1297c43b0c1SIan Rogers argc = parse_options(argc, argv, options, bench_usage, 0); 1307c43b0c1SIan Rogers if (argc) { 1317c43b0c1SIan Rogers usage_with_options(bench_usage, options); 1327c43b0c1SIan Rogers exit(EXIT_FAILURE); 1337c43b0c1SIan Rogers } 1347c43b0c1SIan Rogers 1357c43b0c1SIan Rogers for (i = 1; i <= 2048; i <<= 1) 1367c43b0c1SIan Rogers do_for_each_set_bit(i); 1377c43b0c1SIan Rogers 1387c43b0c1SIan Rogers return err; 1397c43b0c1SIan Rogers } 140