1*7c43b0c1SIan Rogers // SPDX-License-Identifier: GPL-2.0 2*7c43b0c1SIan Rogers /* 3*7c43b0c1SIan Rogers * Benchmark find_next_bit and related bit operations. 4*7c43b0c1SIan Rogers * 5*7c43b0c1SIan Rogers * Copyright 2020 Google LLC. 6*7c43b0c1SIan Rogers */ 7*7c43b0c1SIan Rogers #include <stdlib.h> 8*7c43b0c1SIan Rogers #include "bench.h" 9*7c43b0c1SIan Rogers #include "../util/stat.h" 10*7c43b0c1SIan Rogers #include <linux/bitmap.h> 11*7c43b0c1SIan Rogers #include <linux/bitops.h> 12*7c43b0c1SIan Rogers #include <linux/time64.h> 13*7c43b0c1SIan Rogers #include <subcmd/parse-options.h> 14*7c43b0c1SIan Rogers 15*7c43b0c1SIan Rogers static unsigned int outer_iterations = 5; 16*7c43b0c1SIan Rogers static unsigned int inner_iterations = 100000; 17*7c43b0c1SIan Rogers 18*7c43b0c1SIan Rogers static const struct option options[] = { 19*7c43b0c1SIan Rogers OPT_UINTEGER('i', "outer-iterations", &outer_iterations, 20*7c43b0c1SIan Rogers "Number of outerer iterations used"), 21*7c43b0c1SIan Rogers OPT_UINTEGER('j', "inner-iterations", &inner_iterations, 22*7c43b0c1SIan Rogers "Number of outerer iterations used"), 23*7c43b0c1SIan Rogers OPT_END() 24*7c43b0c1SIan Rogers }; 25*7c43b0c1SIan Rogers 26*7c43b0c1SIan Rogers static const char *const bench_usage[] = { 27*7c43b0c1SIan Rogers "perf bench mem find_bit <options>", 28*7c43b0c1SIan Rogers NULL 29*7c43b0c1SIan Rogers }; 30*7c43b0c1SIan Rogers 31*7c43b0c1SIan Rogers static unsigned int accumulator; 32*7c43b0c1SIan Rogers static unsigned int use_of_val; 33*7c43b0c1SIan Rogers 34*7c43b0c1SIan Rogers static noinline void workload(int val) 35*7c43b0c1SIan Rogers { 36*7c43b0c1SIan Rogers use_of_val += val; 37*7c43b0c1SIan Rogers accumulator++; 38*7c43b0c1SIan Rogers } 39*7c43b0c1SIan Rogers 40*7c43b0c1SIan Rogers #if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__) 41*7c43b0c1SIan Rogers static bool asm_test_bit(long nr, const unsigned long *addr) 42*7c43b0c1SIan Rogers { 43*7c43b0c1SIan Rogers bool oldbit; 44*7c43b0c1SIan Rogers 45*7c43b0c1SIan Rogers asm volatile("bt %2,%1" 46*7c43b0c1SIan Rogers : "=@ccc" (oldbit) 47*7c43b0c1SIan Rogers : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory"); 48*7c43b0c1SIan Rogers 49*7c43b0c1SIan Rogers return oldbit; 50*7c43b0c1SIan Rogers } 51*7c43b0c1SIan Rogers #else 52*7c43b0c1SIan Rogers #define asm_test_bit test_bit 53*7c43b0c1SIan Rogers #endif 54*7c43b0c1SIan Rogers 55*7c43b0c1SIan Rogers static int do_for_each_set_bit(unsigned int num_bits) 56*7c43b0c1SIan Rogers { 57*7c43b0c1SIan Rogers unsigned long *to_test = bitmap_alloc(num_bits); 58*7c43b0c1SIan Rogers struct timeval start, end, diff; 59*7c43b0c1SIan Rogers u64 runtime_us; 60*7c43b0c1SIan Rogers struct stats fb_time_stats, tb_time_stats; 61*7c43b0c1SIan Rogers double time_average, time_stddev; 62*7c43b0c1SIan Rogers unsigned int bit, i, j; 63*7c43b0c1SIan Rogers unsigned int set_bits, skip; 64*7c43b0c1SIan Rogers unsigned int old; 65*7c43b0c1SIan Rogers 66*7c43b0c1SIan Rogers init_stats(&fb_time_stats); 67*7c43b0c1SIan Rogers init_stats(&tb_time_stats); 68*7c43b0c1SIan Rogers 69*7c43b0c1SIan Rogers for (set_bits = 1; set_bits <= num_bits; set_bits <<= 1) { 70*7c43b0c1SIan Rogers bitmap_zero(to_test, num_bits); 71*7c43b0c1SIan Rogers skip = num_bits / set_bits; 72*7c43b0c1SIan Rogers for (i = 0; i < num_bits; i += skip) 73*7c43b0c1SIan Rogers set_bit(i, to_test); 74*7c43b0c1SIan Rogers 75*7c43b0c1SIan Rogers for (i = 0; i < outer_iterations; i++) { 76*7c43b0c1SIan Rogers old = accumulator; 77*7c43b0c1SIan Rogers gettimeofday(&start, NULL); 78*7c43b0c1SIan Rogers for (j = 0; j < inner_iterations; j++) { 79*7c43b0c1SIan Rogers for_each_set_bit(bit, to_test, num_bits) 80*7c43b0c1SIan Rogers workload(bit); 81*7c43b0c1SIan Rogers } 82*7c43b0c1SIan Rogers gettimeofday(&end, NULL); 83*7c43b0c1SIan Rogers assert(old + (inner_iterations * set_bits) == accumulator); 84*7c43b0c1SIan Rogers timersub(&end, &start, &diff); 85*7c43b0c1SIan Rogers runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; 86*7c43b0c1SIan Rogers update_stats(&fb_time_stats, runtime_us); 87*7c43b0c1SIan Rogers 88*7c43b0c1SIan Rogers old = accumulator; 89*7c43b0c1SIan Rogers gettimeofday(&start, NULL); 90*7c43b0c1SIan Rogers for (j = 0; j < inner_iterations; j++) { 91*7c43b0c1SIan Rogers for (bit = 0; bit < num_bits; bit++) { 92*7c43b0c1SIan Rogers if (asm_test_bit(bit, to_test)) 93*7c43b0c1SIan Rogers workload(bit); 94*7c43b0c1SIan Rogers } 95*7c43b0c1SIan Rogers } 96*7c43b0c1SIan Rogers gettimeofday(&end, NULL); 97*7c43b0c1SIan Rogers assert(old + (inner_iterations * set_bits) == accumulator); 98*7c43b0c1SIan Rogers timersub(&end, &start, &diff); 99*7c43b0c1SIan Rogers runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; 100*7c43b0c1SIan Rogers update_stats(&tb_time_stats, runtime_us); 101*7c43b0c1SIan Rogers } 102*7c43b0c1SIan Rogers 103*7c43b0c1SIan Rogers printf("%d operations %d bits set of %d bits\n", 104*7c43b0c1SIan Rogers inner_iterations, set_bits, num_bits); 105*7c43b0c1SIan Rogers time_average = avg_stats(&fb_time_stats); 106*7c43b0c1SIan Rogers time_stddev = stddev_stats(&fb_time_stats); 107*7c43b0c1SIan Rogers printf(" Average for_each_set_bit took: %.3f usec (+- %.3f usec)\n", 108*7c43b0c1SIan Rogers time_average, time_stddev); 109*7c43b0c1SIan Rogers time_average = avg_stats(&tb_time_stats); 110*7c43b0c1SIan Rogers time_stddev = stddev_stats(&tb_time_stats); 111*7c43b0c1SIan Rogers printf(" Average test_bit loop took: %.3f usec (+- %.3f usec)\n", 112*7c43b0c1SIan Rogers time_average, time_stddev); 113*7c43b0c1SIan Rogers 114*7c43b0c1SIan Rogers if (use_of_val == accumulator) /* Try to avoid compiler tricks. */ 115*7c43b0c1SIan Rogers printf("\n"); 116*7c43b0c1SIan Rogers } 117*7c43b0c1SIan Rogers bitmap_free(to_test); 118*7c43b0c1SIan Rogers return 0; 119*7c43b0c1SIan Rogers } 120*7c43b0c1SIan Rogers 121*7c43b0c1SIan Rogers int bench_mem_find_bit(int argc, const char **argv) 122*7c43b0c1SIan Rogers { 123*7c43b0c1SIan Rogers int err = 0, i; 124*7c43b0c1SIan Rogers 125*7c43b0c1SIan Rogers argc = parse_options(argc, argv, options, bench_usage, 0); 126*7c43b0c1SIan Rogers if (argc) { 127*7c43b0c1SIan Rogers usage_with_options(bench_usage, options); 128*7c43b0c1SIan Rogers exit(EXIT_FAILURE); 129*7c43b0c1SIan Rogers } 130*7c43b0c1SIan Rogers 131*7c43b0c1SIan Rogers for (i = 1; i <= 2048; i <<= 1) 132*7c43b0c1SIan Rogers do_for_each_set_bit(i); 133*7c43b0c1SIan Rogers 134*7c43b0c1SIan Rogers return err; 135*7c43b0c1SIan Rogers } 136