xref: /openbmc/linux/tools/perf/bench/find-bit-bench.c (revision 7c43b0c1d499c790cdb5623d27fdcef544ddb566)
1*7c43b0c1SIan Rogers // SPDX-License-Identifier: GPL-2.0
2*7c43b0c1SIan Rogers /*
3*7c43b0c1SIan Rogers  * Benchmark find_next_bit and related bit operations.
4*7c43b0c1SIan Rogers  *
5*7c43b0c1SIan Rogers  * Copyright 2020 Google LLC.
6*7c43b0c1SIan Rogers  */
7*7c43b0c1SIan Rogers #include <stdlib.h>
8*7c43b0c1SIan Rogers #include "bench.h"
9*7c43b0c1SIan Rogers #include "../util/stat.h"
10*7c43b0c1SIan Rogers #include <linux/bitmap.h>
11*7c43b0c1SIan Rogers #include <linux/bitops.h>
12*7c43b0c1SIan Rogers #include <linux/time64.h>
13*7c43b0c1SIan Rogers #include <subcmd/parse-options.h>
14*7c43b0c1SIan Rogers 
15*7c43b0c1SIan Rogers static unsigned int outer_iterations = 5;
16*7c43b0c1SIan Rogers static unsigned int inner_iterations = 100000;
17*7c43b0c1SIan Rogers 
18*7c43b0c1SIan Rogers static const struct option options[] = {
19*7c43b0c1SIan Rogers 	OPT_UINTEGER('i', "outer-iterations", &outer_iterations,
20*7c43b0c1SIan Rogers 		"Number of outerer iterations used"),
21*7c43b0c1SIan Rogers 	OPT_UINTEGER('j', "inner-iterations", &inner_iterations,
22*7c43b0c1SIan Rogers 		"Number of outerer iterations used"),
23*7c43b0c1SIan Rogers 	OPT_END()
24*7c43b0c1SIan Rogers };
25*7c43b0c1SIan Rogers 
26*7c43b0c1SIan Rogers static const char *const bench_usage[] = {
27*7c43b0c1SIan Rogers 	"perf bench mem find_bit <options>",
28*7c43b0c1SIan Rogers 	NULL
29*7c43b0c1SIan Rogers };
30*7c43b0c1SIan Rogers 
31*7c43b0c1SIan Rogers static unsigned int accumulator;
32*7c43b0c1SIan Rogers static unsigned int use_of_val;
33*7c43b0c1SIan Rogers 
34*7c43b0c1SIan Rogers static noinline void workload(int val)
35*7c43b0c1SIan Rogers {
36*7c43b0c1SIan Rogers 	use_of_val += val;
37*7c43b0c1SIan Rogers 	accumulator++;
38*7c43b0c1SIan Rogers }
39*7c43b0c1SIan Rogers 
40*7c43b0c1SIan Rogers #if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__)
41*7c43b0c1SIan Rogers static bool asm_test_bit(long nr, const unsigned long *addr)
42*7c43b0c1SIan Rogers {
43*7c43b0c1SIan Rogers 	bool oldbit;
44*7c43b0c1SIan Rogers 
45*7c43b0c1SIan Rogers 	asm volatile("bt %2,%1"
46*7c43b0c1SIan Rogers 		     : "=@ccc" (oldbit)
47*7c43b0c1SIan Rogers 		     : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
48*7c43b0c1SIan Rogers 
49*7c43b0c1SIan Rogers 	return oldbit;
50*7c43b0c1SIan Rogers }
51*7c43b0c1SIan Rogers #else
52*7c43b0c1SIan Rogers #define asm_test_bit test_bit
53*7c43b0c1SIan Rogers #endif
54*7c43b0c1SIan Rogers 
55*7c43b0c1SIan Rogers static int do_for_each_set_bit(unsigned int num_bits)
56*7c43b0c1SIan Rogers {
57*7c43b0c1SIan Rogers 	unsigned long *to_test = bitmap_alloc(num_bits);
58*7c43b0c1SIan Rogers 	struct timeval start, end, diff;
59*7c43b0c1SIan Rogers 	u64 runtime_us;
60*7c43b0c1SIan Rogers 	struct stats fb_time_stats, tb_time_stats;
61*7c43b0c1SIan Rogers 	double time_average, time_stddev;
62*7c43b0c1SIan Rogers 	unsigned int bit, i, j;
63*7c43b0c1SIan Rogers 	unsigned int set_bits, skip;
64*7c43b0c1SIan Rogers 	unsigned int old;
65*7c43b0c1SIan Rogers 
66*7c43b0c1SIan Rogers 	init_stats(&fb_time_stats);
67*7c43b0c1SIan Rogers 	init_stats(&tb_time_stats);
68*7c43b0c1SIan Rogers 
69*7c43b0c1SIan Rogers 	for (set_bits = 1; set_bits <= num_bits; set_bits <<= 1) {
70*7c43b0c1SIan Rogers 		bitmap_zero(to_test, num_bits);
71*7c43b0c1SIan Rogers 		skip = num_bits / set_bits;
72*7c43b0c1SIan Rogers 		for (i = 0; i < num_bits; i += skip)
73*7c43b0c1SIan Rogers 			set_bit(i, to_test);
74*7c43b0c1SIan Rogers 
75*7c43b0c1SIan Rogers 		for (i = 0; i < outer_iterations; i++) {
76*7c43b0c1SIan Rogers 			old = accumulator;
77*7c43b0c1SIan Rogers 			gettimeofday(&start, NULL);
78*7c43b0c1SIan Rogers 			for (j = 0; j < inner_iterations; j++) {
79*7c43b0c1SIan Rogers 				for_each_set_bit(bit, to_test, num_bits)
80*7c43b0c1SIan Rogers 					workload(bit);
81*7c43b0c1SIan Rogers 			}
82*7c43b0c1SIan Rogers 			gettimeofday(&end, NULL);
83*7c43b0c1SIan Rogers 			assert(old + (inner_iterations * set_bits) == accumulator);
84*7c43b0c1SIan Rogers 			timersub(&end, &start, &diff);
85*7c43b0c1SIan Rogers 			runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
86*7c43b0c1SIan Rogers 			update_stats(&fb_time_stats, runtime_us);
87*7c43b0c1SIan Rogers 
88*7c43b0c1SIan Rogers 			old = accumulator;
89*7c43b0c1SIan Rogers 			gettimeofday(&start, NULL);
90*7c43b0c1SIan Rogers 			for (j = 0; j < inner_iterations; j++) {
91*7c43b0c1SIan Rogers 				for (bit = 0; bit < num_bits; bit++) {
92*7c43b0c1SIan Rogers 					if (asm_test_bit(bit, to_test))
93*7c43b0c1SIan Rogers 						workload(bit);
94*7c43b0c1SIan Rogers 				}
95*7c43b0c1SIan Rogers 			}
96*7c43b0c1SIan Rogers 			gettimeofday(&end, NULL);
97*7c43b0c1SIan Rogers 			assert(old + (inner_iterations * set_bits) == accumulator);
98*7c43b0c1SIan Rogers 			timersub(&end, &start, &diff);
99*7c43b0c1SIan Rogers 			runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
100*7c43b0c1SIan Rogers 			update_stats(&tb_time_stats, runtime_us);
101*7c43b0c1SIan Rogers 		}
102*7c43b0c1SIan Rogers 
103*7c43b0c1SIan Rogers 		printf("%d operations %d bits set of %d bits\n",
104*7c43b0c1SIan Rogers 			inner_iterations, set_bits, num_bits);
105*7c43b0c1SIan Rogers 		time_average = avg_stats(&fb_time_stats);
106*7c43b0c1SIan Rogers 		time_stddev = stddev_stats(&fb_time_stats);
107*7c43b0c1SIan Rogers 		printf("  Average for_each_set_bit took: %.3f usec (+- %.3f usec)\n",
108*7c43b0c1SIan Rogers 			time_average, time_stddev);
109*7c43b0c1SIan Rogers 		time_average = avg_stats(&tb_time_stats);
110*7c43b0c1SIan Rogers 		time_stddev = stddev_stats(&tb_time_stats);
111*7c43b0c1SIan Rogers 		printf("  Average test_bit loop took:    %.3f usec (+- %.3f usec)\n",
112*7c43b0c1SIan Rogers 			time_average, time_stddev);
113*7c43b0c1SIan Rogers 
114*7c43b0c1SIan Rogers 		if (use_of_val == accumulator)  /* Try to avoid compiler tricks. */
115*7c43b0c1SIan Rogers 			printf("\n");
116*7c43b0c1SIan Rogers 	}
117*7c43b0c1SIan Rogers 	bitmap_free(to_test);
118*7c43b0c1SIan Rogers 	return 0;
119*7c43b0c1SIan Rogers }
120*7c43b0c1SIan Rogers 
121*7c43b0c1SIan Rogers int bench_mem_find_bit(int argc, const char **argv)
122*7c43b0c1SIan Rogers {
123*7c43b0c1SIan Rogers 	int err = 0, i;
124*7c43b0c1SIan Rogers 
125*7c43b0c1SIan Rogers 	argc = parse_options(argc, argv, options, bench_usage, 0);
126*7c43b0c1SIan Rogers 	if (argc) {
127*7c43b0c1SIan Rogers 		usage_with_options(bench_usage, options);
128*7c43b0c1SIan Rogers 		exit(EXIT_FAILURE);
129*7c43b0c1SIan Rogers 	}
130*7c43b0c1SIan Rogers 
131*7c43b0c1SIan Rogers 	for (i = 1; i <= 2048; i <<= 1)
132*7c43b0c1SIan Rogers 		do_for_each_set_bit(i);
133*7c43b0c1SIan Rogers 
134*7c43b0c1SIan Rogers 	return err;
135*7c43b0c1SIan Rogers }
136