xref: /openbmc/linux/tools/perf/bench/find-bit-bench.c (revision 1ac731c529cd4d6adbce134754b51ff7d822b145)
17c43b0c1SIan Rogers // SPDX-License-Identifier: GPL-2.0
27c43b0c1SIan Rogers /*
37c43b0c1SIan Rogers  * Benchmark find_next_bit and related bit operations.
47c43b0c1SIan Rogers  *
57c43b0c1SIan Rogers  * Copyright 2020 Google LLC.
67c43b0c1SIan Rogers  */
77c43b0c1SIan Rogers #include <stdlib.h>
87c43b0c1SIan Rogers #include "bench.h"
97c43b0c1SIan Rogers #include "../util/stat.h"
107c43b0c1SIan Rogers #include <linux/bitmap.h>
117c43b0c1SIan Rogers #include <linux/bitops.h>
127c43b0c1SIan Rogers #include <linux/time64.h>
137c43b0c1SIan Rogers #include <subcmd/parse-options.h>
147c43b0c1SIan Rogers 
157c43b0c1SIan Rogers static unsigned int outer_iterations = 5;
167c43b0c1SIan Rogers static unsigned int inner_iterations = 100000;
177c43b0c1SIan Rogers 
187c43b0c1SIan Rogers static const struct option options[] = {
197c43b0c1SIan Rogers 	OPT_UINTEGER('i', "outer-iterations", &outer_iterations,
20f9f95068SColin Ian King 		"Number of outer iterations used"),
217c43b0c1SIan Rogers 	OPT_UINTEGER('j', "inner-iterations", &inner_iterations,
22f9f95068SColin Ian King 		"Number of inner iterations used"),
237c43b0c1SIan Rogers 	OPT_END()
247c43b0c1SIan Rogers };
257c43b0c1SIan Rogers 
267c43b0c1SIan Rogers static const char *const bench_usage[] = {
277c43b0c1SIan Rogers 	"perf bench mem find_bit <options>",
287c43b0c1SIan Rogers 	NULL
297c43b0c1SIan Rogers };
307c43b0c1SIan Rogers 
317c43b0c1SIan Rogers static unsigned int accumulator;
327c43b0c1SIan Rogers static unsigned int use_of_val;
337c43b0c1SIan Rogers 
workload(int val)347c43b0c1SIan Rogers static noinline void workload(int val)
357c43b0c1SIan Rogers {
367c43b0c1SIan Rogers 	use_of_val += val;
377c43b0c1SIan Rogers 	accumulator++;
387c43b0c1SIan Rogers }
397c43b0c1SIan Rogers 
407c43b0c1SIan Rogers #if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__)
asm_test_bit(long nr,const unsigned long * addr)417c43b0c1SIan Rogers static bool asm_test_bit(long nr, const unsigned long *addr)
427c43b0c1SIan Rogers {
437c43b0c1SIan Rogers 	bool oldbit;
447c43b0c1SIan Rogers 
457c43b0c1SIan Rogers 	asm volatile("bt %2,%1"
467c43b0c1SIan Rogers 		     : "=@ccc" (oldbit)
477c43b0c1SIan Rogers 		     : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
487c43b0c1SIan Rogers 
497c43b0c1SIan Rogers 	return oldbit;
507c43b0c1SIan Rogers }
517c43b0c1SIan Rogers #else
527c43b0c1SIan Rogers #define asm_test_bit test_bit
537c43b0c1SIan Rogers #endif
547c43b0c1SIan Rogers 
do_for_each_set_bit(unsigned int num_bits)557c43b0c1SIan Rogers static int do_for_each_set_bit(unsigned int num_bits)
567c43b0c1SIan Rogers {
577fc5b571SAndy Shevchenko 	unsigned long *to_test = bitmap_zalloc(num_bits);
587c43b0c1SIan Rogers 	struct timeval start, end, diff;
597c43b0c1SIan Rogers 	u64 runtime_us;
607c43b0c1SIan Rogers 	struct stats fb_time_stats, tb_time_stats;
617c43b0c1SIan Rogers 	double time_average, time_stddev;
627c43b0c1SIan Rogers 	unsigned int bit, i, j;
637c43b0c1SIan Rogers 	unsigned int set_bits, skip;
647c43b0c1SIan Rogers 
657c43b0c1SIan Rogers 	init_stats(&fb_time_stats);
667c43b0c1SIan Rogers 	init_stats(&tb_time_stats);
677c43b0c1SIan Rogers 
687c43b0c1SIan Rogers 	for (set_bits = 1; set_bits <= num_bits; set_bits <<= 1) {
697c43b0c1SIan Rogers 		bitmap_zero(to_test, num_bits);
707c43b0c1SIan Rogers 		skip = num_bits / set_bits;
717c43b0c1SIan Rogers 		for (i = 0; i < num_bits; i += skip)
7275d7ba32SSean Christopherson 			__set_bit(i, to_test);
737c43b0c1SIan Rogers 
747c43b0c1SIan Rogers 		for (i = 0; i < outer_iterations; i++) {
75*d1babea9SIan Rogers #ifndef NDEBUG
76*d1babea9SIan Rogers 			unsigned int old = accumulator;
77*d1babea9SIan Rogers #endif
78*d1babea9SIan Rogers 
797c43b0c1SIan Rogers 			gettimeofday(&start, NULL);
807c43b0c1SIan Rogers 			for (j = 0; j < inner_iterations; j++) {
817c43b0c1SIan Rogers 				for_each_set_bit(bit, to_test, num_bits)
827c43b0c1SIan Rogers 					workload(bit);
837c43b0c1SIan Rogers 			}
847c43b0c1SIan Rogers 			gettimeofday(&end, NULL);
857c43b0c1SIan Rogers 			assert(old + (inner_iterations * set_bits) == accumulator);
867c43b0c1SIan Rogers 			timersub(&end, &start, &diff);
877c43b0c1SIan Rogers 			runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
887c43b0c1SIan Rogers 			update_stats(&fb_time_stats, runtime_us);
897c43b0c1SIan Rogers 
90*d1babea9SIan Rogers #ifndef NDEBUG
917c43b0c1SIan Rogers 			old = accumulator;
92*d1babea9SIan Rogers #endif
937c43b0c1SIan Rogers 			gettimeofday(&start, NULL);
947c43b0c1SIan Rogers 			for (j = 0; j < inner_iterations; j++) {
957c43b0c1SIan Rogers 				for (bit = 0; bit < num_bits; bit++) {
967c43b0c1SIan Rogers 					if (asm_test_bit(bit, to_test))
977c43b0c1SIan Rogers 						workload(bit);
987c43b0c1SIan Rogers 				}
997c43b0c1SIan Rogers 			}
1007c43b0c1SIan Rogers 			gettimeofday(&end, NULL);
1017c43b0c1SIan Rogers 			assert(old + (inner_iterations * set_bits) == accumulator);
1027c43b0c1SIan Rogers 			timersub(&end, &start, &diff);
1037c43b0c1SIan Rogers 			runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
1047c43b0c1SIan Rogers 			update_stats(&tb_time_stats, runtime_us);
1057c43b0c1SIan Rogers 		}
1067c43b0c1SIan Rogers 
1077c43b0c1SIan Rogers 		printf("%d operations %d bits set of %d bits\n",
1087c43b0c1SIan Rogers 			inner_iterations, set_bits, num_bits);
1097c43b0c1SIan Rogers 		time_average = avg_stats(&fb_time_stats);
1107c43b0c1SIan Rogers 		time_stddev = stddev_stats(&fb_time_stats);
1117c43b0c1SIan Rogers 		printf("  Average for_each_set_bit took: %.3f usec (+- %.3f usec)\n",
1127c43b0c1SIan Rogers 			time_average, time_stddev);
1137c43b0c1SIan Rogers 		time_average = avg_stats(&tb_time_stats);
1147c43b0c1SIan Rogers 		time_stddev = stddev_stats(&tb_time_stats);
1157c43b0c1SIan Rogers 		printf("  Average test_bit loop took:    %.3f usec (+- %.3f usec)\n",
1167c43b0c1SIan Rogers 			time_average, time_stddev);
1177c43b0c1SIan Rogers 
1187c43b0c1SIan Rogers 		if (use_of_val == accumulator)  /* Try to avoid compiler tricks. */
1197c43b0c1SIan Rogers 			printf("\n");
1207c43b0c1SIan Rogers 	}
1217c43b0c1SIan Rogers 	bitmap_free(to_test);
1227c43b0c1SIan Rogers 	return 0;
1237c43b0c1SIan Rogers }
1247c43b0c1SIan Rogers 
bench_mem_find_bit(int argc,const char ** argv)1257c43b0c1SIan Rogers int bench_mem_find_bit(int argc, const char **argv)
1267c43b0c1SIan Rogers {
1277c43b0c1SIan Rogers 	int err = 0, i;
1287c43b0c1SIan Rogers 
1297c43b0c1SIan Rogers 	argc = parse_options(argc, argv, options, bench_usage, 0);
1307c43b0c1SIan Rogers 	if (argc) {
1317c43b0c1SIan Rogers 		usage_with_options(bench_usage, options);
1327c43b0c1SIan Rogers 		exit(EXIT_FAILURE);
1337c43b0c1SIan Rogers 	}
1347c43b0c1SIan Rogers 
1357c43b0c1SIan Rogers 	for (i = 1; i <= 2048; i <<= 1)
1367c43b0c1SIan Rogers 		do_for_each_set_bit(i);
1377c43b0c1SIan Rogers 
1387c43b0c1SIan Rogers 	return err;
1397c43b0c1SIan Rogers }
140