xref: /openbmc/linux/tools/perf/bench/numa.c (revision 337fa2db)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
21c13f3c9SIngo Molnar /*
31c13f3c9SIngo Molnar  * numa.c
41c13f3c9SIngo Molnar  *
51c13f3c9SIngo Molnar  * numa: Simulate NUMA-sensitive workload and measure their NUMA performance
61c13f3c9SIngo Molnar  */
71c13f3c9SIngo Molnar 
8fd20e811SArnaldo Carvalho de Melo #include <inttypes.h>
98a158589SArnaldo Carvalho de Melo 
104b6ab94eSJosh Poimboeuf #include <subcmd/parse-options.h>
112d8e405aSArnaldo Carvalho de Melo #include "../util/cloexec.h"
121c13f3c9SIngo Molnar 
131c13f3c9SIngo Molnar #include "bench.h"
141c13f3c9SIngo Molnar 
151c13f3c9SIngo Molnar #include <errno.h>
161c13f3c9SIngo Molnar #include <sched.h>
171c13f3c9SIngo Molnar #include <stdio.h>
181c13f3c9SIngo Molnar #include <assert.h>
19a527c2c1SJames Clark #include <debug.h>
201c13f3c9SIngo Molnar #include <malloc.h>
211c13f3c9SIngo Molnar #include <signal.h>
221c13f3c9SIngo Molnar #include <stdlib.h>
231c13f3c9SIngo Molnar #include <string.h>
241c13f3c9SIngo Molnar #include <unistd.h>
251c13f3c9SIngo Molnar #include <sys/mman.h>
261c13f3c9SIngo Molnar #include <sys/time.h>
27b64aa553SPetr Holasek #include <sys/resource.h>
281c13f3c9SIngo Molnar #include <sys/wait.h>
291c13f3c9SIngo Molnar #include <sys/prctl.h>
301c13f3c9SIngo Molnar #include <sys/types.h>
31877a7a11SArnaldo Carvalho de Melo #include <linux/kernel.h>
32a8ad8329SArnaldo Carvalho de Melo #include <linux/time64.h>
337c9eefe8SStephen Rothwell #include <linux/numa.h>
347f7c536fSArnaldo Carvalho de Melo #include <linux/zalloc.h>
351c13f3c9SIngo Molnar 
368cb7a188SAthira Rajeev #include "../util/header.h"
37a64d3af5SIan Rogers #include "../util/mutex.h"
381c13f3c9SIngo Molnar #include <numa.h>
391c13f3c9SIngo Molnar #include <numaif.h>
401c13f3c9SIngo Molnar 
41bf561d3cSArnaldo Carvalho de Melo #ifndef RUSAGE_THREAD
42bf561d3cSArnaldo Carvalho de Melo # define RUSAGE_THREAD 1
43bf561d3cSArnaldo Carvalho de Melo #endif
44bf561d3cSArnaldo Carvalho de Melo 
451c13f3c9SIngo Molnar /*
464d39c89fSIngo Molnar  * Regular printout to the terminal, suppressed if -q is specified:
471c13f3c9SIngo Molnar  */
481c13f3c9SIngo Molnar #define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0)
491c13f3c9SIngo Molnar 
501c13f3c9SIngo Molnar /*
511c13f3c9SIngo Molnar  * Debug printf:
521c13f3c9SIngo Molnar  */
536aa4d826SArnaldo Carvalho de Melo #undef dprintf
541c13f3c9SIngo Molnar #define dprintf(x...) do { if (g && g->p.show_details >= 1) printf(x); } while (0)
551c13f3c9SIngo Molnar 
561c13f3c9SIngo Molnar struct thread_data {
571c13f3c9SIngo Molnar 	int			curr_cpu;
58f58faed7SAthira Rajeev 	cpu_set_t		*bind_cpumask;
591c13f3c9SIngo Molnar 	int			bind_node;
601c13f3c9SIngo Molnar 	u8			*process_data;
611c13f3c9SIngo Molnar 	int			process_nr;
621c13f3c9SIngo Molnar 	int			thread_nr;
631c13f3c9SIngo Molnar 	int			task_nr;
641c13f3c9SIngo Molnar 	unsigned int		loops_done;
651c13f3c9SIngo Molnar 	u64			val;
661c13f3c9SIngo Molnar 	u64			runtime_ns;
67b64aa553SPetr Holasek 	u64			system_time_ns;
68b64aa553SPetr Holasek 	u64			user_time_ns;
69b64aa553SPetr Holasek 	double			speed_gbs;
70a64d3af5SIan Rogers 	struct mutex		*process_lock;
711c13f3c9SIngo Molnar };
721c13f3c9SIngo Molnar 
731c13f3c9SIngo Molnar /* Parameters set by options: */
741c13f3c9SIngo Molnar 
751c13f3c9SIngo Molnar struct params {
761c13f3c9SIngo Molnar 	/* Startup synchronization: */
771c13f3c9SIngo Molnar 	bool			serialize_startup;
781c13f3c9SIngo Molnar 
791c13f3c9SIngo Molnar 	/* Task hierarchy: */
801c13f3c9SIngo Molnar 	int			nr_proc;
811c13f3c9SIngo Molnar 	int			nr_threads;
821c13f3c9SIngo Molnar 
831c13f3c9SIngo Molnar 	/* Working set sizes: */
841c13f3c9SIngo Molnar 	const char		*mb_global_str;
851c13f3c9SIngo Molnar 	const char		*mb_proc_str;
861c13f3c9SIngo Molnar 	const char		*mb_proc_locked_str;
871c13f3c9SIngo Molnar 	const char		*mb_thread_str;
881c13f3c9SIngo Molnar 
891c13f3c9SIngo Molnar 	double			mb_global;
901c13f3c9SIngo Molnar 	double			mb_proc;
911c13f3c9SIngo Molnar 	double			mb_proc_locked;
921c13f3c9SIngo Molnar 	double			mb_thread;
931c13f3c9SIngo Molnar 
941c13f3c9SIngo Molnar 	/* Access patterns to the working set: */
951c13f3c9SIngo Molnar 	bool			data_reads;
961c13f3c9SIngo Molnar 	bool			data_writes;
971c13f3c9SIngo Molnar 	bool			data_backwards;
981c13f3c9SIngo Molnar 	bool			data_zero_memset;
991c13f3c9SIngo Molnar 	bool			data_rand_walk;
1001c13f3c9SIngo Molnar 	u32			nr_loops;
1011c13f3c9SIngo Molnar 	u32			nr_secs;
1021c13f3c9SIngo Molnar 	u32			sleep_usecs;
1031c13f3c9SIngo Molnar 
1041c13f3c9SIngo Molnar 	/* Working set initialization: */
1051c13f3c9SIngo Molnar 	bool			init_zero;
1061c13f3c9SIngo Molnar 	bool			init_random;
1071c13f3c9SIngo Molnar 	bool			init_cpu0;
1081c13f3c9SIngo Molnar 
1091c13f3c9SIngo Molnar 	/* Misc options: */
1101c13f3c9SIngo Molnar 	int			show_details;
1111c13f3c9SIngo Molnar 	int			run_all;
1121c13f3c9SIngo Molnar 	int			thp;
1131c13f3c9SIngo Molnar 
1141c13f3c9SIngo Molnar 	long			bytes_global;
1151c13f3c9SIngo Molnar 	long			bytes_process;
1161c13f3c9SIngo Molnar 	long			bytes_process_locked;
1171c13f3c9SIngo Molnar 	long			bytes_thread;
1181c13f3c9SIngo Molnar 
1191c13f3c9SIngo Molnar 	int			nr_tasks;
1201c13f3c9SIngo Molnar 
1211c13f3c9SIngo Molnar 	bool			show_convergence;
1221c13f3c9SIngo Molnar 	bool			measure_convergence;
1231c13f3c9SIngo Molnar 
1241c13f3c9SIngo Molnar 	int			perturb_secs;
1251c13f3c9SIngo Molnar 	int			nr_cpus;
1261c13f3c9SIngo Molnar 	int			nr_nodes;
1271c13f3c9SIngo Molnar 
1281c13f3c9SIngo Molnar 	/* Affinity options -C and -N: */
1291c13f3c9SIngo Molnar 	char			*cpu_list_str;
1301c13f3c9SIngo Molnar 	char			*node_list_str;
1311c13f3c9SIngo Molnar };
1321c13f3c9SIngo Molnar 
1331c13f3c9SIngo Molnar 
1341c13f3c9SIngo Molnar /* Global, read-writable area, accessible to all processes and threads: */
1351c13f3c9SIngo Molnar 
1361c13f3c9SIngo Molnar struct global_info {
1371c13f3c9SIngo Molnar 	u8			*data;
1381c13f3c9SIngo Molnar 
139a64d3af5SIan Rogers 	struct mutex		startup_mutex;
140a64d3af5SIan Rogers 	struct cond		startup_cond;
1411c13f3c9SIngo Molnar 	int			nr_tasks_started;
1421c13f3c9SIngo Molnar 
143a64d3af5SIan Rogers 	struct mutex		start_work_mutex;
144a64d3af5SIan Rogers 	struct cond		start_work_cond;
1451c13f3c9SIngo Molnar 	int			nr_tasks_working;
146f9299385SIan Rogers 	bool			start_work;
1471c13f3c9SIngo Molnar 
148a64d3af5SIan Rogers 	struct mutex		stop_work_mutex;
1491c13f3c9SIngo Molnar 	u64			bytes_done;
1501c13f3c9SIngo Molnar 
1511c13f3c9SIngo Molnar 	struct thread_data	*threads;
1521c13f3c9SIngo Molnar 
1531c13f3c9SIngo Molnar 	/* Convergence latency measurement: */
1541c13f3c9SIngo Molnar 	bool			all_converged;
1551c13f3c9SIngo Molnar 	bool			stop_work;
1561c13f3c9SIngo Molnar 
1571c13f3c9SIngo Molnar 	int			print_once;
1581c13f3c9SIngo Molnar 
1591c13f3c9SIngo Molnar 	struct params		p;
1601c13f3c9SIngo Molnar };
1611c13f3c9SIngo Molnar 
1621c13f3c9SIngo Molnar static struct global_info	*g = NULL;
1631c13f3c9SIngo Molnar 
1641c13f3c9SIngo Molnar static int parse_cpus_opt(const struct option *opt, const char *arg, int unset);
1651c13f3c9SIngo Molnar static int parse_nodes_opt(const struct option *opt, const char *arg, int unset);
1661c13f3c9SIngo Molnar 
1671c13f3c9SIngo Molnar struct params p0;
1681c13f3c9SIngo Molnar 
1691c13f3c9SIngo Molnar static const struct option options[] = {
1701c13f3c9SIngo Molnar 	OPT_INTEGER('p', "nr_proc"	, &p0.nr_proc,		"number of processes"),
1711c13f3c9SIngo Molnar 	OPT_INTEGER('t', "nr_threads"	, &p0.nr_threads,	"number of threads per process"),
1721c13f3c9SIngo Molnar 
1731c13f3c9SIngo Molnar 	OPT_STRING('G', "mb_global"	, &p0.mb_global_str,	"MB", "global  memory (MBs)"),
1741c13f3c9SIngo Molnar 	OPT_STRING('P', "mb_proc"	, &p0.mb_proc_str,	"MB", "process memory (MBs)"),
1751c13f3c9SIngo Molnar 	OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"),
1761c13f3c9SIngo Molnar 	OPT_STRING('T', "mb_thread"	, &p0.mb_thread_str,	"MB", "thread  memory (MBs)"),
1771c13f3c9SIngo Molnar 
178b0d22e52SIngo Molnar 	OPT_UINTEGER('l', "nr_loops"	, &p0.nr_loops,		"max number of loops to run (default: unlimited)"),
179b0d22e52SIngo Molnar 	OPT_UINTEGER('s', "nr_secs"	, &p0.nr_secs,		"max number of seconds to run (default: 5 secs)"),
1801c13f3c9SIngo Molnar 	OPT_UINTEGER('u', "usleep"	, &p0.sleep_usecs,	"usecs to sleep per loop iteration"),
1811c13f3c9SIngo Molnar 
1822abb80daSYisheng Xie 	OPT_BOOLEAN('R', "data_reads"	, &p0.data_reads,	"access the data via reads (can be mixed with -W)"),
1831c13f3c9SIngo Molnar 	OPT_BOOLEAN('W', "data_writes"	, &p0.data_writes,	"access the data via writes (can be mixed with -R)"),
1841c13f3c9SIngo Molnar 	OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards,	"access the data backwards as well"),
1851c13f3c9SIngo Molnar 	OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"),
1861c13f3c9SIngo Molnar 	OPT_BOOLEAN('r', "data_rand_walk", &p0.data_rand_walk,	"access the data with random (32bit LFSR) walk"),
1871c13f3c9SIngo Molnar 
1881c13f3c9SIngo Molnar 
1891c13f3c9SIngo Molnar 	OPT_BOOLEAN('z', "init_zero"	, &p0.init_zero,	"bzero the initial allocations"),
1901c13f3c9SIngo Molnar 	OPT_BOOLEAN('I', "init_random"	, &p0.init_random,	"randomize the contents of the initial allocations"),
1911c13f3c9SIngo Molnar 	OPT_BOOLEAN('0', "init_cpu0"	, &p0.init_cpu0,	"do the initial allocations on CPU#0"),
1921c13f3c9SIngo Molnar 	OPT_INTEGER('x', "perturb_secs", &p0.perturb_secs,	"perturb thread 0/0 every X secs, to test convergence stability"),
1931c13f3c9SIngo Molnar 
1941c13f3c9SIngo Molnar 	OPT_INCR   ('d', "show_details"	, &p0.show_details,	"Show details"),
1951c13f3c9SIngo Molnar 	OPT_INCR   ('a', "all"		, &p0.run_all,		"Run all tests in the suite"),
1961c13f3c9SIngo Molnar 	OPT_INTEGER('H', "thp"		, &p0.thp,		"MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"),
197001916b9SJiri Olsa 	OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details, "
198001916b9SJiri Olsa 		    "convergence is reached when each process (all its threads) is running on a single NUMA node."),
1991c13f3c9SIngo Molnar 	OPT_BOOLEAN('m', "measure_convergence",	&p0.measure_convergence, "measure convergence latency"),
200a527c2c1SJames Clark 	OPT_BOOLEAN('q', "quiet"	, &quiet,
201a527c2c1SJames Clark 		    "quiet mode (do not show any warnings or messages)"),
2021c13f3c9SIngo Molnar 	OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"),
2031c13f3c9SIngo Molnar 
2041c13f3c9SIngo Molnar 	/* Special option string parsing callbacks: */
2051c13f3c9SIngo Molnar         OPT_CALLBACK('C', "cpus", NULL, "cpu[,cpu2,...cpuN]",
2061c13f3c9SIngo Molnar 			"bind the first N tasks to these specific cpus (the rest is unbound)",
2071c13f3c9SIngo Molnar 			parse_cpus_opt),
2081c13f3c9SIngo Molnar         OPT_CALLBACK('M', "memnodes", NULL, "node[,node2,...nodeN]",
2091c13f3c9SIngo Molnar 			"bind the first N tasks to these specific memory nodes (the rest is unbound)",
2101c13f3c9SIngo Molnar 			parse_nodes_opt),
2111c13f3c9SIngo Molnar 	OPT_END()
2121c13f3c9SIngo Molnar };
2131c13f3c9SIngo Molnar 
2141c13f3c9SIngo Molnar static const char * const bench_numa_usage[] = {
2151c13f3c9SIngo Molnar 	"perf bench numa <options>",
2161c13f3c9SIngo Molnar 	NULL
2171c13f3c9SIngo Molnar };
2181c13f3c9SIngo Molnar 
2191c13f3c9SIngo Molnar static const char * const numa_usage[] = {
2201c13f3c9SIngo Molnar 	"perf bench numa mem [<options>]",
2211c13f3c9SIngo Molnar 	NULL
2221c13f3c9SIngo Molnar };
2231c13f3c9SIngo Molnar 
224321a7c35SSatheesh Rajendran /*
225321a7c35SSatheesh Rajendran  * To get number of numa nodes present.
226321a7c35SSatheesh Rajendran  */
nr_numa_nodes(void)227321a7c35SSatheesh Rajendran static int nr_numa_nodes(void)
228321a7c35SSatheesh Rajendran {
229321a7c35SSatheesh Rajendran 	int i, nr_nodes = 0;
230321a7c35SSatheesh Rajendran 
231321a7c35SSatheesh Rajendran 	for (i = 0; i < g->p.nr_nodes; i++) {
232321a7c35SSatheesh Rajendran 		if (numa_bitmask_isbitset(numa_nodes_ptr, i))
233321a7c35SSatheesh Rajendran 			nr_nodes++;
234321a7c35SSatheesh Rajendran 	}
235321a7c35SSatheesh Rajendran 
236321a7c35SSatheesh Rajendran 	return nr_nodes;
237321a7c35SSatheesh Rajendran }
238321a7c35SSatheesh Rajendran 
239321a7c35SSatheesh Rajendran /*
240321a7c35SSatheesh Rajendran  * To check if given numa node is present.
241321a7c35SSatheesh Rajendran  */
is_node_present(int node)242321a7c35SSatheesh Rajendran static int is_node_present(int node)
243321a7c35SSatheesh Rajendran {
244321a7c35SSatheesh Rajendran 	return numa_bitmask_isbitset(numa_nodes_ptr, node);
245321a7c35SSatheesh Rajendran }
246321a7c35SSatheesh Rajendran 
247321a7c35SSatheesh Rajendran /*
248321a7c35SSatheesh Rajendran  * To check given numa node has cpus.
249321a7c35SSatheesh Rajendran  */
node_has_cpus(int node)250321a7c35SSatheesh Rajendran static bool node_has_cpus(int node)
251321a7c35SSatheesh Rajendran {
2522db13a9bSAlexander Gordeev 	struct bitmask *cpumask = numa_allocate_cpumask();
253509f68e3SAlexander Gordeev 	bool ret = false; /* fall back to nocpus */
2542db13a9bSAlexander Gordeev 	int cpu;
255321a7c35SSatheesh Rajendran 
2562db13a9bSAlexander Gordeev 	BUG_ON(!cpumask);
2572db13a9bSAlexander Gordeev 	if (!numa_node_to_cpus(node, cpumask)) {
2582db13a9bSAlexander Gordeev 		for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
2592db13a9bSAlexander Gordeev 			if (numa_bitmask_isbitset(cpumask, cpu)) {
260509f68e3SAlexander Gordeev 				ret = true;
261509f68e3SAlexander Gordeev 				break;
262321a7c35SSatheesh Rajendran 			}
263321a7c35SSatheesh Rajendran 		}
264509f68e3SAlexander Gordeev 	}
2652db13a9bSAlexander Gordeev 	numa_free_cpumask(cpumask);
266321a7c35SSatheesh Rajendran 
267509f68e3SAlexander Gordeev 	return ret;
268321a7c35SSatheesh Rajendran }
269321a7c35SSatheesh Rajendran 
bind_to_cpu(int target_cpu)270f58faed7SAthira Rajeev static cpu_set_t *bind_to_cpu(int target_cpu)
2711c13f3c9SIngo Molnar {
272f58faed7SAthira Rajeev 	int nrcpus = numa_num_possible_cpus();
273f58faed7SAthira Rajeev 	cpu_set_t *orig_mask, *mask;
274f58faed7SAthira Rajeev 	size_t size;
2751c13f3c9SIngo Molnar 
276f58faed7SAthira Rajeev 	orig_mask = CPU_ALLOC(nrcpus);
277f58faed7SAthira Rajeev 	BUG_ON(!orig_mask);
278f58faed7SAthira Rajeev 	size = CPU_ALLOC_SIZE(nrcpus);
279f58faed7SAthira Rajeev 	CPU_ZERO_S(size, orig_mask);
2801c13f3c9SIngo Molnar 
281f58faed7SAthira Rajeev 	if (sched_getaffinity(0, size, orig_mask))
282f58faed7SAthira Rajeev 		goto err_out;
283f58faed7SAthira Rajeev 
284f58faed7SAthira Rajeev 	mask = CPU_ALLOC(nrcpus);
285f58faed7SAthira Rajeev 	if (!mask)
286f58faed7SAthira Rajeev 		goto err_out;
287f58faed7SAthira Rajeev 
288f58faed7SAthira Rajeev 	CPU_ZERO_S(size, mask);
2891c13f3c9SIngo Molnar 
2901c13f3c9SIngo Molnar 	if (target_cpu == -1) {
2911c13f3c9SIngo Molnar 		int cpu;
2921c13f3c9SIngo Molnar 
2931c13f3c9SIngo Molnar 		for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
294f58faed7SAthira Rajeev 			CPU_SET_S(cpu, size, mask);
2951c13f3c9SIngo Molnar 	} else {
296f58faed7SAthira Rajeev 		if (target_cpu < 0 || target_cpu >= g->p.nr_cpus)
297f58faed7SAthira Rajeev 			goto err;
298f58faed7SAthira Rajeev 
299f58faed7SAthira Rajeev 		CPU_SET_S(target_cpu, size, mask);
3001c13f3c9SIngo Molnar 	}
3011c13f3c9SIngo Molnar 
302f58faed7SAthira Rajeev 	if (sched_setaffinity(0, size, mask))
303f58faed7SAthira Rajeev 		goto err;
3041c13f3c9SIngo Molnar 
3051c13f3c9SIngo Molnar 	return orig_mask;
306f58faed7SAthira Rajeev 
307f58faed7SAthira Rajeev err:
308f58faed7SAthira Rajeev 	CPU_FREE(mask);
309f58faed7SAthira Rajeev err_out:
310f58faed7SAthira Rajeev 	CPU_FREE(orig_mask);
311f58faed7SAthira Rajeev 
312f58faed7SAthira Rajeev 	/* BUG_ON due to failure in allocation of orig_mask/mask */
313f58faed7SAthira Rajeev 	BUG_ON(-1);
314183d4f2dSIan Rogers 	return NULL;
3151c13f3c9SIngo Molnar }
3161c13f3c9SIngo Molnar 
bind_to_node(int target_node)317f58faed7SAthira Rajeev static cpu_set_t *bind_to_node(int target_node)
3181c13f3c9SIngo Molnar {
319f58faed7SAthira Rajeev 	int nrcpus = numa_num_possible_cpus();
320f58faed7SAthira Rajeev 	size_t size;
321f58faed7SAthira Rajeev 	cpu_set_t *orig_mask, *mask;
3221c13f3c9SIngo Molnar 	int cpu;
3231c13f3c9SIngo Molnar 
324f58faed7SAthira Rajeev 	orig_mask = CPU_ALLOC(nrcpus);
325f58faed7SAthira Rajeev 	BUG_ON(!orig_mask);
326f58faed7SAthira Rajeev 	size = CPU_ALLOC_SIZE(nrcpus);
327f58faed7SAthira Rajeev 	CPU_ZERO_S(size, orig_mask);
3281c13f3c9SIngo Molnar 
329f58faed7SAthira Rajeev 	if (sched_getaffinity(0, size, orig_mask))
330f58faed7SAthira Rajeev 		goto err_out;
331f58faed7SAthira Rajeev 
332f58faed7SAthira Rajeev 	mask = CPU_ALLOC(nrcpus);
333f58faed7SAthira Rajeev 	if (!mask)
334f58faed7SAthira Rajeev 		goto err_out;
335f58faed7SAthira Rajeev 
336f58faed7SAthira Rajeev 	CPU_ZERO_S(size, mask);
3371c13f3c9SIngo Molnar 
3387c9eefe8SStephen Rothwell 	if (target_node == NUMA_NO_NODE) {
3391c13f3c9SIngo Molnar 		for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
340f58faed7SAthira Rajeev 			CPU_SET_S(cpu, size, mask);
3411c13f3c9SIngo Molnar 	} else {
3422db13a9bSAlexander Gordeev 		struct bitmask *cpumask = numa_allocate_cpumask();
3431c13f3c9SIngo Molnar 
344f58faed7SAthira Rajeev 		if (!cpumask)
345f58faed7SAthira Rajeev 			goto err;
346f58faed7SAthira Rajeev 
3472db13a9bSAlexander Gordeev 		if (!numa_node_to_cpus(target_node, cpumask)) {
3482db13a9bSAlexander Gordeev 			for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
3492db13a9bSAlexander Gordeev 				if (numa_bitmask_isbitset(cpumask, cpu))
350f58faed7SAthira Rajeev 					CPU_SET_S(cpu, size, mask);
3511c13f3c9SIngo Molnar 			}
3522db13a9bSAlexander Gordeev 		}
3532db13a9bSAlexander Gordeev 		numa_free_cpumask(cpumask);
3542db13a9bSAlexander Gordeev 	}
3551c13f3c9SIngo Molnar 
356f58faed7SAthira Rajeev 	if (sched_setaffinity(0, size, mask))
357f58faed7SAthira Rajeev 		goto err;
3581c13f3c9SIngo Molnar 
3591c13f3c9SIngo Molnar 	return orig_mask;
360f58faed7SAthira Rajeev 
361f58faed7SAthira Rajeev err:
362f58faed7SAthira Rajeev 	CPU_FREE(mask);
363f58faed7SAthira Rajeev err_out:
364f58faed7SAthira Rajeev 	CPU_FREE(orig_mask);
365f58faed7SAthira Rajeev 
366f58faed7SAthira Rajeev 	/* BUG_ON due to failure in allocation of orig_mask/mask */
367f58faed7SAthira Rajeev 	BUG_ON(-1);
368183d4f2dSIan Rogers 	return NULL;
3691c13f3c9SIngo Molnar }
3701c13f3c9SIngo Molnar 
bind_to_cpumask(cpu_set_t * mask)371f58faed7SAthira Rajeev static void bind_to_cpumask(cpu_set_t *mask)
3721c13f3c9SIngo Molnar {
3731c13f3c9SIngo Molnar 	int ret;
374f58faed7SAthira Rajeev 	size_t size = CPU_ALLOC_SIZE(numa_num_possible_cpus());
3751c13f3c9SIngo Molnar 
376f58faed7SAthira Rajeev 	ret = sched_setaffinity(0, size, mask);
377f58faed7SAthira Rajeev 	if (ret) {
378f58faed7SAthira Rajeev 		CPU_FREE(mask);
3791c13f3c9SIngo Molnar 		BUG_ON(ret);
3801c13f3c9SIngo Molnar 	}
381f58faed7SAthira Rajeev }
3821c13f3c9SIngo Molnar 
mempol_restore(void)3831c13f3c9SIngo Molnar static void mempol_restore(void)
3841c13f3c9SIngo Molnar {
3851c13f3c9SIngo Molnar 	int ret;
3861c13f3c9SIngo Molnar 
3871c13f3c9SIngo Molnar 	ret = set_mempolicy(MPOL_DEFAULT, NULL, g->p.nr_nodes-1);
3881c13f3c9SIngo Molnar 
3891c13f3c9SIngo Molnar 	BUG_ON(ret);
3901c13f3c9SIngo Molnar }
3911c13f3c9SIngo Molnar 
bind_to_memnode(int node)3921c13f3c9SIngo Molnar static void bind_to_memnode(int node)
3931c13f3c9SIngo Molnar {
394394e4306SAthira Rajeev 	struct bitmask *node_mask;
3951c13f3c9SIngo Molnar 	int ret;
3961c13f3c9SIngo Molnar 
3977c9eefe8SStephen Rothwell 	if (node == NUMA_NO_NODE)
3981c13f3c9SIngo Molnar 		return;
3991c13f3c9SIngo Molnar 
400394e4306SAthira Rajeev 	node_mask = numa_allocate_nodemask();
401394e4306SAthira Rajeev 	BUG_ON(!node_mask);
4021c13f3c9SIngo Molnar 
403394e4306SAthira Rajeev 	numa_bitmask_clearall(node_mask);
404394e4306SAthira Rajeev 	numa_bitmask_setbit(node_mask, node);
4051c13f3c9SIngo Molnar 
406394e4306SAthira Rajeev 	ret = set_mempolicy(MPOL_BIND, node_mask->maskp, node_mask->size + 1);
407394e4306SAthira Rajeev 	dprintf("binding to node %d, mask: %016lx => %d\n", node, *node_mask->maskp, ret);
408394e4306SAthira Rajeev 
409394e4306SAthira Rajeev 	numa_bitmask_free(node_mask);
4101c13f3c9SIngo Molnar 	BUG_ON(ret);
4111c13f3c9SIngo Molnar }
4121c13f3c9SIngo Molnar 
4131c13f3c9SIngo Molnar #define HPSIZE (2*1024*1024)
4141c13f3c9SIngo Molnar 
4151c13f3c9SIngo Molnar #define set_taskname(fmt...)				\
4161c13f3c9SIngo Molnar do {							\
4171c13f3c9SIngo Molnar 	char name[20];					\
4181c13f3c9SIngo Molnar 							\
4191c13f3c9SIngo Molnar 	snprintf(name, 20, fmt);			\
4201c13f3c9SIngo Molnar 	prctl(PR_SET_NAME, name);			\
4211c13f3c9SIngo Molnar } while (0)
4221c13f3c9SIngo Molnar 
alloc_data(ssize_t bytes0,int map_flags,int init_zero,int init_cpu0,int thp,int init_random)4231c13f3c9SIngo Molnar static u8 *alloc_data(ssize_t bytes0, int map_flags,
4241c13f3c9SIngo Molnar 		      int init_zero, int init_cpu0, int thp, int init_random)
4251c13f3c9SIngo Molnar {
426f58faed7SAthira Rajeev 	cpu_set_t *orig_mask = NULL;
4271c13f3c9SIngo Molnar 	ssize_t bytes;
4281c13f3c9SIngo Molnar 	u8 *buf;
4291c13f3c9SIngo Molnar 	int ret;
4301c13f3c9SIngo Molnar 
4311c13f3c9SIngo Molnar 	if (!bytes0)
4321c13f3c9SIngo Molnar 		return NULL;
4331c13f3c9SIngo Molnar 
4341c13f3c9SIngo Molnar 	/* Allocate and initialize all memory on CPU#0: */
4351c13f3c9SIngo Molnar 	if (init_cpu0) {
4366bbfe4e6SJiri Olsa 		int node = numa_node_of_cpu(0);
4376bbfe4e6SJiri Olsa 
4386bbfe4e6SJiri Olsa 		orig_mask = bind_to_node(node);
4396bbfe4e6SJiri Olsa 		bind_to_memnode(node);
4401c13f3c9SIngo Molnar 	}
4411c13f3c9SIngo Molnar 
4421c13f3c9SIngo Molnar 	bytes = bytes0 + HPSIZE;
4431c13f3c9SIngo Molnar 
4441c13f3c9SIngo Molnar 	buf = (void *)mmap(0, bytes, PROT_READ|PROT_WRITE, MAP_ANON|map_flags, -1, 0);
4451c13f3c9SIngo Molnar 	BUG_ON(buf == (void *)-1);
4461c13f3c9SIngo Molnar 
4471c13f3c9SIngo Molnar 	if (map_flags == MAP_PRIVATE) {
4481c13f3c9SIngo Molnar 		if (thp > 0) {
4491c13f3c9SIngo Molnar 			ret = madvise(buf, bytes, MADV_HUGEPAGE);
4501c13f3c9SIngo Molnar 			if (ret && !g->print_once) {
4511c13f3c9SIngo Molnar 				g->print_once = 1;
4521c13f3c9SIngo Molnar 				printf("WARNING: Could not enable THP - do: 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled'\n");
4531c13f3c9SIngo Molnar 			}
4541c13f3c9SIngo Molnar 		}
4551c13f3c9SIngo Molnar 		if (thp < 0) {
4561c13f3c9SIngo Molnar 			ret = madvise(buf, bytes, MADV_NOHUGEPAGE);
4571c13f3c9SIngo Molnar 			if (ret && !g->print_once) {
4581c13f3c9SIngo Molnar 				g->print_once = 1;
4591c13f3c9SIngo Molnar 				printf("WARNING: Could not disable THP: run a CONFIG_TRANSPARENT_HUGEPAGE kernel?\n");
4601c13f3c9SIngo Molnar 			}
4611c13f3c9SIngo Molnar 		}
4621c13f3c9SIngo Molnar 	}
4631c13f3c9SIngo Molnar 
4641c13f3c9SIngo Molnar 	if (init_zero) {
4651c13f3c9SIngo Molnar 		bzero(buf, bytes);
4661c13f3c9SIngo Molnar 	} else {
4671c13f3c9SIngo Molnar 		/* Initialize random contents, different in each word: */
4681c13f3c9SIngo Molnar 		if (init_random) {
4691c13f3c9SIngo Molnar 			u64 *wbuf = (void *)buf;
4701c13f3c9SIngo Molnar 			long off = rand();
4711c13f3c9SIngo Molnar 			long i;
4721c13f3c9SIngo Molnar 
4731c13f3c9SIngo Molnar 			for (i = 0; i < bytes/8; i++)
4741c13f3c9SIngo Molnar 				wbuf[i] = i + off;
4751c13f3c9SIngo Molnar 		}
4761c13f3c9SIngo Molnar 	}
4771c13f3c9SIngo Molnar 
4781c13f3c9SIngo Molnar 	/* Align to 2MB boundary: */
4791c13f3c9SIngo Molnar 	buf = (void *)(((unsigned long)buf + HPSIZE-1) & ~(HPSIZE-1));
4801c13f3c9SIngo Molnar 
4811c13f3c9SIngo Molnar 	/* Restore affinity: */
4821c13f3c9SIngo Molnar 	if (init_cpu0) {
4831c13f3c9SIngo Molnar 		bind_to_cpumask(orig_mask);
484f58faed7SAthira Rajeev 		CPU_FREE(orig_mask);
4851c13f3c9SIngo Molnar 		mempol_restore();
4861c13f3c9SIngo Molnar 	}
4871c13f3c9SIngo Molnar 
4881c13f3c9SIngo Molnar 	return buf;
4891c13f3c9SIngo Molnar }
4901c13f3c9SIngo Molnar 
free_data(void * data,ssize_t bytes)4911c13f3c9SIngo Molnar static void free_data(void *data, ssize_t bytes)
4921c13f3c9SIngo Molnar {
4931c13f3c9SIngo Molnar 	int ret;
4941c13f3c9SIngo Molnar 
4951c13f3c9SIngo Molnar 	if (!data)
4961c13f3c9SIngo Molnar 		return;
4971c13f3c9SIngo Molnar 
4981c13f3c9SIngo Molnar 	ret = munmap(data, bytes);
4991c13f3c9SIngo Molnar 	BUG_ON(ret);
5001c13f3c9SIngo Molnar }
5011c13f3c9SIngo Molnar 
5021c13f3c9SIngo Molnar /*
5031c13f3c9SIngo Molnar  * Create a shared memory buffer that can be shared between processes, zeroed:
5041c13f3c9SIngo Molnar  */
zalloc_shared_data(ssize_t bytes)5051c13f3c9SIngo Molnar static void * zalloc_shared_data(ssize_t bytes)
5061c13f3c9SIngo Molnar {
5071c13f3c9SIngo Molnar 	return alloc_data(bytes, MAP_SHARED, 1, g->p.init_cpu0,  g->p.thp, g->p.init_random);
5081c13f3c9SIngo Molnar }
5091c13f3c9SIngo Molnar 
5101c13f3c9SIngo Molnar /*
5111c13f3c9SIngo Molnar  * Create a shared memory buffer that can be shared between processes:
5121c13f3c9SIngo Molnar  */
setup_shared_data(ssize_t bytes)5131c13f3c9SIngo Molnar static void * setup_shared_data(ssize_t bytes)
5141c13f3c9SIngo Molnar {
5151c13f3c9SIngo Molnar 	return alloc_data(bytes, MAP_SHARED, 0, g->p.init_cpu0,  g->p.thp, g->p.init_random);
5161c13f3c9SIngo Molnar }
5171c13f3c9SIngo Molnar 
5181c13f3c9SIngo Molnar /*
5191c13f3c9SIngo Molnar  * Allocate process-local memory - this will either be shared between
5201c13f3c9SIngo Molnar  * threads of this process, or only be accessed by this thread:
5211c13f3c9SIngo Molnar  */
setup_private_data(ssize_t bytes)5221c13f3c9SIngo Molnar static void * setup_private_data(ssize_t bytes)
5231c13f3c9SIngo Molnar {
5241c13f3c9SIngo Molnar 	return alloc_data(bytes, MAP_PRIVATE, 0, g->p.init_cpu0,  g->p.thp, g->p.init_random);
5251c13f3c9SIngo Molnar }
5261c13f3c9SIngo Molnar 
parse_cpu_list(const char * arg)5271c13f3c9SIngo Molnar static int parse_cpu_list(const char *arg)
5281c13f3c9SIngo Molnar {
5291c13f3c9SIngo Molnar 	p0.cpu_list_str = strdup(arg);
5301c13f3c9SIngo Molnar 
5311c13f3c9SIngo Molnar 	dprintf("got CPU list: {%s}\n", p0.cpu_list_str);
5321c13f3c9SIngo Molnar 
5331c13f3c9SIngo Molnar 	return 0;
5341c13f3c9SIngo Molnar }
5351c13f3c9SIngo Molnar 
parse_setup_cpu_list(void)536b81a48eaSPetr Holasek static int parse_setup_cpu_list(void)
5371c13f3c9SIngo Molnar {
5381c13f3c9SIngo Molnar 	struct thread_data *td;
5391c13f3c9SIngo Molnar 	char *str0, *str;
5401c13f3c9SIngo Molnar 	int t;
5411c13f3c9SIngo Molnar 
5421c13f3c9SIngo Molnar 	if (!g->p.cpu_list_str)
543b81a48eaSPetr Holasek 		return 0;
5441c13f3c9SIngo Molnar 
5451c13f3c9SIngo Molnar 	dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
5461c13f3c9SIngo Molnar 
5471c13f3c9SIngo Molnar 	str0 = str = strdup(g->p.cpu_list_str);
5481c13f3c9SIngo Molnar 	t = 0;
5491c13f3c9SIngo Molnar 
5501c13f3c9SIngo Molnar 	BUG_ON(!str);
5511c13f3c9SIngo Molnar 
5521c13f3c9SIngo Molnar 	tprintf("# binding tasks to CPUs:\n");
5531c13f3c9SIngo Molnar 	tprintf("#  ");
5541c13f3c9SIngo Molnar 
5551c13f3c9SIngo Molnar 	while (true) {
5561c13f3c9SIngo Molnar 		int bind_cpu, bind_cpu_0, bind_cpu_1;
5571c13f3c9SIngo Molnar 		char *tok, *tok_end, *tok_step, *tok_len, *tok_mul;
5581c13f3c9SIngo Molnar 		int bind_len;
5591c13f3c9SIngo Molnar 		int step;
5601c13f3c9SIngo Molnar 		int mul;
5611c13f3c9SIngo Molnar 
5621c13f3c9SIngo Molnar 		tok = strsep(&str, ",");
5631c13f3c9SIngo Molnar 		if (!tok)
5641c13f3c9SIngo Molnar 			break;
5651c13f3c9SIngo Molnar 
5661c13f3c9SIngo Molnar 		tok_end = strstr(tok, "-");
5671c13f3c9SIngo Molnar 
5681c13f3c9SIngo Molnar 		dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
5691c13f3c9SIngo Molnar 		if (!tok_end) {
5701c13f3c9SIngo Molnar 			/* Single CPU specified: */
5711c13f3c9SIngo Molnar 			bind_cpu_0 = bind_cpu_1 = atol(tok);
5721c13f3c9SIngo Molnar 		} else {
5731c13f3c9SIngo Molnar 			/* CPU range specified (for example: "5-11"): */
5741c13f3c9SIngo Molnar 			bind_cpu_0 = atol(tok);
5751c13f3c9SIngo Molnar 			bind_cpu_1 = atol(tok_end + 1);
5761c13f3c9SIngo Molnar 		}
5771c13f3c9SIngo Molnar 
5781c13f3c9SIngo Molnar 		step = 1;
5791c13f3c9SIngo Molnar 		tok_step = strstr(tok, "#");
5801c13f3c9SIngo Molnar 		if (tok_step) {
5811c13f3c9SIngo Molnar 			step = atol(tok_step + 1);
5821c13f3c9SIngo Molnar 			BUG_ON(step <= 0 || step >= g->p.nr_cpus);
5831c13f3c9SIngo Molnar 		}
5841c13f3c9SIngo Molnar 
5851c13f3c9SIngo Molnar 		/*
5861c13f3c9SIngo Molnar 		 * Mask length.
5871c13f3c9SIngo Molnar 		 * Eg: "--cpus 8_4-16#4" means: '--cpus 8_4,12_4,16_4',
5881c13f3c9SIngo Molnar 		 * where the _4 means the next 4 CPUs are allowed.
5891c13f3c9SIngo Molnar 		 */
5901c13f3c9SIngo Molnar 		bind_len = 1;
5911c13f3c9SIngo Molnar 		tok_len = strstr(tok, "_");
5921c13f3c9SIngo Molnar 		if (tok_len) {
5931c13f3c9SIngo Molnar 			bind_len = atol(tok_len + 1);
5941c13f3c9SIngo Molnar 			BUG_ON(bind_len <= 0 || bind_len > g->p.nr_cpus);
5951c13f3c9SIngo Molnar 		}
5961c13f3c9SIngo Molnar 
5971c13f3c9SIngo Molnar 		/* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
5981c13f3c9SIngo Molnar 		mul = 1;
5991c13f3c9SIngo Molnar 		tok_mul = strstr(tok, "x");
6001c13f3c9SIngo Molnar 		if (tok_mul) {
6011c13f3c9SIngo Molnar 			mul = atol(tok_mul + 1);
6021c13f3c9SIngo Molnar 			BUG_ON(mul <= 0);
6031c13f3c9SIngo Molnar 		}
6041c13f3c9SIngo Molnar 
6051c13f3c9SIngo Molnar 		dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul);
6061c13f3c9SIngo Molnar 
607b81a48eaSPetr Holasek 		if (bind_cpu_0 >= g->p.nr_cpus || bind_cpu_1 >= g->p.nr_cpus) {
608b81a48eaSPetr Holasek 			printf("\nTest not applicable, system has only %d CPUs.\n", g->p.nr_cpus);
609b81a48eaSPetr Holasek 			return -1;
610b81a48eaSPetr Holasek 		}
611b81a48eaSPetr Holasek 
6128cb7a188SAthira Rajeev 		if (is_cpu_online(bind_cpu_0) != 1 || is_cpu_online(bind_cpu_1) != 1) {
6138cb7a188SAthira Rajeev 			printf("\nTest not applicable, bind_cpu_0 or bind_cpu_1 is offline\n");
6148cb7a188SAthira Rajeev 			return -1;
6158cb7a188SAthira Rajeev 		}
6168cb7a188SAthira Rajeev 
617b81a48eaSPetr Holasek 		BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0);
6181c13f3c9SIngo Molnar 		BUG_ON(bind_cpu_0 > bind_cpu_1);
6191c13f3c9SIngo Molnar 
6201c13f3c9SIngo Molnar 		for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
621f58faed7SAthira Rajeev 			size_t size = CPU_ALLOC_SIZE(g->p.nr_cpus);
6221c13f3c9SIngo Molnar 			int i;
6231c13f3c9SIngo Molnar 
6241c13f3c9SIngo Molnar 			for (i = 0; i < mul; i++) {
6251c13f3c9SIngo Molnar 				int cpu;
6261c13f3c9SIngo Molnar 
6271c13f3c9SIngo Molnar 				if (t >= g->p.nr_tasks) {
6281c13f3c9SIngo Molnar 					printf("\n# NOTE: ignoring bind CPUs starting at CPU#%d\n #", bind_cpu);
6291c13f3c9SIngo Molnar 					goto out;
6301c13f3c9SIngo Molnar 				}
6311c13f3c9SIngo Molnar 				td = g->threads + t;
6321c13f3c9SIngo Molnar 
6331c13f3c9SIngo Molnar 				if (t)
6341c13f3c9SIngo Molnar 					tprintf(",");
6351c13f3c9SIngo Molnar 				if (bind_len > 1) {
6361c13f3c9SIngo Molnar 					tprintf("%2d/%d", bind_cpu, bind_len);
6371c13f3c9SIngo Molnar 				} else {
6381c13f3c9SIngo Molnar 					tprintf("%2d", bind_cpu);
6391c13f3c9SIngo Molnar 				}
6401c13f3c9SIngo Molnar 
641f58faed7SAthira Rajeev 				td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus);
642f58faed7SAthira Rajeev 				BUG_ON(!td->bind_cpumask);
643f58faed7SAthira Rajeev 				CPU_ZERO_S(size, td->bind_cpumask);
6441c13f3c9SIngo Molnar 				for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) {
645f58faed7SAthira Rajeev 					if (cpu < 0 || cpu >= g->p.nr_cpus) {
646f58faed7SAthira Rajeev 						CPU_FREE(td->bind_cpumask);
647f58faed7SAthira Rajeev 						BUG_ON(-1);
648f58faed7SAthira Rajeev 					}
649f58faed7SAthira Rajeev 					CPU_SET_S(cpu, size, td->bind_cpumask);
6501c13f3c9SIngo Molnar 				}
6511c13f3c9SIngo Molnar 				t++;
6521c13f3c9SIngo Molnar 			}
6531c13f3c9SIngo Molnar 		}
6541c13f3c9SIngo Molnar 	}
6551c13f3c9SIngo Molnar out:
6561c13f3c9SIngo Molnar 
6571c13f3c9SIngo Molnar 	tprintf("\n");
6581c13f3c9SIngo Molnar 
6591c13f3c9SIngo Molnar 	if (t < g->p.nr_tasks)
6601c13f3c9SIngo Molnar 		printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
6611c13f3c9SIngo Molnar 
6621c13f3c9SIngo Molnar 	free(str0);
663b81a48eaSPetr Holasek 	return 0;
6641c13f3c9SIngo Molnar }
6651c13f3c9SIngo Molnar 
parse_cpus_opt(const struct option * opt __maybe_unused,const char * arg,int unset __maybe_unused)6661c13f3c9SIngo Molnar static int parse_cpus_opt(const struct option *opt __maybe_unused,
6671c13f3c9SIngo Molnar 			  const char *arg, int unset __maybe_unused)
6681c13f3c9SIngo Molnar {
6691c13f3c9SIngo Molnar 	if (!arg)
6701c13f3c9SIngo Molnar 		return -1;
6711c13f3c9SIngo Molnar 
6721c13f3c9SIngo Molnar 	return parse_cpu_list(arg);
6731c13f3c9SIngo Molnar }
6741c13f3c9SIngo Molnar 
parse_node_list(const char * arg)6751c13f3c9SIngo Molnar static int parse_node_list(const char *arg)
6761c13f3c9SIngo Molnar {
6771c13f3c9SIngo Molnar 	p0.node_list_str = strdup(arg);
6781c13f3c9SIngo Molnar 
6791c13f3c9SIngo Molnar 	dprintf("got NODE list: {%s}\n", p0.node_list_str);
6801c13f3c9SIngo Molnar 
6811c13f3c9SIngo Molnar 	return 0;
6821c13f3c9SIngo Molnar }
6831c13f3c9SIngo Molnar 
parse_setup_node_list(void)684b81a48eaSPetr Holasek static int parse_setup_node_list(void)
6851c13f3c9SIngo Molnar {
6861c13f3c9SIngo Molnar 	struct thread_data *td;
6871c13f3c9SIngo Molnar 	char *str0, *str;
6881c13f3c9SIngo Molnar 	int t;
6891c13f3c9SIngo Molnar 
6901c13f3c9SIngo Molnar 	if (!g->p.node_list_str)
691b81a48eaSPetr Holasek 		return 0;
6921c13f3c9SIngo Molnar 
6931c13f3c9SIngo Molnar 	dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
6941c13f3c9SIngo Molnar 
6951c13f3c9SIngo Molnar 	str0 = str = strdup(g->p.node_list_str);
6961c13f3c9SIngo Molnar 	t = 0;
6971c13f3c9SIngo Molnar 
6981c13f3c9SIngo Molnar 	BUG_ON(!str);
6991c13f3c9SIngo Molnar 
7001c13f3c9SIngo Molnar 	tprintf("# binding tasks to NODEs:\n");
7011c13f3c9SIngo Molnar 	tprintf("# ");
7021c13f3c9SIngo Molnar 
7031c13f3c9SIngo Molnar 	while (true) {
7041c13f3c9SIngo Molnar 		int bind_node, bind_node_0, bind_node_1;
7051c13f3c9SIngo Molnar 		char *tok, *tok_end, *tok_step, *tok_mul;
7061c13f3c9SIngo Molnar 		int step;
7071c13f3c9SIngo Molnar 		int mul;
7081c13f3c9SIngo Molnar 
7091c13f3c9SIngo Molnar 		tok = strsep(&str, ",");
7101c13f3c9SIngo Molnar 		if (!tok)
7111c13f3c9SIngo Molnar 			break;
7121c13f3c9SIngo Molnar 
7131c13f3c9SIngo Molnar 		tok_end = strstr(tok, "-");
7141c13f3c9SIngo Molnar 
7151c13f3c9SIngo Molnar 		dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
7161c13f3c9SIngo Molnar 		if (!tok_end) {
7171c13f3c9SIngo Molnar 			/* Single NODE specified: */
7181c13f3c9SIngo Molnar 			bind_node_0 = bind_node_1 = atol(tok);
7191c13f3c9SIngo Molnar 		} else {
7201c13f3c9SIngo Molnar 			/* NODE range specified (for example: "5-11"): */
7211c13f3c9SIngo Molnar 			bind_node_0 = atol(tok);
7221c13f3c9SIngo Molnar 			bind_node_1 = atol(tok_end + 1);
7231c13f3c9SIngo Molnar 		}
7241c13f3c9SIngo Molnar 
7251c13f3c9SIngo Molnar 		step = 1;
7261c13f3c9SIngo Molnar 		tok_step = strstr(tok, "#");
7271c13f3c9SIngo Molnar 		if (tok_step) {
7281c13f3c9SIngo Molnar 			step = atol(tok_step + 1);
7291c13f3c9SIngo Molnar 			BUG_ON(step <= 0 || step >= g->p.nr_nodes);
7301c13f3c9SIngo Molnar 		}
7311c13f3c9SIngo Molnar 
7321c13f3c9SIngo Molnar 		/* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
7331c13f3c9SIngo Molnar 		mul = 1;
7341c13f3c9SIngo Molnar 		tok_mul = strstr(tok, "x");
7351c13f3c9SIngo Molnar 		if (tok_mul) {
7361c13f3c9SIngo Molnar 			mul = atol(tok_mul + 1);
7371c13f3c9SIngo Molnar 			BUG_ON(mul <= 0);
7381c13f3c9SIngo Molnar 		}
7391c13f3c9SIngo Molnar 
7401c13f3c9SIngo Molnar 		dprintf("NODEs: %d-%d #%d\n", bind_node_0, bind_node_1, step);
7411c13f3c9SIngo Molnar 
742b81a48eaSPetr Holasek 		if (bind_node_0 >= g->p.nr_nodes || bind_node_1 >= g->p.nr_nodes) {
743b81a48eaSPetr Holasek 			printf("\nTest not applicable, system has only %d nodes.\n", g->p.nr_nodes);
744b81a48eaSPetr Holasek 			return -1;
745b81a48eaSPetr Holasek 		}
746b81a48eaSPetr Holasek 
747b81a48eaSPetr Holasek 		BUG_ON(bind_node_0 < 0 || bind_node_1 < 0);
7481c13f3c9SIngo Molnar 		BUG_ON(bind_node_0 > bind_node_1);
7491c13f3c9SIngo Molnar 
7501c13f3c9SIngo Molnar 		for (bind_node = bind_node_0; bind_node <= bind_node_1; bind_node += step) {
7511c13f3c9SIngo Molnar 			int i;
7521c13f3c9SIngo Molnar 
7531c13f3c9SIngo Molnar 			for (i = 0; i < mul; i++) {
754321a7c35SSatheesh Rajendran 				if (t >= g->p.nr_tasks || !node_has_cpus(bind_node)) {
7551c13f3c9SIngo Molnar 					printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node);
7561c13f3c9SIngo Molnar 					goto out;
7571c13f3c9SIngo Molnar 				}
7581c13f3c9SIngo Molnar 				td = g->threads + t;
7591c13f3c9SIngo Molnar 
7601c13f3c9SIngo Molnar 				if (!t)
7611c13f3c9SIngo Molnar 					tprintf(" %2d", bind_node);
7621c13f3c9SIngo Molnar 				else
7631c13f3c9SIngo Molnar 					tprintf(",%2d", bind_node);
7641c13f3c9SIngo Molnar 
7651c13f3c9SIngo Molnar 				td->bind_node = bind_node;
7661c13f3c9SIngo Molnar 				t++;
7671c13f3c9SIngo Molnar 			}
7681c13f3c9SIngo Molnar 		}
7691c13f3c9SIngo Molnar 	}
7701c13f3c9SIngo Molnar out:
7711c13f3c9SIngo Molnar 
7721c13f3c9SIngo Molnar 	tprintf("\n");
7731c13f3c9SIngo Molnar 
7741c13f3c9SIngo Molnar 	if (t < g->p.nr_tasks)
7751c13f3c9SIngo Molnar 		printf("# NOTE: %d tasks mem-bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
7761c13f3c9SIngo Molnar 
7771c13f3c9SIngo Molnar 	free(str0);
778b81a48eaSPetr Holasek 	return 0;
7791c13f3c9SIngo Molnar }
7801c13f3c9SIngo Molnar 
parse_nodes_opt(const struct option * opt __maybe_unused,const char * arg,int unset __maybe_unused)7811c13f3c9SIngo Molnar static int parse_nodes_opt(const struct option *opt __maybe_unused,
7821c13f3c9SIngo Molnar 			  const char *arg, int unset __maybe_unused)
7831c13f3c9SIngo Molnar {
7841c13f3c9SIngo Molnar 	if (!arg)
7851c13f3c9SIngo Molnar 		return -1;
7861c13f3c9SIngo Molnar 
7871c13f3c9SIngo Molnar 	return parse_node_list(arg);
7881c13f3c9SIngo Molnar }
7891c13f3c9SIngo Molnar 
lfsr_32(uint32_t lfsr)7901c13f3c9SIngo Molnar static inline uint32_t lfsr_32(uint32_t lfsr)
7911c13f3c9SIngo Molnar {
7921c13f3c9SIngo Molnar 	const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31);
7931c13f3c9SIngo Molnar 	return (lfsr>>1) ^ ((0x0u - (lfsr & 0x1u)) & taps);
7941c13f3c9SIngo Molnar }
7951c13f3c9SIngo Molnar 
7961c13f3c9SIngo Molnar /*
7971c13f3c9SIngo Molnar  * Make sure there's real data dependency to RAM (when read
7981c13f3c9SIngo Molnar  * accesses are enabled), so the compiler, the CPU and the
7991c13f3c9SIngo Molnar  * kernel (KSM, zero page, etc.) cannot optimize away RAM
8001c13f3c9SIngo Molnar  * accesses:
8011c13f3c9SIngo Molnar  */
access_data(u64 * data,u64 val)8020353631aSArnaldo Carvalho de Melo static inline u64 access_data(u64 *data, u64 val)
8031c13f3c9SIngo Molnar {
8041c13f3c9SIngo Molnar 	if (g->p.data_reads)
8051c13f3c9SIngo Molnar 		val += *data;
8061c13f3c9SIngo Molnar 	if (g->p.data_writes)
8071c13f3c9SIngo Molnar 		*data = val + 1;
8081c13f3c9SIngo Molnar 	return val;
8091c13f3c9SIngo Molnar }
8101c13f3c9SIngo Molnar 
8111c13f3c9SIngo Molnar /*
8121c13f3c9SIngo Molnar  * The worker process does two types of work, a forwards going
8131c13f3c9SIngo Molnar  * loop and a backwards going loop.
8141c13f3c9SIngo Molnar  *
8151c13f3c9SIngo Molnar  * We do this so that on multiprocessor systems we do not create
8161c13f3c9SIngo Molnar  * a 'train' of processing, with highly synchronized processes,
8171c13f3c9SIngo Molnar  * skewing the whole benchmark.
8181c13f3c9SIngo Molnar  */
do_work(u8 * __data,long bytes,int nr,int nr_max,int loop,u64 val)8191c13f3c9SIngo Molnar static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val)
8201c13f3c9SIngo Molnar {
8211c13f3c9SIngo Molnar 	long words = bytes/sizeof(u64);
8221c13f3c9SIngo Molnar 	u64 *data = (void *)__data;
8231c13f3c9SIngo Molnar 	long chunk_0, chunk_1;
8241c13f3c9SIngo Molnar 	u64 *d0, *d, *d1;
8251c13f3c9SIngo Molnar 	long off;
8261c13f3c9SIngo Molnar 	long i;
8271c13f3c9SIngo Molnar 
8281c13f3c9SIngo Molnar 	BUG_ON(!data && words);
8291c13f3c9SIngo Molnar 	BUG_ON(data && !words);
8301c13f3c9SIngo Molnar 
8311c13f3c9SIngo Molnar 	if (!data)
8321c13f3c9SIngo Molnar 		return val;
8331c13f3c9SIngo Molnar 
8341c13f3c9SIngo Molnar 	/* Very simple memset() work variant: */
8351c13f3c9SIngo Molnar 	if (g->p.data_zero_memset && !g->p.data_rand_walk) {
8361c13f3c9SIngo Molnar 		bzero(data, bytes);
8371c13f3c9SIngo Molnar 		return val;
8381c13f3c9SIngo Molnar 	}
8391c13f3c9SIngo Molnar 
8401c13f3c9SIngo Molnar 	/* Spread out by PID/TID nr and by loop nr: */
8411c13f3c9SIngo Molnar 	chunk_0 = words/nr_max;
8421c13f3c9SIngo Molnar 	chunk_1 = words/g->p.nr_loops;
8431c13f3c9SIngo Molnar 	off = nr*chunk_0 + loop*chunk_1;
8441c13f3c9SIngo Molnar 
8451c13f3c9SIngo Molnar 	while (off >= words)
8461c13f3c9SIngo Molnar 		off -= words;
8471c13f3c9SIngo Molnar 
8481c13f3c9SIngo Molnar 	if (g->p.data_rand_walk) {
8491c13f3c9SIngo Molnar 		u32 lfsr = nr + loop + val;
850*337fa2dbSAndreas Herrmann 		long j;
8511c13f3c9SIngo Molnar 
8521c13f3c9SIngo Molnar 		for (i = 0; i < words/1024; i++) {
8531c13f3c9SIngo Molnar 			long start, end;
8541c13f3c9SIngo Molnar 
8551c13f3c9SIngo Molnar 			lfsr = lfsr_32(lfsr);
8561c13f3c9SIngo Molnar 
8571c13f3c9SIngo Molnar 			start = lfsr % words;
8581c13f3c9SIngo Molnar 			end = min(start + 1024, words-1);
8591c13f3c9SIngo Molnar 
8601c13f3c9SIngo Molnar 			if (g->p.data_zero_memset) {
8611c13f3c9SIngo Molnar 				bzero(data + start, (end-start) * sizeof(u64));
8621c13f3c9SIngo Molnar 			} else {
8631c13f3c9SIngo Molnar 				for (j = start; j < end; j++)
8641c13f3c9SIngo Molnar 					val = access_data(data + j, val);
8651c13f3c9SIngo Molnar 			}
8661c13f3c9SIngo Molnar 		}
8671c13f3c9SIngo Molnar 	} else if (!g->p.data_backwards || (nr + loop) & 1) {
86885372c69SAlexander Gordeev 		/* Process data forwards: */
8691c13f3c9SIngo Molnar 
8701c13f3c9SIngo Molnar 		d0 = data + off;
8711c13f3c9SIngo Molnar 		d  = data + off + 1;
8721c13f3c9SIngo Molnar 		d1 = data + words;
8731c13f3c9SIngo Molnar 
8741c13f3c9SIngo Molnar 		for (;;) {
8751c13f3c9SIngo Molnar 			if (unlikely(d >= d1))
8761c13f3c9SIngo Molnar 				d = data;
8771c13f3c9SIngo Molnar 			if (unlikely(d == d0))
8781c13f3c9SIngo Molnar 				break;
8791c13f3c9SIngo Molnar 
8801c13f3c9SIngo Molnar 			val = access_data(d, val);
8811c13f3c9SIngo Molnar 
8821c13f3c9SIngo Molnar 			d++;
8831c13f3c9SIngo Molnar 		}
8841c13f3c9SIngo Molnar 	} else {
8851c13f3c9SIngo Molnar 		/* Process data backwards: */
8861c13f3c9SIngo Molnar 
8871c13f3c9SIngo Molnar 		d0 = data + off;
8881c13f3c9SIngo Molnar 		d  = data + off - 1;
8891c13f3c9SIngo Molnar 		d1 = data + words;
8901c13f3c9SIngo Molnar 
8911c13f3c9SIngo Molnar 		for (;;) {
8921c13f3c9SIngo Molnar 			if (unlikely(d < data))
8931c13f3c9SIngo Molnar 				d = data + words-1;
8941c13f3c9SIngo Molnar 			if (unlikely(d == d0))
8951c13f3c9SIngo Molnar 				break;
8961c13f3c9SIngo Molnar 
8971c13f3c9SIngo Molnar 			val = access_data(d, val);
8981c13f3c9SIngo Molnar 
8991c13f3c9SIngo Molnar 			d--;
9001c13f3c9SIngo Molnar 		}
9011c13f3c9SIngo Molnar 	}
9021c13f3c9SIngo Molnar 
9031c13f3c9SIngo Molnar 	return val;
9041c13f3c9SIngo Molnar }
9051c13f3c9SIngo Molnar 
update_curr_cpu(int task_nr,unsigned long bytes_worked)9061c13f3c9SIngo Molnar static void update_curr_cpu(int task_nr, unsigned long bytes_worked)
9071c13f3c9SIngo Molnar {
9081c13f3c9SIngo Molnar 	unsigned int cpu;
9091c13f3c9SIngo Molnar 
9101c13f3c9SIngo Molnar 	cpu = sched_getcpu();
9111c13f3c9SIngo Molnar 
9121c13f3c9SIngo Molnar 	g->threads[task_nr].curr_cpu = cpu;
9131c13f3c9SIngo Molnar 	prctl(0, bytes_worked);
9141c13f3c9SIngo Molnar }
9151c13f3c9SIngo Molnar 
9161c13f3c9SIngo Molnar /*
9171c13f3c9SIngo Molnar  * Count the number of nodes a process's threads
9181c13f3c9SIngo Molnar  * are spread out on.
9191c13f3c9SIngo Molnar  *
9201c13f3c9SIngo Molnar  * A count of 1 means that the process is compressed
9211c13f3c9SIngo Molnar  * to a single node. A count of g->p.nr_nodes means it's
9221c13f3c9SIngo Molnar  * spread out on the whole system.
9231c13f3c9SIngo Molnar  */
count_process_nodes(int process_nr)9241c13f3c9SIngo Molnar static int count_process_nodes(int process_nr)
9251c13f3c9SIngo Molnar {
926394e4306SAthira Rajeev 	char *node_present;
9271c13f3c9SIngo Molnar 	int nodes;
9281c13f3c9SIngo Molnar 	int n, t;
9291c13f3c9SIngo Molnar 
930394e4306SAthira Rajeev 	node_present = (char *)malloc(g->p.nr_nodes * sizeof(char));
931394e4306SAthira Rajeev 	BUG_ON(!node_present);
932394e4306SAthira Rajeev 	for (nodes = 0; nodes < g->p.nr_nodes; nodes++)
933394e4306SAthira Rajeev 		node_present[nodes] = 0;
934394e4306SAthira Rajeev 
9351c13f3c9SIngo Molnar 	for (t = 0; t < g->p.nr_threads; t++) {
9361c13f3c9SIngo Molnar 		struct thread_data *td;
9371c13f3c9SIngo Molnar 		int task_nr;
9381c13f3c9SIngo Molnar 		int node;
9391c13f3c9SIngo Molnar 
9401c13f3c9SIngo Molnar 		task_nr = process_nr*g->p.nr_threads + t;
9411c13f3c9SIngo Molnar 		td = g->threads + task_nr;
9421c13f3c9SIngo Molnar 
9431c13f3c9SIngo Molnar 		node = numa_node_of_cpu(td->curr_cpu);
944394e4306SAthira Rajeev 		if (node < 0) /* curr_cpu was likely still -1 */ {
945394e4306SAthira Rajeev 			free(node_present);
9461d90a685SPetr Holasek 			return 0;
947394e4306SAthira Rajeev 		}
9481d90a685SPetr Holasek 
9491c13f3c9SIngo Molnar 		node_present[node] = 1;
9501c13f3c9SIngo Molnar 	}
9511c13f3c9SIngo Molnar 
9521c13f3c9SIngo Molnar 	nodes = 0;
9531c13f3c9SIngo Molnar 
954394e4306SAthira Rajeev 	for (n = 0; n < g->p.nr_nodes; n++)
9551c13f3c9SIngo Molnar 		nodes += node_present[n];
9561c13f3c9SIngo Molnar 
957394e4306SAthira Rajeev 	free(node_present);
9581c13f3c9SIngo Molnar 	return nodes;
9591c13f3c9SIngo Molnar }
9601c13f3c9SIngo Molnar 
9611c13f3c9SIngo Molnar /*
9621c13f3c9SIngo Molnar  * Count the number of distinct process-threads a node contains.
9631c13f3c9SIngo Molnar  *
9641c13f3c9SIngo Molnar  * A count of 1 means that the node contains only a single
9651c13f3c9SIngo Molnar  * process. If all nodes on the system contain at most one
9661c13f3c9SIngo Molnar  * process then we are well-converged.
9671c13f3c9SIngo Molnar  */
count_node_processes(int node)9681c13f3c9SIngo Molnar static int count_node_processes(int node)
9691c13f3c9SIngo Molnar {
9701c13f3c9SIngo Molnar 	int processes = 0;
9711c13f3c9SIngo Molnar 	int t, p;
9721c13f3c9SIngo Molnar 
9731c13f3c9SIngo Molnar 	for (p = 0; p < g->p.nr_proc; p++) {
9741c13f3c9SIngo Molnar 		for (t = 0; t < g->p.nr_threads; t++) {
9751c13f3c9SIngo Molnar 			struct thread_data *td;
9761c13f3c9SIngo Molnar 			int task_nr;
9771c13f3c9SIngo Molnar 			int n;
9781c13f3c9SIngo Molnar 
9791c13f3c9SIngo Molnar 			task_nr = p*g->p.nr_threads + t;
9801c13f3c9SIngo Molnar 			td = g->threads + task_nr;
9811c13f3c9SIngo Molnar 
9821c13f3c9SIngo Molnar 			n = numa_node_of_cpu(td->curr_cpu);
9831c13f3c9SIngo Molnar 			if (n == node) {
9841c13f3c9SIngo Molnar 				processes++;
9851c13f3c9SIngo Molnar 				break;
9861c13f3c9SIngo Molnar 			}
9871c13f3c9SIngo Molnar 		}
9881c13f3c9SIngo Molnar 	}
9891c13f3c9SIngo Molnar 
9901c13f3c9SIngo Molnar 	return processes;
9911c13f3c9SIngo Molnar }
9921c13f3c9SIngo Molnar 
calc_convergence_compression(int * strong)9931c13f3c9SIngo Molnar static void calc_convergence_compression(int *strong)
9941c13f3c9SIngo Molnar {
9951c13f3c9SIngo Molnar 	unsigned int nodes_min, nodes_max;
9961c13f3c9SIngo Molnar 	int p;
9971c13f3c9SIngo Molnar 
9981c13f3c9SIngo Molnar 	nodes_min = -1;
9991c13f3c9SIngo Molnar 	nodes_max =  0;
10001c13f3c9SIngo Molnar 
10011c13f3c9SIngo Molnar 	for (p = 0; p < g->p.nr_proc; p++) {
10021c13f3c9SIngo Molnar 		unsigned int nodes = count_process_nodes(p);
10031c13f3c9SIngo Molnar 
10041d90a685SPetr Holasek 		if (!nodes) {
10051d90a685SPetr Holasek 			*strong = 0;
10061d90a685SPetr Holasek 			return;
10071d90a685SPetr Holasek 		}
10081d90a685SPetr Holasek 
10091c13f3c9SIngo Molnar 		nodes_min = min(nodes, nodes_min);
10101c13f3c9SIngo Molnar 		nodes_max = max(nodes, nodes_max);
10111c13f3c9SIngo Molnar 	}
10121c13f3c9SIngo Molnar 
10131c13f3c9SIngo Molnar 	/* Strong convergence: all threads compress on a single node: */
10141c13f3c9SIngo Molnar 	if (nodes_min == 1 && nodes_max == 1) {
10151c13f3c9SIngo Molnar 		*strong = 1;
10161c13f3c9SIngo Molnar 	} else {
10171c13f3c9SIngo Molnar 		*strong = 0;
10181c13f3c9SIngo Molnar 		tprintf(" {%d-%d}", nodes_min, nodes_max);
10191c13f3c9SIngo Molnar 	}
10201c13f3c9SIngo Molnar }
10211c13f3c9SIngo Molnar 
calc_convergence(double runtime_ns_max,double * convergence)10221c13f3c9SIngo Molnar static void calc_convergence(double runtime_ns_max, double *convergence)
10231c13f3c9SIngo Molnar {
10241c13f3c9SIngo Molnar 	unsigned int loops_done_min, loops_done_max;
10251c13f3c9SIngo Molnar 	int process_groups;
1026394e4306SAthira Rajeev 	int *nodes;
10271c13f3c9SIngo Molnar 	int distance;
10281c13f3c9SIngo Molnar 	int nr_min;
10291c13f3c9SIngo Molnar 	int nr_max;
10301c13f3c9SIngo Molnar 	int strong;
10311c13f3c9SIngo Molnar 	int sum;
10321c13f3c9SIngo Molnar 	int nr;
10331c13f3c9SIngo Molnar 	int node;
10341c13f3c9SIngo Molnar 	int cpu;
10351c13f3c9SIngo Molnar 	int t;
10361c13f3c9SIngo Molnar 
10371c13f3c9SIngo Molnar 	if (!g->p.show_convergence && !g->p.measure_convergence)
10381c13f3c9SIngo Molnar 		return;
10391c13f3c9SIngo Molnar 
1040394e4306SAthira Rajeev 	nodes = (int *)malloc(g->p.nr_nodes * sizeof(int));
1041394e4306SAthira Rajeev 	BUG_ON(!nodes);
10421c13f3c9SIngo Molnar 	for (node = 0; node < g->p.nr_nodes; node++)
10431c13f3c9SIngo Molnar 		nodes[node] = 0;
10441c13f3c9SIngo Molnar 
10451c13f3c9SIngo Molnar 	loops_done_min = -1;
10461c13f3c9SIngo Molnar 	loops_done_max = 0;
10471c13f3c9SIngo Molnar 
10481c13f3c9SIngo Molnar 	for (t = 0; t < g->p.nr_tasks; t++) {
10491c13f3c9SIngo Molnar 		struct thread_data *td = g->threads + t;
10501c13f3c9SIngo Molnar 		unsigned int loops_done;
10511c13f3c9SIngo Molnar 
10521c13f3c9SIngo Molnar 		cpu = td->curr_cpu;
10531c13f3c9SIngo Molnar 
10541c13f3c9SIngo Molnar 		/* Not all threads have written it yet: */
10551c13f3c9SIngo Molnar 		if (cpu < 0)
10561c13f3c9SIngo Molnar 			continue;
10571c13f3c9SIngo Molnar 
10581c13f3c9SIngo Molnar 		node = numa_node_of_cpu(cpu);
10591c13f3c9SIngo Molnar 
10601c13f3c9SIngo Molnar 		nodes[node]++;
10611c13f3c9SIngo Molnar 
10621c13f3c9SIngo Molnar 		loops_done = td->loops_done;
10631c13f3c9SIngo Molnar 		loops_done_min = min(loops_done, loops_done_min);
10641c13f3c9SIngo Molnar 		loops_done_max = max(loops_done, loops_done_max);
10651c13f3c9SIngo Molnar 	}
10661c13f3c9SIngo Molnar 
10671c13f3c9SIngo Molnar 	nr_max = 0;
10681c13f3c9SIngo Molnar 	nr_min = g->p.nr_tasks;
10691c13f3c9SIngo Molnar 	sum = 0;
10701c13f3c9SIngo Molnar 
10711c13f3c9SIngo Molnar 	for (node = 0; node < g->p.nr_nodes; node++) {
1072321a7c35SSatheesh Rajendran 		if (!is_node_present(node))
1073321a7c35SSatheesh Rajendran 			continue;
10741c13f3c9SIngo Molnar 		nr = nodes[node];
10751c13f3c9SIngo Molnar 		nr_min = min(nr, nr_min);
10761c13f3c9SIngo Molnar 		nr_max = max(nr, nr_max);
10771c13f3c9SIngo Molnar 		sum += nr;
10781c13f3c9SIngo Molnar 	}
10791c13f3c9SIngo Molnar 	BUG_ON(nr_min > nr_max);
10801c13f3c9SIngo Molnar 
10811c13f3c9SIngo Molnar 	BUG_ON(sum > g->p.nr_tasks);
10821c13f3c9SIngo Molnar 
1083394e4306SAthira Rajeev 	if (0 && (sum < g->p.nr_tasks)) {
1084394e4306SAthira Rajeev 		free(nodes);
10851c13f3c9SIngo Molnar 		return;
1086394e4306SAthira Rajeev 	}
10871c13f3c9SIngo Molnar 
10881c13f3c9SIngo Molnar 	/*
10891c13f3c9SIngo Molnar 	 * Count the number of distinct process groups present
10901c13f3c9SIngo Molnar 	 * on nodes - when we are converged this will decrease
10911c13f3c9SIngo Molnar 	 * to g->p.nr_proc:
10921c13f3c9SIngo Molnar 	 */
10931c13f3c9SIngo Molnar 	process_groups = 0;
10941c13f3c9SIngo Molnar 
10951c13f3c9SIngo Molnar 	for (node = 0; node < g->p.nr_nodes; node++) {
1096321a7c35SSatheesh Rajendran 		int processes;
10971c13f3c9SIngo Molnar 
1098321a7c35SSatheesh Rajendran 		if (!is_node_present(node))
1099321a7c35SSatheesh Rajendran 			continue;
1100321a7c35SSatheesh Rajendran 		processes = count_node_processes(node);
11011c13f3c9SIngo Molnar 		nr = nodes[node];
11021c13f3c9SIngo Molnar 		tprintf(" %2d/%-2d", nr, processes);
11031c13f3c9SIngo Molnar 
11041c13f3c9SIngo Molnar 		process_groups += processes;
11051c13f3c9SIngo Molnar 	}
11061c13f3c9SIngo Molnar 
11071c13f3c9SIngo Molnar 	distance = nr_max - nr_min;
11081c13f3c9SIngo Molnar 
11091c13f3c9SIngo Molnar 	tprintf(" [%2d/%-2d]", distance, process_groups);
11101c13f3c9SIngo Molnar 
11111c13f3c9SIngo Molnar 	tprintf(" l:%3d-%-3d (%3d)",
11121c13f3c9SIngo Molnar 		loops_done_min, loops_done_max, loops_done_max-loops_done_min);
11131c13f3c9SIngo Molnar 
11141c13f3c9SIngo Molnar 	if (loops_done_min && loops_done_max) {
11151c13f3c9SIngo Molnar 		double skew = 1.0 - (double)loops_done_min/loops_done_max;
11161c13f3c9SIngo Molnar 
11171c13f3c9SIngo Molnar 		tprintf(" [%4.1f%%]", skew * 100.0);
11181c13f3c9SIngo Molnar 	}
11191c13f3c9SIngo Molnar 
11201c13f3c9SIngo Molnar 	calc_convergence_compression(&strong);
11211c13f3c9SIngo Molnar 
11221c13f3c9SIngo Molnar 	if (strong && process_groups == g->p.nr_proc) {
11231c13f3c9SIngo Molnar 		if (!*convergence) {
11241c13f3c9SIngo Molnar 			*convergence = runtime_ns_max;
1125a8ad8329SArnaldo Carvalho de Melo 			tprintf(" (%6.1fs converged)\n", *convergence / NSEC_PER_SEC);
11261c13f3c9SIngo Molnar 			if (g->p.measure_convergence) {
11271c13f3c9SIngo Molnar 				g->all_converged = true;
11281c13f3c9SIngo Molnar 				g->stop_work = true;
11291c13f3c9SIngo Molnar 			}
11301c13f3c9SIngo Molnar 		}
11311c13f3c9SIngo Molnar 	} else {
11321c13f3c9SIngo Molnar 		if (*convergence) {
1133a8ad8329SArnaldo Carvalho de Melo 			tprintf(" (%6.1fs de-converged)", runtime_ns_max / NSEC_PER_SEC);
11341c13f3c9SIngo Molnar 			*convergence = 0;
11351c13f3c9SIngo Molnar 		}
11361c13f3c9SIngo Molnar 		tprintf("\n");
11371c13f3c9SIngo Molnar 	}
1138394e4306SAthira Rajeev 
1139394e4306SAthira Rajeev 	free(nodes);
11401c13f3c9SIngo Molnar }
11411c13f3c9SIngo Molnar 
show_summary(double runtime_ns_max,int l,double * convergence)11421c13f3c9SIngo Molnar static void show_summary(double runtime_ns_max, int l, double *convergence)
11431c13f3c9SIngo Molnar {
11441c13f3c9SIngo Molnar 	tprintf("\r #  %5.1f%%  [%.1f mins]",
1145a8ad8329SArnaldo Carvalho de Melo 		(double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max / NSEC_PER_SEC / 60.0);
11461c13f3c9SIngo Molnar 
11471c13f3c9SIngo Molnar 	calc_convergence(runtime_ns_max, convergence);
11481c13f3c9SIngo Molnar 
11491c13f3c9SIngo Molnar 	if (g->p.show_details >= 0)
11501c13f3c9SIngo Molnar 		fflush(stdout);
11511c13f3c9SIngo Molnar }
11521c13f3c9SIngo Molnar 
worker_thread(void * __tdata)11531c13f3c9SIngo Molnar static void *worker_thread(void *__tdata)
11541c13f3c9SIngo Molnar {
11551c13f3c9SIngo Molnar 	struct thread_data *td = __tdata;
11561c13f3c9SIngo Molnar 	struct timeval start0, start, stop, diff;
11571c13f3c9SIngo Molnar 	int process_nr = td->process_nr;
11581c13f3c9SIngo Molnar 	int thread_nr = td->thread_nr;
11591c13f3c9SIngo Molnar 	unsigned long last_perturbance;
11601c13f3c9SIngo Molnar 	int task_nr = td->task_nr;
11611c13f3c9SIngo Molnar 	int details = g->p.show_details;
11621c13f3c9SIngo Molnar 	int first_task, last_task;
11631c13f3c9SIngo Molnar 	double convergence = 0;
11641c13f3c9SIngo Molnar 	u64 val = td->val;
11651c13f3c9SIngo Molnar 	double runtime_ns_max;
11661c13f3c9SIngo Molnar 	u8 *global_data;
11671c13f3c9SIngo Molnar 	u8 *process_data;
11681c13f3c9SIngo Molnar 	u8 *thread_data;
116998310707SJiri Olsa 	u64 bytes_done, secs;
11701c13f3c9SIngo Molnar 	long work_done;
11711c13f3c9SIngo Molnar 	u32 l;
1172b64aa553SPetr Holasek 	struct rusage rusage;
11731c13f3c9SIngo Molnar 
11741c13f3c9SIngo Molnar 	bind_to_cpumask(td->bind_cpumask);
11751c13f3c9SIngo Molnar 	bind_to_memnode(td->bind_node);
11761c13f3c9SIngo Molnar 
11771c13f3c9SIngo Molnar 	set_taskname("thread %d/%d", process_nr, thread_nr);
11781c13f3c9SIngo Molnar 
11791c13f3c9SIngo Molnar 	global_data = g->data;
11801c13f3c9SIngo Molnar 	process_data = td->process_data;
11811c13f3c9SIngo Molnar 	thread_data = setup_private_data(g->p.bytes_thread);
11821c13f3c9SIngo Molnar 
11831c13f3c9SIngo Molnar 	bytes_done = 0;
11841c13f3c9SIngo Molnar 
11851c13f3c9SIngo Molnar 	last_task = 0;
11861c13f3c9SIngo Molnar 	if (process_nr == g->p.nr_proc-1 && thread_nr == g->p.nr_threads-1)
11871c13f3c9SIngo Molnar 		last_task = 1;
11881c13f3c9SIngo Molnar 
11891c13f3c9SIngo Molnar 	first_task = 0;
11901c13f3c9SIngo Molnar 	if (process_nr == 0 && thread_nr == 0)
11911c13f3c9SIngo Molnar 		first_task = 1;
11921c13f3c9SIngo Molnar 
11931c13f3c9SIngo Molnar 	if (details >= 2) {
11941c13f3c9SIngo Molnar 		printf("#  thread %2d / %2d global mem: %p, process mem: %p, thread mem: %p\n",
11951c13f3c9SIngo Molnar 			process_nr, thread_nr, global_data, process_data, thread_data);
11961c13f3c9SIngo Molnar 	}
11971c13f3c9SIngo Molnar 
11981c13f3c9SIngo Molnar 	if (g->p.serialize_startup) {
1199a64d3af5SIan Rogers 		mutex_lock(&g->startup_mutex);
12001c13f3c9SIngo Molnar 		g->nr_tasks_started++;
1201f9299385SIan Rogers 		/* The last thread wakes the main process. */
1202f9299385SIan Rogers 		if (g->nr_tasks_started == g->p.nr_tasks)
1203a64d3af5SIan Rogers 			cond_signal(&g->startup_cond);
1204f9299385SIan Rogers 
1205a64d3af5SIan Rogers 		mutex_unlock(&g->startup_mutex);
12061c13f3c9SIngo Molnar 
12071c13f3c9SIngo Molnar 		/* Here we will wait for the main process to start us all at once: */
1208a64d3af5SIan Rogers 		mutex_lock(&g->start_work_mutex);
1209f9299385SIan Rogers 		g->start_work = false;
12101c13f3c9SIngo Molnar 		g->nr_tasks_working++;
1211f9299385SIan Rogers 		while (!g->start_work)
1212a64d3af5SIan Rogers 			cond_wait(&g->start_work_cond, &g->start_work_mutex);
12131c13f3c9SIngo Molnar 
1214a64d3af5SIan Rogers 		mutex_unlock(&g->start_work_mutex);
12151c13f3c9SIngo Molnar 	}
12161c13f3c9SIngo Molnar 
12171c13f3c9SIngo Molnar 	gettimeofday(&start0, NULL);
12181c13f3c9SIngo Molnar 
12191c13f3c9SIngo Molnar 	start = stop = start0;
12201c13f3c9SIngo Molnar 	last_perturbance = start.tv_sec;
12211c13f3c9SIngo Molnar 
12221c13f3c9SIngo Molnar 	for (l = 0; l < g->p.nr_loops; l++) {
12231c13f3c9SIngo Molnar 		start = stop;
12241c13f3c9SIngo Molnar 
12251c13f3c9SIngo Molnar 		if (g->stop_work)
12261c13f3c9SIngo Molnar 			break;
12271c13f3c9SIngo Molnar 
12281c13f3c9SIngo Molnar 		val += do_work(global_data,  g->p.bytes_global,  process_nr, g->p.nr_proc,	l, val);
12291c13f3c9SIngo Molnar 		val += do_work(process_data, g->p.bytes_process, thread_nr,  g->p.nr_threads,	l, val);
12301c13f3c9SIngo Molnar 		val += do_work(thread_data,  g->p.bytes_thread,  0,          1,		l, val);
12311c13f3c9SIngo Molnar 
12321c13f3c9SIngo Molnar 		if (g->p.sleep_usecs) {
1233a64d3af5SIan Rogers 			mutex_lock(td->process_lock);
12341c13f3c9SIngo Molnar 			usleep(g->p.sleep_usecs);
1235a64d3af5SIan Rogers 			mutex_unlock(td->process_lock);
12361c13f3c9SIngo Molnar 		}
12371c13f3c9SIngo Molnar 		/*
12381c13f3c9SIngo Molnar 		 * Amount of work to be done under a process-global lock:
12391c13f3c9SIngo Molnar 		 */
12401c13f3c9SIngo Molnar 		if (g->p.bytes_process_locked) {
1241a64d3af5SIan Rogers 			mutex_lock(td->process_lock);
12421c13f3c9SIngo Molnar 			val += do_work(process_data, g->p.bytes_process_locked, thread_nr,  g->p.nr_threads,	l, val);
1243a64d3af5SIan Rogers 			mutex_unlock(td->process_lock);
12441c13f3c9SIngo Molnar 		}
12451c13f3c9SIngo Molnar 
12461c13f3c9SIngo Molnar 		work_done = g->p.bytes_global + g->p.bytes_process +
12471c13f3c9SIngo Molnar 			    g->p.bytes_process_locked + g->p.bytes_thread;
12481c13f3c9SIngo Molnar 
12491c13f3c9SIngo Molnar 		update_curr_cpu(task_nr, work_done);
12501c13f3c9SIngo Molnar 		bytes_done += work_done;
12511c13f3c9SIngo Molnar 
12521c13f3c9SIngo Molnar 		if (details < 0 && !g->p.perturb_secs && !g->p.measure_convergence && !g->p.nr_secs)
12531c13f3c9SIngo Molnar 			continue;
12541c13f3c9SIngo Molnar 
12551c13f3c9SIngo Molnar 		td->loops_done = l;
12561c13f3c9SIngo Molnar 
12571c13f3c9SIngo Molnar 		gettimeofday(&stop, NULL);
12581c13f3c9SIngo Molnar 
12591c13f3c9SIngo Molnar 		/* Check whether our max runtime timed out: */
12601c13f3c9SIngo Molnar 		if (g->p.nr_secs) {
12611c13f3c9SIngo Molnar 			timersub(&stop, &start0, &diff);
12622100f778SAdrian Hunter 			if ((u32)diff.tv_sec >= g->p.nr_secs) {
12631c13f3c9SIngo Molnar 				g->stop_work = true;
12641c13f3c9SIngo Molnar 				break;
12651c13f3c9SIngo Molnar 			}
12661c13f3c9SIngo Molnar 		}
12671c13f3c9SIngo Molnar 
12681c13f3c9SIngo Molnar 		/* Update the summary at most once per second: */
12691c13f3c9SIngo Molnar 		if (start.tv_sec == stop.tv_sec)
12701c13f3c9SIngo Molnar 			continue;
12711c13f3c9SIngo Molnar 
12721c13f3c9SIngo Molnar 		/*
12731c13f3c9SIngo Molnar 		 * Perturb the first task's equilibrium every g->p.perturb_secs seconds,
12741c13f3c9SIngo Molnar 		 * by migrating to CPU#0:
12751c13f3c9SIngo Molnar 		 */
12761c13f3c9SIngo Molnar 		if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) {
1277f58faed7SAthira Rajeev 			cpu_set_t *orig_mask;
12781c13f3c9SIngo Molnar 			int target_cpu;
12791c13f3c9SIngo Molnar 			int this_cpu;
12801c13f3c9SIngo Molnar 
12811c13f3c9SIngo Molnar 			last_perturbance = stop.tv_sec;
12821c13f3c9SIngo Molnar 
12831c13f3c9SIngo Molnar 			/*
12841c13f3c9SIngo Molnar 			 * Depending on where we are running, move into
12851c13f3c9SIngo Molnar 			 * the other half of the system, to create some
12861c13f3c9SIngo Molnar 			 * real disturbance:
12871c13f3c9SIngo Molnar 			 */
12881c13f3c9SIngo Molnar 			this_cpu = g->threads[task_nr].curr_cpu;
12891c13f3c9SIngo Molnar 			if (this_cpu < g->p.nr_cpus/2)
12901c13f3c9SIngo Molnar 				target_cpu = g->p.nr_cpus-1;
12911c13f3c9SIngo Molnar 			else
12921c13f3c9SIngo Molnar 				target_cpu = 0;
12931c13f3c9SIngo Molnar 
12941c13f3c9SIngo Molnar 			orig_mask = bind_to_cpu(target_cpu);
12951c13f3c9SIngo Molnar 
12961c13f3c9SIngo Molnar 			/* Here we are running on the target CPU already */
12971c13f3c9SIngo Molnar 			if (details >= 1)
12981c13f3c9SIngo Molnar 				printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu);
12991c13f3c9SIngo Molnar 
13001c13f3c9SIngo Molnar 			bind_to_cpumask(orig_mask);
1301f58faed7SAthira Rajeev 			CPU_FREE(orig_mask);
13021c13f3c9SIngo Molnar 		}
13031c13f3c9SIngo Molnar 
13041c13f3c9SIngo Molnar 		if (details >= 3) {
13051c13f3c9SIngo Molnar 			timersub(&stop, &start, &diff);
1306a8ad8329SArnaldo Carvalho de Melo 			runtime_ns_max = diff.tv_sec * NSEC_PER_SEC;
1307a8ad8329SArnaldo Carvalho de Melo 			runtime_ns_max += diff.tv_usec * NSEC_PER_USEC;
13081c13f3c9SIngo Molnar 
13091c13f3c9SIngo Molnar 			if (details >= 0) {
13102100f778SAdrian Hunter 				printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016"PRIx64"]\n",
13111c13f3c9SIngo Molnar 					process_nr, thread_nr, runtime_ns_max / bytes_done, val);
13121c13f3c9SIngo Molnar 			}
13131c13f3c9SIngo Molnar 			fflush(stdout);
13141c13f3c9SIngo Molnar 		}
13151c13f3c9SIngo Molnar 		if (!last_task)
13161c13f3c9SIngo Molnar 			continue;
13171c13f3c9SIngo Molnar 
13181c13f3c9SIngo Molnar 		timersub(&stop, &start0, &diff);
1319a8ad8329SArnaldo Carvalho de Melo 		runtime_ns_max = diff.tv_sec * NSEC_PER_SEC;
1320a8ad8329SArnaldo Carvalho de Melo 		runtime_ns_max += diff.tv_usec * NSEC_PER_USEC;
13211c13f3c9SIngo Molnar 
13221c13f3c9SIngo Molnar 		show_summary(runtime_ns_max, l, &convergence);
13231c13f3c9SIngo Molnar 	}
13241c13f3c9SIngo Molnar 
13251c13f3c9SIngo Molnar 	gettimeofday(&stop, NULL);
13261c13f3c9SIngo Molnar 	timersub(&stop, &start0, &diff);
1327a8ad8329SArnaldo Carvalho de Melo 	td->runtime_ns = diff.tv_sec * NSEC_PER_SEC;
1328a8ad8329SArnaldo Carvalho de Melo 	td->runtime_ns += diff.tv_usec * NSEC_PER_USEC;
132998310707SJiri Olsa 	secs = td->runtime_ns / NSEC_PER_SEC;
133098310707SJiri Olsa 	td->speed_gbs = secs ? bytes_done / secs / 1e9 : 0;
1331b64aa553SPetr Holasek 
1332b64aa553SPetr Holasek 	getrusage(RUSAGE_THREAD, &rusage);
1333a8ad8329SArnaldo Carvalho de Melo 	td->system_time_ns = rusage.ru_stime.tv_sec * NSEC_PER_SEC;
1334a8ad8329SArnaldo Carvalho de Melo 	td->system_time_ns += rusage.ru_stime.tv_usec * NSEC_PER_USEC;
1335a8ad8329SArnaldo Carvalho de Melo 	td->user_time_ns = rusage.ru_utime.tv_sec * NSEC_PER_SEC;
1336a8ad8329SArnaldo Carvalho de Melo 	td->user_time_ns += rusage.ru_utime.tv_usec * NSEC_PER_USEC;
13371c13f3c9SIngo Molnar 
13381c13f3c9SIngo Molnar 	free_data(thread_data, g->p.bytes_thread);
13391c13f3c9SIngo Molnar 
1340a64d3af5SIan Rogers 	mutex_lock(&g->stop_work_mutex);
13411c13f3c9SIngo Molnar 	g->bytes_done += bytes_done;
1342a64d3af5SIan Rogers 	mutex_unlock(&g->stop_work_mutex);
13431c13f3c9SIngo Molnar 
13441c13f3c9SIngo Molnar 	return NULL;
13451c13f3c9SIngo Molnar }
13461c13f3c9SIngo Molnar 
13471c13f3c9SIngo Molnar /*
13481c13f3c9SIngo Molnar  * A worker process starts a couple of threads:
13491c13f3c9SIngo Molnar  */
worker_process(int process_nr)13501c13f3c9SIngo Molnar static void worker_process(int process_nr)
13511c13f3c9SIngo Molnar {
1352a64d3af5SIan Rogers 	struct mutex process_lock;
13531c13f3c9SIngo Molnar 	struct thread_data *td;
13541c13f3c9SIngo Molnar 	pthread_t *pthreads;
13551c13f3c9SIngo Molnar 	u8 *process_data;
13561c13f3c9SIngo Molnar 	int task_nr;
13571c13f3c9SIngo Molnar 	int ret;
13581c13f3c9SIngo Molnar 	int t;
13591c13f3c9SIngo Molnar 
1360a64d3af5SIan Rogers 	mutex_init(&process_lock);
13611c13f3c9SIngo Molnar 	set_taskname("process %d", process_nr);
13621c13f3c9SIngo Molnar 
13631c13f3c9SIngo Molnar 	/*
13641c13f3c9SIngo Molnar 	 * Pick up the memory policy and the CPU binding of our first thread,
13651c13f3c9SIngo Molnar 	 * so that we initialize memory accordingly:
13661c13f3c9SIngo Molnar 	 */
13671c13f3c9SIngo Molnar 	task_nr = process_nr*g->p.nr_threads;
13681c13f3c9SIngo Molnar 	td = g->threads + task_nr;
13691c13f3c9SIngo Molnar 
13701c13f3c9SIngo Molnar 	bind_to_memnode(td->bind_node);
13711c13f3c9SIngo Molnar 	bind_to_cpumask(td->bind_cpumask);
13721c13f3c9SIngo Molnar 
13731c13f3c9SIngo Molnar 	pthreads = zalloc(g->p.nr_threads * sizeof(pthread_t));
13741c13f3c9SIngo Molnar 	process_data = setup_private_data(g->p.bytes_process);
13751c13f3c9SIngo Molnar 
13761c13f3c9SIngo Molnar 	if (g->p.show_details >= 3) {
13771c13f3c9SIngo Molnar 		printf(" # process %2d global mem: %p, process mem: %p\n",
13781c13f3c9SIngo Molnar 			process_nr, g->data, process_data);
13791c13f3c9SIngo Molnar 	}
13801c13f3c9SIngo Molnar 
13811c13f3c9SIngo Molnar 	for (t = 0; t < g->p.nr_threads; t++) {
13821c13f3c9SIngo Molnar 		task_nr = process_nr*g->p.nr_threads + t;
13831c13f3c9SIngo Molnar 		td = g->threads + task_nr;
13841c13f3c9SIngo Molnar 
13851c13f3c9SIngo Molnar 		td->process_data = process_data;
13861c13f3c9SIngo Molnar 		td->process_nr   = process_nr;
13871c13f3c9SIngo Molnar 		td->thread_nr    = t;
13881c13f3c9SIngo Molnar 		td->task_nr	 = task_nr;
13891c13f3c9SIngo Molnar 		td->val          = rand();
13901c13f3c9SIngo Molnar 		td->curr_cpu	 = -1;
13911c13f3c9SIngo Molnar 		td->process_lock = &process_lock;
13921c13f3c9SIngo Molnar 
13931c13f3c9SIngo Molnar 		ret = pthread_create(pthreads + t, NULL, worker_thread, td);
13941c13f3c9SIngo Molnar 		BUG_ON(ret);
13951c13f3c9SIngo Molnar 	}
13961c13f3c9SIngo Molnar 
13971c13f3c9SIngo Molnar 	for (t = 0; t < g->p.nr_threads; t++) {
13981c13f3c9SIngo Molnar                 ret = pthread_join(pthreads[t], NULL);
13991c13f3c9SIngo Molnar 		BUG_ON(ret);
14001c13f3c9SIngo Molnar 	}
14011c13f3c9SIngo Molnar 
14021c13f3c9SIngo Molnar 	free_data(process_data, g->p.bytes_process);
14031c13f3c9SIngo Molnar 	free(pthreads);
14041c13f3c9SIngo Molnar }
14051c13f3c9SIngo Molnar 
print_summary(void)14061c13f3c9SIngo Molnar static void print_summary(void)
14071c13f3c9SIngo Molnar {
14081c13f3c9SIngo Molnar 	if (g->p.show_details < 0)
14091c13f3c9SIngo Molnar 		return;
14101c13f3c9SIngo Molnar 
14111c13f3c9SIngo Molnar 	printf("\n ###\n");
14121c13f3c9SIngo Molnar 	printf(" # %d %s will execute (on %d nodes, %d CPUs):\n",
1413321a7c35SSatheesh Rajendran 		g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", nr_numa_nodes(), g->p.nr_cpus);
14141c13f3c9SIngo Molnar 	printf(" #      %5dx %5ldMB global  shared mem operations\n",
14151c13f3c9SIngo Molnar 			g->p.nr_loops, g->p.bytes_global/1024/1024);
14161c13f3c9SIngo Molnar 	printf(" #      %5dx %5ldMB process shared mem operations\n",
14171c13f3c9SIngo Molnar 			g->p.nr_loops, g->p.bytes_process/1024/1024);
14181c13f3c9SIngo Molnar 	printf(" #      %5dx %5ldMB thread  local  mem operations\n",
14191c13f3c9SIngo Molnar 			g->p.nr_loops, g->p.bytes_thread/1024/1024);
14201c13f3c9SIngo Molnar 
14211c13f3c9SIngo Molnar 	printf(" ###\n");
14221c13f3c9SIngo Molnar 
14231c13f3c9SIngo Molnar 	printf("\n ###\n"); fflush(stdout);
14241c13f3c9SIngo Molnar }
14251c13f3c9SIngo Molnar 
init_thread_data(void)14261c13f3c9SIngo Molnar static void init_thread_data(void)
14271c13f3c9SIngo Molnar {
14281c13f3c9SIngo Molnar 	ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
14291c13f3c9SIngo Molnar 	int t;
14301c13f3c9SIngo Molnar 
14311c13f3c9SIngo Molnar 	g->threads = zalloc_shared_data(size);
14321c13f3c9SIngo Molnar 
14331c13f3c9SIngo Molnar 	for (t = 0; t < g->p.nr_tasks; t++) {
14341c13f3c9SIngo Molnar 		struct thread_data *td = g->threads + t;
1435f58faed7SAthira Rajeev 		size_t cpuset_size = CPU_ALLOC_SIZE(g->p.nr_cpus);
14361c13f3c9SIngo Molnar 		int cpu;
14371c13f3c9SIngo Molnar 
14381c13f3c9SIngo Molnar 		/* Allow all nodes by default: */
14397c9eefe8SStephen Rothwell 		td->bind_node = NUMA_NO_NODE;
14401c13f3c9SIngo Molnar 
14411c13f3c9SIngo Molnar 		/* Allow all CPUs by default: */
1442f58faed7SAthira Rajeev 		td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus);
1443f58faed7SAthira Rajeev 		BUG_ON(!td->bind_cpumask);
1444f58faed7SAthira Rajeev 		CPU_ZERO_S(cpuset_size, td->bind_cpumask);
14451c13f3c9SIngo Molnar 		for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
1446f58faed7SAthira Rajeev 			CPU_SET_S(cpu, cpuset_size, td->bind_cpumask);
14471c13f3c9SIngo Molnar 	}
14481c13f3c9SIngo Molnar }
14491c13f3c9SIngo Molnar 
deinit_thread_data(void)14501c13f3c9SIngo Molnar static void deinit_thread_data(void)
14511c13f3c9SIngo Molnar {
14521c13f3c9SIngo Molnar 	ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
1453f58faed7SAthira Rajeev 	int t;
1454f58faed7SAthira Rajeev 
1455f58faed7SAthira Rajeev 	/* Free the bind_cpumask allocated for thread_data */
1456f58faed7SAthira Rajeev 	for (t = 0; t < g->p.nr_tasks; t++) {
1457f58faed7SAthira Rajeev 		struct thread_data *td = g->threads + t;
1458f58faed7SAthira Rajeev 		CPU_FREE(td->bind_cpumask);
1459f58faed7SAthira Rajeev 	}
14601c13f3c9SIngo Molnar 
14611c13f3c9SIngo Molnar 	free_data(g->threads, size);
14621c13f3c9SIngo Molnar }
14631c13f3c9SIngo Molnar 
init(void)14641c13f3c9SIngo Molnar static int init(void)
14651c13f3c9SIngo Molnar {
14661c13f3c9SIngo Molnar 	g = (void *)alloc_data(sizeof(*g), MAP_SHARED, 1, 0, 0 /* THP */, 0);
14671c13f3c9SIngo Molnar 
14681c13f3c9SIngo Molnar 	/* Copy over options: */
14691c13f3c9SIngo Molnar 	g->p = p0;
14701c13f3c9SIngo Molnar 
14711c13f3c9SIngo Molnar 	g->p.nr_cpus = numa_num_configured_cpus();
14721c13f3c9SIngo Molnar 
14731c13f3c9SIngo Molnar 	g->p.nr_nodes = numa_max_node() + 1;
14741c13f3c9SIngo Molnar 
14751c13f3c9SIngo Molnar 	/* char array in count_process_nodes(): */
1476394e4306SAthira Rajeev 	BUG_ON(g->p.nr_nodes < 0);
14771c13f3c9SIngo Molnar 
1478a527c2c1SJames Clark 	if (quiet && !g->p.show_details)
14791c13f3c9SIngo Molnar 		g->p.show_details = -1;
14801c13f3c9SIngo Molnar 
14811c13f3c9SIngo Molnar 	/* Some memory should be specified: */
14821c13f3c9SIngo Molnar 	if (!g->p.mb_global_str && !g->p.mb_proc_str && !g->p.mb_thread_str)
14831c13f3c9SIngo Molnar 		return -1;
14841c13f3c9SIngo Molnar 
14851c13f3c9SIngo Molnar 	if (g->p.mb_global_str) {
14861c13f3c9SIngo Molnar 		g->p.mb_global = atof(g->p.mb_global_str);
14871c13f3c9SIngo Molnar 		BUG_ON(g->p.mb_global < 0);
14881c13f3c9SIngo Molnar 	}
14891c13f3c9SIngo Molnar 
14901c13f3c9SIngo Molnar 	if (g->p.mb_proc_str) {
14911c13f3c9SIngo Molnar 		g->p.mb_proc = atof(g->p.mb_proc_str);
14921c13f3c9SIngo Molnar 		BUG_ON(g->p.mb_proc < 0);
14931c13f3c9SIngo Molnar 	}
14941c13f3c9SIngo Molnar 
14951c13f3c9SIngo Molnar 	if (g->p.mb_proc_locked_str) {
14961c13f3c9SIngo Molnar 		g->p.mb_proc_locked = atof(g->p.mb_proc_locked_str);
14971c13f3c9SIngo Molnar 		BUG_ON(g->p.mb_proc_locked < 0);
14981c13f3c9SIngo Molnar 		BUG_ON(g->p.mb_proc_locked > g->p.mb_proc);
14991c13f3c9SIngo Molnar 	}
15001c13f3c9SIngo Molnar 
15011c13f3c9SIngo Molnar 	if (g->p.mb_thread_str) {
15021c13f3c9SIngo Molnar 		g->p.mb_thread = atof(g->p.mb_thread_str);
15031c13f3c9SIngo Molnar 		BUG_ON(g->p.mb_thread < 0);
15041c13f3c9SIngo Molnar 	}
15051c13f3c9SIngo Molnar 
15061c13f3c9SIngo Molnar 	BUG_ON(g->p.nr_threads <= 0);
15071c13f3c9SIngo Molnar 	BUG_ON(g->p.nr_proc <= 0);
15081c13f3c9SIngo Molnar 
15091c13f3c9SIngo Molnar 	g->p.nr_tasks = g->p.nr_proc*g->p.nr_threads;
15101c13f3c9SIngo Molnar 
15111c13f3c9SIngo Molnar 	g->p.bytes_global		= g->p.mb_global	*1024L*1024L;
15121c13f3c9SIngo Molnar 	g->p.bytes_process		= g->p.mb_proc		*1024L*1024L;
15131c13f3c9SIngo Molnar 	g->p.bytes_process_locked	= g->p.mb_proc_locked	*1024L*1024L;
15141c13f3c9SIngo Molnar 	g->p.bytes_thread		= g->p.mb_thread	*1024L*1024L;
15151c13f3c9SIngo Molnar 
15161c13f3c9SIngo Molnar 	g->data = setup_shared_data(g->p.bytes_global);
15171c13f3c9SIngo Molnar 
15181c13f3c9SIngo Molnar 	/* Startup serialization: */
1519a64d3af5SIan Rogers 	mutex_init_pshared(&g->start_work_mutex);
1520a64d3af5SIan Rogers 	cond_init_pshared(&g->start_work_cond);
1521a64d3af5SIan Rogers 	mutex_init_pshared(&g->startup_mutex);
1522a64d3af5SIan Rogers 	cond_init_pshared(&g->startup_cond);
1523a64d3af5SIan Rogers 	mutex_init_pshared(&g->stop_work_mutex);
15241c13f3c9SIngo Molnar 
15251c13f3c9SIngo Molnar 	init_thread_data();
15261c13f3c9SIngo Molnar 
15271c13f3c9SIngo Molnar 	tprintf("#\n");
1528b81a48eaSPetr Holasek 	if (parse_setup_cpu_list() || parse_setup_node_list())
1529b81a48eaSPetr Holasek 		return -1;
15301c13f3c9SIngo Molnar 	tprintf("#\n");
15311c13f3c9SIngo Molnar 
15321c13f3c9SIngo Molnar 	print_summary();
15331c13f3c9SIngo Molnar 
15341c13f3c9SIngo Molnar 	return 0;
15351c13f3c9SIngo Molnar }
15361c13f3c9SIngo Molnar 
deinit(void)15371c13f3c9SIngo Molnar static void deinit(void)
15381c13f3c9SIngo Molnar {
15391c13f3c9SIngo Molnar 	free_data(g->data, g->p.bytes_global);
15401c13f3c9SIngo Molnar 	g->data = NULL;
15411c13f3c9SIngo Molnar 
15421c13f3c9SIngo Molnar 	deinit_thread_data();
15431c13f3c9SIngo Molnar 
15441c13f3c9SIngo Molnar 	free_data(g, sizeof(*g));
15451c13f3c9SIngo Molnar 	g = NULL;
15461c13f3c9SIngo Molnar }
15471c13f3c9SIngo Molnar 
15481c13f3c9SIngo Molnar /*
15491c13f3c9SIngo Molnar  * Print a short or long result, depending on the verbosity setting:
15501c13f3c9SIngo Molnar  */
print_res(const char * name,double val,const char * txt_unit,const char * txt_short,const char * txt_long)15511c13f3c9SIngo Molnar static void print_res(const char *name, double val,
15521c13f3c9SIngo Molnar 		      const char *txt_unit, const char *txt_short, const char *txt_long)
15531c13f3c9SIngo Molnar {
15541c13f3c9SIngo Molnar 	if (!name)
15551c13f3c9SIngo Molnar 		name = "main,";
15561c13f3c9SIngo Molnar 
1557a527c2c1SJames Clark 	if (!quiet)
15581c13f3c9SIngo Molnar 		printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short);
15591c13f3c9SIngo Molnar 	else
15601c13f3c9SIngo Molnar 		printf(" %14.3f %s\n", val, txt_long);
15611c13f3c9SIngo Molnar }
15621c13f3c9SIngo Molnar 
__bench_numa(const char * name)15631c13f3c9SIngo Molnar static int __bench_numa(const char *name)
15641c13f3c9SIngo Molnar {
15651c13f3c9SIngo Molnar 	struct timeval start, stop, diff;
15661c13f3c9SIngo Molnar 	u64 runtime_ns_min, runtime_ns_sum;
15671c13f3c9SIngo Molnar 	pid_t *pids, pid, wpid;
15681c13f3c9SIngo Molnar 	double delta_runtime;
15691c13f3c9SIngo Molnar 	double runtime_avg;
15701c13f3c9SIngo Molnar 	double runtime_sec_max;
15711c13f3c9SIngo Molnar 	double runtime_sec_min;
15721c13f3c9SIngo Molnar 	int wait_stat;
15731c13f3c9SIngo Molnar 	double bytes;
1574b64aa553SPetr Holasek 	int i, t, p;
15751c13f3c9SIngo Molnar 
15761c13f3c9SIngo Molnar 	if (init())
15771c13f3c9SIngo Molnar 		return -1;
15781c13f3c9SIngo Molnar 
15791c13f3c9SIngo Molnar 	pids = zalloc(g->p.nr_proc * sizeof(*pids));
15801c13f3c9SIngo Molnar 	pid = -1;
15811c13f3c9SIngo Molnar 
15821c13f3c9SIngo Molnar 	if (g->p.serialize_startup) {
15831c13f3c9SIngo Molnar 		tprintf(" #\n");
15841c13f3c9SIngo Molnar 		tprintf(" # Startup synchronization: ..."); fflush(stdout);
15851c13f3c9SIngo Molnar 	}
15861c13f3c9SIngo Molnar 
15871c13f3c9SIngo Molnar 	gettimeofday(&start, NULL);
15881c13f3c9SIngo Molnar 
15891c13f3c9SIngo Molnar 	for (i = 0; i < g->p.nr_proc; i++) {
15901c13f3c9SIngo Molnar 		pid = fork();
15911c13f3c9SIngo Molnar 		dprintf(" # process %2d: PID %d\n", i, pid);
15921c13f3c9SIngo Molnar 
15931c13f3c9SIngo Molnar 		BUG_ON(pid < 0);
15941c13f3c9SIngo Molnar 		if (!pid) {
15951c13f3c9SIngo Molnar 			/* Child process: */
15961c13f3c9SIngo Molnar 			worker_process(i);
15971c13f3c9SIngo Molnar 
15981c13f3c9SIngo Molnar 			exit(0);
15991c13f3c9SIngo Molnar 		}
16001c13f3c9SIngo Molnar 		pids[i] = pid;
16011c13f3c9SIngo Molnar 
16021c13f3c9SIngo Molnar 	}
16031c13f3c9SIngo Molnar 
16041c13f3c9SIngo Molnar 	if (g->p.serialize_startup) {
1605f9299385SIan Rogers 		bool threads_ready = false;
16061c13f3c9SIngo Molnar 		double startup_sec;
16071c13f3c9SIngo Molnar 
1608f9299385SIan Rogers 		/*
1609f9299385SIan Rogers 		 * Wait for all the threads to start up. The last thread will
1610f9299385SIan Rogers 		 * signal this process.
1611f9299385SIan Rogers 		 */
1612a64d3af5SIan Rogers 		mutex_lock(&g->startup_mutex);
1613f9299385SIan Rogers 		while (g->nr_tasks_started != g->p.nr_tasks)
1614a64d3af5SIan Rogers 			cond_wait(&g->startup_cond, &g->startup_mutex);
16151c13f3c9SIngo Molnar 
1616a64d3af5SIan Rogers 		mutex_unlock(&g->startup_mutex);
1617f9299385SIan Rogers 
1618f9299385SIan Rogers 		/* Wait for all threads to be at the start_work_cond. */
1619f9299385SIan Rogers 		while (!threads_ready) {
1620a64d3af5SIan Rogers 			mutex_lock(&g->start_work_mutex);
1621f9299385SIan Rogers 			threads_ready = (g->nr_tasks_working == g->p.nr_tasks);
1622a64d3af5SIan Rogers 			mutex_unlock(&g->start_work_mutex);
1623f9299385SIan Rogers 			if (!threads_ready)
1624f9299385SIan Rogers 				usleep(1);
1625f9299385SIan Rogers 		}
16261c13f3c9SIngo Molnar 
16271c13f3c9SIngo Molnar 		gettimeofday(&stop, NULL);
16281c13f3c9SIngo Molnar 
16291c13f3c9SIngo Molnar 		timersub(&stop, &start, &diff);
16301c13f3c9SIngo Molnar 
1631a8ad8329SArnaldo Carvalho de Melo 		startup_sec = diff.tv_sec * NSEC_PER_SEC;
1632a8ad8329SArnaldo Carvalho de Melo 		startup_sec += diff.tv_usec * NSEC_PER_USEC;
1633a8ad8329SArnaldo Carvalho de Melo 		startup_sec /= NSEC_PER_SEC;
16341c13f3c9SIngo Molnar 
16351c13f3c9SIngo Molnar 		tprintf(" threads initialized in %.6f seconds.\n", startup_sec);
16361c13f3c9SIngo Molnar 		tprintf(" #\n");
16371c13f3c9SIngo Molnar 
16381c13f3c9SIngo Molnar 		start = stop;
1639f9299385SIan Rogers 		/* Start all threads running. */
1640a64d3af5SIan Rogers 		mutex_lock(&g->start_work_mutex);
1641f9299385SIan Rogers 		g->start_work = true;
1642a64d3af5SIan Rogers 		mutex_unlock(&g->start_work_mutex);
1643a64d3af5SIan Rogers 		cond_broadcast(&g->start_work_cond);
16441c13f3c9SIngo Molnar 	} else {
16451c13f3c9SIngo Molnar 		gettimeofday(&start, NULL);
16461c13f3c9SIngo Molnar 	}
16471c13f3c9SIngo Molnar 
16481c13f3c9SIngo Molnar 	/* Parent process: */
16491c13f3c9SIngo Molnar 
16501c13f3c9SIngo Molnar 
16511c13f3c9SIngo Molnar 	for (i = 0; i < g->p.nr_proc; i++) {
16521c13f3c9SIngo Molnar 		wpid = waitpid(pids[i], &wait_stat, 0);
16531c13f3c9SIngo Molnar 		BUG_ON(wpid < 0);
16541c13f3c9SIngo Molnar 		BUG_ON(!WIFEXITED(wait_stat));
16551c13f3c9SIngo Molnar 
16561c13f3c9SIngo Molnar 	}
16571c13f3c9SIngo Molnar 
16581c13f3c9SIngo Molnar 	runtime_ns_sum = 0;
16591c13f3c9SIngo Molnar 	runtime_ns_min = -1LL;
16601c13f3c9SIngo Molnar 
16611c13f3c9SIngo Molnar 	for (t = 0; t < g->p.nr_tasks; t++) {
16621c13f3c9SIngo Molnar 		u64 thread_runtime_ns = g->threads[t].runtime_ns;
16631c13f3c9SIngo Molnar 
16641c13f3c9SIngo Molnar 		runtime_ns_sum += thread_runtime_ns;
16651c13f3c9SIngo Molnar 		runtime_ns_min = min(thread_runtime_ns, runtime_ns_min);
16661c13f3c9SIngo Molnar 	}
16671c13f3c9SIngo Molnar 
16681c13f3c9SIngo Molnar 	gettimeofday(&stop, NULL);
16691c13f3c9SIngo Molnar 	timersub(&stop, &start, &diff);
16701c13f3c9SIngo Molnar 
16711c13f3c9SIngo Molnar 	BUG_ON(bench_format != BENCH_FORMAT_DEFAULT);
16721c13f3c9SIngo Molnar 
16731c13f3c9SIngo Molnar 	tprintf("\n ###\n");
16741c13f3c9SIngo Molnar 	tprintf("\n");
16751c13f3c9SIngo Molnar 
1676a8ad8329SArnaldo Carvalho de Melo 	runtime_sec_max = diff.tv_sec * NSEC_PER_SEC;
1677a8ad8329SArnaldo Carvalho de Melo 	runtime_sec_max += diff.tv_usec * NSEC_PER_USEC;
1678a8ad8329SArnaldo Carvalho de Melo 	runtime_sec_max /= NSEC_PER_SEC;
16791c13f3c9SIngo Molnar 
1680a8ad8329SArnaldo Carvalho de Melo 	runtime_sec_min = runtime_ns_min / NSEC_PER_SEC;
16811c13f3c9SIngo Molnar 
16821c13f3c9SIngo Molnar 	bytes = g->bytes_done;
1683a8ad8329SArnaldo Carvalho de Melo 	runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / NSEC_PER_SEC;
16841c13f3c9SIngo Molnar 
16851c13f3c9SIngo Molnar 	if (g->p.measure_convergence) {
16861c13f3c9SIngo Molnar 		print_res(name, runtime_sec_max,
16871c13f3c9SIngo Molnar 			"secs,", "NUMA-convergence-latency", "secs latency to NUMA-converge");
16881c13f3c9SIngo Molnar 	}
16891c13f3c9SIngo Molnar 
16901c13f3c9SIngo Molnar 	print_res(name, runtime_sec_max,
16911c13f3c9SIngo Molnar 		"secs,", "runtime-max/thread",	"secs slowest (max) thread-runtime");
16921c13f3c9SIngo Molnar 
16931c13f3c9SIngo Molnar 	print_res(name, runtime_sec_min,
16941c13f3c9SIngo Molnar 		"secs,", "runtime-min/thread",	"secs fastest (min) thread-runtime");
16951c13f3c9SIngo Molnar 
16961c13f3c9SIngo Molnar 	print_res(name, runtime_avg,
16971c13f3c9SIngo Molnar 		"secs,", "runtime-avg/thread",	"secs average thread-runtime");
16981c13f3c9SIngo Molnar 
16991c13f3c9SIngo Molnar 	delta_runtime = (runtime_sec_max - runtime_sec_min)/2.0;
17001c13f3c9SIngo Molnar 	print_res(name, delta_runtime / runtime_sec_max * 100.0,
17011c13f3c9SIngo Molnar 		"%,", "spread-runtime/thread",	"% difference between max/avg runtime");
17021c13f3c9SIngo Molnar 
17031c13f3c9SIngo Molnar 	print_res(name, bytes / g->p.nr_tasks / 1e9,
17041c13f3c9SIngo Molnar 		"GB,", "data/thread",		"GB data processed, per thread");
17051c13f3c9SIngo Molnar 
17061c13f3c9SIngo Molnar 	print_res(name, bytes / 1e9,
17071c13f3c9SIngo Molnar 		"GB,", "data-total",		"GB data processed, total");
17081c13f3c9SIngo Molnar 
1709a8ad8329SArnaldo Carvalho de Melo 	print_res(name, runtime_sec_max * NSEC_PER_SEC / (bytes / g->p.nr_tasks),
17101c13f3c9SIngo Molnar 		"nsecs,", "runtime/byte/thread","nsecs/byte/thread runtime");
17111c13f3c9SIngo Molnar 
17121c13f3c9SIngo Molnar 	print_res(name, bytes / g->p.nr_tasks / 1e9 / runtime_sec_max,
17131c13f3c9SIngo Molnar 		"GB/sec,", "thread-speed",	"GB/sec/thread speed");
17141c13f3c9SIngo Molnar 
17151c13f3c9SIngo Molnar 	print_res(name, bytes / runtime_sec_max / 1e9,
17161c13f3c9SIngo Molnar 		"GB/sec,", "total-speed",	"GB/sec total speed");
17171c13f3c9SIngo Molnar 
1718b64aa553SPetr Holasek 	if (g->p.show_details >= 2) {
1719f8ac1c47SThomas Richter 		char tname[14 + 2 * 11 + 1];
1720b64aa553SPetr Holasek 		struct thread_data *td;
1721b64aa553SPetr Holasek 		for (p = 0; p < g->p.nr_proc; p++) {
1722b64aa553SPetr Holasek 			for (t = 0; t < g->p.nr_threads; t++) {
17233aff8ba0SArnaldo Carvalho de Melo 				memset(tname, 0, sizeof(tname));
1724b64aa553SPetr Holasek 				td = g->threads + p*g->p.nr_threads + t;
17253aff8ba0SArnaldo Carvalho de Melo 				snprintf(tname, sizeof(tname), "process%d:thread%d", p, t);
1726b64aa553SPetr Holasek 				print_res(tname, td->speed_gbs,
1727b64aa553SPetr Holasek 					"GB/sec",	"thread-speed", "GB/sec/thread speed");
1728a8ad8329SArnaldo Carvalho de Melo 				print_res(tname, td->system_time_ns / NSEC_PER_SEC,
1729b64aa553SPetr Holasek 					"secs",	"thread-system-time", "system CPU time/thread");
1730a8ad8329SArnaldo Carvalho de Melo 				print_res(tname, td->user_time_ns / NSEC_PER_SEC,
1731b64aa553SPetr Holasek 					"secs",	"thread-user-time", "user CPU time/thread");
1732b64aa553SPetr Holasek 			}
1733b64aa553SPetr Holasek 		}
1734b64aa553SPetr Holasek 	}
1735b64aa553SPetr Holasek 
17361c13f3c9SIngo Molnar 	free(pids);
17371c13f3c9SIngo Molnar 
17381c13f3c9SIngo Molnar 	deinit();
17391c13f3c9SIngo Molnar 
17401c13f3c9SIngo Molnar 	return 0;
17411c13f3c9SIngo Molnar }
17421c13f3c9SIngo Molnar 
17431c13f3c9SIngo Molnar #define MAX_ARGS 50
17441c13f3c9SIngo Molnar 
command_size(const char ** argv)17451c13f3c9SIngo Molnar static int command_size(const char **argv)
17461c13f3c9SIngo Molnar {
17471c13f3c9SIngo Molnar 	int size = 0;
17481c13f3c9SIngo Molnar 
17491c13f3c9SIngo Molnar 	while (*argv) {
17501c13f3c9SIngo Molnar 		size++;
17511c13f3c9SIngo Molnar 		argv++;
17521c13f3c9SIngo Molnar 	}
17531c13f3c9SIngo Molnar 
17541c13f3c9SIngo Molnar 	BUG_ON(size >= MAX_ARGS);
17551c13f3c9SIngo Molnar 
17561c13f3c9SIngo Molnar 	return size;
17571c13f3c9SIngo Molnar }
17581c13f3c9SIngo Molnar 
init_params(struct params * p,const char * name,int argc,const char ** argv)17591c13f3c9SIngo Molnar static void init_params(struct params *p, const char *name, int argc, const char **argv)
17601c13f3c9SIngo Molnar {
17611c13f3c9SIngo Molnar 	int i;
17621c13f3c9SIngo Molnar 
17631c13f3c9SIngo Molnar 	printf("\n # Running %s \"perf bench numa", name);
17641c13f3c9SIngo Molnar 
17651c13f3c9SIngo Molnar 	for (i = 0; i < argc; i++)
17661c13f3c9SIngo Molnar 		printf(" %s", argv[i]);
17671c13f3c9SIngo Molnar 
17681c13f3c9SIngo Molnar 	printf("\"\n");
17691c13f3c9SIngo Molnar 
17701c13f3c9SIngo Molnar 	memset(p, 0, sizeof(*p));
17711c13f3c9SIngo Molnar 
17721c13f3c9SIngo Molnar 	/* Initialize nonzero defaults: */
17731c13f3c9SIngo Molnar 
17741c13f3c9SIngo Molnar 	p->serialize_startup		= 1;
17751c13f3c9SIngo Molnar 	p->data_reads			= true;
17761c13f3c9SIngo Molnar 	p->data_writes			= true;
17771c13f3c9SIngo Molnar 	p->data_backwards		= true;
17781c13f3c9SIngo Molnar 	p->data_rand_walk		= true;
17791c13f3c9SIngo Molnar 	p->nr_loops			= -1;
17801c13f3c9SIngo Molnar 	p->init_random			= true;
178140ba93e3SRamkumar Ramachandra 	p->mb_global_str		= "1";
178240ba93e3SRamkumar Ramachandra 	p->nr_proc			= 1;
178340ba93e3SRamkumar Ramachandra 	p->nr_threads			= 1;
178440ba93e3SRamkumar Ramachandra 	p->nr_secs			= 5;
17850fae799eSArnaldo Carvalho de Melo 	p->run_all			= argc == 1;
17861c13f3c9SIngo Molnar }
17871c13f3c9SIngo Molnar 
run_bench_numa(const char * name,const char ** argv)17881c13f3c9SIngo Molnar static int run_bench_numa(const char *name, const char **argv)
17891c13f3c9SIngo Molnar {
17901c13f3c9SIngo Molnar 	int argc = command_size(argv);
17911c13f3c9SIngo Molnar 
17921c13f3c9SIngo Molnar 	init_params(&p0, name, argc, argv);
17931c13f3c9SIngo Molnar 	argc = parse_options(argc, argv, options, bench_numa_usage, 0);
17941c13f3c9SIngo Molnar 	if (argc)
17951c13f3c9SIngo Molnar 		goto err;
17961c13f3c9SIngo Molnar 
17971c13f3c9SIngo Molnar 	if (__bench_numa(name))
17981c13f3c9SIngo Molnar 		goto err;
17991c13f3c9SIngo Molnar 
18001c13f3c9SIngo Molnar 	return 0;
18011c13f3c9SIngo Molnar 
18021c13f3c9SIngo Molnar err:
18031c13f3c9SIngo Molnar 	return -1;
18041c13f3c9SIngo Molnar }
18051c13f3c9SIngo Molnar 
18061c13f3c9SIngo Molnar #define OPT_BW_RAM		"-s",  "20", "-zZq",    "--thp", " 1", "--no-data_rand_walk"
18071c13f3c9SIngo Molnar #define OPT_BW_RAM_NOTHP	OPT_BW_RAM,		"--thp", "-1"
18081c13f3c9SIngo Molnar 
18091c13f3c9SIngo Molnar #define OPT_CONV		"-s", "100", "-zZ0qcm", "--thp", " 1"
18101c13f3c9SIngo Molnar #define OPT_CONV_NOTHP		OPT_CONV,		"--thp", "-1"
18111c13f3c9SIngo Molnar 
18121c13f3c9SIngo Molnar #define OPT_BW			"-s",  "20", "-zZ0q",   "--thp", " 1"
18131c13f3c9SIngo Molnar #define OPT_BW_NOTHP		OPT_BW,			"--thp", "-1"
18141c13f3c9SIngo Molnar 
18151c13f3c9SIngo Molnar /*
18161c13f3c9SIngo Molnar  * The built-in test-suite executed by "perf bench numa -a".
18171c13f3c9SIngo Molnar  *
18181c13f3c9SIngo Molnar  * (A minimum of 4 nodes and 16 GB of RAM is recommended.)
18191c13f3c9SIngo Molnar  */
18201c13f3c9SIngo Molnar static const char *tests[][MAX_ARGS] = {
18211c13f3c9SIngo Molnar    /* Basic single-stream NUMA bandwidth measurements: */
18221c13f3c9SIngo Molnar    { "RAM-bw-local,",     "mem",  "-p",  "1",  "-t",  "1", "-P", "1024",
18231c13f3c9SIngo Molnar 			  "-C" ,   "0", "-M",   "0", OPT_BW_RAM },
18241c13f3c9SIngo Molnar    { "RAM-bw-local-NOTHP,",
18251c13f3c9SIngo Molnar 			  "mem",  "-p",  "1",  "-t",  "1", "-P", "1024",
18261c13f3c9SIngo Molnar 			  "-C" ,   "0", "-M",   "0", OPT_BW_RAM_NOTHP },
18271c13f3c9SIngo Molnar    { "RAM-bw-remote,",    "mem",  "-p",  "1",  "-t",  "1", "-P", "1024",
18281c13f3c9SIngo Molnar 			  "-C" ,   "0", "-M",   "1", OPT_BW_RAM },
18291c13f3c9SIngo Molnar 
18301c13f3c9SIngo Molnar    /* 2-stream NUMA bandwidth measurements: */
18311c13f3c9SIngo Molnar    { "RAM-bw-local-2x,",  "mem",  "-p",  "2",  "-t",  "1", "-P", "1024",
18321c13f3c9SIngo Molnar 			   "-C", "0,2", "-M", "0x2", OPT_BW_RAM },
18331c13f3c9SIngo Molnar    { "RAM-bw-remote-2x,", "mem",  "-p",  "2",  "-t",  "1", "-P", "1024",
18341c13f3c9SIngo Molnar 		 	   "-C", "0,2", "-M", "1x2", OPT_BW_RAM },
18351c13f3c9SIngo Molnar 
18361c13f3c9SIngo Molnar    /* Cross-stream NUMA bandwidth measurement: */
18371c13f3c9SIngo Molnar    { "RAM-bw-cross,",     "mem",  "-p",  "2",  "-t",  "1", "-P", "1024",
18381c13f3c9SIngo Molnar 		 	   "-C", "0,8", "-M", "1,0", OPT_BW_RAM },
18391c13f3c9SIngo Molnar 
18401c13f3c9SIngo Molnar    /* Convergence latency measurements: */
18411c13f3c9SIngo Molnar    { " 1x3-convergence,", "mem",  "-p",  "1", "-t",  "3", "-P",  "512", OPT_CONV },
18421c13f3c9SIngo Molnar    { " 1x4-convergence,", "mem",  "-p",  "1", "-t",  "4", "-P",  "512", OPT_CONV },
18431c13f3c9SIngo Molnar    { " 1x6-convergence,", "mem",  "-p",  "1", "-t",  "6", "-P", "1020", OPT_CONV },
184472d69c2aSAlexander Gordeev    { " 2x3-convergence,", "mem",  "-p",  "2", "-t",  "3", "-P", "1020", OPT_CONV },
18451c13f3c9SIngo Molnar    { " 3x3-convergence,", "mem",  "-p",  "3", "-t",  "3", "-P", "1020", OPT_CONV },
18461c13f3c9SIngo Molnar    { " 4x4-convergence,", "mem",  "-p",  "4", "-t",  "4", "-P",  "512", OPT_CONV },
18471c13f3c9SIngo Molnar    { " 4x4-convergence-NOTHP,",
18481c13f3c9SIngo Molnar 			  "mem",  "-p",  "4", "-t",  "4", "-P",  "512", OPT_CONV_NOTHP },
18491c13f3c9SIngo Molnar    { " 4x6-convergence,", "mem",  "-p",  "4", "-t",  "6", "-P", "1020", OPT_CONV },
18501c13f3c9SIngo Molnar    { " 4x8-convergence,", "mem",  "-p",  "4", "-t",  "8", "-P",  "512", OPT_CONV },
18511c13f3c9SIngo Molnar    { " 8x4-convergence,", "mem",  "-p",  "8", "-t",  "4", "-P",  "512", OPT_CONV },
18521c13f3c9SIngo Molnar    { " 8x4-convergence-NOTHP,",
18531c13f3c9SIngo Molnar 			  "mem",  "-p",  "8", "-t",  "4", "-P",  "512", OPT_CONV_NOTHP },
18541c13f3c9SIngo Molnar    { " 3x1-convergence,", "mem",  "-p",  "3", "-t",  "1", "-P",  "512", OPT_CONV },
18551c13f3c9SIngo Molnar    { " 4x1-convergence,", "mem",  "-p",  "4", "-t",  "1", "-P",  "512", OPT_CONV },
18561c13f3c9SIngo Molnar    { " 8x1-convergence,", "mem",  "-p",  "8", "-t",  "1", "-P",  "512", OPT_CONV },
18571c13f3c9SIngo Molnar    { "16x1-convergence,", "mem",  "-p", "16", "-t",  "1", "-P",  "256", OPT_CONV },
18581c13f3c9SIngo Molnar    { "32x1-convergence,", "mem",  "-p", "32", "-t",  "1", "-P",  "128", OPT_CONV },
18591c13f3c9SIngo Molnar 
18601c13f3c9SIngo Molnar    /* Various NUMA process/thread layout bandwidth measurements: */
18611c13f3c9SIngo Molnar    { " 2x1-bw-process,",  "mem",  "-p",  "2", "-t",  "1", "-P", "1024", OPT_BW },
18621c13f3c9SIngo Molnar    { " 3x1-bw-process,",  "mem",  "-p",  "3", "-t",  "1", "-P", "1024", OPT_BW },
18631c13f3c9SIngo Molnar    { " 4x1-bw-process,",  "mem",  "-p",  "4", "-t",  "1", "-P", "1024", OPT_BW },
18641c13f3c9SIngo Molnar    { " 8x1-bw-process,",  "mem",  "-p",  "8", "-t",  "1", "-P", " 512", OPT_BW },
18651c13f3c9SIngo Molnar    { " 8x1-bw-process-NOTHP,",
18661c13f3c9SIngo Molnar 			  "mem",  "-p",  "8", "-t",  "1", "-P", " 512", OPT_BW_NOTHP },
18671c13f3c9SIngo Molnar    { "16x1-bw-process,",  "mem",  "-p", "16", "-t",  "1", "-P",  "256", OPT_BW },
18681c13f3c9SIngo Molnar 
186985372c69SAlexander Gordeev    { " 1x4-bw-thread,",   "mem",  "-p",  "1", "-t",  "4", "-T",  "256", OPT_BW },
187085372c69SAlexander Gordeev    { " 1x8-bw-thread,",   "mem",  "-p",  "1", "-t",  "8", "-T",  "256", OPT_BW },
187185372c69SAlexander Gordeev    { "1x16-bw-thread,",   "mem",  "-p",  "1", "-t", "16", "-T",  "128", OPT_BW },
187285372c69SAlexander Gordeev    { "1x32-bw-thread,",   "mem",  "-p",  "1", "-t", "32", "-T",   "64", OPT_BW },
18731c13f3c9SIngo Molnar 
187485372c69SAlexander Gordeev    { " 2x3-bw-process,",  "mem",  "-p",  "2", "-t",  "3", "-P",  "512", OPT_BW },
187585372c69SAlexander Gordeev    { " 4x4-bw-process,",  "mem",  "-p",  "4", "-t",  "4", "-P",  "512", OPT_BW },
187685372c69SAlexander Gordeev    { " 4x6-bw-process,",  "mem",  "-p",  "4", "-t",  "6", "-P",  "512", OPT_BW },
187785372c69SAlexander Gordeev    { " 4x8-bw-process,",  "mem",  "-p",  "4", "-t",  "8", "-P",  "512", OPT_BW },
187885372c69SAlexander Gordeev    { " 4x8-bw-process-NOTHP,",
18791c13f3c9SIngo Molnar 			  "mem",  "-p",  "4", "-t",  "8", "-P",  "512", OPT_BW_NOTHP },
188085372c69SAlexander Gordeev    { " 3x3-bw-process,",  "mem",  "-p",  "3", "-t",  "3", "-P",  "512", OPT_BW },
188185372c69SAlexander Gordeev    { " 5x5-bw-process,",  "mem",  "-p",  "5", "-t",  "5", "-P",  "512", OPT_BW },
18821c13f3c9SIngo Molnar 
188385372c69SAlexander Gordeev    { "2x16-bw-process,",  "mem",  "-p",  "2", "-t", "16", "-P",  "512", OPT_BW },
188485372c69SAlexander Gordeev    { "1x32-bw-process,",  "mem",  "-p",  "1", "-t", "32", "-P", "2048", OPT_BW },
18851c13f3c9SIngo Molnar 
18861c13f3c9SIngo Molnar    { "numa02-bw,",        "mem",  "-p",  "1", "-t", "32", "-T",   "32", OPT_BW },
18871c13f3c9SIngo Molnar    { "numa02-bw-NOTHP,",  "mem",  "-p",  "1", "-t", "32", "-T",   "32", OPT_BW_NOTHP },
18881c13f3c9SIngo Molnar    { "numa01-bw-thread,", "mem",  "-p",  "2", "-t", "16", "-T",  "192", OPT_BW },
18891c13f3c9SIngo Molnar    { "numa01-bw-thread-NOTHP,",
18901c13f3c9SIngo Molnar 			  "mem",  "-p",  "2", "-t", "16", "-T",  "192", OPT_BW_NOTHP },
18911c13f3c9SIngo Molnar };
18921c13f3c9SIngo Molnar 
bench_all(void)18931c13f3c9SIngo Molnar static int bench_all(void)
18941c13f3c9SIngo Molnar {
18951c13f3c9SIngo Molnar 	int nr = ARRAY_SIZE(tests);
18961c13f3c9SIngo Molnar 	int ret;
18971c13f3c9SIngo Molnar 	int i;
18981c13f3c9SIngo Molnar 
18991c13f3c9SIngo Molnar 	ret = system("echo ' #'; echo ' # Running test on: '$(uname -a); echo ' #'");
19001c13f3c9SIngo Molnar 	BUG_ON(ret < 0);
19011c13f3c9SIngo Molnar 
19021c13f3c9SIngo Molnar 	for (i = 0; i < nr; i++) {
1903b81a48eaSPetr Holasek 		run_bench_numa(tests[i][0], tests[i] + 1);
19041c13f3c9SIngo Molnar 	}
19051c13f3c9SIngo Molnar 
19061c13f3c9SIngo Molnar 	printf("\n");
19071c13f3c9SIngo Molnar 
19081c13f3c9SIngo Molnar 	return 0;
19091c13f3c9SIngo Molnar }
19101c13f3c9SIngo Molnar 
bench_numa(int argc,const char ** argv)1911b0ad8ea6SArnaldo Carvalho de Melo int bench_numa(int argc, const char **argv)
19121c13f3c9SIngo Molnar {
19131c13f3c9SIngo Molnar 	init_params(&p0, "main,", argc, argv);
19141c13f3c9SIngo Molnar 	argc = parse_options(argc, argv, options, bench_numa_usage, 0);
19151c13f3c9SIngo Molnar 	if (argc)
19161c13f3c9SIngo Molnar 		goto err;
19171c13f3c9SIngo Molnar 
19181c13f3c9SIngo Molnar 	if (p0.run_all)
19191c13f3c9SIngo Molnar 		return bench_all();
19201c13f3c9SIngo Molnar 
19211c13f3c9SIngo Molnar 	if (__bench_numa(NULL))
19221c13f3c9SIngo Molnar 		goto err;
19231c13f3c9SIngo Molnar 
19241c13f3c9SIngo Molnar 	return 0;
19251c13f3c9SIngo Molnar 
19261c13f3c9SIngo Molnar err:
19271c13f3c9SIngo Molnar 	usage_with_options(numa_usage, options);
19281c13f3c9SIngo Molnar 	return -1;
19291c13f3c9SIngo Molnar }
1930