1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Memory bandwidth monitoring and allocation library
4  *
5  * Copyright (C) 2018 Intel Corporation
6  *
7  * Authors:
8  *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
9  *    Fenghua Yu <fenghua.yu@intel.com>
10  */
11 #include "resctrl.h"
12 
13 #define UNCORE_IMC		"uncore_imc"
14 #define READ_FILE_NAME		"events/cas_count_read"
15 #define WRITE_FILE_NAME		"events/cas_count_write"
16 #define DYN_PMU_PATH		"/sys/bus/event_source/devices"
17 #define SCALE			0.00006103515625
18 #define MAX_IMCS		20
19 #define MAX_TOKENS		5
20 #define READ			0
21 #define WRITE			1
22 #define CON_MON_MBM_LOCAL_BYTES_PATH				\
23 	"%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
24 
25 #define CON_MBM_LOCAL_BYTES_PATH		\
26 	"%s/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
27 
28 #define MON_MBM_LOCAL_BYTES_PATH		\
29 	"%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
30 
31 #define MBM_LOCAL_BYTES_PATH			\
32 	"%s/mon_data/mon_L3_%02d/mbm_local_bytes"
33 
34 #define CON_MON_LCC_OCCUP_PATH		\
35 	"%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy"
36 
37 #define CON_LCC_OCCUP_PATH		\
38 	"%s/%s/mon_data/mon_L3_%02d/llc_occupancy"
39 
40 #define MON_LCC_OCCUP_PATH		\
41 	"%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy"
42 
43 #define LCC_OCCUP_PATH			\
44 	"%s/mon_data/mon_L3_%02d/llc_occupancy"
45 
46 struct membw_read_format {
47 	__u64 value;         /* The value of the event */
48 	__u64 time_enabled;  /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
49 	__u64 time_running;  /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
50 	__u64 id;            /* if PERF_FORMAT_ID */
51 };
52 
53 struct imc_counter_config {
54 	__u32 type;
55 	__u64 event;
56 	__u64 umask;
57 	struct perf_event_attr pe;
58 	struct membw_read_format return_value;
59 	int fd;
60 };
61 
62 static char mbm_total_path[1024];
63 static int imcs;
64 static struct imc_counter_config imc_counters_config[MAX_IMCS][2];
65 
membw_initialize_perf_event_attr(int i,int j)66 void membw_initialize_perf_event_attr(int i, int j)
67 {
68 	memset(&imc_counters_config[i][j].pe, 0,
69 	       sizeof(struct perf_event_attr));
70 	imc_counters_config[i][j].pe.type = imc_counters_config[i][j].type;
71 	imc_counters_config[i][j].pe.size = sizeof(struct perf_event_attr);
72 	imc_counters_config[i][j].pe.disabled = 1;
73 	imc_counters_config[i][j].pe.inherit = 1;
74 	imc_counters_config[i][j].pe.exclude_guest = 0;
75 	imc_counters_config[i][j].pe.config =
76 		imc_counters_config[i][j].umask << 8 |
77 		imc_counters_config[i][j].event;
78 	imc_counters_config[i][j].pe.sample_type = PERF_SAMPLE_IDENTIFIER;
79 	imc_counters_config[i][j].pe.read_format =
80 		PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
81 }
82 
membw_ioctl_perf_event_ioc_reset_enable(int i,int j)83 void membw_ioctl_perf_event_ioc_reset_enable(int i, int j)
84 {
85 	ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_RESET, 0);
86 	ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_ENABLE, 0);
87 }
88 
membw_ioctl_perf_event_ioc_disable(int i,int j)89 void membw_ioctl_perf_event_ioc_disable(int i, int j)
90 {
91 	ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_DISABLE, 0);
92 }
93 
94 /*
95  * get_event_and_umask:	Parse config into event and umask
96  * @cas_count_cfg:	Config
97  * @count:		iMC number
98  * @op:			Operation (read/write)
99  */
get_event_and_umask(char * cas_count_cfg,int count,bool op)100 void get_event_and_umask(char *cas_count_cfg, int count, bool op)
101 {
102 	char *token[MAX_TOKENS];
103 	int i = 0;
104 
105 	token[0] = strtok(cas_count_cfg, "=,");
106 
107 	for (i = 1; i < MAX_TOKENS; i++)
108 		token[i] = strtok(NULL, "=,");
109 
110 	for (i = 0; i < MAX_TOKENS - 1; i++) {
111 		if (!token[i])
112 			break;
113 		if (strcmp(token[i], "event") == 0) {
114 			if (op == READ)
115 				imc_counters_config[count][READ].event =
116 				strtol(token[i + 1], NULL, 16);
117 			else
118 				imc_counters_config[count][WRITE].event =
119 				strtol(token[i + 1], NULL, 16);
120 		}
121 		if (strcmp(token[i], "umask") == 0) {
122 			if (op == READ)
123 				imc_counters_config[count][READ].umask =
124 				strtol(token[i + 1], NULL, 16);
125 			else
126 				imc_counters_config[count][WRITE].umask =
127 				strtol(token[i + 1], NULL, 16);
128 		}
129 	}
130 }
131 
open_perf_event(int i,int cpu_no,int j)132 static int open_perf_event(int i, int cpu_no, int j)
133 {
134 	imc_counters_config[i][j].fd =
135 		perf_event_open(&imc_counters_config[i][j].pe, -1, cpu_no, -1,
136 				PERF_FLAG_FD_CLOEXEC);
137 
138 	if (imc_counters_config[i][j].fd == -1) {
139 		fprintf(stderr, "Error opening leader %llx\n",
140 			imc_counters_config[i][j].pe.config);
141 
142 		return -1;
143 	}
144 
145 	return 0;
146 }
147 
148 /* Get type and config (read and write) of an iMC counter */
read_from_imc_dir(char * imc_dir,int count)149 static int read_from_imc_dir(char *imc_dir, int count)
150 {
151 	char cas_count_cfg[1024], imc_counter_cfg[1024], imc_counter_type[1024];
152 	FILE *fp;
153 
154 	/* Get type of iMC counter */
155 	sprintf(imc_counter_type, "%s%s", imc_dir, "type");
156 	fp = fopen(imc_counter_type, "r");
157 	if (!fp) {
158 		ksft_perror("Failed to open iMC counter type file");
159 
160 		return -1;
161 	}
162 	if (fscanf(fp, "%u", &imc_counters_config[count][READ].type) <= 0) {
163 		ksft_perror("Could not get iMC type");
164 		fclose(fp);
165 
166 		return -1;
167 	}
168 	fclose(fp);
169 
170 	imc_counters_config[count][WRITE].type =
171 				imc_counters_config[count][READ].type;
172 
173 	/* Get read config */
174 	sprintf(imc_counter_cfg, "%s%s", imc_dir, READ_FILE_NAME);
175 	fp = fopen(imc_counter_cfg, "r");
176 	if (!fp) {
177 		ksft_perror("Failed to open iMC config file");
178 
179 		return -1;
180 	}
181 	if (fscanf(fp, "%1023s", cas_count_cfg) <= 0) {
182 		ksft_perror("Could not get iMC cas count read");
183 		fclose(fp);
184 
185 		return -1;
186 	}
187 	fclose(fp);
188 
189 	get_event_and_umask(cas_count_cfg, count, READ);
190 
191 	/* Get write config */
192 	sprintf(imc_counter_cfg, "%s%s", imc_dir, WRITE_FILE_NAME);
193 	fp = fopen(imc_counter_cfg, "r");
194 	if (!fp) {
195 		ksft_perror("Failed to open iMC config file");
196 
197 		return -1;
198 	}
199 	if  (fscanf(fp, "%1023s", cas_count_cfg) <= 0) {
200 		ksft_perror("Could not get iMC cas count write");
201 		fclose(fp);
202 
203 		return -1;
204 	}
205 	fclose(fp);
206 
207 	get_event_and_umask(cas_count_cfg, count, WRITE);
208 
209 	return 0;
210 }
211 
212 /*
213  * A system can have 'n' number of iMC (Integrated Memory Controller)
214  * counters, get that 'n'. For each iMC counter get it's type and config.
215  * Also, each counter has two configs, one for read and the other for write.
216  * A config again has two parts, event and umask.
217  * Enumerate all these details into an array of structures.
218  *
219  * Return: >= 0 on success. < 0 on failure.
220  */
num_of_imcs(void)221 static int num_of_imcs(void)
222 {
223 	char imc_dir[512], *temp;
224 	unsigned int count = 0;
225 	struct dirent *ep;
226 	int ret;
227 	DIR *dp;
228 
229 	dp = opendir(DYN_PMU_PATH);
230 	if (dp) {
231 		while ((ep = readdir(dp))) {
232 			temp = strstr(ep->d_name, UNCORE_IMC);
233 			if (!temp)
234 				continue;
235 
236 			/*
237 			 * imc counters are named as "uncore_imc_<n>", hence
238 			 * increment the pointer to point to <n>. Note that
239 			 * sizeof(UNCORE_IMC) would count for null character as
240 			 * well and hence the last underscore character in
241 			 * uncore_imc'_' need not be counted.
242 			 */
243 			temp = temp + sizeof(UNCORE_IMC);
244 
245 			/*
246 			 * Some directories under "DYN_PMU_PATH" could have
247 			 * names like "uncore_imc_free_running", hence, check if
248 			 * first character is a numerical digit or not.
249 			 */
250 			if (temp[0] >= '0' && temp[0] <= '9') {
251 				sprintf(imc_dir, "%s/%s/", DYN_PMU_PATH,
252 					ep->d_name);
253 				ret = read_from_imc_dir(imc_dir, count);
254 				if (ret) {
255 					closedir(dp);
256 
257 					return ret;
258 				}
259 				count++;
260 			}
261 		}
262 		closedir(dp);
263 		if (count == 0) {
264 			ksft_print_msg("Unable to find iMC counters\n");
265 
266 			return -1;
267 		}
268 	} else {
269 		ksft_perror("Unable to open PMU directory");
270 
271 		return -1;
272 	}
273 
274 	return count;
275 }
276 
initialize_mem_bw_imc(void)277 static int initialize_mem_bw_imc(void)
278 {
279 	int imc, j;
280 
281 	imcs = num_of_imcs();
282 	if (imcs <= 0)
283 		return imcs;
284 
285 	/* Initialize perf_event_attr structures for all iMC's */
286 	for (imc = 0; imc < imcs; imc++) {
287 		for (j = 0; j < 2; j++)
288 			membw_initialize_perf_event_attr(imc, j);
289 	}
290 
291 	return 0;
292 }
293 
perf_close_imc_mem_bw(void)294 static void perf_close_imc_mem_bw(void)
295 {
296 	int mc;
297 
298 	for (mc = 0; mc < imcs; mc++) {
299 		if (imc_counters_config[mc][READ].fd != -1)
300 			close(imc_counters_config[mc][READ].fd);
301 		if (imc_counters_config[mc][WRITE].fd != -1)
302 			close(imc_counters_config[mc][WRITE].fd);
303 	}
304 }
305 
306 /*
307  * get_mem_bw_imc:	Memory band width as reported by iMC counters
308  * @cpu_no:		CPU number that the benchmark PID is binded to
309  * @bw_report:		Bandwidth report type (reads, writes)
310  *
311  * Memory B/W utilized by a process on a socket can be calculated using
312  * iMC counters. Perf events are used to read these counters.
313  *
314  * Return: = 0 on success. < 0 on failure.
315  */
get_mem_bw_imc(int cpu_no,char * bw_report,float * bw_imc)316 static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
317 {
318 	float reads, writes, of_mul_read, of_mul_write;
319 	int imc, ret;
320 
321 	for (imc = 0; imc < imcs; imc++) {
322 		imc_counters_config[imc][READ].fd = -1;
323 		imc_counters_config[imc][WRITE].fd = -1;
324 	}
325 
326 	/* Start all iMC counters to log values (both read and write) */
327 	reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1;
328 	for (imc = 0; imc < imcs; imc++) {
329 		ret = open_perf_event(imc, cpu_no, READ);
330 		if (ret)
331 			goto close_fds;
332 		ret = open_perf_event(imc, cpu_no, WRITE);
333 		if (ret)
334 			goto close_fds;
335 
336 		membw_ioctl_perf_event_ioc_reset_enable(imc, READ);
337 		membw_ioctl_perf_event_ioc_reset_enable(imc, WRITE);
338 	}
339 
340 	sleep(1);
341 
342 	/* Stop counters after a second to get results (both read and write) */
343 	for (imc = 0; imc < imcs; imc++) {
344 		membw_ioctl_perf_event_ioc_disable(imc, READ);
345 		membw_ioctl_perf_event_ioc_disable(imc, WRITE);
346 	}
347 
348 	/*
349 	 * Get results which are stored in struct type imc_counter_config
350 	 * Take over flow into consideration before calculating total b/w
351 	 */
352 	for (imc = 0; imc < imcs; imc++) {
353 		struct imc_counter_config *r =
354 			&imc_counters_config[imc][READ];
355 		struct imc_counter_config *w =
356 			&imc_counters_config[imc][WRITE];
357 
358 		if (read(r->fd, &r->return_value,
359 			 sizeof(struct membw_read_format)) == -1) {
360 			ksft_perror("Couldn't get read b/w through iMC");
361 			goto close_fds;
362 		}
363 
364 		if (read(w->fd, &w->return_value,
365 			 sizeof(struct membw_read_format)) == -1) {
366 			ksft_perror("Couldn't get write bw through iMC");
367 			goto close_fds;
368 		}
369 
370 		__u64 r_time_enabled = r->return_value.time_enabled;
371 		__u64 r_time_running = r->return_value.time_running;
372 
373 		if (r_time_enabled != r_time_running)
374 			of_mul_read = (float)r_time_enabled /
375 					(float)r_time_running;
376 
377 		__u64 w_time_enabled = w->return_value.time_enabled;
378 		__u64 w_time_running = w->return_value.time_running;
379 
380 		if (w_time_enabled != w_time_running)
381 			of_mul_write = (float)w_time_enabled /
382 					(float)w_time_running;
383 		reads += r->return_value.value * of_mul_read * SCALE;
384 		writes += w->return_value.value * of_mul_write * SCALE;
385 	}
386 
387 	perf_close_imc_mem_bw();
388 
389 	if (strcmp(bw_report, "reads") == 0) {
390 		*bw_imc = reads;
391 		return 0;
392 	}
393 
394 	if (strcmp(bw_report, "writes") == 0) {
395 		*bw_imc = writes;
396 		return 0;
397 	}
398 
399 	*bw_imc = reads + writes;
400 	return 0;
401 
402 close_fds:
403 	perf_close_imc_mem_bw();
404 	return -1;
405 }
406 
set_mbm_path(const char * ctrlgrp,const char * mongrp,int resource_id)407 void set_mbm_path(const char *ctrlgrp, const char *mongrp, int resource_id)
408 {
409 	if (ctrlgrp && mongrp)
410 		sprintf(mbm_total_path, CON_MON_MBM_LOCAL_BYTES_PATH,
411 			RESCTRL_PATH, ctrlgrp, mongrp, resource_id);
412 	else if (!ctrlgrp && mongrp)
413 		sprintf(mbm_total_path, MON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
414 			mongrp, resource_id);
415 	else if (ctrlgrp && !mongrp)
416 		sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
417 			ctrlgrp, resource_id);
418 	else if (!ctrlgrp && !mongrp)
419 		sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
420 			resource_id);
421 }
422 
423 /*
424  * initialize_mem_bw_resctrl:	Appropriately populate "mbm_total_path"
425  * @ctrlgrp:			Name of the control monitor group (con_mon grp)
426  * @mongrp:			Name of the monitor group (mon grp)
427  * @cpu_no:			CPU number that the benchmark PID is binded to
428  * @resctrl_val:		Resctrl feature (Eg: mbm, mba.. etc)
429  */
initialize_mem_bw_resctrl(const char * ctrlgrp,const char * mongrp,int cpu_no,char * resctrl_val)430 static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp,
431 				      int cpu_no, char *resctrl_val)
432 {
433 	int resource_id;
434 
435 	if (get_resource_id(cpu_no, &resource_id) < 0) {
436 		ksft_print_msg("Could not get resource_id\n");
437 		return;
438 	}
439 
440 	if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)))
441 		set_mbm_path(ctrlgrp, mongrp, resource_id);
442 
443 	if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
444 		if (ctrlgrp)
445 			sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH,
446 				RESCTRL_PATH, ctrlgrp, resource_id);
447 		else
448 			sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH,
449 				RESCTRL_PATH, resource_id);
450 	}
451 }
452 
453 /*
454  * Get MBM Local bytes as reported by resctrl FS
455  * For MBM,
456  * 1. If con_mon grp and mon grp are given, then read from con_mon grp's mon grp
457  * 2. If only con_mon grp is given, then read from con_mon grp
458  * 3. If both are not given, then read from root con_mon grp
459  * For MBA,
460  * 1. If con_mon grp is given, then read from it
461  * 2. If con_mon grp is not given, then read from root con_mon grp
462  */
get_mem_bw_resctrl(unsigned long * mbm_total)463 static int get_mem_bw_resctrl(unsigned long *mbm_total)
464 {
465 	FILE *fp;
466 
467 	fp = fopen(mbm_total_path, "r");
468 	if (!fp) {
469 		ksft_perror("Failed to open total bw file");
470 
471 		return -1;
472 	}
473 	if (fscanf(fp, "%lu", mbm_total) <= 0) {
474 		ksft_perror("Could not get mbm local bytes");
475 		fclose(fp);
476 
477 		return -1;
478 	}
479 	fclose(fp);
480 
481 	return 0;
482 }
483 
484 pid_t bm_pid, ppid;
485 
ctrlc_handler(int signum,siginfo_t * info,void * ptr)486 void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
487 {
488 	/* Only kill child after bm_pid is set after fork() */
489 	if (bm_pid)
490 		kill(bm_pid, SIGKILL);
491 	umount_resctrlfs();
492 	tests_cleanup();
493 	ksft_print_msg("Ending\n\n");
494 
495 	exit(EXIT_SUCCESS);
496 }
497 
498 /*
499  * Register CTRL-C handler for parent, as it has to kill
500  * child process before exiting.
501  */
signal_handler_register(void)502 int signal_handler_register(void)
503 {
504 	struct sigaction sigact = {};
505 	int ret = 0;
506 
507 	bm_pid = 0;
508 
509 	sigact.sa_sigaction = ctrlc_handler;
510 	sigemptyset(&sigact.sa_mask);
511 	sigact.sa_flags = SA_SIGINFO;
512 	if (sigaction(SIGINT, &sigact, NULL) ||
513 	    sigaction(SIGTERM, &sigact, NULL) ||
514 	    sigaction(SIGHUP, &sigact, NULL)) {
515 		ksft_perror("sigaction");
516 		ret = -1;
517 	}
518 	return ret;
519 }
520 
521 /*
522  * Reset signal handler to SIG_DFL.
523  * Non-Value return because the caller should keep
524  * the error code of other path even if sigaction fails.
525  */
signal_handler_unregister(void)526 void signal_handler_unregister(void)
527 {
528 	struct sigaction sigact = {};
529 
530 	sigact.sa_handler = SIG_DFL;
531 	sigemptyset(&sigact.sa_mask);
532 	if (sigaction(SIGINT, &sigact, NULL) ||
533 	    sigaction(SIGTERM, &sigact, NULL) ||
534 	    sigaction(SIGHUP, &sigact, NULL)) {
535 		ksft_perror("sigaction");
536 	}
537 }
538 
539 /*
540  * print_results_bw:	the memory bandwidth results are stored in a file
541  * @filename:		file that stores the results
542  * @bm_pid:		child pid that runs benchmark
543  * @bw_imc:		perf imc counter value
544  * @bw_resc:		memory bandwidth value
545  *
546  * Return:		0 on success. non-zero on failure.
547  */
print_results_bw(char * filename,int bm_pid,float bw_imc,unsigned long bw_resc)548 static int print_results_bw(char *filename,  int bm_pid, float bw_imc,
549 			    unsigned long bw_resc)
550 {
551 	unsigned long diff = fabs(bw_imc - bw_resc);
552 	FILE *fp;
553 
554 	if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) {
555 		printf("Pid: %d \t Mem_BW_iMC: %f \t ", bm_pid, bw_imc);
556 		printf("Mem_BW_resc: %lu \t Difference: %lu\n", bw_resc, diff);
557 	} else {
558 		fp = fopen(filename, "a");
559 		if (!fp) {
560 			ksft_perror("Cannot open results file");
561 
562 			return errno;
563 		}
564 		if (fprintf(fp, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n",
565 			    bm_pid, bw_imc, bw_resc, diff) <= 0) {
566 			ksft_print_msg("Could not log results\n");
567 			fclose(fp);
568 
569 			return errno;
570 		}
571 		fclose(fp);
572 	}
573 
574 	return 0;
575 }
576 
set_cmt_path(const char * ctrlgrp,const char * mongrp,char sock_num)577 static void set_cmt_path(const char *ctrlgrp, const char *mongrp, char sock_num)
578 {
579 	if (strlen(ctrlgrp) && strlen(mongrp))
580 		sprintf(llc_occup_path,	CON_MON_LCC_OCCUP_PATH,	RESCTRL_PATH,
581 			ctrlgrp, mongrp, sock_num);
582 	else if (!strlen(ctrlgrp) && strlen(mongrp))
583 		sprintf(llc_occup_path,	MON_LCC_OCCUP_PATH, RESCTRL_PATH,
584 			mongrp, sock_num);
585 	else if (strlen(ctrlgrp) && !strlen(mongrp))
586 		sprintf(llc_occup_path,	CON_LCC_OCCUP_PATH, RESCTRL_PATH,
587 			ctrlgrp, sock_num);
588 	else if (!strlen(ctrlgrp) && !strlen(mongrp))
589 		sprintf(llc_occup_path, LCC_OCCUP_PATH,	RESCTRL_PATH, sock_num);
590 }
591 
592 /*
593  * initialize_llc_occu_resctrl:	Appropriately populate "llc_occup_path"
594  * @ctrlgrp:			Name of the control monitor group (con_mon grp)
595  * @mongrp:			Name of the monitor group (mon grp)
596  * @cpu_no:			CPU number that the benchmark PID is binded to
597  * @resctrl_val:		Resctrl feature (Eg: cat, cmt.. etc)
598  */
initialize_llc_occu_resctrl(const char * ctrlgrp,const char * mongrp,int cpu_no,char * resctrl_val)599 static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp,
600 					int cpu_no, char *resctrl_val)
601 {
602 	int resource_id;
603 
604 	if (get_resource_id(cpu_no, &resource_id) < 0) {
605 		ksft_print_msg("Could not get resource_id\n");
606 		return;
607 	}
608 
609 	if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
610 		set_cmt_path(ctrlgrp, mongrp, resource_id);
611 }
612 
613 static int
measure_vals(struct resctrl_val_param * param,unsigned long * bw_resc_start)614 measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start)
615 {
616 	unsigned long bw_resc, bw_resc_end;
617 	float bw_imc;
618 	int ret;
619 
620 	/*
621 	 * Measure memory bandwidth from resctrl and from
622 	 * another source which is perf imc value or could
623 	 * be something else if perf imc event is not available.
624 	 * Compare the two values to validate resctrl value.
625 	 * It takes 1sec to measure the data.
626 	 */
627 	ret = get_mem_bw_imc(param->cpu_no, param->bw_report, &bw_imc);
628 	if (ret < 0)
629 		return ret;
630 
631 	ret = get_mem_bw_resctrl(&bw_resc_end);
632 	if (ret < 0)
633 		return ret;
634 
635 	bw_resc = (bw_resc_end - *bw_resc_start) / MB;
636 	ret = print_results_bw(param->filename, bm_pid, bw_imc, bw_resc);
637 	if (ret)
638 		return ret;
639 
640 	*bw_resc_start = bw_resc_end;
641 
642 	return 0;
643 }
644 
645 /*
646  * run_benchmark - Run a specified benchmark or fill_buf (default benchmark)
647  *		   in specified signal. Direct benchmark stdio to /dev/null.
648  * @signum:	signal number
649  * @info:	signal info
650  * @ucontext:	user context in signal handling
651  */
run_benchmark(int signum,siginfo_t * info,void * ucontext)652 static void run_benchmark(int signum, siginfo_t *info, void *ucontext)
653 {
654 	int operation, ret, memflush;
655 	char **benchmark_cmd;
656 	size_t span;
657 	bool once;
658 	FILE *fp;
659 
660 	benchmark_cmd = info->si_ptr;
661 
662 	/*
663 	 * Direct stdio of child to /dev/null, so that only parent writes to
664 	 * stdio (console)
665 	 */
666 	fp = freopen("/dev/null", "w", stdout);
667 	if (!fp) {
668 		ksft_perror("Unable to direct benchmark status to /dev/null");
669 		PARENT_EXIT();
670 	}
671 
672 	if (strcmp(benchmark_cmd[0], "fill_buf") == 0) {
673 		/* Execute default fill_buf benchmark */
674 		span = strtoul(benchmark_cmd[1], NULL, 10);
675 		memflush =  atoi(benchmark_cmd[2]);
676 		operation = atoi(benchmark_cmd[3]);
677 		if (!strcmp(benchmark_cmd[4], "true")) {
678 			once = true;
679 		} else if (!strcmp(benchmark_cmd[4], "false")) {
680 			once = false;
681 		} else {
682 			ksft_print_msg("Invalid once parameter\n");
683 			PARENT_EXIT();
684 		}
685 
686 		if (run_fill_buf(span, memflush, operation, once))
687 			fprintf(stderr, "Error in running fill buffer\n");
688 	} else {
689 		/* Execute specified benchmark */
690 		ret = execvp(benchmark_cmd[0], benchmark_cmd);
691 		if (ret)
692 			ksft_perror("execvp");
693 	}
694 
695 	fclose(stdout);
696 	ksft_print_msg("Unable to run specified benchmark\n");
697 	PARENT_EXIT();
698 }
699 
700 /*
701  * resctrl_val:	execute benchmark and measure memory bandwidth on
702  *			the benchmark
703  * @benchmark_cmd:	benchmark command and its arguments
704  * @param:		parameters passed to resctrl_val()
705  *
706  * Return:		0 on success. non-zero on failure.
707  */
resctrl_val(const char * const * benchmark_cmd,struct resctrl_val_param * param)708 int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *param)
709 {
710 	char *resctrl_val = param->resctrl_val;
711 	unsigned long bw_resc_start = 0;
712 	struct sigaction sigact;
713 	int ret = 0, pipefd[2];
714 	char pipe_message = 0;
715 	union sigval value;
716 
717 	if (strcmp(param->filename, "") == 0)
718 		sprintf(param->filename, "stdio");
719 
720 	if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) ||
721 	    !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) {
722 		ret = validate_bw_report_request(param->bw_report);
723 		if (ret)
724 			return ret;
725 	}
726 
727 	/*
728 	 * If benchmark wasn't successfully started by child, then child should
729 	 * kill parent, so save parent's pid
730 	 */
731 	ppid = getpid();
732 
733 	if (pipe(pipefd)) {
734 		ksft_perror("Unable to create pipe");
735 
736 		return -1;
737 	}
738 
739 	/*
740 	 * Fork to start benchmark, save child's pid so that it can be killed
741 	 * when needed
742 	 */
743 	fflush(stdout);
744 	bm_pid = fork();
745 	if (bm_pid == -1) {
746 		ksft_perror("Unable to fork");
747 
748 		return -1;
749 	}
750 
751 	if (bm_pid == 0) {
752 		/*
753 		 * Mask all signals except SIGUSR1, parent uses SIGUSR1 to
754 		 * start benchmark
755 		 */
756 		sigfillset(&sigact.sa_mask);
757 		sigdelset(&sigact.sa_mask, SIGUSR1);
758 
759 		sigact.sa_sigaction = run_benchmark;
760 		sigact.sa_flags = SA_SIGINFO;
761 
762 		/* Register for "SIGUSR1" signal from parent */
763 		if (sigaction(SIGUSR1, &sigact, NULL)) {
764 			ksft_perror("Can't register child for signal");
765 			PARENT_EXIT();
766 		}
767 
768 		/* Tell parent that child is ready */
769 		close(pipefd[0]);
770 		pipe_message = 1;
771 		if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) <
772 		    sizeof(pipe_message)) {
773 			ksft_perror("Failed signaling parent process");
774 			close(pipefd[1]);
775 			return -1;
776 		}
777 		close(pipefd[1]);
778 
779 		/* Suspend child until delivery of "SIGUSR1" from parent */
780 		sigsuspend(&sigact.sa_mask);
781 
782 		ksft_perror("Child is done");
783 		PARENT_EXIT();
784 	}
785 
786 	ksft_print_msg("Benchmark PID: %d\n", bm_pid);
787 
788 	/*
789 	 * The cast removes constness but nothing mutates benchmark_cmd within
790 	 * the context of this process. At the receiving process, it becomes
791 	 * argv, which is mutable, on exec() but that's after fork() so it
792 	 * doesn't matter for the process running the tests.
793 	 */
794 	value.sival_ptr = (void *)benchmark_cmd;
795 
796 	/* Taskset benchmark to specified cpu */
797 	ret = taskset_benchmark(bm_pid, param->cpu_no);
798 	if (ret)
799 		goto out;
800 
801 	/* Write benchmark to specified control&monitoring grp in resctrl FS */
802 	ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp,
803 				      resctrl_val);
804 	if (ret)
805 		goto out;
806 
807 	if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
808 	    !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
809 		ret = initialize_mem_bw_imc();
810 		if (ret)
811 			goto out;
812 
813 		initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp,
814 					  param->cpu_no, resctrl_val);
815 	} else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
816 		initialize_llc_occu_resctrl(param->ctrlgrp, param->mongrp,
817 					    param->cpu_no, resctrl_val);
818 
819 	/* Parent waits for child to be ready. */
820 	close(pipefd[1]);
821 	while (pipe_message != 1) {
822 		if (read(pipefd[0], &pipe_message, sizeof(pipe_message)) <
823 		    sizeof(pipe_message)) {
824 			ksft_perror("Failed reading message from child process");
825 			close(pipefd[0]);
826 			goto out;
827 		}
828 	}
829 	close(pipefd[0]);
830 
831 	/* Signal child to start benchmark */
832 	if (sigqueue(bm_pid, SIGUSR1, value) == -1) {
833 		ksft_perror("sigqueue SIGUSR1 to child");
834 		ret = errno;
835 		goto out;
836 	}
837 
838 	/* Give benchmark enough time to fully run */
839 	sleep(1);
840 
841 	/* Test runs until the callback setup() tells the test to stop. */
842 	while (1) {
843 		ret = param->setup(param);
844 		if (ret == END_OF_TESTS) {
845 			ret = 0;
846 			break;
847 		}
848 		if (ret < 0)
849 			break;
850 
851 		if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
852 		    !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
853 			ret = measure_vals(param, &bw_resc_start);
854 			if (ret)
855 				break;
856 		} else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
857 			sleep(1);
858 			ret = measure_cache_vals(param, bm_pid);
859 			if (ret)
860 				break;
861 		}
862 	}
863 
864 out:
865 	kill(bm_pid, SIGKILL);
866 
867 	return ret;
868 }
869