1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Memory bandwidth monitoring and allocation library
4  *
5  * Copyright (C) 2018 Intel Corporation
6  *
7  * Authors:
8  *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
9  *    Fenghua Yu <fenghua.yu@intel.com>
10  */
11 #include "resctrl.h"
12 
13 #define UNCORE_IMC		"uncore_imc"
14 #define READ_FILE_NAME		"events/cas_count_read"
15 #define WRITE_FILE_NAME		"events/cas_count_write"
16 #define DYN_PMU_PATH		"/sys/bus/event_source/devices"
17 #define SCALE			0.00006103515625
18 #define MAX_IMCS		20
19 #define MAX_TOKENS		5
20 #define READ			0
21 #define WRITE			1
22 #define CON_MON_MBM_LOCAL_BYTES_PATH				\
23 	"%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
24 
25 #define CON_MBM_LOCAL_BYTES_PATH		\
26 	"%s/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
27 
28 #define MON_MBM_LOCAL_BYTES_PATH		\
29 	"%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
30 
31 #define MBM_LOCAL_BYTES_PATH			\
32 	"%s/mon_data/mon_L3_%02d/mbm_local_bytes"
33 
34 #define CON_MON_LCC_OCCUP_PATH		\
35 	"%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy"
36 
37 #define CON_LCC_OCCUP_PATH		\
38 	"%s/%s/mon_data/mon_L3_%02d/llc_occupancy"
39 
40 #define MON_LCC_OCCUP_PATH		\
41 	"%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy"
42 
43 #define LCC_OCCUP_PATH			\
44 	"%s/mon_data/mon_L3_%02d/llc_occupancy"
45 
46 struct membw_read_format {
47 	__u64 value;         /* The value of the event */
48 	__u64 time_enabled;  /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
49 	__u64 time_running;  /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
50 	__u64 id;            /* if PERF_FORMAT_ID */
51 };
52 
53 struct imc_counter_config {
54 	__u32 type;
55 	__u64 event;
56 	__u64 umask;
57 	struct perf_event_attr pe;
58 	struct membw_read_format return_value;
59 	int fd;
60 };
61 
62 static char mbm_total_path[1024];
63 static int imcs;
64 static struct imc_counter_config imc_counters_config[MAX_IMCS][2];
65 
66 void membw_initialize_perf_event_attr(int i, int j)
67 {
68 	memset(&imc_counters_config[i][j].pe, 0,
69 	       sizeof(struct perf_event_attr));
70 	imc_counters_config[i][j].pe.type = imc_counters_config[i][j].type;
71 	imc_counters_config[i][j].pe.size = sizeof(struct perf_event_attr);
72 	imc_counters_config[i][j].pe.disabled = 1;
73 	imc_counters_config[i][j].pe.inherit = 1;
74 	imc_counters_config[i][j].pe.exclude_guest = 0;
75 	imc_counters_config[i][j].pe.config =
76 		imc_counters_config[i][j].umask << 8 |
77 		imc_counters_config[i][j].event;
78 	imc_counters_config[i][j].pe.sample_type = PERF_SAMPLE_IDENTIFIER;
79 	imc_counters_config[i][j].pe.read_format =
80 		PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
81 }
82 
83 void membw_ioctl_perf_event_ioc_reset_enable(int i, int j)
84 {
85 	ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_RESET, 0);
86 	ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_ENABLE, 0);
87 }
88 
89 void membw_ioctl_perf_event_ioc_disable(int i, int j)
90 {
91 	ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_DISABLE, 0);
92 }
93 
94 /*
95  * get_event_and_umask:	Parse config into event and umask
96  * @cas_count_cfg:	Config
97  * @count:		iMC number
98  * @op:			Operation (read/write)
99  */
100 void get_event_and_umask(char *cas_count_cfg, int count, bool op)
101 {
102 	char *token[MAX_TOKENS];
103 	int i = 0;
104 
105 	strcat(cas_count_cfg, ",");
106 	token[0] = strtok(cas_count_cfg, "=,");
107 
108 	for (i = 1; i < MAX_TOKENS; i++)
109 		token[i] = strtok(NULL, "=,");
110 
111 	for (i = 0; i < MAX_TOKENS; i++) {
112 		if (!token[i])
113 			break;
114 		if (strcmp(token[i], "event") == 0) {
115 			if (op == READ)
116 				imc_counters_config[count][READ].event =
117 				strtol(token[i + 1], NULL, 16);
118 			else
119 				imc_counters_config[count][WRITE].event =
120 				strtol(token[i + 1], NULL, 16);
121 		}
122 		if (strcmp(token[i], "umask") == 0) {
123 			if (op == READ)
124 				imc_counters_config[count][READ].umask =
125 				strtol(token[i + 1], NULL, 16);
126 			else
127 				imc_counters_config[count][WRITE].umask =
128 				strtol(token[i + 1], NULL, 16);
129 		}
130 	}
131 }
132 
133 static int open_perf_event(int i, int cpu_no, int j)
134 {
135 	imc_counters_config[i][j].fd =
136 		perf_event_open(&imc_counters_config[i][j].pe, -1, cpu_no, -1,
137 				PERF_FLAG_FD_CLOEXEC);
138 
139 	if (imc_counters_config[i][j].fd == -1) {
140 		fprintf(stderr, "Error opening leader %llx\n",
141 			imc_counters_config[i][j].pe.config);
142 
143 		return -1;
144 	}
145 
146 	return 0;
147 }
148 
149 /* Get type and config (read and write) of an iMC counter */
150 static int read_from_imc_dir(char *imc_dir, int count)
151 {
152 	char cas_count_cfg[1024], imc_counter_cfg[1024], imc_counter_type[1024];
153 	FILE *fp;
154 
155 	/* Get type of iMC counter */
156 	sprintf(imc_counter_type, "%s%s", imc_dir, "type");
157 	fp = fopen(imc_counter_type, "r");
158 	if (!fp) {
159 		ksft_perror("Failed to open iMC counter type file");
160 
161 		return -1;
162 	}
163 	if (fscanf(fp, "%u", &imc_counters_config[count][READ].type) <= 0) {
164 		ksft_perror("Could not get iMC type");
165 		fclose(fp);
166 
167 		return -1;
168 	}
169 	fclose(fp);
170 
171 	imc_counters_config[count][WRITE].type =
172 				imc_counters_config[count][READ].type;
173 
174 	/* Get read config */
175 	sprintf(imc_counter_cfg, "%s%s", imc_dir, READ_FILE_NAME);
176 	fp = fopen(imc_counter_cfg, "r");
177 	if (!fp) {
178 		ksft_perror("Failed to open iMC config file");
179 
180 		return -1;
181 	}
182 	if (fscanf(fp, "%s", cas_count_cfg) <= 0) {
183 		ksft_perror("Could not get iMC cas count read");
184 		fclose(fp);
185 
186 		return -1;
187 	}
188 	fclose(fp);
189 
190 	get_event_and_umask(cas_count_cfg, count, READ);
191 
192 	/* Get write config */
193 	sprintf(imc_counter_cfg, "%s%s", imc_dir, WRITE_FILE_NAME);
194 	fp = fopen(imc_counter_cfg, "r");
195 	if (!fp) {
196 		ksft_perror("Failed to open iMC config file");
197 
198 		return -1;
199 	}
200 	if  (fscanf(fp, "%s", cas_count_cfg) <= 0) {
201 		ksft_perror("Could not get iMC cas count write");
202 		fclose(fp);
203 
204 		return -1;
205 	}
206 	fclose(fp);
207 
208 	get_event_and_umask(cas_count_cfg, count, WRITE);
209 
210 	return 0;
211 }
212 
213 /*
214  * A system can have 'n' number of iMC (Integrated Memory Controller)
215  * counters, get that 'n'. For each iMC counter get it's type and config.
216  * Also, each counter has two configs, one for read and the other for write.
217  * A config again has two parts, event and umask.
218  * Enumerate all these details into an array of structures.
219  *
220  * Return: >= 0 on success. < 0 on failure.
221  */
222 static int num_of_imcs(void)
223 {
224 	char imc_dir[512], *temp;
225 	unsigned int count = 0;
226 	struct dirent *ep;
227 	int ret;
228 	DIR *dp;
229 
230 	dp = opendir(DYN_PMU_PATH);
231 	if (dp) {
232 		while ((ep = readdir(dp))) {
233 			temp = strstr(ep->d_name, UNCORE_IMC);
234 			if (!temp)
235 				continue;
236 
237 			/*
238 			 * imc counters are named as "uncore_imc_<n>", hence
239 			 * increment the pointer to point to <n>. Note that
240 			 * sizeof(UNCORE_IMC) would count for null character as
241 			 * well and hence the last underscore character in
242 			 * uncore_imc'_' need not be counted.
243 			 */
244 			temp = temp + sizeof(UNCORE_IMC);
245 
246 			/*
247 			 * Some directories under "DYN_PMU_PATH" could have
248 			 * names like "uncore_imc_free_running", hence, check if
249 			 * first character is a numerical digit or not.
250 			 */
251 			if (temp[0] >= '0' && temp[0] <= '9') {
252 				sprintf(imc_dir, "%s/%s/", DYN_PMU_PATH,
253 					ep->d_name);
254 				ret = read_from_imc_dir(imc_dir, count);
255 				if (ret) {
256 					closedir(dp);
257 
258 					return ret;
259 				}
260 				count++;
261 			}
262 		}
263 		closedir(dp);
264 		if (count == 0) {
265 			ksft_print_msg("Unable to find iMC counters\n");
266 
267 			return -1;
268 		}
269 	} else {
270 		ksft_perror("Unable to open PMU directory");
271 
272 		return -1;
273 	}
274 
275 	return count;
276 }
277 
278 static int initialize_mem_bw_imc(void)
279 {
280 	int imc, j;
281 
282 	imcs = num_of_imcs();
283 	if (imcs <= 0)
284 		return imcs;
285 
286 	/* Initialize perf_event_attr structures for all iMC's */
287 	for (imc = 0; imc < imcs; imc++) {
288 		for (j = 0; j < 2; j++)
289 			membw_initialize_perf_event_attr(imc, j);
290 	}
291 
292 	return 0;
293 }
294 
295 static void perf_close_imc_mem_bw(void)
296 {
297 	int mc;
298 
299 	for (mc = 0; mc < imcs; mc++) {
300 		if (imc_counters_config[mc][READ].fd != -1)
301 			close(imc_counters_config[mc][READ].fd);
302 		if (imc_counters_config[mc][WRITE].fd != -1)
303 			close(imc_counters_config[mc][WRITE].fd);
304 	}
305 }
306 
307 /*
308  * get_mem_bw_imc:	Memory band width as reported by iMC counters
309  * @cpu_no:		CPU number that the benchmark PID is binded to
310  * @bw_report:		Bandwidth report type (reads, writes)
311  *
312  * Memory B/W utilized by a process on a socket can be calculated using
313  * iMC counters. Perf events are used to read these counters.
314  *
315  * Return: = 0 on success. < 0 on failure.
316  */
317 static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
318 {
319 	float reads, writes, of_mul_read, of_mul_write;
320 	int imc, ret;
321 
322 	for (imc = 0; imc < imcs; imc++) {
323 		imc_counters_config[imc][READ].fd = -1;
324 		imc_counters_config[imc][WRITE].fd = -1;
325 	}
326 
327 	/* Start all iMC counters to log values (both read and write) */
328 	reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1;
329 	for (imc = 0; imc < imcs; imc++) {
330 		ret = open_perf_event(imc, cpu_no, READ);
331 		if (ret)
332 			goto close_fds;
333 		ret = open_perf_event(imc, cpu_no, WRITE);
334 		if (ret)
335 			goto close_fds;
336 
337 		membw_ioctl_perf_event_ioc_reset_enable(imc, READ);
338 		membw_ioctl_perf_event_ioc_reset_enable(imc, WRITE);
339 	}
340 
341 	sleep(1);
342 
343 	/* Stop counters after a second to get results (both read and write) */
344 	for (imc = 0; imc < imcs; imc++) {
345 		membw_ioctl_perf_event_ioc_disable(imc, READ);
346 		membw_ioctl_perf_event_ioc_disable(imc, WRITE);
347 	}
348 
349 	/*
350 	 * Get results which are stored in struct type imc_counter_config
351 	 * Take over flow into consideration before calculating total b/w
352 	 */
353 	for (imc = 0; imc < imcs; imc++) {
354 		struct imc_counter_config *r =
355 			&imc_counters_config[imc][READ];
356 		struct imc_counter_config *w =
357 			&imc_counters_config[imc][WRITE];
358 
359 		if (read(r->fd, &r->return_value,
360 			 sizeof(struct membw_read_format)) == -1) {
361 			ksft_perror("Couldn't get read b/w through iMC");
362 			goto close_fds;
363 		}
364 
365 		if (read(w->fd, &w->return_value,
366 			 sizeof(struct membw_read_format)) == -1) {
367 			ksft_perror("Couldn't get write bw through iMC");
368 			goto close_fds;
369 		}
370 
371 		__u64 r_time_enabled = r->return_value.time_enabled;
372 		__u64 r_time_running = r->return_value.time_running;
373 
374 		if (r_time_enabled != r_time_running)
375 			of_mul_read = (float)r_time_enabled /
376 					(float)r_time_running;
377 
378 		__u64 w_time_enabled = w->return_value.time_enabled;
379 		__u64 w_time_running = w->return_value.time_running;
380 
381 		if (w_time_enabled != w_time_running)
382 			of_mul_write = (float)w_time_enabled /
383 					(float)w_time_running;
384 		reads += r->return_value.value * of_mul_read * SCALE;
385 		writes += w->return_value.value * of_mul_write * SCALE;
386 	}
387 
388 	perf_close_imc_mem_bw();
389 
390 	if (strcmp(bw_report, "reads") == 0) {
391 		*bw_imc = reads;
392 		return 0;
393 	}
394 
395 	if (strcmp(bw_report, "writes") == 0) {
396 		*bw_imc = writes;
397 		return 0;
398 	}
399 
400 	*bw_imc = reads + writes;
401 	return 0;
402 
403 close_fds:
404 	perf_close_imc_mem_bw();
405 	return -1;
406 }
407 
408 void set_mbm_path(const char *ctrlgrp, const char *mongrp, int resource_id)
409 {
410 	if (ctrlgrp && mongrp)
411 		sprintf(mbm_total_path, CON_MON_MBM_LOCAL_BYTES_PATH,
412 			RESCTRL_PATH, ctrlgrp, mongrp, resource_id);
413 	else if (!ctrlgrp && mongrp)
414 		sprintf(mbm_total_path, MON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
415 			mongrp, resource_id);
416 	else if (ctrlgrp && !mongrp)
417 		sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
418 			ctrlgrp, resource_id);
419 	else if (!ctrlgrp && !mongrp)
420 		sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
421 			resource_id);
422 }
423 
424 /*
425  * initialize_mem_bw_resctrl:	Appropriately populate "mbm_total_path"
426  * @ctrlgrp:			Name of the control monitor group (con_mon grp)
427  * @mongrp:			Name of the monitor group (mon grp)
428  * @cpu_no:			CPU number that the benchmark PID is binded to
429  * @resctrl_val:		Resctrl feature (Eg: mbm, mba.. etc)
430  */
431 static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp,
432 				      int cpu_no, char *resctrl_val)
433 {
434 	int resource_id;
435 
436 	if (get_resource_id(cpu_no, &resource_id) < 0) {
437 		ksft_print_msg("Could not get resource_id\n");
438 		return;
439 	}
440 
441 	if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)))
442 		set_mbm_path(ctrlgrp, mongrp, resource_id);
443 
444 	if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
445 		if (ctrlgrp)
446 			sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH,
447 				RESCTRL_PATH, ctrlgrp, resource_id);
448 		else
449 			sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH,
450 				RESCTRL_PATH, resource_id);
451 	}
452 }
453 
454 /*
455  * Get MBM Local bytes as reported by resctrl FS
456  * For MBM,
457  * 1. If con_mon grp and mon grp are given, then read from con_mon grp's mon grp
458  * 2. If only con_mon grp is given, then read from con_mon grp
459  * 3. If both are not given, then read from root con_mon grp
460  * For MBA,
461  * 1. If con_mon grp is given, then read from it
462  * 2. If con_mon grp is not given, then read from root con_mon grp
463  */
464 static int get_mem_bw_resctrl(unsigned long *mbm_total)
465 {
466 	FILE *fp;
467 
468 	fp = fopen(mbm_total_path, "r");
469 	if (!fp) {
470 		ksft_perror("Failed to open total bw file");
471 
472 		return -1;
473 	}
474 	if (fscanf(fp, "%lu", mbm_total) <= 0) {
475 		ksft_perror("Could not get mbm local bytes");
476 		fclose(fp);
477 
478 		return -1;
479 	}
480 	fclose(fp);
481 
482 	return 0;
483 }
484 
485 pid_t bm_pid, ppid;
486 
487 void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
488 {
489 	/* Only kill child after bm_pid is set after fork() */
490 	if (bm_pid)
491 		kill(bm_pid, SIGKILL);
492 	umount_resctrlfs();
493 	tests_cleanup();
494 	ksft_print_msg("Ending\n\n");
495 
496 	exit(EXIT_SUCCESS);
497 }
498 
499 /*
500  * Register CTRL-C handler for parent, as it has to kill
501  * child process before exiting.
502  */
503 int signal_handler_register(void)
504 {
505 	struct sigaction sigact = {};
506 	int ret = 0;
507 
508 	bm_pid = 0;
509 
510 	sigact.sa_sigaction = ctrlc_handler;
511 	sigemptyset(&sigact.sa_mask);
512 	sigact.sa_flags = SA_SIGINFO;
513 	if (sigaction(SIGINT, &sigact, NULL) ||
514 	    sigaction(SIGTERM, &sigact, NULL) ||
515 	    sigaction(SIGHUP, &sigact, NULL)) {
516 		ksft_perror("sigaction");
517 		ret = -1;
518 	}
519 	return ret;
520 }
521 
522 /*
523  * Reset signal handler to SIG_DFL.
524  * Non-Value return because the caller should keep
525  * the error code of other path even if sigaction fails.
526  */
527 void signal_handler_unregister(void)
528 {
529 	struct sigaction sigact = {};
530 
531 	sigact.sa_handler = SIG_DFL;
532 	sigemptyset(&sigact.sa_mask);
533 	if (sigaction(SIGINT, &sigact, NULL) ||
534 	    sigaction(SIGTERM, &sigact, NULL) ||
535 	    sigaction(SIGHUP, &sigact, NULL)) {
536 		ksft_perror("sigaction");
537 	}
538 }
539 
540 /*
541  * print_results_bw:	the memory bandwidth results are stored in a file
542  * @filename:		file that stores the results
543  * @bm_pid:		child pid that runs benchmark
544  * @bw_imc:		perf imc counter value
545  * @bw_resc:		memory bandwidth value
546  *
547  * Return:		0 on success. non-zero on failure.
548  */
549 static int print_results_bw(char *filename,  int bm_pid, float bw_imc,
550 			    unsigned long bw_resc)
551 {
552 	unsigned long diff = fabs(bw_imc - bw_resc);
553 	FILE *fp;
554 
555 	if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) {
556 		printf("Pid: %d \t Mem_BW_iMC: %f \t ", bm_pid, bw_imc);
557 		printf("Mem_BW_resc: %lu \t Difference: %lu\n", bw_resc, diff);
558 	} else {
559 		fp = fopen(filename, "a");
560 		if (!fp) {
561 			ksft_perror("Cannot open results file");
562 
563 			return errno;
564 		}
565 		if (fprintf(fp, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n",
566 			    bm_pid, bw_imc, bw_resc, diff) <= 0) {
567 			ksft_print_msg("Could not log results\n");
568 			fclose(fp);
569 
570 			return errno;
571 		}
572 		fclose(fp);
573 	}
574 
575 	return 0;
576 }
577 
578 static void set_cmt_path(const char *ctrlgrp, const char *mongrp, char sock_num)
579 {
580 	if (strlen(ctrlgrp) && strlen(mongrp))
581 		sprintf(llc_occup_path,	CON_MON_LCC_OCCUP_PATH,	RESCTRL_PATH,
582 			ctrlgrp, mongrp, sock_num);
583 	else if (!strlen(ctrlgrp) && strlen(mongrp))
584 		sprintf(llc_occup_path,	MON_LCC_OCCUP_PATH, RESCTRL_PATH,
585 			mongrp, sock_num);
586 	else if (strlen(ctrlgrp) && !strlen(mongrp))
587 		sprintf(llc_occup_path,	CON_LCC_OCCUP_PATH, RESCTRL_PATH,
588 			ctrlgrp, sock_num);
589 	else if (!strlen(ctrlgrp) && !strlen(mongrp))
590 		sprintf(llc_occup_path, LCC_OCCUP_PATH,	RESCTRL_PATH, sock_num);
591 }
592 
593 /*
594  * initialize_llc_occu_resctrl:	Appropriately populate "llc_occup_path"
595  * @ctrlgrp:			Name of the control monitor group (con_mon grp)
596  * @mongrp:			Name of the monitor group (mon grp)
597  * @cpu_no:			CPU number that the benchmark PID is binded to
598  * @resctrl_val:		Resctrl feature (Eg: cat, cmt.. etc)
599  */
600 static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp,
601 					int cpu_no, char *resctrl_val)
602 {
603 	int resource_id;
604 
605 	if (get_resource_id(cpu_no, &resource_id) < 0) {
606 		ksft_print_msg("Could not get resource_id\n");
607 		return;
608 	}
609 
610 	if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
611 		set_cmt_path(ctrlgrp, mongrp, resource_id);
612 }
613 
614 static int
615 measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start)
616 {
617 	unsigned long bw_resc, bw_resc_end;
618 	float bw_imc;
619 	int ret;
620 
621 	/*
622 	 * Measure memory bandwidth from resctrl and from
623 	 * another source which is perf imc value or could
624 	 * be something else if perf imc event is not available.
625 	 * Compare the two values to validate resctrl value.
626 	 * It takes 1sec to measure the data.
627 	 */
628 	ret = get_mem_bw_imc(param->cpu_no, param->bw_report, &bw_imc);
629 	if (ret < 0)
630 		return ret;
631 
632 	ret = get_mem_bw_resctrl(&bw_resc_end);
633 	if (ret < 0)
634 		return ret;
635 
636 	bw_resc = (bw_resc_end - *bw_resc_start) / MB;
637 	ret = print_results_bw(param->filename, bm_pid, bw_imc, bw_resc);
638 	if (ret)
639 		return ret;
640 
641 	*bw_resc_start = bw_resc_end;
642 
643 	return 0;
644 }
645 
646 /*
647  * run_benchmark - Run a specified benchmark or fill_buf (default benchmark)
648  *		   in specified signal. Direct benchmark stdio to /dev/null.
649  * @signum:	signal number
650  * @info:	signal info
651  * @ucontext:	user context in signal handling
652  */
653 static void run_benchmark(int signum, siginfo_t *info, void *ucontext)
654 {
655 	int operation, ret, memflush;
656 	char **benchmark_cmd;
657 	size_t span;
658 	bool once;
659 	FILE *fp;
660 
661 	benchmark_cmd = info->si_ptr;
662 
663 	/*
664 	 * Direct stdio of child to /dev/null, so that only parent writes to
665 	 * stdio (console)
666 	 */
667 	fp = freopen("/dev/null", "w", stdout);
668 	if (!fp) {
669 		ksft_perror("Unable to direct benchmark status to /dev/null");
670 		PARENT_EXIT();
671 	}
672 
673 	if (strcmp(benchmark_cmd[0], "fill_buf") == 0) {
674 		/* Execute default fill_buf benchmark */
675 		span = strtoul(benchmark_cmd[1], NULL, 10);
676 		memflush =  atoi(benchmark_cmd[2]);
677 		operation = atoi(benchmark_cmd[3]);
678 		if (!strcmp(benchmark_cmd[4], "true")) {
679 			once = true;
680 		} else if (!strcmp(benchmark_cmd[4], "false")) {
681 			once = false;
682 		} else {
683 			ksft_print_msg("Invalid once parameter\n");
684 			PARENT_EXIT();
685 		}
686 
687 		if (run_fill_buf(span, memflush, operation, once))
688 			fprintf(stderr, "Error in running fill buffer\n");
689 	} else {
690 		/* Execute specified benchmark */
691 		ret = execvp(benchmark_cmd[0], benchmark_cmd);
692 		if (ret)
693 			ksft_perror("execvp");
694 	}
695 
696 	fclose(stdout);
697 	ksft_print_msg("Unable to run specified benchmark\n");
698 	PARENT_EXIT();
699 }
700 
701 /*
702  * resctrl_val:	execute benchmark and measure memory bandwidth on
703  *			the benchmark
704  * @benchmark_cmd:	benchmark command and its arguments
705  * @param:		parameters passed to resctrl_val()
706  *
707  * Return:		0 on success. non-zero on failure.
708  */
709 int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *param)
710 {
711 	char *resctrl_val = param->resctrl_val;
712 	unsigned long bw_resc_start = 0;
713 	struct sigaction sigact;
714 	int ret = 0, pipefd[2];
715 	char pipe_message = 0;
716 	union sigval value;
717 
718 	if (strcmp(param->filename, "") == 0)
719 		sprintf(param->filename, "stdio");
720 
721 	if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) ||
722 	    !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) {
723 		ret = validate_bw_report_request(param->bw_report);
724 		if (ret)
725 			return ret;
726 	}
727 
728 	/*
729 	 * If benchmark wasn't successfully started by child, then child should
730 	 * kill parent, so save parent's pid
731 	 */
732 	ppid = getpid();
733 
734 	if (pipe(pipefd)) {
735 		ksft_perror("Unable to create pipe");
736 
737 		return -1;
738 	}
739 
740 	/*
741 	 * Fork to start benchmark, save child's pid so that it can be killed
742 	 * when needed
743 	 */
744 	fflush(stdout);
745 	bm_pid = fork();
746 	if (bm_pid == -1) {
747 		ksft_perror("Unable to fork");
748 
749 		return -1;
750 	}
751 
752 	if (bm_pid == 0) {
753 		/*
754 		 * Mask all signals except SIGUSR1, parent uses SIGUSR1 to
755 		 * start benchmark
756 		 */
757 		sigfillset(&sigact.sa_mask);
758 		sigdelset(&sigact.sa_mask, SIGUSR1);
759 
760 		sigact.sa_sigaction = run_benchmark;
761 		sigact.sa_flags = SA_SIGINFO;
762 
763 		/* Register for "SIGUSR1" signal from parent */
764 		if (sigaction(SIGUSR1, &sigact, NULL)) {
765 			ksft_perror("Can't register child for signal");
766 			PARENT_EXIT();
767 		}
768 
769 		/* Tell parent that child is ready */
770 		close(pipefd[0]);
771 		pipe_message = 1;
772 		if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) <
773 		    sizeof(pipe_message)) {
774 			ksft_perror("Failed signaling parent process");
775 			close(pipefd[1]);
776 			return -1;
777 		}
778 		close(pipefd[1]);
779 
780 		/* Suspend child until delivery of "SIGUSR1" from parent */
781 		sigsuspend(&sigact.sa_mask);
782 
783 		ksft_perror("Child is done");
784 		PARENT_EXIT();
785 	}
786 
787 	ksft_print_msg("Benchmark PID: %d\n", bm_pid);
788 
789 	/*
790 	 * The cast removes constness but nothing mutates benchmark_cmd within
791 	 * the context of this process. At the receiving process, it becomes
792 	 * argv, which is mutable, on exec() but that's after fork() so it
793 	 * doesn't matter for the process running the tests.
794 	 */
795 	value.sival_ptr = (void *)benchmark_cmd;
796 
797 	/* Taskset benchmark to specified cpu */
798 	ret = taskset_benchmark(bm_pid, param->cpu_no);
799 	if (ret)
800 		goto out;
801 
802 	/* Write benchmark to specified control&monitoring grp in resctrl FS */
803 	ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp,
804 				      resctrl_val);
805 	if (ret)
806 		goto out;
807 
808 	if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
809 	    !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
810 		ret = initialize_mem_bw_imc();
811 		if (ret)
812 			goto out;
813 
814 		initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp,
815 					  param->cpu_no, resctrl_val);
816 	} else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
817 		initialize_llc_occu_resctrl(param->ctrlgrp, param->mongrp,
818 					    param->cpu_no, resctrl_val);
819 
820 	/* Parent waits for child to be ready. */
821 	close(pipefd[1]);
822 	while (pipe_message != 1) {
823 		if (read(pipefd[0], &pipe_message, sizeof(pipe_message)) <
824 		    sizeof(pipe_message)) {
825 			ksft_perror("Failed reading message from child process");
826 			close(pipefd[0]);
827 			goto out;
828 		}
829 	}
830 	close(pipefd[0]);
831 
832 	/* Signal child to start benchmark */
833 	if (sigqueue(bm_pid, SIGUSR1, value) == -1) {
834 		ksft_perror("sigqueue SIGUSR1 to child");
835 		ret = errno;
836 		goto out;
837 	}
838 
839 	/* Give benchmark enough time to fully run */
840 	sleep(1);
841 
842 	/* Test runs until the callback setup() tells the test to stop. */
843 	while (1) {
844 		ret = param->setup(param);
845 		if (ret == END_OF_TESTS) {
846 			ret = 0;
847 			break;
848 		}
849 		if (ret < 0)
850 			break;
851 
852 		if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
853 		    !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
854 			ret = measure_vals(param, &bw_resc_start);
855 			if (ret)
856 				break;
857 		} else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
858 			sleep(1);
859 			ret = measure_cache_vals(param, bm_pid);
860 			if (ret)
861 				break;
862 		}
863 	}
864 
865 out:
866 	kill(bm_pid, SIGKILL);
867 
868 	return ret;
869 }
870