1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Basic resctrl file system operations
4  *
5  * Copyright (C) 2018 Intel Corporation
6  *
7  * Authors:
8  *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
9  *    Fenghua Yu <fenghua.yu@intel.com>
10  */
11 #include <limits.h>
12 
13 #include "resctrl.h"
14 
15 static int find_resctrl_mount(char *buffer)
16 {
17 	FILE *mounts;
18 	char line[256], *fs, *mntpoint;
19 
20 	mounts = fopen("/proc/mounts", "r");
21 	if (!mounts) {
22 		perror("/proc/mounts");
23 		return -ENXIO;
24 	}
25 	while (!feof(mounts)) {
26 		if (!fgets(line, 256, mounts))
27 			break;
28 		fs = strtok(line, " \t");
29 		if (!fs)
30 			continue;
31 		mntpoint = strtok(NULL, " \t");
32 		if (!mntpoint)
33 			continue;
34 		fs = strtok(NULL, " \t");
35 		if (!fs)
36 			continue;
37 		if (strcmp(fs, "resctrl"))
38 			continue;
39 
40 		fclose(mounts);
41 		if (buffer)
42 			strncpy(buffer, mntpoint, 256);
43 
44 		return 0;
45 	}
46 
47 	fclose(mounts);
48 
49 	return -ENOENT;
50 }
51 
52 /*
53  * mount_resctrlfs - Mount resctrl FS at /sys/fs/resctrl
54  *
55  * Mounts resctrl FS. Fails if resctrl FS is already mounted to avoid
56  * pre-existing settings interfering with the test results.
57  *
58  * Return: 0 on success, non-zero on failure
59  */
60 int mount_resctrlfs(void)
61 {
62 	int ret;
63 
64 	ret = find_resctrl_mount(NULL);
65 	if (ret != -ENOENT)
66 		return -1;
67 
68 	ksft_print_msg("Mounting resctrl to \"%s\"\n", RESCTRL_PATH);
69 	ret = mount("resctrl", RESCTRL_PATH, "resctrl", 0, NULL);
70 	if (ret)
71 		perror("# mount");
72 
73 	return ret;
74 }
75 
76 int umount_resctrlfs(void)
77 {
78 	char mountpoint[256];
79 	int ret;
80 
81 	ret = find_resctrl_mount(mountpoint);
82 	if (ret == -ENOENT)
83 		return 0;
84 	if (ret)
85 		return ret;
86 
87 	if (umount(mountpoint)) {
88 		perror("# Unable to umount resctrl");
89 
90 		return errno;
91 	}
92 
93 	return 0;
94 }
95 
96 /*
97  * get_resource_id - Get socket number/l3 id for a specified CPU
98  * @cpu_no:	CPU number
99  * @resource_id: Socket number or l3_id
100  *
101  * Return: >= 0 on success, < 0 on failure.
102  */
103 int get_resource_id(int cpu_no, int *resource_id)
104 {
105 	char phys_pkg_path[1024];
106 	FILE *fp;
107 
108 	if (get_vendor() == ARCH_AMD)
109 		sprintf(phys_pkg_path, "%s%d/cache/index3/id",
110 			PHYS_ID_PATH, cpu_no);
111 	else
112 		sprintf(phys_pkg_path, "%s%d/topology/physical_package_id",
113 			PHYS_ID_PATH, cpu_no);
114 
115 	fp = fopen(phys_pkg_path, "r");
116 	if (!fp) {
117 		perror("Failed to open physical_package_id");
118 
119 		return -1;
120 	}
121 	if (fscanf(fp, "%d", resource_id) <= 0) {
122 		perror("Could not get socket number or l3 id");
123 		fclose(fp);
124 
125 		return -1;
126 	}
127 	fclose(fp);
128 
129 	return 0;
130 }
131 
132 /*
133  * get_cache_size - Get cache size for a specified CPU
134  * @cpu_no:	CPU number
135  * @cache_type:	Cache level L2/L3
136  * @cache_size:	pointer to cache_size
137  *
138  * Return: = 0 on success, < 0 on failure.
139  */
140 int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size)
141 {
142 	char cache_path[1024], cache_str[64];
143 	int length, i, cache_num;
144 	FILE *fp;
145 
146 	if (!strcmp(cache_type, "L3")) {
147 		cache_num = 3;
148 	} else if (!strcmp(cache_type, "L2")) {
149 		cache_num = 2;
150 	} else {
151 		perror("Invalid cache level");
152 		return -1;
153 	}
154 
155 	sprintf(cache_path, "/sys/bus/cpu/devices/cpu%d/cache/index%d/size",
156 		cpu_no, cache_num);
157 	fp = fopen(cache_path, "r");
158 	if (!fp) {
159 		perror("Failed to open cache size");
160 
161 		return -1;
162 	}
163 	if (fscanf(fp, "%s", cache_str) <= 0) {
164 		perror("Could not get cache_size");
165 		fclose(fp);
166 
167 		return -1;
168 	}
169 	fclose(fp);
170 
171 	length = (int)strlen(cache_str);
172 
173 	*cache_size = 0;
174 
175 	for (i = 0; i < length; i++) {
176 		if ((cache_str[i] >= '0') && (cache_str[i] <= '9'))
177 
178 			*cache_size = *cache_size * 10 + (cache_str[i] - '0');
179 
180 		else if (cache_str[i] == 'K')
181 
182 			*cache_size = *cache_size * 1024;
183 
184 		else if (cache_str[i] == 'M')
185 
186 			*cache_size = *cache_size * 1024 * 1024;
187 
188 		else
189 			break;
190 	}
191 
192 	return 0;
193 }
194 
195 #define CORE_SIBLINGS_PATH	"/sys/bus/cpu/devices/cpu"
196 
197 /*
198  * get_cbm_mask - Get cbm mask for given cache
199  * @cache_type:	Cache level L2/L3
200  * @cbm_mask:	cbm_mask returned as a string
201  *
202  * Return: = 0 on success, < 0 on failure.
203  */
204 int get_cbm_mask(char *cache_type, char *cbm_mask)
205 {
206 	char cbm_mask_path[1024];
207 	FILE *fp;
208 
209 	if (!cbm_mask)
210 		return -1;
211 
212 	sprintf(cbm_mask_path, "%s/%s/cbm_mask", INFO_PATH, cache_type);
213 
214 	fp = fopen(cbm_mask_path, "r");
215 	if (!fp) {
216 		perror("Failed to open cache level");
217 
218 		return -1;
219 	}
220 	if (fscanf(fp, "%s", cbm_mask) <= 0) {
221 		perror("Could not get max cbm_mask");
222 		fclose(fp);
223 
224 		return -1;
225 	}
226 	fclose(fp);
227 
228 	return 0;
229 }
230 
231 /*
232  * get_core_sibling - Get sibling core id from the same socket for given CPU
233  * @cpu_no:	CPU number
234  *
235  * Return:	> 0 on success, < 0 on failure.
236  */
237 int get_core_sibling(int cpu_no)
238 {
239 	char core_siblings_path[1024], cpu_list_str[64];
240 	int sibling_cpu_no = -1;
241 	FILE *fp;
242 
243 	sprintf(core_siblings_path, "%s%d/topology/core_siblings_list",
244 		CORE_SIBLINGS_PATH, cpu_no);
245 
246 	fp = fopen(core_siblings_path, "r");
247 	if (!fp) {
248 		perror("Failed to open core siblings path");
249 
250 		return -1;
251 	}
252 	if (fscanf(fp, "%s", cpu_list_str) <= 0) {
253 		perror("Could not get core_siblings list");
254 		fclose(fp);
255 
256 		return -1;
257 	}
258 	fclose(fp);
259 
260 	char *token = strtok(cpu_list_str, "-,");
261 
262 	while (token) {
263 		sibling_cpu_no = atoi(token);
264 		/* Skipping core 0 as we don't want to run test on core 0 */
265 		if (sibling_cpu_no != 0 && sibling_cpu_no != cpu_no)
266 			break;
267 		token = strtok(NULL, "-,");
268 	}
269 
270 	return sibling_cpu_no;
271 }
272 
273 /*
274  * taskset_benchmark - Taskset PID (i.e. benchmark) to a specified cpu
275  * @bm_pid:	PID that should be binded
276  * @cpu_no:	CPU number at which the PID would be binded
277  *
278  * Return: 0 on success, non-zero on failure
279  */
280 int taskset_benchmark(pid_t bm_pid, int cpu_no)
281 {
282 	cpu_set_t my_set;
283 
284 	CPU_ZERO(&my_set);
285 	CPU_SET(cpu_no, &my_set);
286 
287 	if (sched_setaffinity(bm_pid, sizeof(cpu_set_t), &my_set)) {
288 		perror("Unable to taskset benchmark");
289 
290 		return -1;
291 	}
292 
293 	return 0;
294 }
295 
296 /*
297  * run_benchmark - Run a specified benchmark or fill_buf (default benchmark)
298  *		   in specified signal. Direct benchmark stdio to /dev/null.
299  * @signum:	signal number
300  * @info:	signal info
301  * @ucontext:	user context in signal handling
302  *
303  * Return: void
304  */
305 void run_benchmark(int signum, siginfo_t *info, void *ucontext)
306 {
307 	int operation, ret, memflush;
308 	char **benchmark_cmd;
309 	size_t span;
310 	bool once;
311 	FILE *fp;
312 
313 	benchmark_cmd = info->si_ptr;
314 
315 	/*
316 	 * Direct stdio of child to /dev/null, so that only parent writes to
317 	 * stdio (console)
318 	 */
319 	fp = freopen("/dev/null", "w", stdout);
320 	if (!fp)
321 		PARENT_EXIT("Unable to direct benchmark status to /dev/null");
322 
323 	if (strcmp(benchmark_cmd[0], "fill_buf") == 0) {
324 		/* Execute default fill_buf benchmark */
325 		span = strtoul(benchmark_cmd[1], NULL, 10);
326 		memflush =  atoi(benchmark_cmd[2]);
327 		operation = atoi(benchmark_cmd[3]);
328 		if (!strcmp(benchmark_cmd[4], "true"))
329 			once = true;
330 		else if (!strcmp(benchmark_cmd[4], "false"))
331 			once = false;
332 		else
333 			PARENT_EXIT("Invalid once parameter");
334 
335 		if (run_fill_buf(span, memflush, operation, once))
336 			fprintf(stderr, "Error in running fill buffer\n");
337 	} else {
338 		/* Execute specified benchmark */
339 		ret = execvp(benchmark_cmd[0], benchmark_cmd);
340 		if (ret)
341 			perror("wrong\n");
342 	}
343 
344 	fclose(stdout);
345 	PARENT_EXIT("Unable to run specified benchmark");
346 }
347 
348 /*
349  * create_grp - Create a group only if one doesn't exist
350  * @grp_name:	Name of the group
351  * @grp:	Full path and name of the group
352  * @parent_grp:	Full path and name of the parent group
353  *
354  * Return: 0 on success, non-zero on failure
355  */
356 static int create_grp(const char *grp_name, char *grp, const char *parent_grp)
357 {
358 	int found_grp = 0;
359 	struct dirent *ep;
360 	DIR *dp;
361 
362 	/*
363 	 * At this point, we are guaranteed to have resctrl FS mounted and if
364 	 * length of grp_name == 0, it means, user wants to use root con_mon
365 	 * grp, so do nothing
366 	 */
367 	if (strlen(grp_name) == 0)
368 		return 0;
369 
370 	/* Check if requested grp exists or not */
371 	dp = opendir(parent_grp);
372 	if (dp) {
373 		while ((ep = readdir(dp)) != NULL) {
374 			if (strcmp(ep->d_name, grp_name) == 0)
375 				found_grp = 1;
376 		}
377 		closedir(dp);
378 	} else {
379 		perror("Unable to open resctrl for group");
380 
381 		return -1;
382 	}
383 
384 	/* Requested grp doesn't exist, hence create it */
385 	if (found_grp == 0) {
386 		if (mkdir(grp, 0) == -1) {
387 			perror("Unable to create group");
388 
389 			return -1;
390 		}
391 	}
392 
393 	return 0;
394 }
395 
396 static int write_pid_to_tasks(char *tasks, pid_t pid)
397 {
398 	FILE *fp;
399 
400 	fp = fopen(tasks, "w");
401 	if (!fp) {
402 		perror("Failed to open tasks file");
403 
404 		return -1;
405 	}
406 	if (fprintf(fp, "%d\n", pid) < 0) {
407 		perror("Failed to wr pid to tasks file");
408 		fclose(fp);
409 
410 		return -1;
411 	}
412 	fclose(fp);
413 
414 	return 0;
415 }
416 
417 /*
418  * write_bm_pid_to_resctrl - Write a PID (i.e. benchmark) to resctrl FS
419  * @bm_pid:		PID that should be written
420  * @ctrlgrp:		Name of the control monitor group (con_mon grp)
421  * @mongrp:		Name of the monitor group (mon grp)
422  * @resctrl_val:	Resctrl feature (Eg: mbm, mba.. etc)
423  *
424  * If a con_mon grp is requested, create it and write pid to it, otherwise
425  * write pid to root con_mon grp.
426  * If a mon grp is requested, create it and write pid to it, otherwise
427  * pid is not written, this means that pid is in con_mon grp and hence
428  * should consult con_mon grp's mon_data directory for results.
429  *
430  * Return: 0 on success, non-zero on failure
431  */
432 int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
433 			    char *resctrl_val)
434 {
435 	char controlgroup[128], monitorgroup[512], monitorgroup_p[256];
436 	char tasks[1024];
437 	int ret = 0;
438 
439 	if (strlen(ctrlgrp))
440 		sprintf(controlgroup, "%s/%s", RESCTRL_PATH, ctrlgrp);
441 	else
442 		sprintf(controlgroup, "%s", RESCTRL_PATH);
443 
444 	/* Create control and monitoring group and write pid into it */
445 	ret = create_grp(ctrlgrp, controlgroup, RESCTRL_PATH);
446 	if (ret)
447 		goto out;
448 	sprintf(tasks, "%s/tasks", controlgroup);
449 	ret = write_pid_to_tasks(tasks, bm_pid);
450 	if (ret)
451 		goto out;
452 
453 	/* Create mon grp and write pid into it for "mbm" and "cmt" test */
454 	if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)) ||
455 	    !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) {
456 		if (strlen(mongrp)) {
457 			sprintf(monitorgroup_p, "%s/mon_groups", controlgroup);
458 			sprintf(monitorgroup, "%s/%s", monitorgroup_p, mongrp);
459 			ret = create_grp(mongrp, monitorgroup, monitorgroup_p);
460 			if (ret)
461 				goto out;
462 
463 			sprintf(tasks, "%s/mon_groups/%s/tasks",
464 				controlgroup, mongrp);
465 			ret = write_pid_to_tasks(tasks, bm_pid);
466 			if (ret)
467 				goto out;
468 		}
469 	}
470 
471 out:
472 	ksft_print_msg("Writing benchmark parameters to resctrl FS\n");
473 	if (ret)
474 		perror("# writing to resctrlfs");
475 
476 	return ret;
477 }
478 
479 /*
480  * write_schemata - Update schemata of a con_mon grp
481  * @ctrlgrp:		Name of the con_mon grp
482  * @schemata:		Schemata that should be updated to
483  * @cpu_no:		CPU number that the benchmark PID is binded to
484  * @resctrl_val:	Resctrl feature (Eg: mbm, mba.. etc)
485  *
486  * Update schemata of a con_mon grp *only* if requested resctrl feature is
487  * allocation type
488  *
489  * Return: 0 on success, non-zero on failure
490  */
491 int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val)
492 {
493 	char controlgroup[1024], schema[1024], reason[64];
494 	int resource_id, ret = 0;
495 	FILE *fp;
496 
497 	if (strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) &&
498 	    strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) &&
499 	    strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)) &&
500 	    strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
501 		return -ENOENT;
502 
503 	if (!schemata) {
504 		ksft_print_msg("Skipping empty schemata update\n");
505 
506 		return -1;
507 	}
508 
509 	if (get_resource_id(cpu_no, &resource_id) < 0) {
510 		sprintf(reason, "Failed to get resource id");
511 		ret = -1;
512 
513 		goto out;
514 	}
515 
516 	if (strlen(ctrlgrp) != 0)
517 		sprintf(controlgroup, "%s/%s/schemata", RESCTRL_PATH, ctrlgrp);
518 	else
519 		sprintf(controlgroup, "%s/schemata", RESCTRL_PATH);
520 
521 	if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)) ||
522 	    !strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
523 		sprintf(schema, "%s%d%c%s", "L3:", resource_id, '=', schemata);
524 	if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) ||
525 	    !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)))
526 		sprintf(schema, "%s%d%c%s", "MB:", resource_id, '=', schemata);
527 
528 	fp = fopen(controlgroup, "w");
529 	if (!fp) {
530 		sprintf(reason, "Failed to open control group");
531 		ret = -1;
532 
533 		goto out;
534 	}
535 
536 	if (fprintf(fp, "%s\n", schema) < 0) {
537 		sprintf(reason, "Failed to write schemata in control group");
538 		fclose(fp);
539 		ret = -1;
540 
541 		goto out;
542 	}
543 	fclose(fp);
544 
545 out:
546 	ksft_print_msg("Write schema \"%s\" to resctrl FS%s%s\n",
547 		       schema, ret ? " # " : "",
548 		       ret ? reason : "");
549 
550 	return ret;
551 }
552 
553 bool check_resctrlfs_support(void)
554 {
555 	FILE *inf = fopen("/proc/filesystems", "r");
556 	DIR *dp;
557 	char *res;
558 	bool ret = false;
559 
560 	if (!inf)
561 		return false;
562 
563 	res = fgrep(inf, "nodev\tresctrl\n");
564 
565 	if (res) {
566 		ret = true;
567 		free(res);
568 	}
569 
570 	fclose(inf);
571 
572 	ksft_print_msg("%s Check kernel supports resctrl filesystem\n",
573 		       ret ? "Pass:" : "Fail:");
574 
575 	if (!ret)
576 		return ret;
577 
578 	dp = opendir(RESCTRL_PATH);
579 	ksft_print_msg("%s Check resctrl mountpoint \"%s\" exists\n",
580 		       dp ? "Pass:" : "Fail:", RESCTRL_PATH);
581 	if (dp)
582 		closedir(dp);
583 
584 	ksft_print_msg("resctrl filesystem %s mounted\n",
585 		       find_resctrl_mount(NULL) ? "not" : "is");
586 
587 	return ret;
588 }
589 
590 char *fgrep(FILE *inf, const char *str)
591 {
592 	char line[256];
593 	int slen = strlen(str);
594 
595 	while (!feof(inf)) {
596 		if (!fgets(line, 256, inf))
597 			break;
598 		if (strncmp(line, str, slen))
599 			continue;
600 
601 		return strdup(line);
602 	}
603 
604 	return NULL;
605 }
606 
607 /*
608  * validate_resctrl_feature_request - Check if requested feature is valid.
609  * @resource:	Required resource (e.g., MB, L3, L2, L3_MON, etc.)
610  * @feature:	Required monitor feature (in mon_features file). Can only be
611  *		set for L3_MON. Must be NULL for all other resources.
612  *
613  * Return: True if the resource/feature is supported, else false. False is
614  *         also returned if resctrl FS is not mounted.
615  */
616 bool validate_resctrl_feature_request(const char *resource, const char *feature)
617 {
618 	char res_path[PATH_MAX];
619 	struct stat statbuf;
620 	char *res;
621 	FILE *inf;
622 	int ret;
623 
624 	if (!resource)
625 		return false;
626 
627 	ret = find_resctrl_mount(NULL);
628 	if (ret)
629 		return false;
630 
631 	snprintf(res_path, sizeof(res_path), "%s/%s", INFO_PATH, resource);
632 
633 	if (stat(res_path, &statbuf))
634 		return false;
635 
636 	if (!feature)
637 		return true;
638 
639 	snprintf(res_path, sizeof(res_path), "%s/%s/mon_features", INFO_PATH, resource);
640 	inf = fopen(res_path, "r");
641 	if (!inf)
642 		return false;
643 
644 	res = fgrep(inf, feature);
645 	free(res);
646 	fclose(inf);
647 
648 	return !!res;
649 }
650 
651 int filter_dmesg(void)
652 {
653 	char line[1024];
654 	FILE *fp;
655 	int pipefds[2];
656 	pid_t pid;
657 	int ret;
658 
659 	ret = pipe(pipefds);
660 	if (ret) {
661 		perror("pipe");
662 		return ret;
663 	}
664 	fflush(stdout);
665 	pid = fork();
666 	if (pid == 0) {
667 		close(pipefds[0]);
668 		dup2(pipefds[1], STDOUT_FILENO);
669 		execlp("dmesg", "dmesg", NULL);
670 		perror("executing dmesg");
671 		exit(1);
672 	}
673 	close(pipefds[1]);
674 	fp = fdopen(pipefds[0], "r");
675 	if (!fp) {
676 		perror("fdopen(pipe)");
677 		kill(pid, SIGTERM);
678 
679 		return -1;
680 	}
681 
682 	while (fgets(line, 1024, fp)) {
683 		if (strstr(line, "intel_rdt:"))
684 			ksft_print_msg("dmesg: %s", line);
685 		if (strstr(line, "resctrl:"))
686 			ksft_print_msg("dmesg: %s", line);
687 	}
688 	fclose(fp);
689 	waitpid(pid, NULL, 0);
690 
691 	return 0;
692 }
693 
694 int validate_bw_report_request(char *bw_report)
695 {
696 	if (strcmp(bw_report, "reads") == 0)
697 		return 0;
698 	if (strcmp(bw_report, "writes") == 0)
699 		return 0;
700 	if (strcmp(bw_report, "nt-writes") == 0) {
701 		strcpy(bw_report, "writes");
702 		return 0;
703 	}
704 	if (strcmp(bw_report, "total") == 0)
705 		return 0;
706 
707 	fprintf(stderr, "Requested iMC B/W report type unavailable\n");
708 
709 	return -1;
710 }
711 
712 int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu,
713 		    int group_fd, unsigned long flags)
714 {
715 	int ret;
716 
717 	ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
718 		      group_fd, flags);
719 	return ret;
720 }
721 
722 unsigned int count_bits(unsigned long n)
723 {
724 	unsigned int count = 0;
725 
726 	while (n) {
727 		count += n & 1;
728 		n >>= 1;
729 	}
730 
731 	return count;
732 }
733