1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Basic resctrl file system operations
4  *
5  * Copyright (C) 2018 Intel Corporation
6  *
7  * Authors:
8  *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
9  *    Fenghua Yu <fenghua.yu@intel.com>
10  */
11 #include "resctrl.h"
12 
13 static int find_resctrl_mount(char *buffer)
14 {
15 	FILE *mounts;
16 	char line[256], *fs, *mntpoint;
17 
18 	mounts = fopen("/proc/mounts", "r");
19 	if (!mounts) {
20 		perror("/proc/mounts");
21 		return -ENXIO;
22 	}
23 	while (!feof(mounts)) {
24 		if (!fgets(line, 256, mounts))
25 			break;
26 		fs = strtok(line, " \t");
27 		if (!fs)
28 			continue;
29 		mntpoint = strtok(NULL, " \t");
30 		if (!mntpoint)
31 			continue;
32 		fs = strtok(NULL, " \t");
33 		if (!fs)
34 			continue;
35 		if (strcmp(fs, "resctrl"))
36 			continue;
37 
38 		fclose(mounts);
39 		if (buffer)
40 			strncpy(buffer, mntpoint, 256);
41 
42 		return 0;
43 	}
44 
45 	fclose(mounts);
46 
47 	return -ENOENT;
48 }
49 
50 /*
51  * remount_resctrlfs - Remount resctrl FS at /sys/fs/resctrl
52  * @mum_resctrlfs:	Should the resctrl FS be remounted?
53  *
54  * If not mounted, mount it.
55  * If mounted and mum_resctrlfs then remount resctrl FS.
56  * If mounted and !mum_resctrlfs then noop
57  *
58  * Return: 0 on success, non-zero on failure
59  */
60 int remount_resctrlfs(bool mum_resctrlfs)
61 {
62 	char mountpoint[256];
63 	int ret;
64 
65 	ret = find_resctrl_mount(mountpoint);
66 	if (ret)
67 		strcpy(mountpoint, RESCTRL_PATH);
68 
69 	if (!ret && mum_resctrlfs && umount(mountpoint))
70 		ksft_print_msg("Fail: unmounting \"%s\"\n", mountpoint);
71 
72 	if (!ret && !mum_resctrlfs)
73 		return 0;
74 
75 	ksft_print_msg("Mounting resctrl to \"%s\"\n", RESCTRL_PATH);
76 	ret = mount("resctrl", RESCTRL_PATH, "resctrl", 0, NULL);
77 	if (ret)
78 		perror("# mount");
79 
80 	return ret;
81 }
82 
83 int umount_resctrlfs(void)
84 {
85 	if (find_resctrl_mount(NULL))
86 		return 0;
87 
88 	if (umount(RESCTRL_PATH)) {
89 		perror("# Unable to umount resctrl");
90 
91 		return errno;
92 	}
93 
94 	return 0;
95 }
96 
97 /*
98  * get_resource_id - Get socket number/l3 id for a specified CPU
99  * @cpu_no:	CPU number
100  * @resource_id: Socket number or l3_id
101  *
102  * Return: >= 0 on success, < 0 on failure.
103  */
104 int get_resource_id(int cpu_no, int *resource_id)
105 {
106 	char phys_pkg_path[1024];
107 	FILE *fp;
108 
109 	if (get_vendor() == ARCH_AMD)
110 		sprintf(phys_pkg_path, "%s%d/cache/index3/id",
111 			PHYS_ID_PATH, cpu_no);
112 	else
113 		sprintf(phys_pkg_path, "%s%d/topology/physical_package_id",
114 			PHYS_ID_PATH, cpu_no);
115 
116 	fp = fopen(phys_pkg_path, "r");
117 	if (!fp) {
118 		perror("Failed to open physical_package_id");
119 
120 		return -1;
121 	}
122 	if (fscanf(fp, "%d", resource_id) <= 0) {
123 		perror("Could not get socket number or l3 id");
124 		fclose(fp);
125 
126 		return -1;
127 	}
128 	fclose(fp);
129 
130 	return 0;
131 }
132 
133 /*
134  * get_cache_size - Get cache size for a specified CPU
135  * @cpu_no:	CPU number
136  * @cache_type:	Cache level L2/L3
137  * @cache_size:	pointer to cache_size
138  *
139  * Return: = 0 on success, < 0 on failure.
140  */
141 int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size)
142 {
143 	char cache_path[1024], cache_str[64];
144 	int length, i, cache_num;
145 	FILE *fp;
146 
147 	if (!strcmp(cache_type, "L3")) {
148 		cache_num = 3;
149 	} else if (!strcmp(cache_type, "L2")) {
150 		cache_num = 2;
151 	} else {
152 		perror("Invalid cache level");
153 		return -1;
154 	}
155 
156 	sprintf(cache_path, "/sys/bus/cpu/devices/cpu%d/cache/index%d/size",
157 		cpu_no, cache_num);
158 	fp = fopen(cache_path, "r");
159 	if (!fp) {
160 		perror("Failed to open cache size");
161 
162 		return -1;
163 	}
164 	if (fscanf(fp, "%s", cache_str) <= 0) {
165 		perror("Could not get cache_size");
166 		fclose(fp);
167 
168 		return -1;
169 	}
170 	fclose(fp);
171 
172 	length = (int)strlen(cache_str);
173 
174 	*cache_size = 0;
175 
176 	for (i = 0; i < length; i++) {
177 		if ((cache_str[i] >= '0') && (cache_str[i] <= '9'))
178 
179 			*cache_size = *cache_size * 10 + (cache_str[i] - '0');
180 
181 		else if (cache_str[i] == 'K')
182 
183 			*cache_size = *cache_size * 1024;
184 
185 		else if (cache_str[i] == 'M')
186 
187 			*cache_size = *cache_size * 1024 * 1024;
188 
189 		else
190 			break;
191 	}
192 
193 	return 0;
194 }
195 
196 #define CORE_SIBLINGS_PATH	"/sys/bus/cpu/devices/cpu"
197 
198 /*
199  * get_cbm_mask - Get cbm mask for given cache
200  * @cache_type:	Cache level L2/L3
201  * @cbm_mask:	cbm_mask returned as a string
202  *
203  * Return: = 0 on success, < 0 on failure.
204  */
205 int get_cbm_mask(char *cache_type, char *cbm_mask)
206 {
207 	char cbm_mask_path[1024];
208 	FILE *fp;
209 
210 	if (!cbm_mask)
211 		return -1;
212 
213 	sprintf(cbm_mask_path, "%s/%s/cbm_mask", INFO_PATH, cache_type);
214 
215 	fp = fopen(cbm_mask_path, "r");
216 	if (!fp) {
217 		perror("Failed to open cache level");
218 
219 		return -1;
220 	}
221 	if (fscanf(fp, "%s", cbm_mask) <= 0) {
222 		perror("Could not get max cbm_mask");
223 		fclose(fp);
224 
225 		return -1;
226 	}
227 	fclose(fp);
228 
229 	return 0;
230 }
231 
232 /*
233  * get_core_sibling - Get sibling core id from the same socket for given CPU
234  * @cpu_no:	CPU number
235  *
236  * Return:	> 0 on success, < 0 on failure.
237  */
238 int get_core_sibling(int cpu_no)
239 {
240 	char core_siblings_path[1024], cpu_list_str[64];
241 	int sibling_cpu_no = -1;
242 	FILE *fp;
243 
244 	sprintf(core_siblings_path, "%s%d/topology/core_siblings_list",
245 		CORE_SIBLINGS_PATH, cpu_no);
246 
247 	fp = fopen(core_siblings_path, "r");
248 	if (!fp) {
249 		perror("Failed to open core siblings path");
250 
251 		return -1;
252 	}
253 	if (fscanf(fp, "%s", cpu_list_str) <= 0) {
254 		perror("Could not get core_siblings list");
255 		fclose(fp);
256 
257 		return -1;
258 	}
259 	fclose(fp);
260 
261 	char *token = strtok(cpu_list_str, "-,");
262 
263 	while (token) {
264 		sibling_cpu_no = atoi(token);
265 		/* Skipping core 0 as we don't want to run test on core 0 */
266 		if (sibling_cpu_no != 0 && sibling_cpu_no != cpu_no)
267 			break;
268 		token = strtok(NULL, "-,");
269 	}
270 
271 	return sibling_cpu_no;
272 }
273 
274 /*
275  * taskset_benchmark - Taskset PID (i.e. benchmark) to a specified cpu
276  * @bm_pid:	PID that should be binded
277  * @cpu_no:	CPU number at which the PID would be binded
278  *
279  * Return: 0 on success, non-zero on failure
280  */
281 int taskset_benchmark(pid_t bm_pid, int cpu_no)
282 {
283 	cpu_set_t my_set;
284 
285 	CPU_ZERO(&my_set);
286 	CPU_SET(cpu_no, &my_set);
287 
288 	if (sched_setaffinity(bm_pid, sizeof(cpu_set_t), &my_set)) {
289 		perror("Unable to taskset benchmark");
290 
291 		return -1;
292 	}
293 
294 	return 0;
295 }
296 
297 /*
298  * run_benchmark - Run a specified benchmark or fill_buf (default benchmark)
299  *		   in specified signal. Direct benchmark stdio to /dev/null.
300  * @signum:	signal number
301  * @info:	signal info
302  * @ucontext:	user context in signal handling
303  *
304  * Return: void
305  */
306 void run_benchmark(int signum, siginfo_t *info, void *ucontext)
307 {
308 	int operation, ret, malloc_and_init_memory, memflush;
309 	unsigned long span, buffer_span;
310 	char **benchmark_cmd;
311 	char resctrl_val[64];
312 	FILE *fp;
313 
314 	benchmark_cmd = info->si_ptr;
315 
316 	/*
317 	 * Direct stdio of child to /dev/null, so that only parent writes to
318 	 * stdio (console)
319 	 */
320 	fp = freopen("/dev/null", "w", stdout);
321 	if (!fp)
322 		PARENT_EXIT("Unable to direct benchmark status to /dev/null");
323 
324 	if (strcmp(benchmark_cmd[0], "fill_buf") == 0) {
325 		/* Execute default fill_buf benchmark */
326 		span = strtoul(benchmark_cmd[1], NULL, 10);
327 		malloc_and_init_memory = atoi(benchmark_cmd[2]);
328 		memflush =  atoi(benchmark_cmd[3]);
329 		operation = atoi(benchmark_cmd[4]);
330 		sprintf(resctrl_val, "%s", benchmark_cmd[5]);
331 
332 		if (strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
333 			buffer_span = span * MB;
334 		else
335 			buffer_span = span;
336 
337 		if (run_fill_buf(buffer_span, malloc_and_init_memory, memflush,
338 				 operation, resctrl_val))
339 			fprintf(stderr, "Error in running fill buffer\n");
340 	} else {
341 		/* Execute specified benchmark */
342 		ret = execvp(benchmark_cmd[0], benchmark_cmd);
343 		if (ret)
344 			perror("wrong\n");
345 	}
346 
347 	fclose(stdout);
348 	PARENT_EXIT("Unable to run specified benchmark");
349 }
350 
351 /*
352  * create_grp - Create a group only if one doesn't exist
353  * @grp_name:	Name of the group
354  * @grp:	Full path and name of the group
355  * @parent_grp:	Full path and name of the parent group
356  *
357  * Return: 0 on success, non-zero on failure
358  */
359 static int create_grp(const char *grp_name, char *grp, const char *parent_grp)
360 {
361 	int found_grp = 0;
362 	struct dirent *ep;
363 	DIR *dp;
364 
365 	/*
366 	 * At this point, we are guaranteed to have resctrl FS mounted and if
367 	 * length of grp_name == 0, it means, user wants to use root con_mon
368 	 * grp, so do nothing
369 	 */
370 	if (strlen(grp_name) == 0)
371 		return 0;
372 
373 	/* Check if requested grp exists or not */
374 	dp = opendir(parent_grp);
375 	if (dp) {
376 		while ((ep = readdir(dp)) != NULL) {
377 			if (strcmp(ep->d_name, grp_name) == 0)
378 				found_grp = 1;
379 		}
380 		closedir(dp);
381 	} else {
382 		perror("Unable to open resctrl for group");
383 
384 		return -1;
385 	}
386 
387 	/* Requested grp doesn't exist, hence create it */
388 	if (found_grp == 0) {
389 		if (mkdir(grp, 0) == -1) {
390 			perror("Unable to create group");
391 
392 			return -1;
393 		}
394 	}
395 
396 	return 0;
397 }
398 
399 static int write_pid_to_tasks(char *tasks, pid_t pid)
400 {
401 	FILE *fp;
402 
403 	fp = fopen(tasks, "w");
404 	if (!fp) {
405 		perror("Failed to open tasks file");
406 
407 		return -1;
408 	}
409 	if (fprintf(fp, "%d\n", pid) < 0) {
410 		perror("Failed to wr pid to tasks file");
411 		fclose(fp);
412 
413 		return -1;
414 	}
415 	fclose(fp);
416 
417 	return 0;
418 }
419 
420 /*
421  * write_bm_pid_to_resctrl - Write a PID (i.e. benchmark) to resctrl FS
422  * @bm_pid:		PID that should be written
423  * @ctrlgrp:		Name of the control monitor group (con_mon grp)
424  * @mongrp:		Name of the monitor group (mon grp)
425  * @resctrl_val:	Resctrl feature (Eg: mbm, mba.. etc)
426  *
427  * If a con_mon grp is requested, create it and write pid to it, otherwise
428  * write pid to root con_mon grp.
429  * If a mon grp is requested, create it and write pid to it, otherwise
430  * pid is not written, this means that pid is in con_mon grp and hence
431  * should consult con_mon grp's mon_data directory for results.
432  *
433  * Return: 0 on success, non-zero on failure
434  */
435 int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
436 			    char *resctrl_val)
437 {
438 	char controlgroup[128], monitorgroup[512], monitorgroup_p[256];
439 	char tasks[1024];
440 	int ret = 0;
441 
442 	if (strlen(ctrlgrp))
443 		sprintf(controlgroup, "%s/%s", RESCTRL_PATH, ctrlgrp);
444 	else
445 		sprintf(controlgroup, "%s", RESCTRL_PATH);
446 
447 	/* Create control and monitoring group and write pid into it */
448 	ret = create_grp(ctrlgrp, controlgroup, RESCTRL_PATH);
449 	if (ret)
450 		goto out;
451 	sprintf(tasks, "%s/tasks", controlgroup);
452 	ret = write_pid_to_tasks(tasks, bm_pid);
453 	if (ret)
454 		goto out;
455 
456 	/* Create mon grp and write pid into it for "mbm" and "cmt" test */
457 	if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)) ||
458 	    !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) {
459 		if (strlen(mongrp)) {
460 			sprintf(monitorgroup_p, "%s/mon_groups", controlgroup);
461 			sprintf(monitorgroup, "%s/%s", monitorgroup_p, mongrp);
462 			ret = create_grp(mongrp, monitorgroup, monitorgroup_p);
463 			if (ret)
464 				goto out;
465 
466 			sprintf(tasks, "%s/mon_groups/%s/tasks",
467 				controlgroup, mongrp);
468 			ret = write_pid_to_tasks(tasks, bm_pid);
469 			if (ret)
470 				goto out;
471 		}
472 	}
473 
474 out:
475 	ksft_print_msg("Writing benchmark parameters to resctrl FS\n");
476 	if (ret)
477 		perror("# writing to resctrlfs");
478 
479 	return ret;
480 }
481 
482 /*
483  * write_schemata - Update schemata of a con_mon grp
484  * @ctrlgrp:		Name of the con_mon grp
485  * @schemata:		Schemata that should be updated to
486  * @cpu_no:		CPU number that the benchmark PID is binded to
487  * @resctrl_val:	Resctrl feature (Eg: mbm, mba.. etc)
488  *
489  * Update schemata of a con_mon grp *only* if requested resctrl feature is
490  * allocation type
491  *
492  * Return: 0 on success, non-zero on failure
493  */
494 int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val)
495 {
496 	char controlgroup[1024], schema[1024], reason[64];
497 	int resource_id, ret = 0;
498 	FILE *fp;
499 
500 	if (strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) &&
501 	    strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) &&
502 	    strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)) &&
503 	    strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
504 		return -ENOENT;
505 
506 	if (!schemata) {
507 		ksft_print_msg("Skipping empty schemata update\n");
508 
509 		return -1;
510 	}
511 
512 	if (get_resource_id(cpu_no, &resource_id) < 0) {
513 		sprintf(reason, "Failed to get resource id");
514 		ret = -1;
515 
516 		goto out;
517 	}
518 
519 	if (strlen(ctrlgrp) != 0)
520 		sprintf(controlgroup, "%s/%s/schemata", RESCTRL_PATH, ctrlgrp);
521 	else
522 		sprintf(controlgroup, "%s/schemata", RESCTRL_PATH);
523 
524 	if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)) ||
525 	    !strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
526 		sprintf(schema, "%s%d%c%s", "L3:", resource_id, '=', schemata);
527 	if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) ||
528 	    !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)))
529 		sprintf(schema, "%s%d%c%s", "MB:", resource_id, '=', schemata);
530 
531 	fp = fopen(controlgroup, "w");
532 	if (!fp) {
533 		sprintf(reason, "Failed to open control group");
534 		ret = -1;
535 
536 		goto out;
537 	}
538 
539 	if (fprintf(fp, "%s\n", schema) < 0) {
540 		sprintf(reason, "Failed to write schemata in control group");
541 		fclose(fp);
542 		ret = -1;
543 
544 		goto out;
545 	}
546 	fclose(fp);
547 
548 out:
549 	ksft_print_msg("Write schema \"%s\" to resctrl FS%s%s\n",
550 		       schema, ret ? " # " : "",
551 		       ret ? reason : "");
552 
553 	return ret;
554 }
555 
556 bool check_resctrlfs_support(void)
557 {
558 	FILE *inf = fopen("/proc/filesystems", "r");
559 	DIR *dp;
560 	char *res;
561 	bool ret = false;
562 
563 	if (!inf)
564 		return false;
565 
566 	res = fgrep(inf, "nodev\tresctrl\n");
567 
568 	if (res) {
569 		ret = true;
570 		free(res);
571 	}
572 
573 	fclose(inf);
574 
575 	ksft_print_msg("%s Check kernel supports resctrl filesystem\n",
576 		       ret ? "Pass:" : "Fail:");
577 
578 	if (!ret)
579 		return ret;
580 
581 	dp = opendir(RESCTRL_PATH);
582 	ksft_print_msg("%s Check resctrl mountpoint \"%s\" exists\n",
583 		       dp ? "Pass:" : "Fail:", RESCTRL_PATH);
584 	if (dp)
585 		closedir(dp);
586 
587 	ksft_print_msg("resctrl filesystem %s mounted\n",
588 		       find_resctrl_mount(NULL) ? "not" : "is");
589 
590 	return ret;
591 }
592 
593 char *fgrep(FILE *inf, const char *str)
594 {
595 	char line[256];
596 	int slen = strlen(str);
597 
598 	while (!feof(inf)) {
599 		if (!fgets(line, 256, inf))
600 			break;
601 		if (strncmp(line, str, slen))
602 			continue;
603 
604 		return strdup(line);
605 	}
606 
607 	return NULL;
608 }
609 
610 /*
611  * validate_resctrl_feature_request - Check if requested feature is valid.
612  * @resctrl_val:	Requested feature
613  *
614  * Return: True if the feature is supported, else false
615  */
616 bool validate_resctrl_feature_request(const char *resctrl_val)
617 {
618 	struct stat statbuf;
619 	bool found = false;
620 	char *res;
621 	FILE *inf;
622 
623 	if (!resctrl_val)
624 		return false;
625 
626 	if (remount_resctrlfs(false))
627 		return false;
628 
629 	if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) {
630 		if (!stat(L3_PATH, &statbuf))
631 			return true;
632 	} else if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
633 		if (!stat(MB_PATH, &statbuf))
634 			return true;
635 	} else if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
636 		   !strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
637 		if (!stat(L3_MON_PATH, &statbuf)) {
638 			inf = fopen(L3_MON_FEATURES_PATH, "r");
639 			if (!inf)
640 				return false;
641 
642 			if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
643 				res = fgrep(inf, "llc_occupancy");
644 				if (res) {
645 					found = true;
646 					free(res);
647 				}
648 			}
649 
650 			if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) {
651 				res = fgrep(inf, "mbm_total_bytes");
652 				if (res) {
653 					free(res);
654 					res = fgrep(inf, "mbm_local_bytes");
655 					if (res) {
656 						found = true;
657 						free(res);
658 					}
659 				}
660 			}
661 			fclose(inf);
662 		}
663 	}
664 
665 	return found;
666 }
667 
668 int filter_dmesg(void)
669 {
670 	char line[1024];
671 	FILE *fp;
672 	int pipefds[2];
673 	pid_t pid;
674 	int ret;
675 
676 	ret = pipe(pipefds);
677 	if (ret) {
678 		perror("pipe");
679 		return ret;
680 	}
681 	fflush(stdout);
682 	pid = fork();
683 	if (pid == 0) {
684 		close(pipefds[0]);
685 		dup2(pipefds[1], STDOUT_FILENO);
686 		execlp("dmesg", "dmesg", NULL);
687 		perror("executing dmesg");
688 		exit(1);
689 	}
690 	close(pipefds[1]);
691 	fp = fdopen(pipefds[0], "r");
692 	if (!fp) {
693 		perror("fdopen(pipe)");
694 		kill(pid, SIGTERM);
695 
696 		return -1;
697 	}
698 
699 	while (fgets(line, 1024, fp)) {
700 		if (strstr(line, "intel_rdt:"))
701 			ksft_print_msg("dmesg: %s", line);
702 		if (strstr(line, "resctrl:"))
703 			ksft_print_msg("dmesg: %s", line);
704 	}
705 	fclose(fp);
706 	waitpid(pid, NULL, 0);
707 
708 	return 0;
709 }
710 
711 int validate_bw_report_request(char *bw_report)
712 {
713 	if (strcmp(bw_report, "reads") == 0)
714 		return 0;
715 	if (strcmp(bw_report, "writes") == 0)
716 		return 0;
717 	if (strcmp(bw_report, "nt-writes") == 0) {
718 		strcpy(bw_report, "writes");
719 		return 0;
720 	}
721 	if (strcmp(bw_report, "total") == 0)
722 		return 0;
723 
724 	fprintf(stderr, "Requested iMC B/W report type unavailable\n");
725 
726 	return -1;
727 }
728 
729 int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu,
730 		    int group_fd, unsigned long flags)
731 {
732 	int ret;
733 
734 	ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
735 		      group_fd, flags);
736 	return ret;
737 }
738 
739 unsigned int count_bits(unsigned long n)
740 {
741 	unsigned int count = 0;
742 
743 	while (n) {
744 		count += n & 1;
745 		n >>= 1;
746 	}
747 
748 	return count;
749 }
750