xref: /openbmc/linux/samples/bpf/cpustat_user.c (revision a44e4f3a)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #define _GNU_SOURCE
4 #include <errno.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <signal.h>
8 #include <sched.h>
9 #include <string.h>
10 #include <unistd.h>
11 #include <fcntl.h>
12 #include <linux/bpf.h>
13 #include <locale.h>
14 #include <sys/types.h>
15 #include <sys/stat.h>
16 #include <sys/time.h>
17 #include <sys/resource.h>
18 #include <sys/wait.h>
19 
20 #include <bpf/bpf.h>
21 #include "bpf_load.h"
22 
23 #define MAX_CPU			8
24 #define MAX_PSTATE_ENTRIES	5
25 #define MAX_CSTATE_ENTRIES	3
26 #define MAX_STARS		40
27 
28 #define CPUFREQ_MAX_SYSFS_PATH	"/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq"
29 #define CPUFREQ_LOWEST_FREQ	"208000"
30 #define CPUFREQ_HIGHEST_FREQ	"12000000"
31 
32 struct cpu_stat_data {
33 	unsigned long cstate[MAX_CSTATE_ENTRIES];
34 	unsigned long pstate[MAX_PSTATE_ENTRIES];
35 };
36 
37 static struct cpu_stat_data stat_data[MAX_CPU];
38 
39 static void cpu_stat_print(void)
40 {
41 	int i, j;
42 	char state_str[sizeof("cstate-9")];
43 	struct cpu_stat_data *data;
44 
45 	/* Clear screen */
46 	printf("\033[2J");
47 
48 	/* Header */
49 	printf("\nCPU states statistics:\n");
50 	printf("%-10s ", "state(ms)");
51 
52 	for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
53 		sprintf(state_str, "cstate-%d", i);
54 		printf("%-11s ", state_str);
55 	}
56 
57 	for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
58 		sprintf(state_str, "pstate-%d", i);
59 		printf("%-11s ", state_str);
60 	}
61 
62 	printf("\n");
63 
64 	for (j = 0; j < MAX_CPU; j++) {
65 		data = &stat_data[j];
66 
67 		printf("CPU-%-6d ", j);
68 		for (i = 0; i < MAX_CSTATE_ENTRIES; i++)
69 			printf("%-11ld ", data->cstate[i] / 1000000);
70 
71 		for (i = 0; i < MAX_PSTATE_ENTRIES; i++)
72 			printf("%-11ld ", data->pstate[i] / 1000000);
73 
74 		printf("\n");
75 	}
76 }
77 
78 static void cpu_stat_update(int cstate_fd, int pstate_fd)
79 {
80 	unsigned long key, value;
81 	int c, i;
82 
83 	for (c = 0; c < MAX_CPU; c++) {
84 		for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
85 			key = c * MAX_CSTATE_ENTRIES + i;
86 			bpf_map_lookup_elem(cstate_fd, &key, &value);
87 			stat_data[c].cstate[i] = value;
88 		}
89 
90 		for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
91 			key = c * MAX_PSTATE_ENTRIES + i;
92 			bpf_map_lookup_elem(pstate_fd, &key, &value);
93 			stat_data[c].pstate[i] = value;
94 		}
95 	}
96 }
97 
98 /*
99  * This function is copied from 'idlestat' tool function
100  * idlestat_wake_all() in idlestate.c.
101  *
102  * It sets the self running task affinity to cpus one by one so can wake up
103  * the specific CPU to handle scheduling; this results in all cpus can be
104  * waken up once and produce ftrace event 'trace_cpu_idle'.
105  */
106 static int cpu_stat_inject_cpu_idle_event(void)
107 {
108 	int rcpu, i, ret;
109 	cpu_set_t cpumask;
110 	cpu_set_t original_cpumask;
111 
112 	ret = sysconf(_SC_NPROCESSORS_CONF);
113 	if (ret < 0)
114 		return -1;
115 
116 	rcpu = sched_getcpu();
117 	if (rcpu < 0)
118 		return -1;
119 
120 	/* Keep track of the CPUs we will run on */
121 	sched_getaffinity(0, sizeof(original_cpumask), &original_cpumask);
122 
123 	for (i = 0; i < ret; i++) {
124 
125 		/* Pointless to wake up ourself */
126 		if (i == rcpu)
127 			continue;
128 
129 		/* Pointless to wake CPUs we will not run on */
130 		if (!CPU_ISSET(i, &original_cpumask))
131 			continue;
132 
133 		CPU_ZERO(&cpumask);
134 		CPU_SET(i, &cpumask);
135 
136 		sched_setaffinity(0, sizeof(cpumask), &cpumask);
137 	}
138 
139 	/* Enable all the CPUs of the original mask */
140 	sched_setaffinity(0, sizeof(original_cpumask), &original_cpumask);
141 	return 0;
142 }
143 
144 /*
145  * It's possible to have no any frequency change for long time and cannot
146  * get ftrace event 'trace_cpu_frequency' for long period, this introduces
147  * big deviation for pstate statistics.
148  *
149  * To solve this issue, below code forces to set 'scaling_max_freq' to 208MHz
150  * for triggering ftrace event 'trace_cpu_frequency' and then recovery back to
151  * the maximum frequency value 1.2GHz.
152  */
153 static int cpu_stat_inject_cpu_frequency_event(void)
154 {
155 	int len, fd;
156 
157 	fd = open(CPUFREQ_MAX_SYSFS_PATH, O_WRONLY);
158 	if (fd < 0) {
159 		printf("failed to open scaling_max_freq, errno=%d\n", errno);
160 		return fd;
161 	}
162 
163 	len = write(fd, CPUFREQ_LOWEST_FREQ, strlen(CPUFREQ_LOWEST_FREQ));
164 	if (len < 0) {
165 		printf("failed to open scaling_max_freq, errno=%d\n", errno);
166 		goto err;
167 	}
168 
169 	len = write(fd, CPUFREQ_HIGHEST_FREQ, strlen(CPUFREQ_HIGHEST_FREQ));
170 	if (len < 0) {
171 		printf("failed to open scaling_max_freq, errno=%d\n", errno);
172 		goto err;
173 	}
174 
175 err:
176 	close(fd);
177 	return len;
178 }
179 
180 static void int_exit(int sig)
181 {
182 	cpu_stat_inject_cpu_idle_event();
183 	cpu_stat_inject_cpu_frequency_event();
184 	cpu_stat_update(map_fd[1], map_fd[2]);
185 	cpu_stat_print();
186 	exit(0);
187 }
188 
189 int main(int argc, char **argv)
190 {
191 	char filename[256];
192 	int ret;
193 
194 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
195 
196 	if (load_bpf_file(filename)) {
197 		printf("%s", bpf_log_buf);
198 		return 1;
199 	}
200 
201 	ret = cpu_stat_inject_cpu_idle_event();
202 	if (ret < 0)
203 		return 1;
204 
205 	ret = cpu_stat_inject_cpu_frequency_event();
206 	if (ret < 0)
207 		return 1;
208 
209 	signal(SIGINT, int_exit);
210 	signal(SIGTERM, int_exit);
211 
212 	while (1) {
213 		cpu_stat_update(map_fd[1], map_fd[2]);
214 		cpu_stat_print();
215 		sleep(5);
216 	}
217 
218 	return 0;
219 }
220