1 // SPDX-License-Identifier: GPL-2.0
2 
3 #define _GNU_SOURCE
4 #include <linux/limits.h>
5 #include <sys/sysinfo.h>
6 #include <sys/wait.h>
7 #include <errno.h>
8 #include <pthread.h>
9 #include <stdio.h>
10 #include <time.h>
11 
12 #include "../kselftest.h"
13 #include "cgroup_util.h"
14 
15 enum hog_clock_type {
16 	// Count elapsed time using the CLOCK_PROCESS_CPUTIME_ID clock.
17 	CPU_HOG_CLOCK_PROCESS,
18 	// Count elapsed time using system wallclock time.
19 	CPU_HOG_CLOCK_WALL,
20 };
21 
22 struct cpu_hog_func_param {
23 	int nprocs;
24 	struct timespec ts;
25 	enum hog_clock_type clock_type;
26 };
27 
28 /*
29  * This test creates two nested cgroups with and without enabling
30  * the cpu controller.
31  */
32 static int test_cpucg_subtree_control(const char *root)
33 {
34 	char *parent = NULL, *child = NULL, *parent2 = NULL, *child2 = NULL;
35 	int ret = KSFT_FAIL;
36 
37 	// Create two nested cgroups with the cpu controller enabled.
38 	parent = cg_name(root, "cpucg_test_0");
39 	if (!parent)
40 		goto cleanup;
41 
42 	if (cg_create(parent))
43 		goto cleanup;
44 
45 	if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
46 		goto cleanup;
47 
48 	child = cg_name(parent, "cpucg_test_child");
49 	if (!child)
50 		goto cleanup;
51 
52 	if (cg_create(child))
53 		goto cleanup;
54 
55 	if (cg_read_strstr(child, "cgroup.controllers", "cpu"))
56 		goto cleanup;
57 
58 	// Create two nested cgroups without enabling the cpu controller.
59 	parent2 = cg_name(root, "cpucg_test_1");
60 	if (!parent2)
61 		goto cleanup;
62 
63 	if (cg_create(parent2))
64 		goto cleanup;
65 
66 	child2 = cg_name(parent2, "cpucg_test_child");
67 	if (!child2)
68 		goto cleanup;
69 
70 	if (cg_create(child2))
71 		goto cleanup;
72 
73 	if (!cg_read_strstr(child2, "cgroup.controllers", "cpu"))
74 		goto cleanup;
75 
76 	ret = KSFT_PASS;
77 
78 cleanup:
79 	cg_destroy(child);
80 	free(child);
81 	cg_destroy(child2);
82 	free(child2);
83 	cg_destroy(parent);
84 	free(parent);
85 	cg_destroy(parent2);
86 	free(parent2);
87 
88 	return ret;
89 }
90 
91 static void *hog_cpu_thread_func(void *arg)
92 {
93 	while (1)
94 		;
95 
96 	return NULL;
97 }
98 
99 static struct timespec
100 timespec_sub(const struct timespec *lhs, const struct timespec *rhs)
101 {
102 	struct timespec zero = {
103 		.tv_sec = 0,
104 		.tv_nsec = 0,
105 	};
106 	struct timespec ret;
107 
108 	if (lhs->tv_sec < rhs->tv_sec)
109 		return zero;
110 
111 	ret.tv_sec = lhs->tv_sec - rhs->tv_sec;
112 
113 	if (lhs->tv_nsec < rhs->tv_nsec) {
114 		if (ret.tv_sec == 0)
115 			return zero;
116 
117 		ret.tv_sec--;
118 		ret.tv_nsec = NSEC_PER_SEC - rhs->tv_nsec + lhs->tv_nsec;
119 	} else
120 		ret.tv_nsec = lhs->tv_nsec - rhs->tv_nsec;
121 
122 	return ret;
123 }
124 
125 static int hog_cpus_timed(const char *cgroup, void *arg)
126 {
127 	const struct cpu_hog_func_param *param =
128 		(struct cpu_hog_func_param *)arg;
129 	struct timespec ts_run = param->ts;
130 	struct timespec ts_remaining = ts_run;
131 	struct timespec ts_start;
132 	int i, ret;
133 
134 	ret = clock_gettime(CLOCK_MONOTONIC, &ts_start);
135 	if (ret != 0)
136 		return ret;
137 
138 	for (i = 0; i < param->nprocs; i++) {
139 		pthread_t tid;
140 
141 		ret = pthread_create(&tid, NULL, &hog_cpu_thread_func, NULL);
142 		if (ret != 0)
143 			return ret;
144 	}
145 
146 	while (ts_remaining.tv_sec > 0 || ts_remaining.tv_nsec > 0) {
147 		struct timespec ts_total;
148 
149 		ret = nanosleep(&ts_remaining, NULL);
150 		if (ret && errno != EINTR)
151 			return ret;
152 
153 		if (param->clock_type == CPU_HOG_CLOCK_PROCESS) {
154 			ret = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts_total);
155 			if (ret != 0)
156 				return ret;
157 		} else {
158 			struct timespec ts_current;
159 
160 			ret = clock_gettime(CLOCK_MONOTONIC, &ts_current);
161 			if (ret != 0)
162 				return ret;
163 
164 			ts_total = timespec_sub(&ts_current, &ts_start);
165 		}
166 
167 		ts_remaining = timespec_sub(&ts_run, &ts_total);
168 	}
169 
170 	return 0;
171 }
172 
173 /*
174  * Creates a cpu cgroup, burns a CPU for a few quanta, and verifies that
175  * cpu.stat shows the expected output.
176  */
177 static int test_cpucg_stats(const char *root)
178 {
179 	int ret = KSFT_FAIL;
180 	long usage_usec, user_usec, system_usec;
181 	long usage_seconds = 2;
182 	long expected_usage_usec = usage_seconds * USEC_PER_SEC;
183 	char *cpucg;
184 
185 	cpucg = cg_name(root, "cpucg_test");
186 	if (!cpucg)
187 		goto cleanup;
188 
189 	if (cg_create(cpucg))
190 		goto cleanup;
191 
192 	usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
193 	user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
194 	system_usec = cg_read_key_long(cpucg, "cpu.stat", "system_usec");
195 	if (usage_usec != 0 || user_usec != 0 || system_usec != 0)
196 		goto cleanup;
197 
198 	struct cpu_hog_func_param param = {
199 		.nprocs = 1,
200 		.ts = {
201 			.tv_sec = usage_seconds,
202 			.tv_nsec = 0,
203 		},
204 		.clock_type = CPU_HOG_CLOCK_PROCESS,
205 	};
206 	if (cg_run(cpucg, hog_cpus_timed, (void *)&param))
207 		goto cleanup;
208 
209 	usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
210 	user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
211 	if (user_usec <= 0)
212 		goto cleanup;
213 
214 	if (!values_close(usage_usec, expected_usage_usec, 1))
215 		goto cleanup;
216 
217 	ret = KSFT_PASS;
218 
219 cleanup:
220 	cg_destroy(cpucg);
221 	free(cpucg);
222 
223 	return ret;
224 }
225 
226 /*
227  * First, this test creates the following hierarchy:
228  * A
229  * A/B     cpu.weight = 50
230  * A/C     cpu.weight = 100
231  * A/D     cpu.weight = 150
232  *
233  * A separate process is then created for each child cgroup which spawns as
234  * many threads as there are cores, and hogs each CPU as much as possible
235  * for some time interval.
236  *
237  * Once all of the children have exited, we verify that each child cgroup
238  * was given proportional runtime as informed by their cpu.weight.
239  */
240 static int test_cpucg_weight_overprovisioned(const char *root)
241 {
242 	struct child {
243 		char *cgroup;
244 		pid_t pid;
245 		long usage;
246 	};
247 	int ret = KSFT_FAIL, i;
248 	char *parent = NULL;
249 	struct child children[3] = {NULL};
250 	long usage_seconds = 10;
251 
252 	parent = cg_name(root, "cpucg_test_0");
253 	if (!parent)
254 		goto cleanup;
255 
256 	if (cg_create(parent))
257 		goto cleanup;
258 
259 	if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
260 		goto cleanup;
261 
262 	for (i = 0; i < ARRAY_SIZE(children); i++) {
263 		children[i].cgroup = cg_name_indexed(parent, "cpucg_child", i);
264 		if (!children[i].cgroup)
265 			goto cleanup;
266 
267 		if (cg_create(children[i].cgroup))
268 			goto cleanup;
269 
270 		if (cg_write_numeric(children[i].cgroup, "cpu.weight",
271 					50 * (i + 1)))
272 			goto cleanup;
273 	}
274 
275 	for (i = 0; i < ARRAY_SIZE(children); i++) {
276 		struct cpu_hog_func_param param = {
277 			.nprocs = get_nprocs(),
278 			.ts = {
279 				.tv_sec = usage_seconds,
280 				.tv_nsec = 0,
281 			},
282 			.clock_type = CPU_HOG_CLOCK_WALL,
283 		};
284 		pid_t pid = cg_run_nowait(children[i].cgroup, hog_cpus_timed,
285 				(void *)&param);
286 		if (pid <= 0)
287 			goto cleanup;
288 		children[i].pid = pid;
289 	}
290 
291 	for (i = 0; i < ARRAY_SIZE(children); i++) {
292 		int retcode;
293 
294 		waitpid(children[i].pid, &retcode, 0);
295 		if (!WIFEXITED(retcode))
296 			goto cleanup;
297 		if (WEXITSTATUS(retcode))
298 			goto cleanup;
299 	}
300 
301 	for (i = 0; i < ARRAY_SIZE(children); i++)
302 		children[i].usage = cg_read_key_long(children[i].cgroup,
303 				"cpu.stat", "usage_usec");
304 
305 	for (i = 0; i < ARRAY_SIZE(children) - 1; i++) {
306 		long delta;
307 
308 		if (children[i + 1].usage <= children[i].usage)
309 			goto cleanup;
310 
311 		delta = children[i + 1].usage - children[i].usage;
312 		if (!values_close(delta, children[0].usage, 35))
313 			goto cleanup;
314 	}
315 
316 	ret = KSFT_PASS;
317 cleanup:
318 	for (i = 0; i < ARRAY_SIZE(children); i++) {
319 		cg_destroy(children[i].cgroup);
320 		free(children[i].cgroup);
321 	}
322 	cg_destroy(parent);
323 	free(parent);
324 
325 	return ret;
326 }
327 
328 #define T(x) { x, #x }
329 struct cpucg_test {
330 	int (*fn)(const char *root);
331 	const char *name;
332 } tests[] = {
333 	T(test_cpucg_subtree_control),
334 	T(test_cpucg_stats),
335 	T(test_cpucg_weight_overprovisioned),
336 };
337 #undef T
338 
339 int main(int argc, char *argv[])
340 {
341 	char root[PATH_MAX];
342 	int i, ret = EXIT_SUCCESS;
343 
344 	if (cg_find_unified_root(root, sizeof(root)))
345 		ksft_exit_skip("cgroup v2 isn't mounted\n");
346 
347 	if (cg_read_strstr(root, "cgroup.subtree_control", "cpu"))
348 		if (cg_write(root, "cgroup.subtree_control", "+cpu"))
349 			ksft_exit_skip("Failed to set cpu controller\n");
350 
351 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
352 		switch (tests[i].fn(root)) {
353 		case KSFT_PASS:
354 			ksft_test_result_pass("%s\n", tests[i].name);
355 			break;
356 		case KSFT_SKIP:
357 			ksft_test_result_skip("%s\n", tests[i].name);
358 			break;
359 		default:
360 			ret = EXIT_FAILURE;
361 			ksft_test_result_fail("%s\n", tests[i].name);
362 			break;
363 		}
364 	}
365 
366 	return ret;
367 }
368