1 // SPDX-License-Identifier: GPL-2.0
2 
3 #define _GNU_SOURCE
4 #include <linux/limits.h>
5 #include <sys/sysinfo.h>
6 #include <sys/wait.h>
7 #include <errno.h>
8 #include <pthread.h>
9 #include <stdio.h>
10 #include <time.h>
11 
12 #include "../kselftest.h"
13 #include "cgroup_util.h"
14 
15 enum hog_clock_type {
16 	// Count elapsed time using the CLOCK_PROCESS_CPUTIME_ID clock.
17 	CPU_HOG_CLOCK_PROCESS,
18 	// Count elapsed time using system wallclock time.
19 	CPU_HOG_CLOCK_WALL,
20 };
21 
22 struct cpu_hogger {
23 	char *cgroup;
24 	pid_t pid;
25 	long usage;
26 };
27 
28 struct cpu_hog_func_param {
29 	int nprocs;
30 	struct timespec ts;
31 	enum hog_clock_type clock_type;
32 };
33 
34 /*
35  * This test creates two nested cgroups with and without enabling
36  * the cpu controller.
37  */
38 static int test_cpucg_subtree_control(const char *root)
39 {
40 	char *parent = NULL, *child = NULL, *parent2 = NULL, *child2 = NULL;
41 	int ret = KSFT_FAIL;
42 
43 	// Create two nested cgroups with the cpu controller enabled.
44 	parent = cg_name(root, "cpucg_test_0");
45 	if (!parent)
46 		goto cleanup;
47 
48 	if (cg_create(parent))
49 		goto cleanup;
50 
51 	if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
52 		goto cleanup;
53 
54 	child = cg_name(parent, "cpucg_test_child");
55 	if (!child)
56 		goto cleanup;
57 
58 	if (cg_create(child))
59 		goto cleanup;
60 
61 	if (cg_read_strstr(child, "cgroup.controllers", "cpu"))
62 		goto cleanup;
63 
64 	// Create two nested cgroups without enabling the cpu controller.
65 	parent2 = cg_name(root, "cpucg_test_1");
66 	if (!parent2)
67 		goto cleanup;
68 
69 	if (cg_create(parent2))
70 		goto cleanup;
71 
72 	child2 = cg_name(parent2, "cpucg_test_child");
73 	if (!child2)
74 		goto cleanup;
75 
76 	if (cg_create(child2))
77 		goto cleanup;
78 
79 	if (!cg_read_strstr(child2, "cgroup.controllers", "cpu"))
80 		goto cleanup;
81 
82 	ret = KSFT_PASS;
83 
84 cleanup:
85 	cg_destroy(child);
86 	free(child);
87 	cg_destroy(child2);
88 	free(child2);
89 	cg_destroy(parent);
90 	free(parent);
91 	cg_destroy(parent2);
92 	free(parent2);
93 
94 	return ret;
95 }
96 
97 static void *hog_cpu_thread_func(void *arg)
98 {
99 	while (1)
100 		;
101 
102 	return NULL;
103 }
104 
105 static struct timespec
106 timespec_sub(const struct timespec *lhs, const struct timespec *rhs)
107 {
108 	struct timespec zero = {
109 		.tv_sec = 0,
110 		.tv_nsec = 0,
111 	};
112 	struct timespec ret;
113 
114 	if (lhs->tv_sec < rhs->tv_sec)
115 		return zero;
116 
117 	ret.tv_sec = lhs->tv_sec - rhs->tv_sec;
118 
119 	if (lhs->tv_nsec < rhs->tv_nsec) {
120 		if (ret.tv_sec == 0)
121 			return zero;
122 
123 		ret.tv_sec--;
124 		ret.tv_nsec = NSEC_PER_SEC - rhs->tv_nsec + lhs->tv_nsec;
125 	} else
126 		ret.tv_nsec = lhs->tv_nsec - rhs->tv_nsec;
127 
128 	return ret;
129 }
130 
131 static int hog_cpus_timed(const char *cgroup, void *arg)
132 {
133 	const struct cpu_hog_func_param *param =
134 		(struct cpu_hog_func_param *)arg;
135 	struct timespec ts_run = param->ts;
136 	struct timespec ts_remaining = ts_run;
137 	struct timespec ts_start;
138 	int i, ret;
139 
140 	ret = clock_gettime(CLOCK_MONOTONIC, &ts_start);
141 	if (ret != 0)
142 		return ret;
143 
144 	for (i = 0; i < param->nprocs; i++) {
145 		pthread_t tid;
146 
147 		ret = pthread_create(&tid, NULL, &hog_cpu_thread_func, NULL);
148 		if (ret != 0)
149 			return ret;
150 	}
151 
152 	while (ts_remaining.tv_sec > 0 || ts_remaining.tv_nsec > 0) {
153 		struct timespec ts_total;
154 
155 		ret = nanosleep(&ts_remaining, NULL);
156 		if (ret && errno != EINTR)
157 			return ret;
158 
159 		if (param->clock_type == CPU_HOG_CLOCK_PROCESS) {
160 			ret = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts_total);
161 			if (ret != 0)
162 				return ret;
163 		} else {
164 			struct timespec ts_current;
165 
166 			ret = clock_gettime(CLOCK_MONOTONIC, &ts_current);
167 			if (ret != 0)
168 				return ret;
169 
170 			ts_total = timespec_sub(&ts_current, &ts_start);
171 		}
172 
173 		ts_remaining = timespec_sub(&ts_run, &ts_total);
174 	}
175 
176 	return 0;
177 }
178 
179 /*
180  * Creates a cpu cgroup, burns a CPU for a few quanta, and verifies that
181  * cpu.stat shows the expected output.
182  */
183 static int test_cpucg_stats(const char *root)
184 {
185 	int ret = KSFT_FAIL;
186 	long usage_usec, user_usec, system_usec;
187 	long usage_seconds = 2;
188 	long expected_usage_usec = usage_seconds * USEC_PER_SEC;
189 	char *cpucg;
190 
191 	cpucg = cg_name(root, "cpucg_test");
192 	if (!cpucg)
193 		goto cleanup;
194 
195 	if (cg_create(cpucg))
196 		goto cleanup;
197 
198 	usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
199 	user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
200 	system_usec = cg_read_key_long(cpucg, "cpu.stat", "system_usec");
201 	if (usage_usec != 0 || user_usec != 0 || system_usec != 0)
202 		goto cleanup;
203 
204 	struct cpu_hog_func_param param = {
205 		.nprocs = 1,
206 		.ts = {
207 			.tv_sec = usage_seconds,
208 			.tv_nsec = 0,
209 		},
210 		.clock_type = CPU_HOG_CLOCK_PROCESS,
211 	};
212 	if (cg_run(cpucg, hog_cpus_timed, (void *)&param))
213 		goto cleanup;
214 
215 	usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
216 	user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
217 	if (user_usec <= 0)
218 		goto cleanup;
219 
220 	if (!values_close(usage_usec, expected_usage_usec, 1))
221 		goto cleanup;
222 
223 	ret = KSFT_PASS;
224 
225 cleanup:
226 	cg_destroy(cpucg);
227 	free(cpucg);
228 
229 	return ret;
230 }
231 
232 static int
233 run_cpucg_weight_test(
234 		const char *root,
235 		pid_t (*spawn_child)(const struct cpu_hogger *child),
236 		int (*validate)(const struct cpu_hogger *children, int num_children))
237 {
238 	int ret = KSFT_FAIL, i;
239 	char *parent = NULL;
240 	struct cpu_hogger children[3] = {NULL};
241 
242 	parent = cg_name(root, "cpucg_test_0");
243 	if (!parent)
244 		goto cleanup;
245 
246 	if (cg_create(parent))
247 		goto cleanup;
248 
249 	if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
250 		goto cleanup;
251 
252 	for (i = 0; i < ARRAY_SIZE(children); i++) {
253 		children[i].cgroup = cg_name_indexed(parent, "cpucg_child", i);
254 		if (!children[i].cgroup)
255 			goto cleanup;
256 
257 		if (cg_create(children[i].cgroup))
258 			goto cleanup;
259 
260 		if (cg_write_numeric(children[i].cgroup, "cpu.weight",
261 					50 * (i + 1)))
262 			goto cleanup;
263 	}
264 
265 	for (i = 0; i < ARRAY_SIZE(children); i++) {
266 		pid_t pid = spawn_child(&children[i]);
267 		if (pid <= 0)
268 			goto cleanup;
269 		children[i].pid = pid;
270 	}
271 
272 	for (i = 0; i < ARRAY_SIZE(children); i++) {
273 		int retcode;
274 
275 		waitpid(children[i].pid, &retcode, 0);
276 		if (!WIFEXITED(retcode))
277 			goto cleanup;
278 		if (WEXITSTATUS(retcode))
279 			goto cleanup;
280 	}
281 
282 	for (i = 0; i < ARRAY_SIZE(children); i++)
283 		children[i].usage = cg_read_key_long(children[i].cgroup,
284 				"cpu.stat", "usage_usec");
285 
286 	if (validate(children, ARRAY_SIZE(children)))
287 		goto cleanup;
288 
289 	ret = KSFT_PASS;
290 cleanup:
291 	for (i = 0; i < ARRAY_SIZE(children); i++) {
292 		cg_destroy(children[i].cgroup);
293 		free(children[i].cgroup);
294 	}
295 	cg_destroy(parent);
296 	free(parent);
297 
298 	return ret;
299 }
300 
301 static pid_t weight_hog_ncpus(const struct cpu_hogger *child, int ncpus)
302 {
303 	long usage_seconds = 10;
304 	struct cpu_hog_func_param param = {
305 		.nprocs = ncpus,
306 		.ts = {
307 			.tv_sec = usage_seconds,
308 			.tv_nsec = 0,
309 		},
310 		.clock_type = CPU_HOG_CLOCK_WALL,
311 	};
312 	return cg_run_nowait(child->cgroup, hog_cpus_timed, (void *)&param);
313 }
314 
315 static pid_t weight_hog_all_cpus(const struct cpu_hogger *child)
316 {
317 	return weight_hog_ncpus(child, get_nprocs());
318 }
319 
320 static int
321 overprovision_validate(const struct cpu_hogger *children, int num_children)
322 {
323 	int ret = KSFT_FAIL, i;
324 
325 	for (i = 0; i < num_children - 1; i++) {
326 		long delta;
327 
328 		if (children[i + 1].usage <= children[i].usage)
329 			goto cleanup;
330 
331 		delta = children[i + 1].usage - children[i].usage;
332 		if (!values_close(delta, children[0].usage, 35))
333 			goto cleanup;
334 	}
335 
336 	ret = KSFT_PASS;
337 cleanup:
338 	return ret;
339 }
340 
341 /*
342  * First, this test creates the following hierarchy:
343  * A
344  * A/B     cpu.weight = 50
345  * A/C     cpu.weight = 100
346  * A/D     cpu.weight = 150
347  *
348  * A separate process is then created for each child cgroup which spawns as
349  * many threads as there are cores, and hogs each CPU as much as possible
350  * for some time interval.
351  *
352  * Once all of the children have exited, we verify that each child cgroup
353  * was given proportional runtime as informed by their cpu.weight.
354  */
355 static int test_cpucg_weight_overprovisioned(const char *root)
356 {
357 	return run_cpucg_weight_test(root, weight_hog_all_cpus,
358 			overprovision_validate);
359 }
360 
361 static pid_t weight_hog_one_cpu(const struct cpu_hogger *child)
362 {
363 	return weight_hog_ncpus(child, 1);
364 }
365 
366 static int
367 underprovision_validate(const struct cpu_hogger *children, int num_children)
368 {
369 	int ret = KSFT_FAIL, i;
370 
371 	for (i = 0; i < num_children - 1; i++) {
372 		if (!values_close(children[i + 1].usage, children[0].usage, 15))
373 			goto cleanup;
374 	}
375 
376 	ret = KSFT_PASS;
377 cleanup:
378 	return ret;
379 }
380 
381 /*
382  * First, this test creates the following hierarchy:
383  * A
384  * A/B     cpu.weight = 50
385  * A/C     cpu.weight = 100
386  * A/D     cpu.weight = 150
387  *
388  * A separate process is then created for each child cgroup which spawns a
389  * single thread that hogs a CPU. The testcase is only run on systems that
390  * have at least one core per-thread in the child processes.
391  *
392  * Once all of the children have exited, we verify that each child cgroup
393  * had roughly the same runtime despite having different cpu.weight.
394  */
395 static int test_cpucg_weight_underprovisioned(const char *root)
396 {
397 	// Only run the test if there are enough cores to avoid overprovisioning
398 	// the system.
399 	if (get_nprocs() < 4)
400 		return KSFT_SKIP;
401 
402 	return run_cpucg_weight_test(root, weight_hog_one_cpu,
403 			underprovision_validate);
404 }
405 
406 static int
407 run_cpucg_nested_weight_test(const char *root, bool overprovisioned)
408 {
409 	int ret = KSFT_FAIL, i;
410 	char *parent = NULL, *child = NULL;
411 	struct cpu_hogger leaf[3] = {NULL};
412 	long nested_leaf_usage, child_usage;
413 	int nprocs = get_nprocs();
414 
415 	if (!overprovisioned) {
416 		if (nprocs < 4)
417 			/*
418 			 * Only run the test if there are enough cores to avoid overprovisioning
419 			 * the system.
420 			 */
421 			return KSFT_SKIP;
422 		nprocs /= 4;
423 	}
424 
425 	parent = cg_name(root, "cpucg_test");
426 	child = cg_name(parent, "cpucg_child");
427 	if (!parent || !child)
428 		goto cleanup;
429 
430 	if (cg_create(parent))
431 		goto cleanup;
432 	if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
433 		goto cleanup;
434 
435 	if (cg_create(child))
436 		goto cleanup;
437 	if (cg_write(child, "cgroup.subtree_control", "+cpu"))
438 		goto cleanup;
439 	if (cg_write(child, "cpu.weight", "1000"))
440 		goto cleanup;
441 
442 	for (i = 0; i < ARRAY_SIZE(leaf); i++) {
443 		const char *ancestor;
444 		long weight;
445 
446 		if (i == 0) {
447 			ancestor = parent;
448 			weight = 1000;
449 		} else {
450 			ancestor = child;
451 			weight = 5000;
452 		}
453 		leaf[i].cgroup = cg_name_indexed(ancestor, "cpucg_leaf", i);
454 		if (!leaf[i].cgroup)
455 			goto cleanup;
456 
457 		if (cg_create(leaf[i].cgroup))
458 			goto cleanup;
459 
460 		if (cg_write_numeric(leaf[i].cgroup, "cpu.weight", weight))
461 			goto cleanup;
462 	}
463 
464 	for (i = 0; i < ARRAY_SIZE(leaf); i++) {
465 		pid_t pid;
466 		struct cpu_hog_func_param param = {
467 			.nprocs = nprocs,
468 			.ts = {
469 				.tv_sec = 10,
470 				.tv_nsec = 0,
471 			},
472 			.clock_type = CPU_HOG_CLOCK_WALL,
473 		};
474 
475 		pid = cg_run_nowait(leaf[i].cgroup, hog_cpus_timed,
476 				(void *)&param);
477 		if (pid <= 0)
478 			goto cleanup;
479 		leaf[i].pid = pid;
480 	}
481 
482 	for (i = 0; i < ARRAY_SIZE(leaf); i++) {
483 		int retcode;
484 
485 		waitpid(leaf[i].pid, &retcode, 0);
486 		if (!WIFEXITED(retcode))
487 			goto cleanup;
488 		if (WEXITSTATUS(retcode))
489 			goto cleanup;
490 	}
491 
492 	for (i = 0; i < ARRAY_SIZE(leaf); i++) {
493 		leaf[i].usage = cg_read_key_long(leaf[i].cgroup,
494 				"cpu.stat", "usage_usec");
495 		if (leaf[i].usage <= 0)
496 			goto cleanup;
497 	}
498 
499 	nested_leaf_usage = leaf[1].usage + leaf[2].usage;
500 	if (overprovisioned) {
501 		if (!values_close(leaf[0].usage, nested_leaf_usage, 15))
502 			goto cleanup;
503 	} else if (!values_close(leaf[0].usage * 2, nested_leaf_usage, 15))
504 		goto cleanup;
505 
506 
507 	child_usage = cg_read_key_long(child, "cpu.stat", "usage_usec");
508 	if (child_usage <= 0)
509 		goto cleanup;
510 	if (!values_close(child_usage, nested_leaf_usage, 1))
511 		goto cleanup;
512 
513 	ret = KSFT_PASS;
514 cleanup:
515 	for (i = 0; i < ARRAY_SIZE(leaf); i++) {
516 		cg_destroy(leaf[i].cgroup);
517 		free(leaf[i].cgroup);
518 	}
519 	cg_destroy(child);
520 	free(child);
521 	cg_destroy(parent);
522 	free(parent);
523 
524 	return ret;
525 }
526 
527 /*
528  * First, this test creates the following hierarchy:
529  * A
530  * A/B     cpu.weight = 1000
531  * A/C     cpu.weight = 1000
532  * A/C/D   cpu.weight = 5000
533  * A/C/E   cpu.weight = 5000
534  *
535  * A separate process is then created for each leaf, which spawn nproc threads
536  * that burn a CPU for a few seconds.
537  *
538  * Once all of those processes have exited, we verify that each of the leaf
539  * cgroups have roughly the same usage from cpu.stat.
540  */
541 static int
542 test_cpucg_nested_weight_overprovisioned(const char *root)
543 {
544 	return run_cpucg_nested_weight_test(root, true);
545 }
546 
547 /*
548  * First, this test creates the following hierarchy:
549  * A
550  * A/B     cpu.weight = 1000
551  * A/C     cpu.weight = 1000
552  * A/C/D   cpu.weight = 5000
553  * A/C/E   cpu.weight = 5000
554  *
555  * A separate process is then created for each leaf, which nproc / 4 threads
556  * that burns a CPU for a few seconds.
557  *
558  * Once all of those processes have exited, we verify that each of the leaf
559  * cgroups have roughly the same usage from cpu.stat.
560  */
561 static int
562 test_cpucg_nested_weight_underprovisioned(const char *root)
563 {
564 	return run_cpucg_nested_weight_test(root, false);
565 }
566 
567 /*
568  * This test creates a cgroup with some maximum value within a period, and
569  * verifies that a process in the cgroup is not overscheduled.
570  */
571 static int test_cpucg_max(const char *root)
572 {
573 	int ret = KSFT_FAIL;
574 	long usage_usec, user_usec;
575 	long usage_seconds = 1;
576 	long expected_usage_usec = usage_seconds * USEC_PER_SEC;
577 	char *cpucg;
578 
579 	cpucg = cg_name(root, "cpucg_test");
580 	if (!cpucg)
581 		goto cleanup;
582 
583 	if (cg_create(cpucg))
584 		goto cleanup;
585 
586 	if (cg_write(cpucg, "cpu.max", "1000"))
587 		goto cleanup;
588 
589 	struct cpu_hog_func_param param = {
590 		.nprocs = 1,
591 		.ts = {
592 			.tv_sec = usage_seconds,
593 			.tv_nsec = 0,
594 		},
595 		.clock_type = CPU_HOG_CLOCK_WALL,
596 	};
597 	if (cg_run(cpucg, hog_cpus_timed, (void *)&param))
598 		goto cleanup;
599 
600 	usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
601 	user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
602 	if (user_usec <= 0)
603 		goto cleanup;
604 
605 	if (user_usec >= expected_usage_usec)
606 		goto cleanup;
607 
608 	if (values_close(usage_usec, expected_usage_usec, 95))
609 		goto cleanup;
610 
611 	ret = KSFT_PASS;
612 
613 cleanup:
614 	cg_destroy(cpucg);
615 	free(cpucg);
616 
617 	return ret;
618 }
619 
620 /*
621  * This test verifies that a process inside of a nested cgroup whose parent
622  * group has a cpu.max value set, is properly throttled.
623  */
624 static int test_cpucg_max_nested(const char *root)
625 {
626 	int ret = KSFT_FAIL;
627 	long usage_usec, user_usec;
628 	long usage_seconds = 1;
629 	long expected_usage_usec = usage_seconds * USEC_PER_SEC;
630 	char *parent, *child;
631 
632 	parent = cg_name(root, "cpucg_parent");
633 	child = cg_name(parent, "cpucg_child");
634 	if (!parent || !child)
635 		goto cleanup;
636 
637 	if (cg_create(parent))
638 		goto cleanup;
639 
640 	if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
641 		goto cleanup;
642 
643 	if (cg_create(child))
644 		goto cleanup;
645 
646 	if (cg_write(parent, "cpu.max", "1000"))
647 		goto cleanup;
648 
649 	struct cpu_hog_func_param param = {
650 		.nprocs = 1,
651 		.ts = {
652 			.tv_sec = usage_seconds,
653 			.tv_nsec = 0,
654 		},
655 		.clock_type = CPU_HOG_CLOCK_WALL,
656 	};
657 	if (cg_run(child, hog_cpus_timed, (void *)&param))
658 		goto cleanup;
659 
660 	usage_usec = cg_read_key_long(child, "cpu.stat", "usage_usec");
661 	user_usec = cg_read_key_long(child, "cpu.stat", "user_usec");
662 	if (user_usec <= 0)
663 		goto cleanup;
664 
665 	if (user_usec >= expected_usage_usec)
666 		goto cleanup;
667 
668 	if (values_close(usage_usec, expected_usage_usec, 95))
669 		goto cleanup;
670 
671 	ret = KSFT_PASS;
672 
673 cleanup:
674 	cg_destroy(child);
675 	free(child);
676 	cg_destroy(parent);
677 	free(parent);
678 
679 	return ret;
680 }
681 
682 #define T(x) { x, #x }
683 struct cpucg_test {
684 	int (*fn)(const char *root);
685 	const char *name;
686 } tests[] = {
687 	T(test_cpucg_subtree_control),
688 	T(test_cpucg_stats),
689 	T(test_cpucg_weight_overprovisioned),
690 	T(test_cpucg_weight_underprovisioned),
691 	T(test_cpucg_nested_weight_overprovisioned),
692 	T(test_cpucg_nested_weight_underprovisioned),
693 	T(test_cpucg_max),
694 	T(test_cpucg_max_nested),
695 };
696 #undef T
697 
698 int main(int argc, char *argv[])
699 {
700 	char root[PATH_MAX];
701 	int i, ret = EXIT_SUCCESS;
702 
703 	if (cg_find_unified_root(root, sizeof(root)))
704 		ksft_exit_skip("cgroup v2 isn't mounted\n");
705 
706 	if (cg_read_strstr(root, "cgroup.subtree_control", "cpu"))
707 		if (cg_write(root, "cgroup.subtree_control", "+cpu"))
708 			ksft_exit_skip("Failed to set cpu controller\n");
709 
710 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
711 		switch (tests[i].fn(root)) {
712 		case KSFT_PASS:
713 			ksft_test_result_pass("%s\n", tests[i].name);
714 			break;
715 		case KSFT_SKIP:
716 			ksft_test_result_skip("%s\n", tests[i].name);
717 			break;
718 		default:
719 			ret = EXIT_FAILURE;
720 			ksft_test_result_fail("%s\n", tests[i].name);
721 			break;
722 		}
723 	}
724 
725 	return ret;
726 }
727