1 /* SPDX-License-Identifier: GPL-2.0 */
2 
3 #include <linux/limits.h>
4 #include <sys/types.h>
5 #include <sys/mman.h>
6 #include <sys/wait.h>
7 #include <unistd.h>
8 #include <fcntl.h>
9 #include <stdio.h>
10 #include <errno.h>
11 #include <signal.h>
12 #include <string.h>
13 #include <pthread.h>
14 
15 #include "../kselftest.h"
16 #include "cgroup_util.h"
17 
18 static int touch_anon(char *buf, size_t size)
19 {
20 	int fd;
21 	char *pos = buf;
22 
23 	fd = open("/dev/urandom", O_RDONLY);
24 	if (fd < 0)
25 		return -1;
26 
27 	while (size > 0) {
28 		ssize_t ret = read(fd, pos, size);
29 
30 		if (ret < 0) {
31 			if (errno != EINTR) {
32 				close(fd);
33 				return -1;
34 			}
35 		} else {
36 			pos += ret;
37 			size -= ret;
38 		}
39 	}
40 	close(fd);
41 
42 	return 0;
43 }
44 
45 static int alloc_and_touch_anon_noexit(const char *cgroup, void *arg)
46 {
47 	int ppid = getppid();
48 	size_t size = (size_t)arg;
49 	void *buf;
50 
51 	buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
52 		   0, 0);
53 	if (buf == MAP_FAILED)
54 		return -1;
55 
56 	if (touch_anon((char *)buf, size)) {
57 		munmap(buf, size);
58 		return -1;
59 	}
60 
61 	while (getppid() == ppid)
62 		sleep(1);
63 
64 	munmap(buf, size);
65 	return 0;
66 }
67 
68 /*
69  * Create a child process that allocates and touches 100MB, then waits to be
70  * killed. Wait until the child is attached to the cgroup, kill all processes
71  * in that cgroup and wait until "cgroup.procs" is empty. At this point try to
72  * destroy the empty cgroup. The test helps detect race conditions between
73  * dying processes leaving the cgroup and cgroup destruction path.
74  */
75 static int test_cgcore_destroy(const char *root)
76 {
77 	int ret = KSFT_FAIL;
78 	char *cg_test = NULL;
79 	int child_pid;
80 	char buf[PAGE_SIZE];
81 
82 	cg_test = cg_name(root, "cg_test");
83 
84 	if (!cg_test)
85 		goto cleanup;
86 
87 	for (int i = 0; i < 10; i++) {
88 		if (cg_create(cg_test))
89 			goto cleanup;
90 
91 		child_pid = cg_run_nowait(cg_test, alloc_and_touch_anon_noexit,
92 					  (void *) MB(100));
93 
94 		if (child_pid < 0)
95 			goto cleanup;
96 
97 		/* wait for the child to enter cgroup */
98 		if (cg_wait_for_proc_count(cg_test, 1))
99 			goto cleanup;
100 
101 		if (cg_killall(cg_test))
102 			goto cleanup;
103 
104 		/* wait for cgroup to be empty */
105 		while (1) {
106 			if (cg_read(cg_test, "cgroup.procs", buf, sizeof(buf)))
107 				goto cleanup;
108 			if (buf[0] == '\0')
109 				break;
110 			usleep(1000);
111 		}
112 
113 		if (rmdir(cg_test))
114 			goto cleanup;
115 
116 		if (waitpid(child_pid, NULL, 0) < 0)
117 			goto cleanup;
118 	}
119 	ret = KSFT_PASS;
120 cleanup:
121 	if (cg_test)
122 		cg_destroy(cg_test);
123 	free(cg_test);
124 	return ret;
125 }
126 
127 /*
128  * A(0) - B(0) - C(1)
129  *        \ D(0)
130  *
131  * A, B and C's "populated" fields would be 1 while D's 0.
132  * test that after the one process in C is moved to root,
133  * A,B and C's "populated" fields would flip to "0" and file
134  * modified events will be generated on the
135  * "cgroup.events" files of both cgroups.
136  */
137 static int test_cgcore_populated(const char *root)
138 {
139 	int ret = KSFT_FAIL;
140 	int err;
141 	char *cg_test_a = NULL, *cg_test_b = NULL;
142 	char *cg_test_c = NULL, *cg_test_d = NULL;
143 	int cgroup_fd = -EBADF;
144 	pid_t pid;
145 
146 	cg_test_a = cg_name(root, "cg_test_a");
147 	cg_test_b = cg_name(root, "cg_test_a/cg_test_b");
148 	cg_test_c = cg_name(root, "cg_test_a/cg_test_b/cg_test_c");
149 	cg_test_d = cg_name(root, "cg_test_a/cg_test_b/cg_test_d");
150 
151 	if (!cg_test_a || !cg_test_b || !cg_test_c || !cg_test_d)
152 		goto cleanup;
153 
154 	if (cg_create(cg_test_a))
155 		goto cleanup;
156 
157 	if (cg_create(cg_test_b))
158 		goto cleanup;
159 
160 	if (cg_create(cg_test_c))
161 		goto cleanup;
162 
163 	if (cg_create(cg_test_d))
164 		goto cleanup;
165 
166 	if (cg_enter_current(cg_test_c))
167 		goto cleanup;
168 
169 	if (cg_read_strcmp(cg_test_a, "cgroup.events", "populated 1\n"))
170 		goto cleanup;
171 
172 	if (cg_read_strcmp(cg_test_b, "cgroup.events", "populated 1\n"))
173 		goto cleanup;
174 
175 	if (cg_read_strcmp(cg_test_c, "cgroup.events", "populated 1\n"))
176 		goto cleanup;
177 
178 	if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n"))
179 		goto cleanup;
180 
181 	if (cg_enter_current(root))
182 		goto cleanup;
183 
184 	if (cg_read_strcmp(cg_test_a, "cgroup.events", "populated 0\n"))
185 		goto cleanup;
186 
187 	if (cg_read_strcmp(cg_test_b, "cgroup.events", "populated 0\n"))
188 		goto cleanup;
189 
190 	if (cg_read_strcmp(cg_test_c, "cgroup.events", "populated 0\n"))
191 		goto cleanup;
192 
193 	if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n"))
194 		goto cleanup;
195 
196 	/* Test that we can directly clone into a new cgroup. */
197 	cgroup_fd = dirfd_open_opath(cg_test_d);
198 	if (cgroup_fd < 0)
199 		goto cleanup;
200 
201 	pid = clone_into_cgroup(cgroup_fd);
202 	if (pid < 0) {
203 		if (errno == ENOSYS)
204 			goto cleanup_pass;
205 		goto cleanup;
206 	}
207 
208 	if (pid == 0) {
209 		if (raise(SIGSTOP))
210 			exit(EXIT_FAILURE);
211 		exit(EXIT_SUCCESS);
212 	}
213 
214 	err = cg_read_strcmp(cg_test_d, "cgroup.events", "populated 1\n");
215 
216 	(void)clone_reap(pid, WSTOPPED);
217 	(void)kill(pid, SIGCONT);
218 	(void)clone_reap(pid, WEXITED);
219 
220 	if (err)
221 		goto cleanup;
222 
223 	if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n"))
224 		goto cleanup;
225 
226 	/* Remove cgroup. */
227 	if (cg_test_d) {
228 		cg_destroy(cg_test_d);
229 		free(cg_test_d);
230 		cg_test_d = NULL;
231 	}
232 
233 	pid = clone_into_cgroup(cgroup_fd);
234 	if (pid < 0)
235 		goto cleanup_pass;
236 	if (pid == 0)
237 		exit(EXIT_SUCCESS);
238 	(void)clone_reap(pid, WEXITED);
239 	goto cleanup;
240 
241 cleanup_pass:
242 	ret = KSFT_PASS;
243 
244 cleanup:
245 	if (cg_test_d)
246 		cg_destroy(cg_test_d);
247 	if (cg_test_c)
248 		cg_destroy(cg_test_c);
249 	if (cg_test_b)
250 		cg_destroy(cg_test_b);
251 	if (cg_test_a)
252 		cg_destroy(cg_test_a);
253 	free(cg_test_d);
254 	free(cg_test_c);
255 	free(cg_test_b);
256 	free(cg_test_a);
257 	if (cgroup_fd >= 0)
258 		close(cgroup_fd);
259 	return ret;
260 }
261 
262 /*
263  * A (domain threaded) - B (threaded) - C (domain)
264  *
265  * test that C can't be used until it is turned into a
266  * threaded cgroup.  "cgroup.type" file will report "domain (invalid)" in
267  * these cases. Operations which fail due to invalid topology use
268  * EOPNOTSUPP as the errno.
269  */
270 static int test_cgcore_invalid_domain(const char *root)
271 {
272 	int ret = KSFT_FAIL;
273 	char *grandparent = NULL, *parent = NULL, *child = NULL;
274 
275 	grandparent = cg_name(root, "cg_test_grandparent");
276 	parent = cg_name(root, "cg_test_grandparent/cg_test_parent");
277 	child = cg_name(root, "cg_test_grandparent/cg_test_parent/cg_test_child");
278 	if (!parent || !child || !grandparent)
279 		goto cleanup;
280 
281 	if (cg_create(grandparent))
282 		goto cleanup;
283 
284 	if (cg_create(parent))
285 		goto cleanup;
286 
287 	if (cg_create(child))
288 		goto cleanup;
289 
290 	if (cg_write(parent, "cgroup.type", "threaded"))
291 		goto cleanup;
292 
293 	if (cg_read_strcmp(child, "cgroup.type", "domain invalid\n"))
294 		goto cleanup;
295 
296 	if (!cg_enter_current(child))
297 		goto cleanup;
298 
299 	if (errno != EOPNOTSUPP)
300 		goto cleanup;
301 
302 	if (!clone_into_cgroup_run_wait(child))
303 		goto cleanup;
304 
305 	if (errno == ENOSYS)
306 		goto cleanup_pass;
307 
308 	if (errno != EOPNOTSUPP)
309 		goto cleanup;
310 
311 cleanup_pass:
312 	ret = KSFT_PASS;
313 
314 cleanup:
315 	cg_enter_current(root);
316 	if (child)
317 		cg_destroy(child);
318 	if (parent)
319 		cg_destroy(parent);
320 	if (grandparent)
321 		cg_destroy(grandparent);
322 	free(child);
323 	free(parent);
324 	free(grandparent);
325 	return ret;
326 }
327 
328 /*
329  * Test that when a child becomes threaded
330  * the parent type becomes domain threaded.
331  */
332 static int test_cgcore_parent_becomes_threaded(const char *root)
333 {
334 	int ret = KSFT_FAIL;
335 	char *parent = NULL, *child = NULL;
336 
337 	parent = cg_name(root, "cg_test_parent");
338 	child = cg_name(root, "cg_test_parent/cg_test_child");
339 	if (!parent || !child)
340 		goto cleanup;
341 
342 	if (cg_create(parent))
343 		goto cleanup;
344 
345 	if (cg_create(child))
346 		goto cleanup;
347 
348 	if (cg_write(child, "cgroup.type", "threaded"))
349 		goto cleanup;
350 
351 	if (cg_read_strcmp(parent, "cgroup.type", "domain threaded\n"))
352 		goto cleanup;
353 
354 	ret = KSFT_PASS;
355 
356 cleanup:
357 	if (child)
358 		cg_destroy(child);
359 	if (parent)
360 		cg_destroy(parent);
361 	free(child);
362 	free(parent);
363 	return ret;
364 
365 }
366 
367 /*
368  * Test that there's no internal process constrain on threaded cgroups.
369  * You can add threads/processes on a parent with a controller enabled.
370  */
371 static int test_cgcore_no_internal_process_constraint_on_threads(const char *root)
372 {
373 	int ret = KSFT_FAIL;
374 	char *parent = NULL, *child = NULL;
375 
376 	if (cg_read_strstr(root, "cgroup.controllers", "cpu") ||
377 	    cg_write(root, "cgroup.subtree_control", "+cpu")) {
378 		ret = KSFT_SKIP;
379 		goto cleanup;
380 	}
381 
382 	parent = cg_name(root, "cg_test_parent");
383 	child = cg_name(root, "cg_test_parent/cg_test_child");
384 	if (!parent || !child)
385 		goto cleanup;
386 
387 	if (cg_create(parent))
388 		goto cleanup;
389 
390 	if (cg_create(child))
391 		goto cleanup;
392 
393 	if (cg_write(parent, "cgroup.type", "threaded"))
394 		goto cleanup;
395 
396 	if (cg_write(child, "cgroup.type", "threaded"))
397 		goto cleanup;
398 
399 	if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
400 		goto cleanup;
401 
402 	if (cg_enter_current(parent))
403 		goto cleanup;
404 
405 	ret = KSFT_PASS;
406 
407 cleanup:
408 	cg_enter_current(root);
409 	cg_enter_current(root);
410 	if (child)
411 		cg_destroy(child);
412 	if (parent)
413 		cg_destroy(parent);
414 	free(child);
415 	free(parent);
416 	return ret;
417 }
418 
419 /*
420  * Test that you can't enable a controller on a child if it's not enabled
421  * on the parent.
422  */
423 static int test_cgcore_top_down_constraint_enable(const char *root)
424 {
425 	int ret = KSFT_FAIL;
426 	char *parent = NULL, *child = NULL;
427 
428 	parent = cg_name(root, "cg_test_parent");
429 	child = cg_name(root, "cg_test_parent/cg_test_child");
430 	if (!parent || !child)
431 		goto cleanup;
432 
433 	if (cg_create(parent))
434 		goto cleanup;
435 
436 	if (cg_create(child))
437 		goto cleanup;
438 
439 	if (!cg_write(child, "cgroup.subtree_control", "+memory"))
440 		goto cleanup;
441 
442 	ret = KSFT_PASS;
443 
444 cleanup:
445 	if (child)
446 		cg_destroy(child);
447 	if (parent)
448 		cg_destroy(parent);
449 	free(child);
450 	free(parent);
451 	return ret;
452 }
453 
454 /*
455  * Test that you can't disable a controller on a parent
456  * if it's enabled in a child.
457  */
458 static int test_cgcore_top_down_constraint_disable(const char *root)
459 {
460 	int ret = KSFT_FAIL;
461 	char *parent = NULL, *child = NULL;
462 
463 	parent = cg_name(root, "cg_test_parent");
464 	child = cg_name(root, "cg_test_parent/cg_test_child");
465 	if (!parent || !child)
466 		goto cleanup;
467 
468 	if (cg_create(parent))
469 		goto cleanup;
470 
471 	if (cg_create(child))
472 		goto cleanup;
473 
474 	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
475 		goto cleanup;
476 
477 	if (cg_write(child, "cgroup.subtree_control", "+memory"))
478 		goto cleanup;
479 
480 	if (!cg_write(parent, "cgroup.subtree_control", "-memory"))
481 		goto cleanup;
482 
483 	ret = KSFT_PASS;
484 
485 cleanup:
486 	if (child)
487 		cg_destroy(child);
488 	if (parent)
489 		cg_destroy(parent);
490 	free(child);
491 	free(parent);
492 	return ret;
493 }
494 
495 /*
496  * Test internal process constraint.
497  * You can't add a pid to a domain parent if a controller is enabled.
498  */
499 static int test_cgcore_internal_process_constraint(const char *root)
500 {
501 	int ret = KSFT_FAIL;
502 	char *parent = NULL, *child = NULL;
503 
504 	parent = cg_name(root, "cg_test_parent");
505 	child = cg_name(root, "cg_test_parent/cg_test_child");
506 	if (!parent || !child)
507 		goto cleanup;
508 
509 	if (cg_create(parent))
510 		goto cleanup;
511 
512 	if (cg_create(child))
513 		goto cleanup;
514 
515 	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
516 		goto cleanup;
517 
518 	if (!cg_enter_current(parent))
519 		goto cleanup;
520 
521 	if (!clone_into_cgroup_run_wait(parent))
522 		goto cleanup;
523 
524 	ret = KSFT_PASS;
525 
526 cleanup:
527 	if (child)
528 		cg_destroy(child);
529 	if (parent)
530 		cg_destroy(parent);
531 	free(child);
532 	free(parent);
533 	return ret;
534 }
535 
536 static void *dummy_thread_fn(void *arg)
537 {
538 	return (void *)(size_t)pause();
539 }
540 
541 /*
542  * Test threadgroup migration.
543  * All threads of a process are migrated together.
544  */
545 static int test_cgcore_proc_migration(const char *root)
546 {
547 	int ret = KSFT_FAIL;
548 	int t, c_threads = 0, n_threads = 13;
549 	char *src = NULL, *dst = NULL;
550 	pthread_t threads[n_threads];
551 
552 	src = cg_name(root, "cg_src");
553 	dst = cg_name(root, "cg_dst");
554 	if (!src || !dst)
555 		goto cleanup;
556 
557 	if (cg_create(src))
558 		goto cleanup;
559 	if (cg_create(dst))
560 		goto cleanup;
561 
562 	if (cg_enter_current(src))
563 		goto cleanup;
564 
565 	for (c_threads = 0; c_threads < n_threads; ++c_threads) {
566 		if (pthread_create(&threads[c_threads], NULL, dummy_thread_fn, NULL))
567 			goto cleanup;
568 	}
569 
570 	cg_enter_current(dst);
571 	if (cg_read_lc(dst, "cgroup.threads") != n_threads + 1)
572 		goto cleanup;
573 
574 	ret = KSFT_PASS;
575 
576 cleanup:
577 	for (t = 0; t < c_threads; ++t) {
578 		pthread_cancel(threads[t]);
579 	}
580 
581 	for (t = 0; t < c_threads; ++t) {
582 		pthread_join(threads[t], NULL);
583 	}
584 
585 	cg_enter_current(root);
586 
587 	if (dst)
588 		cg_destroy(dst);
589 	if (src)
590 		cg_destroy(src);
591 	free(dst);
592 	free(src);
593 	return ret;
594 }
595 
596 static void *migrating_thread_fn(void *arg)
597 {
598 	int g, i, n_iterations = 1000;
599 	char **grps = arg;
600 	char lines[3][PATH_MAX];
601 
602 	for (g = 1; g < 3; ++g)
603 		snprintf(lines[g], sizeof(lines[g]), "0::%s", grps[g] + strlen(grps[0]));
604 
605 	for (i = 0; i < n_iterations; ++i) {
606 		cg_enter_current_thread(grps[(i % 2) + 1]);
607 
608 		if (proc_read_strstr(0, 1, "cgroup", lines[(i % 2) + 1]))
609 			return (void *)-1;
610 	}
611 	return NULL;
612 }
613 
614 /*
615  * Test single thread migration.
616  * Threaded cgroups allow successful migration of a thread.
617  */
618 static int test_cgcore_thread_migration(const char *root)
619 {
620 	int ret = KSFT_FAIL;
621 	char *dom = NULL;
622 	char line[PATH_MAX];
623 	char *grps[3] = { (char *)root, NULL, NULL };
624 	pthread_t thr;
625 	void *retval;
626 
627 	dom = cg_name(root, "cg_dom");
628 	grps[1] = cg_name(root, "cg_dom/cg_src");
629 	grps[2] = cg_name(root, "cg_dom/cg_dst");
630 	if (!grps[1] || !grps[2] || !dom)
631 		goto cleanup;
632 
633 	if (cg_create(dom))
634 		goto cleanup;
635 	if (cg_create(grps[1]))
636 		goto cleanup;
637 	if (cg_create(grps[2]))
638 		goto cleanup;
639 
640 	if (cg_write(grps[1], "cgroup.type", "threaded"))
641 		goto cleanup;
642 	if (cg_write(grps[2], "cgroup.type", "threaded"))
643 		goto cleanup;
644 
645 	if (cg_enter_current(grps[1]))
646 		goto cleanup;
647 
648 	if (pthread_create(&thr, NULL, migrating_thread_fn, grps))
649 		goto cleanup;
650 
651 	if (pthread_join(thr, &retval))
652 		goto cleanup;
653 
654 	if (retval)
655 		goto cleanup;
656 
657 	snprintf(line, sizeof(line), "0::%s", grps[1] + strlen(grps[0]));
658 	if (proc_read_strstr(0, 1, "cgroup", line))
659 		goto cleanup;
660 
661 	ret = KSFT_PASS;
662 
663 cleanup:
664 	cg_enter_current(root);
665 	if (grps[2])
666 		cg_destroy(grps[2]);
667 	if (grps[1])
668 		cg_destroy(grps[1]);
669 	if (dom)
670 		cg_destroy(dom);
671 	free(grps[2]);
672 	free(grps[1]);
673 	free(dom);
674 	return ret;
675 }
676 
677 #define T(x) { x, #x }
678 struct corecg_test {
679 	int (*fn)(const char *root);
680 	const char *name;
681 } tests[] = {
682 	T(test_cgcore_internal_process_constraint),
683 	T(test_cgcore_top_down_constraint_enable),
684 	T(test_cgcore_top_down_constraint_disable),
685 	T(test_cgcore_no_internal_process_constraint_on_threads),
686 	T(test_cgcore_parent_becomes_threaded),
687 	T(test_cgcore_invalid_domain),
688 	T(test_cgcore_populated),
689 	T(test_cgcore_proc_migration),
690 	T(test_cgcore_thread_migration),
691 	T(test_cgcore_destroy),
692 };
693 #undef T
694 
695 int main(int argc, char *argv[])
696 {
697 	char root[PATH_MAX];
698 	int i, ret = EXIT_SUCCESS;
699 
700 	if (cg_find_unified_root(root, sizeof(root)))
701 		ksft_exit_skip("cgroup v2 isn't mounted\n");
702 
703 	if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
704 		if (cg_write(root, "cgroup.subtree_control", "+memory"))
705 			ksft_exit_skip("Failed to set memory controller\n");
706 
707 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
708 		switch (tests[i].fn(root)) {
709 		case KSFT_PASS:
710 			ksft_test_result_pass("%s\n", tests[i].name);
711 			break;
712 		case KSFT_SKIP:
713 			ksft_test_result_skip("%s\n", tests[i].name);
714 			break;
715 		default:
716 			ret = EXIT_FAILURE;
717 			ksft_test_result_fail("%s\n", tests[i].name);
718 			break;
719 		}
720 	}
721 
722 	return ret;
723 }
724