1 /* SPDX-License-Identifier: GPL-2.0 */
2 #define _GNU_SOURCE
3 
4 #include <linux/limits.h>
5 #include <linux/oom.h>
6 #include <fcntl.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <sys/stat.h>
11 #include <sys/types.h>
12 #include <unistd.h>
13 #include <sys/socket.h>
14 #include <sys/wait.h>
15 #include <arpa/inet.h>
16 #include <netinet/in.h>
17 #include <netdb.h>
18 #include <errno.h>
19 
20 #include "../kselftest.h"
21 #include "cgroup_util.h"
22 
23 /*
24  * This test creates two nested cgroups with and without enabling
25  * the memory controller.
26  */
27 static int test_memcg_subtree_control(const char *root)
28 {
29 	char *parent, *child, *parent2 = NULL, *child2 = NULL;
30 	int ret = KSFT_FAIL;
31 	char buf[PAGE_SIZE];
32 
33 	/* Create two nested cgroups with the memory controller enabled */
34 	parent = cg_name(root, "memcg_test_0");
35 	child = cg_name(root, "memcg_test_0/memcg_test_1");
36 	if (!parent || !child)
37 		goto cleanup_free;
38 
39 	if (cg_create(parent))
40 		goto cleanup_free;
41 
42 	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
43 		goto cleanup_parent;
44 
45 	if (cg_create(child))
46 		goto cleanup_parent;
47 
48 	if (cg_read_strstr(child, "cgroup.controllers", "memory"))
49 		goto cleanup_child;
50 
51 	/* Create two nested cgroups without enabling memory controller */
52 	parent2 = cg_name(root, "memcg_test_1");
53 	child2 = cg_name(root, "memcg_test_1/memcg_test_1");
54 	if (!parent2 || !child2)
55 		goto cleanup_free2;
56 
57 	if (cg_create(parent2))
58 		goto cleanup_free2;
59 
60 	if (cg_create(child2))
61 		goto cleanup_parent2;
62 
63 	if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
64 		goto cleanup_all;
65 
66 	if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
67 		goto cleanup_all;
68 
69 	ret = KSFT_PASS;
70 
71 cleanup_all:
72 	cg_destroy(child2);
73 cleanup_parent2:
74 	cg_destroy(parent2);
75 cleanup_free2:
76 	free(parent2);
77 	free(child2);
78 cleanup_child:
79 	cg_destroy(child);
80 cleanup_parent:
81 	cg_destroy(parent);
82 cleanup_free:
83 	free(parent);
84 	free(child);
85 
86 	return ret;
87 }
88 
89 static int alloc_anon_50M_check(const char *cgroup, void *arg)
90 {
91 	size_t size = MB(50);
92 	char *buf, *ptr;
93 	long anon, current;
94 	int ret = -1;
95 
96 	buf = malloc(size);
97 	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
98 		*ptr = 0;
99 
100 	current = cg_read_long(cgroup, "memory.current");
101 	if (current < size)
102 		goto cleanup;
103 
104 	if (!values_close(size, current, 3))
105 		goto cleanup;
106 
107 	anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
108 	if (anon < 0)
109 		goto cleanup;
110 
111 	if (!values_close(anon, current, 3))
112 		goto cleanup;
113 
114 	ret = 0;
115 cleanup:
116 	free(buf);
117 	return ret;
118 }
119 
120 static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
121 {
122 	size_t size = MB(50);
123 	int ret = -1;
124 	long current, file;
125 	int fd;
126 
127 	fd = get_temp_fd();
128 	if (fd < 0)
129 		return -1;
130 
131 	if (alloc_pagecache(fd, size))
132 		goto cleanup;
133 
134 	current = cg_read_long(cgroup, "memory.current");
135 	if (current < size)
136 		goto cleanup;
137 
138 	file = cg_read_key_long(cgroup, "memory.stat", "file ");
139 	if (file < 0)
140 		goto cleanup;
141 
142 	if (!values_close(file, current, 10))
143 		goto cleanup;
144 
145 	ret = 0;
146 
147 cleanup:
148 	close(fd);
149 	return ret;
150 }
151 
152 /*
153  * This test create a memory cgroup, allocates
154  * some anonymous memory and some pagecache
155  * and check memory.current and some memory.stat values.
156  */
157 static int test_memcg_current(const char *root)
158 {
159 	int ret = KSFT_FAIL;
160 	long current;
161 	char *memcg;
162 
163 	memcg = cg_name(root, "memcg_test");
164 	if (!memcg)
165 		goto cleanup;
166 
167 	if (cg_create(memcg))
168 		goto cleanup;
169 
170 	current = cg_read_long(memcg, "memory.current");
171 	if (current != 0)
172 		goto cleanup;
173 
174 	if (cg_run(memcg, alloc_anon_50M_check, NULL))
175 		goto cleanup;
176 
177 	if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
178 		goto cleanup;
179 
180 	ret = KSFT_PASS;
181 
182 cleanup:
183 	cg_destroy(memcg);
184 	free(memcg);
185 
186 	return ret;
187 }
188 
189 static int alloc_pagecache_50M(const char *cgroup, void *arg)
190 {
191 	int fd = (long)arg;
192 
193 	return alloc_pagecache(fd, MB(50));
194 }
195 
196 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
197 {
198 	int fd = (long)arg;
199 	int ppid = getppid();
200 
201 	if (alloc_pagecache(fd, MB(50)))
202 		return -1;
203 
204 	while (getppid() == ppid)
205 		sleep(1);
206 
207 	return 0;
208 }
209 
210 static int alloc_anon_noexit(const char *cgroup, void *arg)
211 {
212 	int ppid = getppid();
213 
214 	if (alloc_anon(cgroup, arg))
215 		return -1;
216 
217 	while (getppid() == ppid)
218 		sleep(1);
219 
220 	return 0;
221 }
222 
223 /*
224  * Wait until processes are killed asynchronously by the OOM killer
225  * If we exceed a timeout, fail.
226  */
227 static int cg_test_proc_killed(const char *cgroup)
228 {
229 	int limit;
230 
231 	for (limit = 10; limit > 0; limit--) {
232 		if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
233 			return 0;
234 
235 		usleep(100000);
236 	}
237 	return -1;
238 }
239 
240 /*
241  * First, this test creates the following hierarchy:
242  * A       memory.min = 50M,  memory.max = 200M
243  * A/B     memory.min = 50M,  memory.current = 50M
244  * A/B/C   memory.min = 75M,  memory.current = 50M
245  * A/B/D   memory.min = 25M,  memory.current = 50M
246  * A/B/E   memory.min = 500M, memory.current = 0
247  * A/B/F   memory.min = 0,    memory.current = 50M
248  *
249  * Usages are pagecache, but the test keeps a running
250  * process in every leaf cgroup.
251  * Then it creates A/G and creates a significant
252  * memory pressure in it.
253  *
254  * A/B    memory.current ~= 50M
255  * A/B/C  memory.current ~= 33M
256  * A/B/D  memory.current ~= 17M
257  * A/B/E  memory.current ~= 0
258  *
259  * After that it tries to allocate more than there is
260  * unprotected memory in A available, and checks
261  * checks that memory.min protects pagecache even
262  * in this case.
263  */
264 static int test_memcg_min(const char *root)
265 {
266 	int ret = KSFT_FAIL;
267 	char *parent[3] = {NULL};
268 	char *children[4] = {NULL};
269 	long c[4];
270 	int i, attempts;
271 	int fd;
272 
273 	fd = get_temp_fd();
274 	if (fd < 0)
275 		goto cleanup;
276 
277 	parent[0] = cg_name(root, "memcg_test_0");
278 	if (!parent[0])
279 		goto cleanup;
280 
281 	parent[1] = cg_name(parent[0], "memcg_test_1");
282 	if (!parent[1])
283 		goto cleanup;
284 
285 	parent[2] = cg_name(parent[0], "memcg_test_2");
286 	if (!parent[2])
287 		goto cleanup;
288 
289 	if (cg_create(parent[0]))
290 		goto cleanup;
291 
292 	if (cg_read_long(parent[0], "memory.min")) {
293 		ret = KSFT_SKIP;
294 		goto cleanup;
295 	}
296 
297 	if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
298 		goto cleanup;
299 
300 	if (cg_write(parent[0], "memory.max", "200M"))
301 		goto cleanup;
302 
303 	if (cg_write(parent[0], "memory.swap.max", "0"))
304 		goto cleanup;
305 
306 	if (cg_create(parent[1]))
307 		goto cleanup;
308 
309 	if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
310 		goto cleanup;
311 
312 	if (cg_create(parent[2]))
313 		goto cleanup;
314 
315 	for (i = 0; i < ARRAY_SIZE(children); i++) {
316 		children[i] = cg_name_indexed(parent[1], "child_memcg", i);
317 		if (!children[i])
318 			goto cleanup;
319 
320 		if (cg_create(children[i]))
321 			goto cleanup;
322 
323 		if (i == 2)
324 			continue;
325 
326 		cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
327 			      (void *)(long)fd);
328 	}
329 
330 	if (cg_write(parent[0], "memory.min", "50M"))
331 		goto cleanup;
332 	if (cg_write(parent[1], "memory.min", "50M"))
333 		goto cleanup;
334 	if (cg_write(children[0], "memory.min", "75M"))
335 		goto cleanup;
336 	if (cg_write(children[1], "memory.min", "25M"))
337 		goto cleanup;
338 	if (cg_write(children[2], "memory.min", "500M"))
339 		goto cleanup;
340 	if (cg_write(children[3], "memory.min", "0"))
341 		goto cleanup;
342 
343 	attempts = 0;
344 	while (!values_close(cg_read_long(parent[1], "memory.current"),
345 			     MB(150), 3)) {
346 		if (attempts++ > 5)
347 			break;
348 		sleep(1);
349 	}
350 
351 	if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
352 		goto cleanup;
353 
354 	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
355 		goto cleanup;
356 
357 	for (i = 0; i < ARRAY_SIZE(children); i++)
358 		c[i] = cg_read_long(children[i], "memory.current");
359 
360 	if (!values_close(c[0], MB(33), 10))
361 		goto cleanup;
362 
363 	if (!values_close(c[1], MB(17), 10))
364 		goto cleanup;
365 
366 	if (!values_close(c[2], 0, 1))
367 		goto cleanup;
368 
369 	if (!cg_run(parent[2], alloc_anon, (void *)MB(170)))
370 		goto cleanup;
371 
372 	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
373 		goto cleanup;
374 
375 	ret = KSFT_PASS;
376 
377 cleanup:
378 	for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
379 		if (!children[i])
380 			continue;
381 
382 		cg_destroy(children[i]);
383 		free(children[i]);
384 	}
385 
386 	for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
387 		if (!parent[i])
388 			continue;
389 
390 		cg_destroy(parent[i]);
391 		free(parent[i]);
392 	}
393 	close(fd);
394 	return ret;
395 }
396 
397 /*
398  * First, this test creates the following hierarchy:
399  * A       memory.low = 50M,  memory.max = 200M
400  * A/B     memory.low = 50M,  memory.current = 50M
401  * A/B/C   memory.low = 75M,  memory.current = 50M
402  * A/B/D   memory.low = 25M,  memory.current = 50M
403  * A/B/E   memory.low = 500M, memory.current = 0
404  * A/B/F   memory.low = 0,    memory.current = 50M
405  *
406  * Usages are pagecache.
407  * Then it creates A/G an creates a significant
408  * memory pressure in it.
409  *
410  * Then it checks actual memory usages and expects that:
411  * A/B    memory.current ~= 50M
412  * A/B/   memory.current ~= 33M
413  * A/B/D  memory.current ~= 17M
414  * A/B/E  memory.current ~= 0
415  *
416  * After that it tries to allocate more than there is
417  * unprotected memory in A available,
418  * and checks low and oom events in memory.events.
419  */
420 static int test_memcg_low(const char *root)
421 {
422 	int ret = KSFT_FAIL;
423 	char *parent[3] = {NULL};
424 	char *children[4] = {NULL};
425 	long low, oom;
426 	long c[4];
427 	int i;
428 	int fd;
429 
430 	fd = get_temp_fd();
431 	if (fd < 0)
432 		goto cleanup;
433 
434 	parent[0] = cg_name(root, "memcg_test_0");
435 	if (!parent[0])
436 		goto cleanup;
437 
438 	parent[1] = cg_name(parent[0], "memcg_test_1");
439 	if (!parent[1])
440 		goto cleanup;
441 
442 	parent[2] = cg_name(parent[0], "memcg_test_2");
443 	if (!parent[2])
444 		goto cleanup;
445 
446 	if (cg_create(parent[0]))
447 		goto cleanup;
448 
449 	if (cg_read_long(parent[0], "memory.low"))
450 		goto cleanup;
451 
452 	if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
453 		goto cleanup;
454 
455 	if (cg_write(parent[0], "memory.max", "200M"))
456 		goto cleanup;
457 
458 	if (cg_write(parent[0], "memory.swap.max", "0"))
459 		goto cleanup;
460 
461 	if (cg_create(parent[1]))
462 		goto cleanup;
463 
464 	if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
465 		goto cleanup;
466 
467 	if (cg_create(parent[2]))
468 		goto cleanup;
469 
470 	for (i = 0; i < ARRAY_SIZE(children); i++) {
471 		children[i] = cg_name_indexed(parent[1], "child_memcg", i);
472 		if (!children[i])
473 			goto cleanup;
474 
475 		if (cg_create(children[i]))
476 			goto cleanup;
477 
478 		if (i == 2)
479 			continue;
480 
481 		if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd))
482 			goto cleanup;
483 	}
484 
485 	if (cg_write(parent[0], "memory.low", "50M"))
486 		goto cleanup;
487 	if (cg_write(parent[1], "memory.low", "50M"))
488 		goto cleanup;
489 	if (cg_write(children[0], "memory.low", "75M"))
490 		goto cleanup;
491 	if (cg_write(children[1], "memory.low", "25M"))
492 		goto cleanup;
493 	if (cg_write(children[2], "memory.low", "500M"))
494 		goto cleanup;
495 	if (cg_write(children[3], "memory.low", "0"))
496 		goto cleanup;
497 
498 	if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
499 		goto cleanup;
500 
501 	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
502 		goto cleanup;
503 
504 	for (i = 0; i < ARRAY_SIZE(children); i++)
505 		c[i] = cg_read_long(children[i], "memory.current");
506 
507 	if (!values_close(c[0], MB(33), 10))
508 		goto cleanup;
509 
510 	if (!values_close(c[1], MB(17), 10))
511 		goto cleanup;
512 
513 	if (!values_close(c[2], 0, 1))
514 		goto cleanup;
515 
516 	if (cg_run(parent[2], alloc_anon, (void *)MB(166))) {
517 		fprintf(stderr,
518 			"memory.low prevents from allocating anon memory\n");
519 		goto cleanup;
520 	}
521 
522 	for (i = 0; i < ARRAY_SIZE(children); i++) {
523 		oom = cg_read_key_long(children[i], "memory.events", "oom ");
524 		low = cg_read_key_long(children[i], "memory.events", "low ");
525 
526 		if (oom)
527 			goto cleanup;
528 		if (i < 2 && low <= 0)
529 			goto cleanup;
530 		if (i >= 2 && low)
531 			goto cleanup;
532 	}
533 
534 	ret = KSFT_PASS;
535 
536 cleanup:
537 	for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
538 		if (!children[i])
539 			continue;
540 
541 		cg_destroy(children[i]);
542 		free(children[i]);
543 	}
544 
545 	for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
546 		if (!parent[i])
547 			continue;
548 
549 		cg_destroy(parent[i]);
550 		free(parent[i]);
551 	}
552 	close(fd);
553 	return ret;
554 }
555 
556 static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
557 {
558 	size_t size = MB(50);
559 	int ret = -1;
560 	long current;
561 	int fd;
562 
563 	fd = get_temp_fd();
564 	if (fd < 0)
565 		return -1;
566 
567 	if (alloc_pagecache(fd, size))
568 		goto cleanup;
569 
570 	current = cg_read_long(cgroup, "memory.current");
571 	if (current <= MB(29) || current > MB(30))
572 		goto cleanup;
573 
574 	ret = 0;
575 
576 cleanup:
577 	close(fd);
578 	return ret;
579 
580 }
581 
582 /*
583  * This test checks that memory.high limits the amount of
584  * memory which can be consumed by either anonymous memory
585  * or pagecache.
586  */
587 static int test_memcg_high(const char *root)
588 {
589 	int ret = KSFT_FAIL;
590 	char *memcg;
591 	long high;
592 
593 	memcg = cg_name(root, "memcg_test");
594 	if (!memcg)
595 		goto cleanup;
596 
597 	if (cg_create(memcg))
598 		goto cleanup;
599 
600 	if (cg_read_strcmp(memcg, "memory.high", "max\n"))
601 		goto cleanup;
602 
603 	if (cg_write(memcg, "memory.swap.max", "0"))
604 		goto cleanup;
605 
606 	if (cg_write(memcg, "memory.high", "30M"))
607 		goto cleanup;
608 
609 	if (cg_run(memcg, alloc_anon, (void *)MB(100)))
610 		goto cleanup;
611 
612 	if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
613 		goto cleanup;
614 
615 	if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
616 		goto cleanup;
617 
618 	high = cg_read_key_long(memcg, "memory.events", "high ");
619 	if (high <= 0)
620 		goto cleanup;
621 
622 	ret = KSFT_PASS;
623 
624 cleanup:
625 	cg_destroy(memcg);
626 	free(memcg);
627 
628 	return ret;
629 }
630 
631 /*
632  * This test checks that memory.max limits the amount of
633  * memory which can be consumed by either anonymous memory
634  * or pagecache.
635  */
636 static int test_memcg_max(const char *root)
637 {
638 	int ret = KSFT_FAIL;
639 	char *memcg;
640 	long current, max;
641 
642 	memcg = cg_name(root, "memcg_test");
643 	if (!memcg)
644 		goto cleanup;
645 
646 	if (cg_create(memcg))
647 		goto cleanup;
648 
649 	if (cg_read_strcmp(memcg, "memory.max", "max\n"))
650 		goto cleanup;
651 
652 	if (cg_write(memcg, "memory.swap.max", "0"))
653 		goto cleanup;
654 
655 	if (cg_write(memcg, "memory.max", "30M"))
656 		goto cleanup;
657 
658 	/* Should be killed by OOM killer */
659 	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
660 		goto cleanup;
661 
662 	if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
663 		goto cleanup;
664 
665 	current = cg_read_long(memcg, "memory.current");
666 	if (current > MB(30) || !current)
667 		goto cleanup;
668 
669 	max = cg_read_key_long(memcg, "memory.events", "max ");
670 	if (max <= 0)
671 		goto cleanup;
672 
673 	ret = KSFT_PASS;
674 
675 cleanup:
676 	cg_destroy(memcg);
677 	free(memcg);
678 
679 	return ret;
680 }
681 
682 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
683 {
684 	long mem_max = (long)arg;
685 	size_t size = MB(50);
686 	char *buf, *ptr;
687 	long mem_current, swap_current;
688 	int ret = -1;
689 
690 	buf = malloc(size);
691 	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
692 		*ptr = 0;
693 
694 	mem_current = cg_read_long(cgroup, "memory.current");
695 	if (!mem_current || !values_close(mem_current, mem_max, 3))
696 		goto cleanup;
697 
698 	swap_current = cg_read_long(cgroup, "memory.swap.current");
699 	if (!swap_current ||
700 	    !values_close(mem_current + swap_current, size, 3))
701 		goto cleanup;
702 
703 	ret = 0;
704 cleanup:
705 	free(buf);
706 	return ret;
707 }
708 
709 /*
710  * This test checks that memory.swap.max limits the amount of
711  * anonymous memory which can be swapped out.
712  */
713 static int test_memcg_swap_max(const char *root)
714 {
715 	int ret = KSFT_FAIL;
716 	char *memcg;
717 	long max;
718 
719 	if (!is_swap_enabled())
720 		return KSFT_SKIP;
721 
722 	memcg = cg_name(root, "memcg_test");
723 	if (!memcg)
724 		goto cleanup;
725 
726 	if (cg_create(memcg))
727 		goto cleanup;
728 
729 	if (cg_read_long(memcg, "memory.swap.current")) {
730 		ret = KSFT_SKIP;
731 		goto cleanup;
732 	}
733 
734 	if (cg_read_strcmp(memcg, "memory.max", "max\n"))
735 		goto cleanup;
736 
737 	if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
738 		goto cleanup;
739 
740 	if (cg_write(memcg, "memory.swap.max", "30M"))
741 		goto cleanup;
742 
743 	if (cg_write(memcg, "memory.max", "30M"))
744 		goto cleanup;
745 
746 	/* Should be killed by OOM killer */
747 	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
748 		goto cleanup;
749 
750 	if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
751 		goto cleanup;
752 
753 	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
754 		goto cleanup;
755 
756 	if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
757 		goto cleanup;
758 
759 	max = cg_read_key_long(memcg, "memory.events", "max ");
760 	if (max <= 0)
761 		goto cleanup;
762 
763 	ret = KSFT_PASS;
764 
765 cleanup:
766 	cg_destroy(memcg);
767 	free(memcg);
768 
769 	return ret;
770 }
771 
772 /*
773  * This test disables swapping and tries to allocate anonymous memory
774  * up to OOM. Then it checks for oom and oom_kill events in
775  * memory.events.
776  */
777 static int test_memcg_oom_events(const char *root)
778 {
779 	int ret = KSFT_FAIL;
780 	char *memcg;
781 
782 	memcg = cg_name(root, "memcg_test");
783 	if (!memcg)
784 		goto cleanup;
785 
786 	if (cg_create(memcg))
787 		goto cleanup;
788 
789 	if (cg_write(memcg, "memory.max", "30M"))
790 		goto cleanup;
791 
792 	if (cg_write(memcg, "memory.swap.max", "0"))
793 		goto cleanup;
794 
795 	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
796 		goto cleanup;
797 
798 	if (cg_read_strcmp(memcg, "cgroup.procs", ""))
799 		goto cleanup;
800 
801 	if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
802 		goto cleanup;
803 
804 	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
805 		goto cleanup;
806 
807 	ret = KSFT_PASS;
808 
809 cleanup:
810 	cg_destroy(memcg);
811 	free(memcg);
812 
813 	return ret;
814 }
815 
816 struct tcp_server_args {
817 	unsigned short port;
818 	int ctl[2];
819 };
820 
821 static int tcp_server(const char *cgroup, void *arg)
822 {
823 	struct tcp_server_args *srv_args = arg;
824 	struct sockaddr_in6 saddr = { 0 };
825 	socklen_t slen = sizeof(saddr);
826 	int sk, client_sk, ctl_fd, yes = 1, ret = -1;
827 
828 	close(srv_args->ctl[0]);
829 	ctl_fd = srv_args->ctl[1];
830 
831 	saddr.sin6_family = AF_INET6;
832 	saddr.sin6_addr = in6addr_any;
833 	saddr.sin6_port = htons(srv_args->port);
834 
835 	sk = socket(AF_INET6, SOCK_STREAM, 0);
836 	if (sk < 0)
837 		return ret;
838 
839 	if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
840 		goto cleanup;
841 
842 	if (bind(sk, (struct sockaddr *)&saddr, slen)) {
843 		write(ctl_fd, &errno, sizeof(errno));
844 		goto cleanup;
845 	}
846 
847 	if (listen(sk, 1))
848 		goto cleanup;
849 
850 	ret = 0;
851 	if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
852 		ret = -1;
853 		goto cleanup;
854 	}
855 
856 	client_sk = accept(sk, NULL, NULL);
857 	if (client_sk < 0)
858 		goto cleanup;
859 
860 	ret = -1;
861 	for (;;) {
862 		uint8_t buf[0x100000];
863 
864 		if (write(client_sk, buf, sizeof(buf)) <= 0) {
865 			if (errno == ECONNRESET)
866 				ret = 0;
867 			break;
868 		}
869 	}
870 
871 	close(client_sk);
872 
873 cleanup:
874 	close(sk);
875 	return ret;
876 }
877 
878 static int tcp_client(const char *cgroup, unsigned short port)
879 {
880 	const char server[] = "localhost";
881 	struct addrinfo *ai;
882 	char servport[6];
883 	int retries = 0x10; /* nice round number */
884 	int sk, ret;
885 
886 	snprintf(servport, sizeof(servport), "%hd", port);
887 	ret = getaddrinfo(server, servport, NULL, &ai);
888 	if (ret)
889 		return ret;
890 
891 	sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
892 	if (sk < 0)
893 		goto free_ainfo;
894 
895 	ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
896 	if (ret < 0)
897 		goto close_sk;
898 
899 	ret = KSFT_FAIL;
900 	while (retries--) {
901 		uint8_t buf[0x100000];
902 		long current, sock;
903 
904 		if (read(sk, buf, sizeof(buf)) <= 0)
905 			goto close_sk;
906 
907 		current = cg_read_long(cgroup, "memory.current");
908 		sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
909 
910 		if (current < 0 || sock < 0)
911 			goto close_sk;
912 
913 		if (current < sock)
914 			goto close_sk;
915 
916 		if (values_close(current, sock, 10)) {
917 			ret = KSFT_PASS;
918 			break;
919 		}
920 	}
921 
922 close_sk:
923 	close(sk);
924 free_ainfo:
925 	freeaddrinfo(ai);
926 	return ret;
927 }
928 
929 /*
930  * This test checks socket memory accounting.
931  * The test forks a TCP server listens on a random port between 1000
932  * and 61000. Once it gets a client connection, it starts writing to
933  * its socket.
934  * The TCP client interleaves reads from the socket with check whether
935  * memory.current and memory.stat.sock are similar.
936  */
937 static int test_memcg_sock(const char *root)
938 {
939 	int bind_retries = 5, ret = KSFT_FAIL, pid, err;
940 	unsigned short port;
941 	char *memcg;
942 
943 	memcg = cg_name(root, "memcg_test");
944 	if (!memcg)
945 		goto cleanup;
946 
947 	if (cg_create(memcg))
948 		goto cleanup;
949 
950 	while (bind_retries--) {
951 		struct tcp_server_args args;
952 
953 		if (pipe(args.ctl))
954 			goto cleanup;
955 
956 		port = args.port = 1000 + rand() % 60000;
957 
958 		pid = cg_run_nowait(memcg, tcp_server, &args);
959 		if (pid < 0)
960 			goto cleanup;
961 
962 		close(args.ctl[1]);
963 		if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
964 			goto cleanup;
965 		close(args.ctl[0]);
966 
967 		if (!err)
968 			break;
969 		if (err != EADDRINUSE)
970 			goto cleanup;
971 
972 		waitpid(pid, NULL, 0);
973 	}
974 
975 	if (err == EADDRINUSE) {
976 		ret = KSFT_SKIP;
977 		goto cleanup;
978 	}
979 
980 	if (tcp_client(memcg, port) != KSFT_PASS)
981 		goto cleanup;
982 
983 	waitpid(pid, &err, 0);
984 	if (WEXITSTATUS(err))
985 		goto cleanup;
986 
987 	if (cg_read_long(memcg, "memory.current") < 0)
988 		goto cleanup;
989 
990 	if (cg_read_key_long(memcg, "memory.stat", "sock "))
991 		goto cleanup;
992 
993 	ret = KSFT_PASS;
994 
995 cleanup:
996 	cg_destroy(memcg);
997 	free(memcg);
998 
999 	return ret;
1000 }
1001 
1002 /*
1003  * This test disables swapping and tries to allocate anonymous memory
1004  * up to OOM with memory.group.oom set. Then it checks that all
1005  * processes in the leaf (but not the parent) were killed.
1006  */
1007 static int test_memcg_oom_group_leaf_events(const char *root)
1008 {
1009 	int ret = KSFT_FAIL;
1010 	char *parent, *child;
1011 
1012 	parent = cg_name(root, "memcg_test_0");
1013 	child = cg_name(root, "memcg_test_0/memcg_test_1");
1014 
1015 	if (!parent || !child)
1016 		goto cleanup;
1017 
1018 	if (cg_create(parent))
1019 		goto cleanup;
1020 
1021 	if (cg_create(child))
1022 		goto cleanup;
1023 
1024 	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
1025 		goto cleanup;
1026 
1027 	if (cg_write(child, "memory.max", "50M"))
1028 		goto cleanup;
1029 
1030 	if (cg_write(child, "memory.swap.max", "0"))
1031 		goto cleanup;
1032 
1033 	if (cg_write(child, "memory.oom.group", "1"))
1034 		goto cleanup;
1035 
1036 	cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1037 	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1038 	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1039 	if (!cg_run(child, alloc_anon, (void *)MB(100)))
1040 		goto cleanup;
1041 
1042 	if (cg_test_proc_killed(child))
1043 		goto cleanup;
1044 
1045 	if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
1046 		goto cleanup;
1047 
1048 	if (cg_read_key_long(parent, "memory.events", "oom_kill ") != 0)
1049 		goto cleanup;
1050 
1051 	ret = KSFT_PASS;
1052 
1053 cleanup:
1054 	if (child)
1055 		cg_destroy(child);
1056 	if (parent)
1057 		cg_destroy(parent);
1058 	free(child);
1059 	free(parent);
1060 
1061 	return ret;
1062 }
1063 
1064 /*
1065  * This test disables swapping and tries to allocate anonymous memory
1066  * up to OOM with memory.group.oom set. Then it checks that all
1067  * processes in the parent and leaf were killed.
1068  */
1069 static int test_memcg_oom_group_parent_events(const char *root)
1070 {
1071 	int ret = KSFT_FAIL;
1072 	char *parent, *child;
1073 
1074 	parent = cg_name(root, "memcg_test_0");
1075 	child = cg_name(root, "memcg_test_0/memcg_test_1");
1076 
1077 	if (!parent || !child)
1078 		goto cleanup;
1079 
1080 	if (cg_create(parent))
1081 		goto cleanup;
1082 
1083 	if (cg_create(child))
1084 		goto cleanup;
1085 
1086 	if (cg_write(parent, "memory.max", "80M"))
1087 		goto cleanup;
1088 
1089 	if (cg_write(parent, "memory.swap.max", "0"))
1090 		goto cleanup;
1091 
1092 	if (cg_write(parent, "memory.oom.group", "1"))
1093 		goto cleanup;
1094 
1095 	cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1096 	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1097 	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1098 
1099 	if (!cg_run(child, alloc_anon, (void *)MB(100)))
1100 		goto cleanup;
1101 
1102 	if (cg_test_proc_killed(child))
1103 		goto cleanup;
1104 	if (cg_test_proc_killed(parent))
1105 		goto cleanup;
1106 
1107 	ret = KSFT_PASS;
1108 
1109 cleanup:
1110 	if (child)
1111 		cg_destroy(child);
1112 	if (parent)
1113 		cg_destroy(parent);
1114 	free(child);
1115 	free(parent);
1116 
1117 	return ret;
1118 }
1119 
1120 /*
1121  * This test disables swapping and tries to allocate anonymous memory
1122  * up to OOM with memory.group.oom set. Then it checks that all
1123  * processes were killed except those set with OOM_SCORE_ADJ_MIN
1124  */
1125 static int test_memcg_oom_group_score_events(const char *root)
1126 {
1127 	int ret = KSFT_FAIL;
1128 	char *memcg;
1129 	int safe_pid;
1130 
1131 	memcg = cg_name(root, "memcg_test_0");
1132 
1133 	if (!memcg)
1134 		goto cleanup;
1135 
1136 	if (cg_create(memcg))
1137 		goto cleanup;
1138 
1139 	if (cg_write(memcg, "memory.max", "50M"))
1140 		goto cleanup;
1141 
1142 	if (cg_write(memcg, "memory.swap.max", "0"))
1143 		goto cleanup;
1144 
1145 	if (cg_write(memcg, "memory.oom.group", "1"))
1146 		goto cleanup;
1147 
1148 	safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1149 	if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
1150 		goto cleanup;
1151 
1152 	cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1153 	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1154 		goto cleanup;
1155 
1156 	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
1157 		goto cleanup;
1158 
1159 	if (kill(safe_pid, SIGKILL))
1160 		goto cleanup;
1161 
1162 	ret = KSFT_PASS;
1163 
1164 cleanup:
1165 	if (memcg)
1166 		cg_destroy(memcg);
1167 	free(memcg);
1168 
1169 	return ret;
1170 }
1171 
1172 
1173 #define T(x) { x, #x }
1174 struct memcg_test {
1175 	int (*fn)(const char *root);
1176 	const char *name;
1177 } tests[] = {
1178 	T(test_memcg_subtree_control),
1179 	T(test_memcg_current),
1180 	T(test_memcg_min),
1181 	T(test_memcg_low),
1182 	T(test_memcg_high),
1183 	T(test_memcg_max),
1184 	T(test_memcg_oom_events),
1185 	T(test_memcg_swap_max),
1186 	T(test_memcg_sock),
1187 	T(test_memcg_oom_group_leaf_events),
1188 	T(test_memcg_oom_group_parent_events),
1189 	T(test_memcg_oom_group_score_events),
1190 };
1191 #undef T
1192 
1193 int main(int argc, char **argv)
1194 {
1195 	char root[PATH_MAX];
1196 	int i, ret = EXIT_SUCCESS;
1197 
1198 	if (cg_find_unified_root(root, sizeof(root)))
1199 		ksft_exit_skip("cgroup v2 isn't mounted\n");
1200 
1201 	/*
1202 	 * Check that memory controller is available:
1203 	 * memory is listed in cgroup.controllers
1204 	 */
1205 	if (cg_read_strstr(root, "cgroup.controllers", "memory"))
1206 		ksft_exit_skip("memory controller isn't available\n");
1207 
1208 	if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
1209 		if (cg_write(root, "cgroup.subtree_control", "+memory"))
1210 			ksft_exit_skip("Failed to set memory controller\n");
1211 
1212 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
1213 		switch (tests[i].fn(root)) {
1214 		case KSFT_PASS:
1215 			ksft_test_result_pass("%s\n", tests[i].name);
1216 			break;
1217 		case KSFT_SKIP:
1218 			ksft_test_result_skip("%s\n", tests[i].name);
1219 			break;
1220 		default:
1221 			ret = EXIT_FAILURE;
1222 			ksft_test_result_fail("%s\n", tests[i].name);
1223 			break;
1224 		}
1225 	}
1226 
1227 	return ret;
1228 }
1229