1 /* SPDX-License-Identifier: GPL-2.0 */
2 #define _GNU_SOURCE
3 
4 #include <linux/limits.h>
5 #include <linux/oom.h>
6 #include <fcntl.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <sys/stat.h>
11 #include <sys/types.h>
12 #include <unistd.h>
13 #include <sys/socket.h>
14 #include <sys/wait.h>
15 #include <arpa/inet.h>
16 #include <netinet/in.h>
17 #include <netdb.h>
18 #include <errno.h>
19 
20 #include "../kselftest.h"
21 #include "cgroup_util.h"
22 
23 /*
24  * This test creates two nested cgroups with and without enabling
25  * the memory controller.
26  */
27 static int test_memcg_subtree_control(const char *root)
28 {
29 	char *parent, *child, *parent2, *child2;
30 	int ret = KSFT_FAIL;
31 	char buf[PAGE_SIZE];
32 
33 	/* Create two nested cgroups with the memory controller enabled */
34 	parent = cg_name(root, "memcg_test_0");
35 	child = cg_name(root, "memcg_test_0/memcg_test_1");
36 	if (!parent || !child)
37 		goto cleanup;
38 
39 	if (cg_create(parent))
40 		goto cleanup;
41 
42 	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
43 		goto cleanup;
44 
45 	if (cg_create(child))
46 		goto cleanup;
47 
48 	if (cg_read_strstr(child, "cgroup.controllers", "memory"))
49 		goto cleanup;
50 
51 	/* Create two nested cgroups without enabling memory controller */
52 	parent2 = cg_name(root, "memcg_test_1");
53 	child2 = cg_name(root, "memcg_test_1/memcg_test_1");
54 	if (!parent2 || !child2)
55 		goto cleanup;
56 
57 	if (cg_create(parent2))
58 		goto cleanup;
59 
60 	if (cg_create(child2))
61 		goto cleanup;
62 
63 	if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
64 		goto cleanup;
65 
66 	if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
67 		goto cleanup;
68 
69 	ret = KSFT_PASS;
70 
71 cleanup:
72 	cg_destroy(child);
73 	cg_destroy(parent);
74 	free(parent);
75 	free(child);
76 
77 	cg_destroy(child2);
78 	cg_destroy(parent2);
79 	free(parent2);
80 	free(child2);
81 
82 	return ret;
83 }
84 
85 static int alloc_anon_50M_check(const char *cgroup, void *arg)
86 {
87 	size_t size = MB(50);
88 	char *buf, *ptr;
89 	long anon, current;
90 	int ret = -1;
91 
92 	buf = malloc(size);
93 	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
94 		*ptr = 0;
95 
96 	current = cg_read_long(cgroup, "memory.current");
97 	if (current < size)
98 		goto cleanup;
99 
100 	if (!values_close(size, current, 3))
101 		goto cleanup;
102 
103 	anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
104 	if (anon < 0)
105 		goto cleanup;
106 
107 	if (!values_close(anon, current, 3))
108 		goto cleanup;
109 
110 	ret = 0;
111 cleanup:
112 	free(buf);
113 	return ret;
114 }
115 
116 static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
117 {
118 	size_t size = MB(50);
119 	int ret = -1;
120 	long current, file;
121 	int fd;
122 
123 	fd = get_temp_fd();
124 	if (fd < 0)
125 		return -1;
126 
127 	if (alloc_pagecache(fd, size))
128 		goto cleanup;
129 
130 	current = cg_read_long(cgroup, "memory.current");
131 	if (current < size)
132 		goto cleanup;
133 
134 	file = cg_read_key_long(cgroup, "memory.stat", "file ");
135 	if (file < 0)
136 		goto cleanup;
137 
138 	if (!values_close(file, current, 10))
139 		goto cleanup;
140 
141 	ret = 0;
142 
143 cleanup:
144 	close(fd);
145 	return ret;
146 }
147 
148 /*
149  * This test create a memory cgroup, allocates
150  * some anonymous memory and some pagecache
151  * and check memory.current and some memory.stat values.
152  */
153 static int test_memcg_current(const char *root)
154 {
155 	int ret = KSFT_FAIL;
156 	long current;
157 	char *memcg;
158 
159 	memcg = cg_name(root, "memcg_test");
160 	if (!memcg)
161 		goto cleanup;
162 
163 	if (cg_create(memcg))
164 		goto cleanup;
165 
166 	current = cg_read_long(memcg, "memory.current");
167 	if (current != 0)
168 		goto cleanup;
169 
170 	if (cg_run(memcg, alloc_anon_50M_check, NULL))
171 		goto cleanup;
172 
173 	if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
174 		goto cleanup;
175 
176 	ret = KSFT_PASS;
177 
178 cleanup:
179 	cg_destroy(memcg);
180 	free(memcg);
181 
182 	return ret;
183 }
184 
185 static int alloc_pagecache_50M(const char *cgroup, void *arg)
186 {
187 	int fd = (long)arg;
188 
189 	return alloc_pagecache(fd, MB(50));
190 }
191 
192 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
193 {
194 	int fd = (long)arg;
195 	int ppid = getppid();
196 
197 	if (alloc_pagecache(fd, MB(50)))
198 		return -1;
199 
200 	while (getppid() == ppid)
201 		sleep(1);
202 
203 	return 0;
204 }
205 
206 static int alloc_anon_noexit(const char *cgroup, void *arg)
207 {
208 	int ppid = getppid();
209 
210 	if (alloc_anon(cgroup, arg))
211 		return -1;
212 
213 	while (getppid() == ppid)
214 		sleep(1);
215 
216 	return 0;
217 }
218 
219 /*
220  * Wait until processes are killed asynchronously by the OOM killer
221  * If we exceed a timeout, fail.
222  */
223 static int cg_test_proc_killed(const char *cgroup)
224 {
225 	int limit;
226 
227 	for (limit = 10; limit > 0; limit--) {
228 		if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
229 			return 0;
230 
231 		usleep(100000);
232 	}
233 	return -1;
234 }
235 
236 /*
237  * First, this test creates the following hierarchy:
238  * A       memory.min = 50M,  memory.max = 200M
239  * A/B     memory.min = 50M,  memory.current = 50M
240  * A/B/C   memory.min = 75M,  memory.current = 50M
241  * A/B/D   memory.min = 25M,  memory.current = 50M
242  * A/B/E   memory.min = 500M, memory.current = 0
243  * A/B/F   memory.min = 0,    memory.current = 50M
244  *
245  * Usages are pagecache, but the test keeps a running
246  * process in every leaf cgroup.
247  * Then it creates A/G and creates a significant
248  * memory pressure in it.
249  *
250  * A/B    memory.current ~= 50M
251  * A/B/C  memory.current ~= 33M
252  * A/B/D  memory.current ~= 17M
253  * A/B/E  memory.current ~= 0
254  *
255  * After that it tries to allocate more than there is
256  * unprotected memory in A available, and checks
257  * checks that memory.min protects pagecache even
258  * in this case.
259  */
260 static int test_memcg_min(const char *root)
261 {
262 	int ret = KSFT_FAIL;
263 	char *parent[3] = {NULL};
264 	char *children[4] = {NULL};
265 	long c[4];
266 	int i, attempts;
267 	int fd;
268 
269 	fd = get_temp_fd();
270 	if (fd < 0)
271 		goto cleanup;
272 
273 	parent[0] = cg_name(root, "memcg_test_0");
274 	if (!parent[0])
275 		goto cleanup;
276 
277 	parent[1] = cg_name(parent[0], "memcg_test_1");
278 	if (!parent[1])
279 		goto cleanup;
280 
281 	parent[2] = cg_name(parent[0], "memcg_test_2");
282 	if (!parent[2])
283 		goto cleanup;
284 
285 	if (cg_create(parent[0]))
286 		goto cleanup;
287 
288 	if (cg_read_long(parent[0], "memory.min")) {
289 		ret = KSFT_SKIP;
290 		goto cleanup;
291 	}
292 
293 	if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
294 		goto cleanup;
295 
296 	if (cg_write(parent[0], "memory.max", "200M"))
297 		goto cleanup;
298 
299 	if (cg_write(parent[0], "memory.swap.max", "0"))
300 		goto cleanup;
301 
302 	if (cg_create(parent[1]))
303 		goto cleanup;
304 
305 	if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
306 		goto cleanup;
307 
308 	if (cg_create(parent[2]))
309 		goto cleanup;
310 
311 	for (i = 0; i < ARRAY_SIZE(children); i++) {
312 		children[i] = cg_name_indexed(parent[1], "child_memcg", i);
313 		if (!children[i])
314 			goto cleanup;
315 
316 		if (cg_create(children[i]))
317 			goto cleanup;
318 
319 		if (i == 2)
320 			continue;
321 
322 		cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
323 			      (void *)(long)fd);
324 	}
325 
326 	if (cg_write(parent[0], "memory.min", "50M"))
327 		goto cleanup;
328 	if (cg_write(parent[1], "memory.min", "50M"))
329 		goto cleanup;
330 	if (cg_write(children[0], "memory.min", "75M"))
331 		goto cleanup;
332 	if (cg_write(children[1], "memory.min", "25M"))
333 		goto cleanup;
334 	if (cg_write(children[2], "memory.min", "500M"))
335 		goto cleanup;
336 	if (cg_write(children[3], "memory.min", "0"))
337 		goto cleanup;
338 
339 	attempts = 0;
340 	while (!values_close(cg_read_long(parent[1], "memory.current"),
341 			     MB(150), 3)) {
342 		if (attempts++ > 5)
343 			break;
344 		sleep(1);
345 	}
346 
347 	if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
348 		goto cleanup;
349 
350 	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
351 		goto cleanup;
352 
353 	for (i = 0; i < ARRAY_SIZE(children); i++)
354 		c[i] = cg_read_long(children[i], "memory.current");
355 
356 	if (!values_close(c[0], MB(33), 10))
357 		goto cleanup;
358 
359 	if (!values_close(c[1], MB(17), 10))
360 		goto cleanup;
361 
362 	if (!values_close(c[2], 0, 1))
363 		goto cleanup;
364 
365 	if (!cg_run(parent[2], alloc_anon, (void *)MB(170)))
366 		goto cleanup;
367 
368 	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
369 		goto cleanup;
370 
371 	ret = KSFT_PASS;
372 
373 cleanup:
374 	for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
375 		if (!children[i])
376 			continue;
377 
378 		cg_destroy(children[i]);
379 		free(children[i]);
380 	}
381 
382 	for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
383 		if (!parent[i])
384 			continue;
385 
386 		cg_destroy(parent[i]);
387 		free(parent[i]);
388 	}
389 	close(fd);
390 	return ret;
391 }
392 
393 /*
394  * First, this test creates the following hierarchy:
395  * A       memory.low = 50M,  memory.max = 200M
396  * A/B     memory.low = 50M,  memory.current = 50M
397  * A/B/C   memory.low = 75M,  memory.current = 50M
398  * A/B/D   memory.low = 25M,  memory.current = 50M
399  * A/B/E   memory.low = 500M, memory.current = 0
400  * A/B/F   memory.low = 0,    memory.current = 50M
401  *
402  * Usages are pagecache.
403  * Then it creates A/G an creates a significant
404  * memory pressure in it.
405  *
406  * Then it checks actual memory usages and expects that:
407  * A/B    memory.current ~= 50M
408  * A/B/   memory.current ~= 33M
409  * A/B/D  memory.current ~= 17M
410  * A/B/E  memory.current ~= 0
411  *
412  * After that it tries to allocate more than there is
413  * unprotected memory in A available,
414  * and checks low and oom events in memory.events.
415  */
416 static int test_memcg_low(const char *root)
417 {
418 	int ret = KSFT_FAIL;
419 	char *parent[3] = {NULL};
420 	char *children[4] = {NULL};
421 	long low, oom;
422 	long c[4];
423 	int i;
424 	int fd;
425 
426 	fd = get_temp_fd();
427 	if (fd < 0)
428 		goto cleanup;
429 
430 	parent[0] = cg_name(root, "memcg_test_0");
431 	if (!parent[0])
432 		goto cleanup;
433 
434 	parent[1] = cg_name(parent[0], "memcg_test_1");
435 	if (!parent[1])
436 		goto cleanup;
437 
438 	parent[2] = cg_name(parent[0], "memcg_test_2");
439 	if (!parent[2])
440 		goto cleanup;
441 
442 	if (cg_create(parent[0]))
443 		goto cleanup;
444 
445 	if (cg_read_long(parent[0], "memory.low"))
446 		goto cleanup;
447 
448 	if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
449 		goto cleanup;
450 
451 	if (cg_write(parent[0], "memory.max", "200M"))
452 		goto cleanup;
453 
454 	if (cg_write(parent[0], "memory.swap.max", "0"))
455 		goto cleanup;
456 
457 	if (cg_create(parent[1]))
458 		goto cleanup;
459 
460 	if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
461 		goto cleanup;
462 
463 	if (cg_create(parent[2]))
464 		goto cleanup;
465 
466 	for (i = 0; i < ARRAY_SIZE(children); i++) {
467 		children[i] = cg_name_indexed(parent[1], "child_memcg", i);
468 		if (!children[i])
469 			goto cleanup;
470 
471 		if (cg_create(children[i]))
472 			goto cleanup;
473 
474 		if (i == 2)
475 			continue;
476 
477 		if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd))
478 			goto cleanup;
479 	}
480 
481 	if (cg_write(parent[0], "memory.low", "50M"))
482 		goto cleanup;
483 	if (cg_write(parent[1], "memory.low", "50M"))
484 		goto cleanup;
485 	if (cg_write(children[0], "memory.low", "75M"))
486 		goto cleanup;
487 	if (cg_write(children[1], "memory.low", "25M"))
488 		goto cleanup;
489 	if (cg_write(children[2], "memory.low", "500M"))
490 		goto cleanup;
491 	if (cg_write(children[3], "memory.low", "0"))
492 		goto cleanup;
493 
494 	if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
495 		goto cleanup;
496 
497 	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
498 		goto cleanup;
499 
500 	for (i = 0; i < ARRAY_SIZE(children); i++)
501 		c[i] = cg_read_long(children[i], "memory.current");
502 
503 	if (!values_close(c[0], MB(33), 10))
504 		goto cleanup;
505 
506 	if (!values_close(c[1], MB(17), 10))
507 		goto cleanup;
508 
509 	if (!values_close(c[2], 0, 1))
510 		goto cleanup;
511 
512 	if (cg_run(parent[2], alloc_anon, (void *)MB(166))) {
513 		fprintf(stderr,
514 			"memory.low prevents from allocating anon memory\n");
515 		goto cleanup;
516 	}
517 
518 	for (i = 0; i < ARRAY_SIZE(children); i++) {
519 		oom = cg_read_key_long(children[i], "memory.events", "oom ");
520 		low = cg_read_key_long(children[i], "memory.events", "low ");
521 
522 		if (oom)
523 			goto cleanup;
524 		if (i < 2 && low <= 0)
525 			goto cleanup;
526 		if (i >= 2 && low)
527 			goto cleanup;
528 	}
529 
530 	ret = KSFT_PASS;
531 
532 cleanup:
533 	for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
534 		if (!children[i])
535 			continue;
536 
537 		cg_destroy(children[i]);
538 		free(children[i]);
539 	}
540 
541 	for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
542 		if (!parent[i])
543 			continue;
544 
545 		cg_destroy(parent[i]);
546 		free(parent[i]);
547 	}
548 	close(fd);
549 	return ret;
550 }
551 
552 static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
553 {
554 	size_t size = MB(50);
555 	int ret = -1;
556 	long current;
557 	int fd;
558 
559 	fd = get_temp_fd();
560 	if (fd < 0)
561 		return -1;
562 
563 	if (alloc_pagecache(fd, size))
564 		goto cleanup;
565 
566 	current = cg_read_long(cgroup, "memory.current");
567 	if (current <= MB(29) || current > MB(30))
568 		goto cleanup;
569 
570 	ret = 0;
571 
572 cleanup:
573 	close(fd);
574 	return ret;
575 
576 }
577 
578 /*
579  * This test checks that memory.high limits the amount of
580  * memory which can be consumed by either anonymous memory
581  * or pagecache.
582  */
583 static int test_memcg_high(const char *root)
584 {
585 	int ret = KSFT_FAIL;
586 	char *memcg;
587 	long high;
588 
589 	memcg = cg_name(root, "memcg_test");
590 	if (!memcg)
591 		goto cleanup;
592 
593 	if (cg_create(memcg))
594 		goto cleanup;
595 
596 	if (cg_read_strcmp(memcg, "memory.high", "max\n"))
597 		goto cleanup;
598 
599 	if (cg_write(memcg, "memory.swap.max", "0"))
600 		goto cleanup;
601 
602 	if (cg_write(memcg, "memory.high", "30M"))
603 		goto cleanup;
604 
605 	if (cg_run(memcg, alloc_anon, (void *)MB(100)))
606 		goto cleanup;
607 
608 	if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
609 		goto cleanup;
610 
611 	if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
612 		goto cleanup;
613 
614 	high = cg_read_key_long(memcg, "memory.events", "high ");
615 	if (high <= 0)
616 		goto cleanup;
617 
618 	ret = KSFT_PASS;
619 
620 cleanup:
621 	cg_destroy(memcg);
622 	free(memcg);
623 
624 	return ret;
625 }
626 
627 /*
628  * This test checks that memory.max limits the amount of
629  * memory which can be consumed by either anonymous memory
630  * or pagecache.
631  */
632 static int test_memcg_max(const char *root)
633 {
634 	int ret = KSFT_FAIL;
635 	char *memcg;
636 	long current, max;
637 
638 	memcg = cg_name(root, "memcg_test");
639 	if (!memcg)
640 		goto cleanup;
641 
642 	if (cg_create(memcg))
643 		goto cleanup;
644 
645 	if (cg_read_strcmp(memcg, "memory.max", "max\n"))
646 		goto cleanup;
647 
648 	if (cg_write(memcg, "memory.swap.max", "0"))
649 		goto cleanup;
650 
651 	if (cg_write(memcg, "memory.max", "30M"))
652 		goto cleanup;
653 
654 	/* Should be killed by OOM killer */
655 	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
656 		goto cleanup;
657 
658 	if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
659 		goto cleanup;
660 
661 	current = cg_read_long(memcg, "memory.current");
662 	if (current > MB(30) || !current)
663 		goto cleanup;
664 
665 	max = cg_read_key_long(memcg, "memory.events", "max ");
666 	if (max <= 0)
667 		goto cleanup;
668 
669 	ret = KSFT_PASS;
670 
671 cleanup:
672 	cg_destroy(memcg);
673 	free(memcg);
674 
675 	return ret;
676 }
677 
678 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
679 {
680 	long mem_max = (long)arg;
681 	size_t size = MB(50);
682 	char *buf, *ptr;
683 	long mem_current, swap_current;
684 	int ret = -1;
685 
686 	buf = malloc(size);
687 	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
688 		*ptr = 0;
689 
690 	mem_current = cg_read_long(cgroup, "memory.current");
691 	if (!mem_current || !values_close(mem_current, mem_max, 3))
692 		goto cleanup;
693 
694 	swap_current = cg_read_long(cgroup, "memory.swap.current");
695 	if (!swap_current ||
696 	    !values_close(mem_current + swap_current, size, 3))
697 		goto cleanup;
698 
699 	ret = 0;
700 cleanup:
701 	free(buf);
702 	return ret;
703 }
704 
705 /*
706  * This test checks that memory.swap.max limits the amount of
707  * anonymous memory which can be swapped out.
708  */
709 static int test_memcg_swap_max(const char *root)
710 {
711 	int ret = KSFT_FAIL;
712 	char *memcg;
713 	long max;
714 
715 	if (!is_swap_enabled())
716 		return KSFT_SKIP;
717 
718 	memcg = cg_name(root, "memcg_test");
719 	if (!memcg)
720 		goto cleanup;
721 
722 	if (cg_create(memcg))
723 		goto cleanup;
724 
725 	if (cg_read_long(memcg, "memory.swap.current")) {
726 		ret = KSFT_SKIP;
727 		goto cleanup;
728 	}
729 
730 	if (cg_read_strcmp(memcg, "memory.max", "max\n"))
731 		goto cleanup;
732 
733 	if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
734 		goto cleanup;
735 
736 	if (cg_write(memcg, "memory.swap.max", "30M"))
737 		goto cleanup;
738 
739 	if (cg_write(memcg, "memory.max", "30M"))
740 		goto cleanup;
741 
742 	/* Should be killed by OOM killer */
743 	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
744 		goto cleanup;
745 
746 	if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
747 		goto cleanup;
748 
749 	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
750 		goto cleanup;
751 
752 	if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
753 		goto cleanup;
754 
755 	max = cg_read_key_long(memcg, "memory.events", "max ");
756 	if (max <= 0)
757 		goto cleanup;
758 
759 	ret = KSFT_PASS;
760 
761 cleanup:
762 	cg_destroy(memcg);
763 	free(memcg);
764 
765 	return ret;
766 }
767 
768 /*
769  * This test disables swapping and tries to allocate anonymous memory
770  * up to OOM. Then it checks for oom and oom_kill events in
771  * memory.events.
772  */
773 static int test_memcg_oom_events(const char *root)
774 {
775 	int ret = KSFT_FAIL;
776 	char *memcg;
777 
778 	memcg = cg_name(root, "memcg_test");
779 	if (!memcg)
780 		goto cleanup;
781 
782 	if (cg_create(memcg))
783 		goto cleanup;
784 
785 	if (cg_write(memcg, "memory.max", "30M"))
786 		goto cleanup;
787 
788 	if (cg_write(memcg, "memory.swap.max", "0"))
789 		goto cleanup;
790 
791 	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
792 		goto cleanup;
793 
794 	if (cg_read_strcmp(memcg, "cgroup.procs", ""))
795 		goto cleanup;
796 
797 	if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
798 		goto cleanup;
799 
800 	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
801 		goto cleanup;
802 
803 	ret = KSFT_PASS;
804 
805 cleanup:
806 	cg_destroy(memcg);
807 	free(memcg);
808 
809 	return ret;
810 }
811 
812 struct tcp_server_args {
813 	unsigned short port;
814 	int ctl[2];
815 };
816 
817 static int tcp_server(const char *cgroup, void *arg)
818 {
819 	struct tcp_server_args *srv_args = arg;
820 	struct sockaddr_in6 saddr = { 0 };
821 	socklen_t slen = sizeof(saddr);
822 	int sk, client_sk, ctl_fd, yes = 1, ret = -1;
823 
824 	close(srv_args->ctl[0]);
825 	ctl_fd = srv_args->ctl[1];
826 
827 	saddr.sin6_family = AF_INET6;
828 	saddr.sin6_addr = in6addr_any;
829 	saddr.sin6_port = htons(srv_args->port);
830 
831 	sk = socket(AF_INET6, SOCK_STREAM, 0);
832 	if (sk < 0)
833 		return ret;
834 
835 	if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
836 		goto cleanup;
837 
838 	if (bind(sk, (struct sockaddr *)&saddr, slen)) {
839 		write(ctl_fd, &errno, sizeof(errno));
840 		goto cleanup;
841 	}
842 
843 	if (listen(sk, 1))
844 		goto cleanup;
845 
846 	ret = 0;
847 	if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
848 		ret = -1;
849 		goto cleanup;
850 	}
851 
852 	client_sk = accept(sk, NULL, NULL);
853 	if (client_sk < 0)
854 		goto cleanup;
855 
856 	ret = -1;
857 	for (;;) {
858 		uint8_t buf[0x100000];
859 
860 		if (write(client_sk, buf, sizeof(buf)) <= 0) {
861 			if (errno == ECONNRESET)
862 				ret = 0;
863 			break;
864 		}
865 	}
866 
867 	close(client_sk);
868 
869 cleanup:
870 	close(sk);
871 	return ret;
872 }
873 
874 static int tcp_client(const char *cgroup, unsigned short port)
875 {
876 	const char server[] = "localhost";
877 	struct addrinfo *ai;
878 	char servport[6];
879 	int retries = 0x10; /* nice round number */
880 	int sk, ret;
881 
882 	snprintf(servport, sizeof(servport), "%hd", port);
883 	ret = getaddrinfo(server, servport, NULL, &ai);
884 	if (ret)
885 		return ret;
886 
887 	sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
888 	if (sk < 0)
889 		goto free_ainfo;
890 
891 	ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
892 	if (ret < 0)
893 		goto close_sk;
894 
895 	ret = KSFT_FAIL;
896 	while (retries--) {
897 		uint8_t buf[0x100000];
898 		long current, sock;
899 
900 		if (read(sk, buf, sizeof(buf)) <= 0)
901 			goto close_sk;
902 
903 		current = cg_read_long(cgroup, "memory.current");
904 		sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
905 
906 		if (current < 0 || sock < 0)
907 			goto close_sk;
908 
909 		if (current < sock)
910 			goto close_sk;
911 
912 		if (values_close(current, sock, 10)) {
913 			ret = KSFT_PASS;
914 			break;
915 		}
916 	}
917 
918 close_sk:
919 	close(sk);
920 free_ainfo:
921 	freeaddrinfo(ai);
922 	return ret;
923 }
924 
925 /*
926  * This test checks socket memory accounting.
927  * The test forks a TCP server listens on a random port between 1000
928  * and 61000. Once it gets a client connection, it starts writing to
929  * its socket.
930  * The TCP client interleaves reads from the socket with check whether
931  * memory.current and memory.stat.sock are similar.
932  */
933 static int test_memcg_sock(const char *root)
934 {
935 	int bind_retries = 5, ret = KSFT_FAIL, pid, err;
936 	unsigned short port;
937 	char *memcg;
938 
939 	memcg = cg_name(root, "memcg_test");
940 	if (!memcg)
941 		goto cleanup;
942 
943 	if (cg_create(memcg))
944 		goto cleanup;
945 
946 	while (bind_retries--) {
947 		struct tcp_server_args args;
948 
949 		if (pipe(args.ctl))
950 			goto cleanup;
951 
952 		port = args.port = 1000 + rand() % 60000;
953 
954 		pid = cg_run_nowait(memcg, tcp_server, &args);
955 		if (pid < 0)
956 			goto cleanup;
957 
958 		close(args.ctl[1]);
959 		if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
960 			goto cleanup;
961 		close(args.ctl[0]);
962 
963 		if (!err)
964 			break;
965 		if (err != EADDRINUSE)
966 			goto cleanup;
967 
968 		waitpid(pid, NULL, 0);
969 	}
970 
971 	if (err == EADDRINUSE) {
972 		ret = KSFT_SKIP;
973 		goto cleanup;
974 	}
975 
976 	if (tcp_client(memcg, port) != KSFT_PASS)
977 		goto cleanup;
978 
979 	waitpid(pid, &err, 0);
980 	if (WEXITSTATUS(err))
981 		goto cleanup;
982 
983 	if (cg_read_long(memcg, "memory.current") < 0)
984 		goto cleanup;
985 
986 	if (cg_read_key_long(memcg, "memory.stat", "sock "))
987 		goto cleanup;
988 
989 	ret = KSFT_PASS;
990 
991 cleanup:
992 	cg_destroy(memcg);
993 	free(memcg);
994 
995 	return ret;
996 }
997 
998 /*
999  * This test disables swapping and tries to allocate anonymous memory
1000  * up to OOM with memory.group.oom set. Then it checks that all
1001  * processes in the leaf (but not the parent) were killed.
1002  */
1003 static int test_memcg_oom_group_leaf_events(const char *root)
1004 {
1005 	int ret = KSFT_FAIL;
1006 	char *parent, *child;
1007 
1008 	parent = cg_name(root, "memcg_test_0");
1009 	child = cg_name(root, "memcg_test_0/memcg_test_1");
1010 
1011 	if (!parent || !child)
1012 		goto cleanup;
1013 
1014 	if (cg_create(parent))
1015 		goto cleanup;
1016 
1017 	if (cg_create(child))
1018 		goto cleanup;
1019 
1020 	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
1021 		goto cleanup;
1022 
1023 	if (cg_write(child, "memory.max", "50M"))
1024 		goto cleanup;
1025 
1026 	if (cg_write(child, "memory.swap.max", "0"))
1027 		goto cleanup;
1028 
1029 	if (cg_write(child, "memory.oom.group", "1"))
1030 		goto cleanup;
1031 
1032 	cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1033 	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1034 	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1035 	if (!cg_run(child, alloc_anon, (void *)MB(100)))
1036 		goto cleanup;
1037 
1038 	if (cg_test_proc_killed(child))
1039 		goto cleanup;
1040 
1041 	if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
1042 		goto cleanup;
1043 
1044 	if (cg_read_key_long(parent, "memory.events", "oom_kill ") != 0)
1045 		goto cleanup;
1046 
1047 	ret = KSFT_PASS;
1048 
1049 cleanup:
1050 	if (child)
1051 		cg_destroy(child);
1052 	if (parent)
1053 		cg_destroy(parent);
1054 	free(child);
1055 	free(parent);
1056 
1057 	return ret;
1058 }
1059 
1060 /*
1061  * This test disables swapping and tries to allocate anonymous memory
1062  * up to OOM with memory.group.oom set. Then it checks that all
1063  * processes in the parent and leaf were killed.
1064  */
1065 static int test_memcg_oom_group_parent_events(const char *root)
1066 {
1067 	int ret = KSFT_FAIL;
1068 	char *parent, *child;
1069 
1070 	parent = cg_name(root, "memcg_test_0");
1071 	child = cg_name(root, "memcg_test_0/memcg_test_1");
1072 
1073 	if (!parent || !child)
1074 		goto cleanup;
1075 
1076 	if (cg_create(parent))
1077 		goto cleanup;
1078 
1079 	if (cg_create(child))
1080 		goto cleanup;
1081 
1082 	if (cg_write(parent, "memory.max", "80M"))
1083 		goto cleanup;
1084 
1085 	if (cg_write(parent, "memory.swap.max", "0"))
1086 		goto cleanup;
1087 
1088 	if (cg_write(parent, "memory.oom.group", "1"))
1089 		goto cleanup;
1090 
1091 	cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1092 	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1093 	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1094 
1095 	if (!cg_run(child, alloc_anon, (void *)MB(100)))
1096 		goto cleanup;
1097 
1098 	if (cg_test_proc_killed(child))
1099 		goto cleanup;
1100 	if (cg_test_proc_killed(parent))
1101 		goto cleanup;
1102 
1103 	ret = KSFT_PASS;
1104 
1105 cleanup:
1106 	if (child)
1107 		cg_destroy(child);
1108 	if (parent)
1109 		cg_destroy(parent);
1110 	free(child);
1111 	free(parent);
1112 
1113 	return ret;
1114 }
1115 
1116 /*
1117  * This test disables swapping and tries to allocate anonymous memory
1118  * up to OOM with memory.group.oom set. Then it checks that all
1119  * processes were killed except those set with OOM_SCORE_ADJ_MIN
1120  */
1121 static int test_memcg_oom_group_score_events(const char *root)
1122 {
1123 	int ret = KSFT_FAIL;
1124 	char *memcg;
1125 	int safe_pid;
1126 
1127 	memcg = cg_name(root, "memcg_test_0");
1128 
1129 	if (!memcg)
1130 		goto cleanup;
1131 
1132 	if (cg_create(memcg))
1133 		goto cleanup;
1134 
1135 	if (cg_write(memcg, "memory.max", "50M"))
1136 		goto cleanup;
1137 
1138 	if (cg_write(memcg, "memory.swap.max", "0"))
1139 		goto cleanup;
1140 
1141 	if (cg_write(memcg, "memory.oom.group", "1"))
1142 		goto cleanup;
1143 
1144 	safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1145 	if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
1146 		goto cleanup;
1147 
1148 	cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1149 	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1150 		goto cleanup;
1151 
1152 	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
1153 		goto cleanup;
1154 
1155 	if (kill(safe_pid, SIGKILL))
1156 		goto cleanup;
1157 
1158 	ret = KSFT_PASS;
1159 
1160 cleanup:
1161 	if (memcg)
1162 		cg_destroy(memcg);
1163 	free(memcg);
1164 
1165 	return ret;
1166 }
1167 
1168 
1169 #define T(x) { x, #x }
1170 struct memcg_test {
1171 	int (*fn)(const char *root);
1172 	const char *name;
1173 } tests[] = {
1174 	T(test_memcg_subtree_control),
1175 	T(test_memcg_current),
1176 	T(test_memcg_min),
1177 	T(test_memcg_low),
1178 	T(test_memcg_high),
1179 	T(test_memcg_max),
1180 	T(test_memcg_oom_events),
1181 	T(test_memcg_swap_max),
1182 	T(test_memcg_sock),
1183 	T(test_memcg_oom_group_leaf_events),
1184 	T(test_memcg_oom_group_parent_events),
1185 	T(test_memcg_oom_group_score_events),
1186 };
1187 #undef T
1188 
1189 int main(int argc, char **argv)
1190 {
1191 	char root[PATH_MAX];
1192 	int i, ret = EXIT_SUCCESS;
1193 
1194 	if (cg_find_unified_root(root, sizeof(root)))
1195 		ksft_exit_skip("cgroup v2 isn't mounted\n");
1196 
1197 	/*
1198 	 * Check that memory controller is available:
1199 	 * memory is listed in cgroup.controllers
1200 	 */
1201 	if (cg_read_strstr(root, "cgroup.controllers", "memory"))
1202 		ksft_exit_skip("memory controller isn't available\n");
1203 
1204 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
1205 		switch (tests[i].fn(root)) {
1206 		case KSFT_PASS:
1207 			ksft_test_result_pass("%s\n", tests[i].name);
1208 			break;
1209 		case KSFT_SKIP:
1210 			ksft_test_result_skip("%s\n", tests[i].name);
1211 			break;
1212 		default:
1213 			ret = EXIT_FAILURE;
1214 			ksft_test_result_fail("%s\n", tests[i].name);
1215 			break;
1216 		}
1217 	}
1218 
1219 	return ret;
1220 }
1221