1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #define _GNU_SOURCE 3 4 #include <linux/limits.h> 5 #include <linux/oom.h> 6 #include <fcntl.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <sys/stat.h> 11 #include <sys/types.h> 12 #include <unistd.h> 13 #include <sys/socket.h> 14 #include <sys/wait.h> 15 #include <arpa/inet.h> 16 #include <netinet/in.h> 17 #include <netdb.h> 18 #include <errno.h> 19 #include <sys/mman.h> 20 21 #include "../kselftest.h" 22 #include "cgroup_util.h" 23 24 /* 25 * This test creates two nested cgroups with and without enabling 26 * the memory controller. 27 */ 28 static int test_memcg_subtree_control(const char *root) 29 { 30 char *parent, *child, *parent2 = NULL, *child2 = NULL; 31 int ret = KSFT_FAIL; 32 char buf[PAGE_SIZE]; 33 34 /* Create two nested cgroups with the memory controller enabled */ 35 parent = cg_name(root, "memcg_test_0"); 36 child = cg_name(root, "memcg_test_0/memcg_test_1"); 37 if (!parent || !child) 38 goto cleanup_free; 39 40 if (cg_create(parent)) 41 goto cleanup_free; 42 43 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 44 goto cleanup_parent; 45 46 if (cg_create(child)) 47 goto cleanup_parent; 48 49 if (cg_read_strstr(child, "cgroup.controllers", "memory")) 50 goto cleanup_child; 51 52 /* Create two nested cgroups without enabling memory controller */ 53 parent2 = cg_name(root, "memcg_test_1"); 54 child2 = cg_name(root, "memcg_test_1/memcg_test_1"); 55 if (!parent2 || !child2) 56 goto cleanup_free2; 57 58 if (cg_create(parent2)) 59 goto cleanup_free2; 60 61 if (cg_create(child2)) 62 goto cleanup_parent2; 63 64 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf))) 65 goto cleanup_all; 66 67 if (!cg_read_strstr(child2, "cgroup.controllers", "memory")) 68 goto cleanup_all; 69 70 ret = KSFT_PASS; 71 72 cleanup_all: 73 cg_destroy(child2); 74 cleanup_parent2: 75 cg_destroy(parent2); 76 cleanup_free2: 77 free(parent2); 78 free(child2); 79 cleanup_child: 80 cg_destroy(child); 81 cleanup_parent: 82 cg_destroy(parent); 83 cleanup_free: 84 free(parent); 85 free(child); 86 87 return ret; 88 } 89 90 static int alloc_anon_50M_check(const char *cgroup, void *arg) 91 { 92 size_t size = MB(50); 93 char *buf, *ptr; 94 long anon, current; 95 int ret = -1; 96 97 buf = malloc(size); 98 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 99 *ptr = 0; 100 101 current = cg_read_long(cgroup, "memory.current"); 102 if (current < size) 103 goto cleanup; 104 105 if (!values_close(size, current, 3)) 106 goto cleanup; 107 108 anon = cg_read_key_long(cgroup, "memory.stat", "anon "); 109 if (anon < 0) 110 goto cleanup; 111 112 if (!values_close(anon, current, 3)) 113 goto cleanup; 114 115 ret = 0; 116 cleanup: 117 free(buf); 118 return ret; 119 } 120 121 static int alloc_pagecache_50M_check(const char *cgroup, void *arg) 122 { 123 size_t size = MB(50); 124 int ret = -1; 125 long current, file; 126 int fd; 127 128 fd = get_temp_fd(); 129 if (fd < 0) 130 return -1; 131 132 if (alloc_pagecache(fd, size)) 133 goto cleanup; 134 135 current = cg_read_long(cgroup, "memory.current"); 136 if (current < size) 137 goto cleanup; 138 139 file = cg_read_key_long(cgroup, "memory.stat", "file "); 140 if (file < 0) 141 goto cleanup; 142 143 if (!values_close(file, current, 10)) 144 goto cleanup; 145 146 ret = 0; 147 148 cleanup: 149 close(fd); 150 return ret; 151 } 152 153 /* 154 * This test create a memory cgroup, allocates 155 * some anonymous memory and some pagecache 156 * and check memory.current and some memory.stat values. 157 */ 158 static int test_memcg_current(const char *root) 159 { 160 int ret = KSFT_FAIL; 161 long current; 162 char *memcg; 163 164 memcg = cg_name(root, "memcg_test"); 165 if (!memcg) 166 goto cleanup; 167 168 if (cg_create(memcg)) 169 goto cleanup; 170 171 current = cg_read_long(memcg, "memory.current"); 172 if (current != 0) 173 goto cleanup; 174 175 if (cg_run(memcg, alloc_anon_50M_check, NULL)) 176 goto cleanup; 177 178 if (cg_run(memcg, alloc_pagecache_50M_check, NULL)) 179 goto cleanup; 180 181 ret = KSFT_PASS; 182 183 cleanup: 184 cg_destroy(memcg); 185 free(memcg); 186 187 return ret; 188 } 189 190 static int alloc_pagecache_50M(const char *cgroup, void *arg) 191 { 192 int fd = (long)arg; 193 194 return alloc_pagecache(fd, MB(50)); 195 } 196 197 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) 198 { 199 int fd = (long)arg; 200 int ppid = getppid(); 201 202 if (alloc_pagecache(fd, MB(50))) 203 return -1; 204 205 while (getppid() == ppid) 206 sleep(1); 207 208 return 0; 209 } 210 211 static int alloc_anon_noexit(const char *cgroup, void *arg) 212 { 213 int ppid = getppid(); 214 215 if (alloc_anon(cgroup, arg)) 216 return -1; 217 218 while (getppid() == ppid) 219 sleep(1); 220 221 return 0; 222 } 223 224 /* 225 * Wait until processes are killed asynchronously by the OOM killer 226 * If we exceed a timeout, fail. 227 */ 228 static int cg_test_proc_killed(const char *cgroup) 229 { 230 int limit; 231 232 for (limit = 10; limit > 0; limit--) { 233 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0) 234 return 0; 235 236 usleep(100000); 237 } 238 return -1; 239 } 240 241 /* 242 * First, this test creates the following hierarchy: 243 * A memory.min = 50M, memory.max = 200M 244 * A/B memory.min = 50M, memory.current = 50M 245 * A/B/C memory.min = 75M, memory.current = 50M 246 * A/B/D memory.min = 25M, memory.current = 50M 247 * A/B/E memory.min = 500M, memory.current = 0 248 * A/B/F memory.min = 0, memory.current = 50M 249 * 250 * Usages are pagecache, but the test keeps a running 251 * process in every leaf cgroup. 252 * Then it creates A/G and creates a significant 253 * memory pressure in it. 254 * 255 * A/B memory.current ~= 50M 256 * A/B/C memory.current ~= 33M 257 * A/B/D memory.current ~= 17M 258 * A/B/E memory.current ~= 0 259 * 260 * After that it tries to allocate more than there is 261 * unprotected memory in A available, and checks 262 * checks that memory.min protects pagecache even 263 * in this case. 264 */ 265 static int test_memcg_min(const char *root) 266 { 267 int ret = KSFT_FAIL; 268 char *parent[3] = {NULL}; 269 char *children[4] = {NULL}; 270 long c[4]; 271 int i, attempts; 272 int fd; 273 274 fd = get_temp_fd(); 275 if (fd < 0) 276 goto cleanup; 277 278 parent[0] = cg_name(root, "memcg_test_0"); 279 if (!parent[0]) 280 goto cleanup; 281 282 parent[1] = cg_name(parent[0], "memcg_test_1"); 283 if (!parent[1]) 284 goto cleanup; 285 286 parent[2] = cg_name(parent[0], "memcg_test_2"); 287 if (!parent[2]) 288 goto cleanup; 289 290 if (cg_create(parent[0])) 291 goto cleanup; 292 293 if (cg_read_long(parent[0], "memory.min")) { 294 ret = KSFT_SKIP; 295 goto cleanup; 296 } 297 298 if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) 299 goto cleanup; 300 301 if (cg_write(parent[0], "memory.max", "200M")) 302 goto cleanup; 303 304 if (cg_write(parent[0], "memory.swap.max", "0")) 305 goto cleanup; 306 307 if (cg_create(parent[1])) 308 goto cleanup; 309 310 if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) 311 goto cleanup; 312 313 if (cg_create(parent[2])) 314 goto cleanup; 315 316 for (i = 0; i < ARRAY_SIZE(children); i++) { 317 children[i] = cg_name_indexed(parent[1], "child_memcg", i); 318 if (!children[i]) 319 goto cleanup; 320 321 if (cg_create(children[i])) 322 goto cleanup; 323 324 if (i == 2) 325 continue; 326 327 cg_run_nowait(children[i], alloc_pagecache_50M_noexit, 328 (void *)(long)fd); 329 } 330 331 if (cg_write(parent[0], "memory.min", "50M")) 332 goto cleanup; 333 if (cg_write(parent[1], "memory.min", "50M")) 334 goto cleanup; 335 if (cg_write(children[0], "memory.min", "75M")) 336 goto cleanup; 337 if (cg_write(children[1], "memory.min", "25M")) 338 goto cleanup; 339 if (cg_write(children[2], "memory.min", "500M")) 340 goto cleanup; 341 if (cg_write(children[3], "memory.min", "0")) 342 goto cleanup; 343 344 attempts = 0; 345 while (!values_close(cg_read_long(parent[1], "memory.current"), 346 MB(150), 3)) { 347 if (attempts++ > 5) 348 break; 349 sleep(1); 350 } 351 352 if (cg_run(parent[2], alloc_anon, (void *)MB(148))) 353 goto cleanup; 354 355 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 356 goto cleanup; 357 358 for (i = 0; i < ARRAY_SIZE(children); i++) 359 c[i] = cg_read_long(children[i], "memory.current"); 360 361 if (!values_close(c[0], MB(33), 10)) 362 goto cleanup; 363 364 if (!values_close(c[1], MB(17), 10)) 365 goto cleanup; 366 367 if (!values_close(c[2], 0, 1)) 368 goto cleanup; 369 370 if (!cg_run(parent[2], alloc_anon, (void *)MB(170))) 371 goto cleanup; 372 373 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 374 goto cleanup; 375 376 ret = KSFT_PASS; 377 378 cleanup: 379 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { 380 if (!children[i]) 381 continue; 382 383 cg_destroy(children[i]); 384 free(children[i]); 385 } 386 387 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { 388 if (!parent[i]) 389 continue; 390 391 cg_destroy(parent[i]); 392 free(parent[i]); 393 } 394 close(fd); 395 return ret; 396 } 397 398 /* 399 * First, this test creates the following hierarchy: 400 * A memory.low = 50M, memory.max = 200M 401 * A/B memory.low = 50M, memory.current = 50M 402 * A/B/C memory.low = 75M, memory.current = 50M 403 * A/B/D memory.low = 25M, memory.current = 50M 404 * A/B/E memory.low = 500M, memory.current = 0 405 * A/B/F memory.low = 0, memory.current = 50M 406 * 407 * Usages are pagecache. 408 * Then it creates A/G an creates a significant 409 * memory pressure in it. 410 * 411 * Then it checks actual memory usages and expects that: 412 * A/B memory.current ~= 50M 413 * A/B/ memory.current ~= 33M 414 * A/B/D memory.current ~= 17M 415 * A/B/E memory.current ~= 0 416 * 417 * After that it tries to allocate more than there is 418 * unprotected memory in A available, 419 * and checks low and oom events in memory.events. 420 */ 421 static int test_memcg_low(const char *root) 422 { 423 int ret = KSFT_FAIL; 424 char *parent[3] = {NULL}; 425 char *children[4] = {NULL}; 426 long low, oom; 427 long c[4]; 428 int i; 429 int fd; 430 431 fd = get_temp_fd(); 432 if (fd < 0) 433 goto cleanup; 434 435 parent[0] = cg_name(root, "memcg_test_0"); 436 if (!parent[0]) 437 goto cleanup; 438 439 parent[1] = cg_name(parent[0], "memcg_test_1"); 440 if (!parent[1]) 441 goto cleanup; 442 443 parent[2] = cg_name(parent[0], "memcg_test_2"); 444 if (!parent[2]) 445 goto cleanup; 446 447 if (cg_create(parent[0])) 448 goto cleanup; 449 450 if (cg_read_long(parent[0], "memory.low")) 451 goto cleanup; 452 453 if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) 454 goto cleanup; 455 456 if (cg_write(parent[0], "memory.max", "200M")) 457 goto cleanup; 458 459 if (cg_write(parent[0], "memory.swap.max", "0")) 460 goto cleanup; 461 462 if (cg_create(parent[1])) 463 goto cleanup; 464 465 if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) 466 goto cleanup; 467 468 if (cg_create(parent[2])) 469 goto cleanup; 470 471 for (i = 0; i < ARRAY_SIZE(children); i++) { 472 children[i] = cg_name_indexed(parent[1], "child_memcg", i); 473 if (!children[i]) 474 goto cleanup; 475 476 if (cg_create(children[i])) 477 goto cleanup; 478 479 if (i == 2) 480 continue; 481 482 if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd)) 483 goto cleanup; 484 } 485 486 if (cg_write(parent[0], "memory.low", "50M")) 487 goto cleanup; 488 if (cg_write(parent[1], "memory.low", "50M")) 489 goto cleanup; 490 if (cg_write(children[0], "memory.low", "75M")) 491 goto cleanup; 492 if (cg_write(children[1], "memory.low", "25M")) 493 goto cleanup; 494 if (cg_write(children[2], "memory.low", "500M")) 495 goto cleanup; 496 if (cg_write(children[3], "memory.low", "0")) 497 goto cleanup; 498 499 if (cg_run(parent[2], alloc_anon, (void *)MB(148))) 500 goto cleanup; 501 502 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 503 goto cleanup; 504 505 for (i = 0; i < ARRAY_SIZE(children); i++) 506 c[i] = cg_read_long(children[i], "memory.current"); 507 508 if (!values_close(c[0], MB(33), 10)) 509 goto cleanup; 510 511 if (!values_close(c[1], MB(17), 10)) 512 goto cleanup; 513 514 if (!values_close(c[2], 0, 1)) 515 goto cleanup; 516 517 if (cg_run(parent[2], alloc_anon, (void *)MB(166))) { 518 fprintf(stderr, 519 "memory.low prevents from allocating anon memory\n"); 520 goto cleanup; 521 } 522 523 for (i = 0; i < ARRAY_SIZE(children); i++) { 524 oom = cg_read_key_long(children[i], "memory.events", "oom "); 525 low = cg_read_key_long(children[i], "memory.events", "low "); 526 527 if (oom) 528 goto cleanup; 529 if (i < 2 && low <= 0) 530 goto cleanup; 531 if (i >= 2 && low) 532 goto cleanup; 533 } 534 535 ret = KSFT_PASS; 536 537 cleanup: 538 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { 539 if (!children[i]) 540 continue; 541 542 cg_destroy(children[i]); 543 free(children[i]); 544 } 545 546 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { 547 if (!parent[i]) 548 continue; 549 550 cg_destroy(parent[i]); 551 free(parent[i]); 552 } 553 close(fd); 554 return ret; 555 } 556 557 static int alloc_pagecache_max_30M(const char *cgroup, void *arg) 558 { 559 size_t size = MB(50); 560 int ret = -1; 561 long current; 562 int fd; 563 564 fd = get_temp_fd(); 565 if (fd < 0) 566 return -1; 567 568 if (alloc_pagecache(fd, size)) 569 goto cleanup; 570 571 current = cg_read_long(cgroup, "memory.current"); 572 if (current <= MB(29) || current > MB(30)) 573 goto cleanup; 574 575 ret = 0; 576 577 cleanup: 578 close(fd); 579 return ret; 580 581 } 582 583 /* 584 * This test checks that memory.high limits the amount of 585 * memory which can be consumed by either anonymous memory 586 * or pagecache. 587 */ 588 static int test_memcg_high(const char *root) 589 { 590 int ret = KSFT_FAIL; 591 char *memcg; 592 long high; 593 594 memcg = cg_name(root, "memcg_test"); 595 if (!memcg) 596 goto cleanup; 597 598 if (cg_create(memcg)) 599 goto cleanup; 600 601 if (cg_read_strcmp(memcg, "memory.high", "max\n")) 602 goto cleanup; 603 604 if (cg_write(memcg, "memory.swap.max", "0")) 605 goto cleanup; 606 607 if (cg_write(memcg, "memory.high", "30M")) 608 goto cleanup; 609 610 if (cg_run(memcg, alloc_anon, (void *)MB(100))) 611 goto cleanup; 612 613 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL)) 614 goto cleanup; 615 616 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 617 goto cleanup; 618 619 high = cg_read_key_long(memcg, "memory.events", "high "); 620 if (high <= 0) 621 goto cleanup; 622 623 ret = KSFT_PASS; 624 625 cleanup: 626 cg_destroy(memcg); 627 free(memcg); 628 629 return ret; 630 } 631 632 static int alloc_anon_mlock(const char *cgroup, void *arg) 633 { 634 size_t size = (size_t)arg; 635 void *buf; 636 637 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 638 0, 0); 639 if (buf == MAP_FAILED) 640 return -1; 641 642 mlock(buf, size); 643 munmap(buf, size); 644 return 0; 645 } 646 647 /* 648 * This test checks that memory.high is able to throttle big single shot 649 * allocation i.e. large allocation within one kernel entry. 650 */ 651 static int test_memcg_high_sync(const char *root) 652 { 653 int ret = KSFT_FAIL, pid, fd = -1; 654 char *memcg; 655 long pre_high, pre_max; 656 long post_high, post_max; 657 658 memcg = cg_name(root, "memcg_test"); 659 if (!memcg) 660 goto cleanup; 661 662 if (cg_create(memcg)) 663 goto cleanup; 664 665 pre_high = cg_read_key_long(memcg, "memory.events", "high "); 666 pre_max = cg_read_key_long(memcg, "memory.events", "max "); 667 if (pre_high < 0 || pre_max < 0) 668 goto cleanup; 669 670 if (cg_write(memcg, "memory.swap.max", "0")) 671 goto cleanup; 672 673 if (cg_write(memcg, "memory.high", "30M")) 674 goto cleanup; 675 676 if (cg_write(memcg, "memory.max", "140M")) 677 goto cleanup; 678 679 fd = memcg_prepare_for_wait(memcg); 680 if (fd < 0) 681 goto cleanup; 682 683 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200)); 684 if (pid < 0) 685 goto cleanup; 686 687 cg_wait_for(fd); 688 689 post_high = cg_read_key_long(memcg, "memory.events", "high "); 690 post_max = cg_read_key_long(memcg, "memory.events", "max "); 691 if (post_high < 0 || post_max < 0) 692 goto cleanup; 693 694 if (pre_high == post_high || pre_max != post_max) 695 goto cleanup; 696 697 ret = KSFT_PASS; 698 699 cleanup: 700 if (fd >= 0) 701 close(fd); 702 cg_destroy(memcg); 703 free(memcg); 704 705 return ret; 706 } 707 708 /* 709 * This test checks that memory.max limits the amount of 710 * memory which can be consumed by either anonymous memory 711 * or pagecache. 712 */ 713 static int test_memcg_max(const char *root) 714 { 715 int ret = KSFT_FAIL; 716 char *memcg; 717 long current, max; 718 719 memcg = cg_name(root, "memcg_test"); 720 if (!memcg) 721 goto cleanup; 722 723 if (cg_create(memcg)) 724 goto cleanup; 725 726 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 727 goto cleanup; 728 729 if (cg_write(memcg, "memory.swap.max", "0")) 730 goto cleanup; 731 732 if (cg_write(memcg, "memory.max", "30M")) 733 goto cleanup; 734 735 /* Should be killed by OOM killer */ 736 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 737 goto cleanup; 738 739 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 740 goto cleanup; 741 742 current = cg_read_long(memcg, "memory.current"); 743 if (current > MB(30) || !current) 744 goto cleanup; 745 746 max = cg_read_key_long(memcg, "memory.events", "max "); 747 if (max <= 0) 748 goto cleanup; 749 750 ret = KSFT_PASS; 751 752 cleanup: 753 cg_destroy(memcg); 754 free(memcg); 755 756 return ret; 757 } 758 759 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) 760 { 761 long mem_max = (long)arg; 762 size_t size = MB(50); 763 char *buf, *ptr; 764 long mem_current, swap_current; 765 int ret = -1; 766 767 buf = malloc(size); 768 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 769 *ptr = 0; 770 771 mem_current = cg_read_long(cgroup, "memory.current"); 772 if (!mem_current || !values_close(mem_current, mem_max, 3)) 773 goto cleanup; 774 775 swap_current = cg_read_long(cgroup, "memory.swap.current"); 776 if (!swap_current || 777 !values_close(mem_current + swap_current, size, 3)) 778 goto cleanup; 779 780 ret = 0; 781 cleanup: 782 free(buf); 783 return ret; 784 } 785 786 /* 787 * This test checks that memory.swap.max limits the amount of 788 * anonymous memory which can be swapped out. 789 */ 790 static int test_memcg_swap_max(const char *root) 791 { 792 int ret = KSFT_FAIL; 793 char *memcg; 794 long max; 795 796 if (!is_swap_enabled()) 797 return KSFT_SKIP; 798 799 memcg = cg_name(root, "memcg_test"); 800 if (!memcg) 801 goto cleanup; 802 803 if (cg_create(memcg)) 804 goto cleanup; 805 806 if (cg_read_long(memcg, "memory.swap.current")) { 807 ret = KSFT_SKIP; 808 goto cleanup; 809 } 810 811 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 812 goto cleanup; 813 814 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n")) 815 goto cleanup; 816 817 if (cg_write(memcg, "memory.swap.max", "30M")) 818 goto cleanup; 819 820 if (cg_write(memcg, "memory.max", "30M")) 821 goto cleanup; 822 823 /* Should be killed by OOM killer */ 824 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 825 goto cleanup; 826 827 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 828 goto cleanup; 829 830 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 831 goto cleanup; 832 833 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30))) 834 goto cleanup; 835 836 max = cg_read_key_long(memcg, "memory.events", "max "); 837 if (max <= 0) 838 goto cleanup; 839 840 ret = KSFT_PASS; 841 842 cleanup: 843 cg_destroy(memcg); 844 free(memcg); 845 846 return ret; 847 } 848 849 /* 850 * This test disables swapping and tries to allocate anonymous memory 851 * up to OOM. Then it checks for oom and oom_kill events in 852 * memory.events. 853 */ 854 static int test_memcg_oom_events(const char *root) 855 { 856 int ret = KSFT_FAIL; 857 char *memcg; 858 859 memcg = cg_name(root, "memcg_test"); 860 if (!memcg) 861 goto cleanup; 862 863 if (cg_create(memcg)) 864 goto cleanup; 865 866 if (cg_write(memcg, "memory.max", "30M")) 867 goto cleanup; 868 869 if (cg_write(memcg, "memory.swap.max", "0")) 870 goto cleanup; 871 872 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 873 goto cleanup; 874 875 if (cg_read_strcmp(memcg, "cgroup.procs", "")) 876 goto cleanup; 877 878 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 879 goto cleanup; 880 881 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 882 goto cleanup; 883 884 ret = KSFT_PASS; 885 886 cleanup: 887 cg_destroy(memcg); 888 free(memcg); 889 890 return ret; 891 } 892 893 struct tcp_server_args { 894 unsigned short port; 895 int ctl[2]; 896 }; 897 898 static int tcp_server(const char *cgroup, void *arg) 899 { 900 struct tcp_server_args *srv_args = arg; 901 struct sockaddr_in6 saddr = { 0 }; 902 socklen_t slen = sizeof(saddr); 903 int sk, client_sk, ctl_fd, yes = 1, ret = -1; 904 905 close(srv_args->ctl[0]); 906 ctl_fd = srv_args->ctl[1]; 907 908 saddr.sin6_family = AF_INET6; 909 saddr.sin6_addr = in6addr_any; 910 saddr.sin6_port = htons(srv_args->port); 911 912 sk = socket(AF_INET6, SOCK_STREAM, 0); 913 if (sk < 0) 914 return ret; 915 916 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) 917 goto cleanup; 918 919 if (bind(sk, (struct sockaddr *)&saddr, slen)) { 920 write(ctl_fd, &errno, sizeof(errno)); 921 goto cleanup; 922 } 923 924 if (listen(sk, 1)) 925 goto cleanup; 926 927 ret = 0; 928 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) { 929 ret = -1; 930 goto cleanup; 931 } 932 933 client_sk = accept(sk, NULL, NULL); 934 if (client_sk < 0) 935 goto cleanup; 936 937 ret = -1; 938 for (;;) { 939 uint8_t buf[0x100000]; 940 941 if (write(client_sk, buf, sizeof(buf)) <= 0) { 942 if (errno == ECONNRESET) 943 ret = 0; 944 break; 945 } 946 } 947 948 close(client_sk); 949 950 cleanup: 951 close(sk); 952 return ret; 953 } 954 955 static int tcp_client(const char *cgroup, unsigned short port) 956 { 957 const char server[] = "localhost"; 958 struct addrinfo *ai; 959 char servport[6]; 960 int retries = 0x10; /* nice round number */ 961 int sk, ret; 962 963 snprintf(servport, sizeof(servport), "%hd", port); 964 ret = getaddrinfo(server, servport, NULL, &ai); 965 if (ret) 966 return ret; 967 968 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); 969 if (sk < 0) 970 goto free_ainfo; 971 972 ret = connect(sk, ai->ai_addr, ai->ai_addrlen); 973 if (ret < 0) 974 goto close_sk; 975 976 ret = KSFT_FAIL; 977 while (retries--) { 978 uint8_t buf[0x100000]; 979 long current, sock; 980 981 if (read(sk, buf, sizeof(buf)) <= 0) 982 goto close_sk; 983 984 current = cg_read_long(cgroup, "memory.current"); 985 sock = cg_read_key_long(cgroup, "memory.stat", "sock "); 986 987 if (current < 0 || sock < 0) 988 goto close_sk; 989 990 if (current < sock) 991 goto close_sk; 992 993 if (values_close(current, sock, 10)) { 994 ret = KSFT_PASS; 995 break; 996 } 997 } 998 999 close_sk: 1000 close(sk); 1001 free_ainfo: 1002 freeaddrinfo(ai); 1003 return ret; 1004 } 1005 1006 /* 1007 * This test checks socket memory accounting. 1008 * The test forks a TCP server listens on a random port between 1000 1009 * and 61000. Once it gets a client connection, it starts writing to 1010 * its socket. 1011 * The TCP client interleaves reads from the socket with check whether 1012 * memory.current and memory.stat.sock are similar. 1013 */ 1014 static int test_memcg_sock(const char *root) 1015 { 1016 int bind_retries = 5, ret = KSFT_FAIL, pid, err; 1017 unsigned short port; 1018 char *memcg; 1019 1020 memcg = cg_name(root, "memcg_test"); 1021 if (!memcg) 1022 goto cleanup; 1023 1024 if (cg_create(memcg)) 1025 goto cleanup; 1026 1027 while (bind_retries--) { 1028 struct tcp_server_args args; 1029 1030 if (pipe(args.ctl)) 1031 goto cleanup; 1032 1033 port = args.port = 1000 + rand() % 60000; 1034 1035 pid = cg_run_nowait(memcg, tcp_server, &args); 1036 if (pid < 0) 1037 goto cleanup; 1038 1039 close(args.ctl[1]); 1040 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err)) 1041 goto cleanup; 1042 close(args.ctl[0]); 1043 1044 if (!err) 1045 break; 1046 if (err != EADDRINUSE) 1047 goto cleanup; 1048 1049 waitpid(pid, NULL, 0); 1050 } 1051 1052 if (err == EADDRINUSE) { 1053 ret = KSFT_SKIP; 1054 goto cleanup; 1055 } 1056 1057 if (tcp_client(memcg, port) != KSFT_PASS) 1058 goto cleanup; 1059 1060 waitpid(pid, &err, 0); 1061 if (WEXITSTATUS(err)) 1062 goto cleanup; 1063 1064 if (cg_read_long(memcg, "memory.current") < 0) 1065 goto cleanup; 1066 1067 if (cg_read_key_long(memcg, "memory.stat", "sock ")) 1068 goto cleanup; 1069 1070 ret = KSFT_PASS; 1071 1072 cleanup: 1073 cg_destroy(memcg); 1074 free(memcg); 1075 1076 return ret; 1077 } 1078 1079 /* 1080 * This test disables swapping and tries to allocate anonymous memory 1081 * up to OOM with memory.group.oom set. Then it checks that all 1082 * processes in the leaf (but not the parent) were killed. 1083 */ 1084 static int test_memcg_oom_group_leaf_events(const char *root) 1085 { 1086 int ret = KSFT_FAIL; 1087 char *parent, *child; 1088 1089 parent = cg_name(root, "memcg_test_0"); 1090 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1091 1092 if (!parent || !child) 1093 goto cleanup; 1094 1095 if (cg_create(parent)) 1096 goto cleanup; 1097 1098 if (cg_create(child)) 1099 goto cleanup; 1100 1101 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 1102 goto cleanup; 1103 1104 if (cg_write(child, "memory.max", "50M")) 1105 goto cleanup; 1106 1107 if (cg_write(child, "memory.swap.max", "0")) 1108 goto cleanup; 1109 1110 if (cg_write(child, "memory.oom.group", "1")) 1111 goto cleanup; 1112 1113 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1114 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1115 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1116 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1117 goto cleanup; 1118 1119 if (cg_test_proc_killed(child)) 1120 goto cleanup; 1121 1122 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) 1123 goto cleanup; 1124 1125 if (cg_read_key_long(parent, "memory.events", "oom_kill ") != 0) 1126 goto cleanup; 1127 1128 ret = KSFT_PASS; 1129 1130 cleanup: 1131 if (child) 1132 cg_destroy(child); 1133 if (parent) 1134 cg_destroy(parent); 1135 free(child); 1136 free(parent); 1137 1138 return ret; 1139 } 1140 1141 /* 1142 * This test disables swapping and tries to allocate anonymous memory 1143 * up to OOM with memory.group.oom set. Then it checks that all 1144 * processes in the parent and leaf were killed. 1145 */ 1146 static int test_memcg_oom_group_parent_events(const char *root) 1147 { 1148 int ret = KSFT_FAIL; 1149 char *parent, *child; 1150 1151 parent = cg_name(root, "memcg_test_0"); 1152 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1153 1154 if (!parent || !child) 1155 goto cleanup; 1156 1157 if (cg_create(parent)) 1158 goto cleanup; 1159 1160 if (cg_create(child)) 1161 goto cleanup; 1162 1163 if (cg_write(parent, "memory.max", "80M")) 1164 goto cleanup; 1165 1166 if (cg_write(parent, "memory.swap.max", "0")) 1167 goto cleanup; 1168 1169 if (cg_write(parent, "memory.oom.group", "1")) 1170 goto cleanup; 1171 1172 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1173 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1174 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1175 1176 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1177 goto cleanup; 1178 1179 if (cg_test_proc_killed(child)) 1180 goto cleanup; 1181 if (cg_test_proc_killed(parent)) 1182 goto cleanup; 1183 1184 ret = KSFT_PASS; 1185 1186 cleanup: 1187 if (child) 1188 cg_destroy(child); 1189 if (parent) 1190 cg_destroy(parent); 1191 free(child); 1192 free(parent); 1193 1194 return ret; 1195 } 1196 1197 /* 1198 * This test disables swapping and tries to allocate anonymous memory 1199 * up to OOM with memory.group.oom set. Then it checks that all 1200 * processes were killed except those set with OOM_SCORE_ADJ_MIN 1201 */ 1202 static int test_memcg_oom_group_score_events(const char *root) 1203 { 1204 int ret = KSFT_FAIL; 1205 char *memcg; 1206 int safe_pid; 1207 1208 memcg = cg_name(root, "memcg_test_0"); 1209 1210 if (!memcg) 1211 goto cleanup; 1212 1213 if (cg_create(memcg)) 1214 goto cleanup; 1215 1216 if (cg_write(memcg, "memory.max", "50M")) 1217 goto cleanup; 1218 1219 if (cg_write(memcg, "memory.swap.max", "0")) 1220 goto cleanup; 1221 1222 if (cg_write(memcg, "memory.oom.group", "1")) 1223 goto cleanup; 1224 1225 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1226 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN)) 1227 goto cleanup; 1228 1229 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1230 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1231 goto cleanup; 1232 1233 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3) 1234 goto cleanup; 1235 1236 if (kill(safe_pid, SIGKILL)) 1237 goto cleanup; 1238 1239 ret = KSFT_PASS; 1240 1241 cleanup: 1242 if (memcg) 1243 cg_destroy(memcg); 1244 free(memcg); 1245 1246 return ret; 1247 } 1248 1249 1250 #define T(x) { x, #x } 1251 struct memcg_test { 1252 int (*fn)(const char *root); 1253 const char *name; 1254 } tests[] = { 1255 T(test_memcg_subtree_control), 1256 T(test_memcg_current), 1257 T(test_memcg_min), 1258 T(test_memcg_low), 1259 T(test_memcg_high), 1260 T(test_memcg_high_sync), 1261 T(test_memcg_max), 1262 T(test_memcg_oom_events), 1263 T(test_memcg_swap_max), 1264 T(test_memcg_sock), 1265 T(test_memcg_oom_group_leaf_events), 1266 T(test_memcg_oom_group_parent_events), 1267 T(test_memcg_oom_group_score_events), 1268 }; 1269 #undef T 1270 1271 int main(int argc, char **argv) 1272 { 1273 char root[PATH_MAX]; 1274 int i, ret = EXIT_SUCCESS; 1275 1276 if (cg_find_unified_root(root, sizeof(root))) 1277 ksft_exit_skip("cgroup v2 isn't mounted\n"); 1278 1279 /* 1280 * Check that memory controller is available: 1281 * memory is listed in cgroup.controllers 1282 */ 1283 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 1284 ksft_exit_skip("memory controller isn't available\n"); 1285 1286 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 1287 if (cg_write(root, "cgroup.subtree_control", "+memory")) 1288 ksft_exit_skip("Failed to set memory controller\n"); 1289 1290 for (i = 0; i < ARRAY_SIZE(tests); i++) { 1291 switch (tests[i].fn(root)) { 1292 case KSFT_PASS: 1293 ksft_test_result_pass("%s\n", tests[i].name); 1294 break; 1295 case KSFT_SKIP: 1296 ksft_test_result_skip("%s\n", tests[i].name); 1297 break; 1298 default: 1299 ret = EXIT_FAILURE; 1300 ksft_test_result_fail("%s\n", tests[i].name); 1301 break; 1302 } 1303 } 1304 1305 return ret; 1306 } 1307