1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #define _GNU_SOURCE 3 4 #include <linux/limits.h> 5 #include <linux/oom.h> 6 #include <fcntl.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <sys/stat.h> 11 #include <sys/types.h> 12 #include <unistd.h> 13 #include <sys/socket.h> 14 #include <sys/wait.h> 15 #include <arpa/inet.h> 16 #include <netinet/in.h> 17 #include <netdb.h> 18 #include <errno.h> 19 #include <sys/mman.h> 20 21 #include "../kselftest.h" 22 #include "cgroup_util.h" 23 24 static bool has_localevents; 25 static bool has_recursiveprot; 26 27 /* 28 * This test creates two nested cgroups with and without enabling 29 * the memory controller. 30 */ 31 static int test_memcg_subtree_control(const char *root) 32 { 33 char *parent, *child, *parent2 = NULL, *child2 = NULL; 34 int ret = KSFT_FAIL; 35 char buf[PAGE_SIZE]; 36 37 /* Create two nested cgroups with the memory controller enabled */ 38 parent = cg_name(root, "memcg_test_0"); 39 child = cg_name(root, "memcg_test_0/memcg_test_1"); 40 if (!parent || !child) 41 goto cleanup_free; 42 43 if (cg_create(parent)) 44 goto cleanup_free; 45 46 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 47 goto cleanup_parent; 48 49 if (cg_create(child)) 50 goto cleanup_parent; 51 52 if (cg_read_strstr(child, "cgroup.controllers", "memory")) 53 goto cleanup_child; 54 55 /* Create two nested cgroups without enabling memory controller */ 56 parent2 = cg_name(root, "memcg_test_1"); 57 child2 = cg_name(root, "memcg_test_1/memcg_test_1"); 58 if (!parent2 || !child2) 59 goto cleanup_free2; 60 61 if (cg_create(parent2)) 62 goto cleanup_free2; 63 64 if (cg_create(child2)) 65 goto cleanup_parent2; 66 67 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf))) 68 goto cleanup_all; 69 70 if (!cg_read_strstr(child2, "cgroup.controllers", "memory")) 71 goto cleanup_all; 72 73 ret = KSFT_PASS; 74 75 cleanup_all: 76 cg_destroy(child2); 77 cleanup_parent2: 78 cg_destroy(parent2); 79 cleanup_free2: 80 free(parent2); 81 free(child2); 82 cleanup_child: 83 cg_destroy(child); 84 cleanup_parent: 85 cg_destroy(parent); 86 cleanup_free: 87 free(parent); 88 free(child); 89 90 return ret; 91 } 92 93 static int alloc_anon_50M_check(const char *cgroup, void *arg) 94 { 95 size_t size = MB(50); 96 char *buf, *ptr; 97 long anon, current; 98 int ret = -1; 99 100 buf = malloc(size); 101 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 102 *ptr = 0; 103 104 current = cg_read_long(cgroup, "memory.current"); 105 if (current < size) 106 goto cleanup; 107 108 if (!values_close(size, current, 3)) 109 goto cleanup; 110 111 anon = cg_read_key_long(cgroup, "memory.stat", "anon "); 112 if (anon < 0) 113 goto cleanup; 114 115 if (!values_close(anon, current, 3)) 116 goto cleanup; 117 118 ret = 0; 119 cleanup: 120 free(buf); 121 return ret; 122 } 123 124 static int alloc_pagecache_50M_check(const char *cgroup, void *arg) 125 { 126 size_t size = MB(50); 127 int ret = -1; 128 long current, file; 129 int fd; 130 131 fd = get_temp_fd(); 132 if (fd < 0) 133 return -1; 134 135 if (alloc_pagecache(fd, size)) 136 goto cleanup; 137 138 current = cg_read_long(cgroup, "memory.current"); 139 if (current < size) 140 goto cleanup; 141 142 file = cg_read_key_long(cgroup, "memory.stat", "file "); 143 if (file < 0) 144 goto cleanup; 145 146 if (!values_close(file, current, 10)) 147 goto cleanup; 148 149 ret = 0; 150 151 cleanup: 152 close(fd); 153 return ret; 154 } 155 156 /* 157 * This test create a memory cgroup, allocates 158 * some anonymous memory and some pagecache 159 * and check memory.current and some memory.stat values. 160 */ 161 static int test_memcg_current(const char *root) 162 { 163 int ret = KSFT_FAIL; 164 long current; 165 char *memcg; 166 167 memcg = cg_name(root, "memcg_test"); 168 if (!memcg) 169 goto cleanup; 170 171 if (cg_create(memcg)) 172 goto cleanup; 173 174 current = cg_read_long(memcg, "memory.current"); 175 if (current != 0) 176 goto cleanup; 177 178 if (cg_run(memcg, alloc_anon_50M_check, NULL)) 179 goto cleanup; 180 181 if (cg_run(memcg, alloc_pagecache_50M_check, NULL)) 182 goto cleanup; 183 184 ret = KSFT_PASS; 185 186 cleanup: 187 cg_destroy(memcg); 188 free(memcg); 189 190 return ret; 191 } 192 193 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) 194 { 195 int fd = (long)arg; 196 int ppid = getppid(); 197 198 if (alloc_pagecache(fd, MB(50))) 199 return -1; 200 201 while (getppid() == ppid) 202 sleep(1); 203 204 return 0; 205 } 206 207 static int alloc_anon_noexit(const char *cgroup, void *arg) 208 { 209 int ppid = getppid(); 210 size_t size = (unsigned long)arg; 211 char *buf, *ptr; 212 213 buf = malloc(size); 214 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 215 *ptr = 0; 216 217 while (getppid() == ppid) 218 sleep(1); 219 220 free(buf); 221 return 0; 222 } 223 224 /* 225 * Wait until processes are killed asynchronously by the OOM killer 226 * If we exceed a timeout, fail. 227 */ 228 static int cg_test_proc_killed(const char *cgroup) 229 { 230 int limit; 231 232 for (limit = 10; limit > 0; limit--) { 233 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0) 234 return 0; 235 236 usleep(100000); 237 } 238 return -1; 239 } 240 241 /* 242 * First, this test creates the following hierarchy: 243 * A memory.min = 0, memory.max = 200M 244 * A/B memory.min = 50M 245 * A/B/C memory.min = 75M, memory.current = 50M 246 * A/B/D memory.min = 25M, memory.current = 50M 247 * A/B/E memory.min = 0, memory.current = 50M 248 * A/B/F memory.min = 500M, memory.current = 0 249 * 250 * (or memory.low if we test soft protection) 251 * 252 * Usages are pagecache and the test keeps a running 253 * process in every leaf cgroup. 254 * Then it creates A/G and creates a significant 255 * memory pressure in A. 256 * 257 * Then it checks actual memory usages and expects that: 258 * A/B memory.current ~= 50M 259 * A/B/C memory.current ~= 29M 260 * A/B/D memory.current ~= 21M 261 * A/B/E memory.current ~= 0 262 * A/B/F memory.current = 0 263 * (for origin of the numbers, see model in memcg_protection.m.) 264 * 265 * After that it tries to allocate more than there is 266 * unprotected memory in A available, and checks that: 267 * a) memory.min protects pagecache even in this case, 268 * b) memory.low allows reclaiming page cache with low events. 269 */ 270 static int test_memcg_protection(const char *root, bool min) 271 { 272 int ret = KSFT_FAIL, rc; 273 char *parent[3] = {NULL}; 274 char *children[4] = {NULL}; 275 const char *attribute = min ? "memory.min" : "memory.low"; 276 long c[4]; 277 int i, attempts; 278 int fd; 279 280 fd = get_temp_fd(); 281 if (fd < 0) 282 goto cleanup; 283 284 parent[0] = cg_name(root, "memcg_test_0"); 285 if (!parent[0]) 286 goto cleanup; 287 288 parent[1] = cg_name(parent[0], "memcg_test_1"); 289 if (!parent[1]) 290 goto cleanup; 291 292 parent[2] = cg_name(parent[0], "memcg_test_2"); 293 if (!parent[2]) 294 goto cleanup; 295 296 if (cg_create(parent[0])) 297 goto cleanup; 298 299 if (cg_read_long(parent[0], attribute)) { 300 /* No memory.min on older kernels is fine */ 301 if (min) 302 ret = KSFT_SKIP; 303 goto cleanup; 304 } 305 306 if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) 307 goto cleanup; 308 309 if (cg_write(parent[0], "memory.max", "200M")) 310 goto cleanup; 311 312 if (cg_write(parent[0], "memory.swap.max", "0")) 313 goto cleanup; 314 315 if (cg_create(parent[1])) 316 goto cleanup; 317 318 if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) 319 goto cleanup; 320 321 if (cg_create(parent[2])) 322 goto cleanup; 323 324 for (i = 0; i < ARRAY_SIZE(children); i++) { 325 children[i] = cg_name_indexed(parent[1], "child_memcg", i); 326 if (!children[i]) 327 goto cleanup; 328 329 if (cg_create(children[i])) 330 goto cleanup; 331 332 if (i > 2) 333 continue; 334 335 cg_run_nowait(children[i], alloc_pagecache_50M_noexit, 336 (void *)(long)fd); 337 } 338 339 if (cg_write(parent[1], attribute, "50M")) 340 goto cleanup; 341 if (cg_write(children[0], attribute, "75M")) 342 goto cleanup; 343 if (cg_write(children[1], attribute, "25M")) 344 goto cleanup; 345 if (cg_write(children[2], attribute, "0")) 346 goto cleanup; 347 if (cg_write(children[3], attribute, "500M")) 348 goto cleanup; 349 350 attempts = 0; 351 while (!values_close(cg_read_long(parent[1], "memory.current"), 352 MB(150), 3)) { 353 if (attempts++ > 5) 354 break; 355 sleep(1); 356 } 357 358 if (cg_run(parent[2], alloc_anon, (void *)MB(148))) 359 goto cleanup; 360 361 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 362 goto cleanup; 363 364 for (i = 0; i < ARRAY_SIZE(children); i++) 365 c[i] = cg_read_long(children[i], "memory.current"); 366 367 if (!values_close(c[0], MB(29), 10)) 368 goto cleanup; 369 370 if (!values_close(c[1], MB(21), 10)) 371 goto cleanup; 372 373 if (c[3] != 0) 374 goto cleanup; 375 376 rc = cg_run(parent[2], alloc_anon, (void *)MB(170)); 377 if (min && !rc) 378 goto cleanup; 379 else if (!min && rc) { 380 fprintf(stderr, 381 "memory.low prevents from allocating anon memory\n"); 382 goto cleanup; 383 } 384 385 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 386 goto cleanup; 387 388 if (min) { 389 ret = KSFT_PASS; 390 goto cleanup; 391 } 392 393 for (i = 0; i < ARRAY_SIZE(children); i++) { 394 int no_low_events_index = 1; 395 long low, oom; 396 397 oom = cg_read_key_long(children[i], "memory.events", "oom "); 398 low = cg_read_key_long(children[i], "memory.events", "low "); 399 400 if (oom) 401 goto cleanup; 402 if (i <= no_low_events_index && low <= 0) 403 goto cleanup; 404 if (i > no_low_events_index && low) 405 goto cleanup; 406 407 } 408 409 ret = KSFT_PASS; 410 411 cleanup: 412 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { 413 if (!children[i]) 414 continue; 415 416 cg_destroy(children[i]); 417 free(children[i]); 418 } 419 420 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { 421 if (!parent[i]) 422 continue; 423 424 cg_destroy(parent[i]); 425 free(parent[i]); 426 } 427 close(fd); 428 return ret; 429 } 430 431 static int test_memcg_min(const char *root) 432 { 433 return test_memcg_protection(root, true); 434 } 435 436 static int test_memcg_low(const char *root) 437 { 438 return test_memcg_protection(root, false); 439 } 440 441 static int alloc_pagecache_max_30M(const char *cgroup, void *arg) 442 { 443 size_t size = MB(50); 444 int ret = -1; 445 long current, high, max; 446 int fd; 447 448 high = cg_read_long(cgroup, "memory.high"); 449 max = cg_read_long(cgroup, "memory.max"); 450 if (high != MB(30) && max != MB(30)) 451 return -1; 452 453 fd = get_temp_fd(); 454 if (fd < 0) 455 return -1; 456 457 if (alloc_pagecache(fd, size)) 458 goto cleanup; 459 460 current = cg_read_long(cgroup, "memory.current"); 461 if (!values_close(current, MB(30), 5)) 462 goto cleanup; 463 464 ret = 0; 465 466 cleanup: 467 close(fd); 468 return ret; 469 470 } 471 472 /* 473 * This test checks that memory.high limits the amount of 474 * memory which can be consumed by either anonymous memory 475 * or pagecache. 476 */ 477 static int test_memcg_high(const char *root) 478 { 479 int ret = KSFT_FAIL; 480 char *memcg; 481 long high; 482 483 memcg = cg_name(root, "memcg_test"); 484 if (!memcg) 485 goto cleanup; 486 487 if (cg_create(memcg)) 488 goto cleanup; 489 490 if (cg_read_strcmp(memcg, "memory.high", "max\n")) 491 goto cleanup; 492 493 if (cg_write(memcg, "memory.swap.max", "0")) 494 goto cleanup; 495 496 if (cg_write(memcg, "memory.high", "30M")) 497 goto cleanup; 498 499 if (cg_run(memcg, alloc_anon, (void *)MB(31))) 500 goto cleanup; 501 502 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL)) 503 goto cleanup; 504 505 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 506 goto cleanup; 507 508 high = cg_read_key_long(memcg, "memory.events", "high "); 509 if (high <= 0) 510 goto cleanup; 511 512 ret = KSFT_PASS; 513 514 cleanup: 515 cg_destroy(memcg); 516 free(memcg); 517 518 return ret; 519 } 520 521 static int alloc_anon_mlock(const char *cgroup, void *arg) 522 { 523 size_t size = (size_t)arg; 524 void *buf; 525 526 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 527 0, 0); 528 if (buf == MAP_FAILED) 529 return -1; 530 531 mlock(buf, size); 532 munmap(buf, size); 533 return 0; 534 } 535 536 /* 537 * This test checks that memory.high is able to throttle big single shot 538 * allocation i.e. large allocation within one kernel entry. 539 */ 540 static int test_memcg_high_sync(const char *root) 541 { 542 int ret = KSFT_FAIL, pid, fd = -1; 543 char *memcg; 544 long pre_high, pre_max; 545 long post_high, post_max; 546 547 memcg = cg_name(root, "memcg_test"); 548 if (!memcg) 549 goto cleanup; 550 551 if (cg_create(memcg)) 552 goto cleanup; 553 554 pre_high = cg_read_key_long(memcg, "memory.events", "high "); 555 pre_max = cg_read_key_long(memcg, "memory.events", "max "); 556 if (pre_high < 0 || pre_max < 0) 557 goto cleanup; 558 559 if (cg_write(memcg, "memory.swap.max", "0")) 560 goto cleanup; 561 562 if (cg_write(memcg, "memory.high", "30M")) 563 goto cleanup; 564 565 if (cg_write(memcg, "memory.max", "140M")) 566 goto cleanup; 567 568 fd = memcg_prepare_for_wait(memcg); 569 if (fd < 0) 570 goto cleanup; 571 572 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200)); 573 if (pid < 0) 574 goto cleanup; 575 576 cg_wait_for(fd); 577 578 post_high = cg_read_key_long(memcg, "memory.events", "high "); 579 post_max = cg_read_key_long(memcg, "memory.events", "max "); 580 if (post_high < 0 || post_max < 0) 581 goto cleanup; 582 583 if (pre_high == post_high || pre_max != post_max) 584 goto cleanup; 585 586 ret = KSFT_PASS; 587 588 cleanup: 589 if (fd >= 0) 590 close(fd); 591 cg_destroy(memcg); 592 free(memcg); 593 594 return ret; 595 } 596 597 /* 598 * This test checks that memory.max limits the amount of 599 * memory which can be consumed by either anonymous memory 600 * or pagecache. 601 */ 602 static int test_memcg_max(const char *root) 603 { 604 int ret = KSFT_FAIL; 605 char *memcg; 606 long current, max; 607 608 memcg = cg_name(root, "memcg_test"); 609 if (!memcg) 610 goto cleanup; 611 612 if (cg_create(memcg)) 613 goto cleanup; 614 615 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 616 goto cleanup; 617 618 if (cg_write(memcg, "memory.swap.max", "0")) 619 goto cleanup; 620 621 if (cg_write(memcg, "memory.max", "30M")) 622 goto cleanup; 623 624 /* Should be killed by OOM killer */ 625 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 626 goto cleanup; 627 628 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 629 goto cleanup; 630 631 current = cg_read_long(memcg, "memory.current"); 632 if (current > MB(30) || !current) 633 goto cleanup; 634 635 max = cg_read_key_long(memcg, "memory.events", "max "); 636 if (max <= 0) 637 goto cleanup; 638 639 ret = KSFT_PASS; 640 641 cleanup: 642 cg_destroy(memcg); 643 free(memcg); 644 645 return ret; 646 } 647 648 /* 649 * This test checks that memory.reclaim reclaims the given 650 * amount of memory (from both anon and file, if possible). 651 */ 652 static int test_memcg_reclaim(const char *root) 653 { 654 int ret = KSFT_FAIL, fd, retries; 655 char *memcg; 656 long current, expected_usage, to_reclaim; 657 char buf[64]; 658 659 memcg = cg_name(root, "memcg_test"); 660 if (!memcg) 661 goto cleanup; 662 663 if (cg_create(memcg)) 664 goto cleanup; 665 666 current = cg_read_long(memcg, "memory.current"); 667 if (current != 0) 668 goto cleanup; 669 670 fd = get_temp_fd(); 671 if (fd < 0) 672 goto cleanup; 673 674 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd); 675 676 /* 677 * If swap is enabled, try to reclaim from both anon and file, else try 678 * to reclaim from file only. 679 */ 680 if (is_swap_enabled()) { 681 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50)); 682 expected_usage = MB(100); 683 } else 684 expected_usage = MB(50); 685 686 /* 687 * Wait until current usage reaches the expected usage (or we run out of 688 * retries). 689 */ 690 retries = 5; 691 while (!values_close(cg_read_long(memcg, "memory.current"), 692 expected_usage, 10)) { 693 if (retries--) { 694 sleep(1); 695 continue; 696 } else { 697 fprintf(stderr, 698 "failed to allocate %ld for memcg reclaim test\n", 699 expected_usage); 700 goto cleanup; 701 } 702 } 703 704 /* 705 * Reclaim until current reaches 30M, this makes sure we hit both anon 706 * and file if swap is enabled. 707 */ 708 retries = 5; 709 while (true) { 710 int err; 711 712 current = cg_read_long(memcg, "memory.current"); 713 to_reclaim = current - MB(30); 714 715 /* 716 * We only keep looping if we get EAGAIN, which means we could 717 * not reclaim the full amount. 718 */ 719 if (to_reclaim <= 0) 720 goto cleanup; 721 722 723 snprintf(buf, sizeof(buf), "%ld", to_reclaim); 724 err = cg_write(memcg, "memory.reclaim", buf); 725 if (!err) { 726 /* 727 * If writing succeeds, then the written amount should have been 728 * fully reclaimed (and maybe more). 729 */ 730 current = cg_read_long(memcg, "memory.current"); 731 if (!values_close(current, MB(30), 3) && current > MB(30)) 732 goto cleanup; 733 break; 734 } 735 736 /* The kernel could not reclaim the full amount, try again. */ 737 if (err == -EAGAIN && retries--) 738 continue; 739 740 /* We got an unexpected error or ran out of retries. */ 741 goto cleanup; 742 } 743 744 ret = KSFT_PASS; 745 cleanup: 746 cg_destroy(memcg); 747 free(memcg); 748 close(fd); 749 750 return ret; 751 } 752 753 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) 754 { 755 long mem_max = (long)arg; 756 size_t size = MB(50); 757 char *buf, *ptr; 758 long mem_current, swap_current; 759 int ret = -1; 760 761 buf = malloc(size); 762 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 763 *ptr = 0; 764 765 mem_current = cg_read_long(cgroup, "memory.current"); 766 if (!mem_current || !values_close(mem_current, mem_max, 3)) 767 goto cleanup; 768 769 swap_current = cg_read_long(cgroup, "memory.swap.current"); 770 if (!swap_current || 771 !values_close(mem_current + swap_current, size, 3)) 772 goto cleanup; 773 774 ret = 0; 775 cleanup: 776 free(buf); 777 return ret; 778 } 779 780 /* 781 * This test checks that memory.swap.max limits the amount of 782 * anonymous memory which can be swapped out. 783 */ 784 static int test_memcg_swap_max(const char *root) 785 { 786 int ret = KSFT_FAIL; 787 char *memcg; 788 long max; 789 790 if (!is_swap_enabled()) 791 return KSFT_SKIP; 792 793 memcg = cg_name(root, "memcg_test"); 794 if (!memcg) 795 goto cleanup; 796 797 if (cg_create(memcg)) 798 goto cleanup; 799 800 if (cg_read_long(memcg, "memory.swap.current")) { 801 ret = KSFT_SKIP; 802 goto cleanup; 803 } 804 805 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 806 goto cleanup; 807 808 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n")) 809 goto cleanup; 810 811 if (cg_write(memcg, "memory.swap.max", "30M")) 812 goto cleanup; 813 814 if (cg_write(memcg, "memory.max", "30M")) 815 goto cleanup; 816 817 /* Should be killed by OOM killer */ 818 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 819 goto cleanup; 820 821 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 822 goto cleanup; 823 824 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 825 goto cleanup; 826 827 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30))) 828 goto cleanup; 829 830 max = cg_read_key_long(memcg, "memory.events", "max "); 831 if (max <= 0) 832 goto cleanup; 833 834 ret = KSFT_PASS; 835 836 cleanup: 837 cg_destroy(memcg); 838 free(memcg); 839 840 return ret; 841 } 842 843 /* 844 * This test disables swapping and tries to allocate anonymous memory 845 * up to OOM. Then it checks for oom and oom_kill events in 846 * memory.events. 847 */ 848 static int test_memcg_oom_events(const char *root) 849 { 850 int ret = KSFT_FAIL; 851 char *memcg; 852 853 memcg = cg_name(root, "memcg_test"); 854 if (!memcg) 855 goto cleanup; 856 857 if (cg_create(memcg)) 858 goto cleanup; 859 860 if (cg_write(memcg, "memory.max", "30M")) 861 goto cleanup; 862 863 if (cg_write(memcg, "memory.swap.max", "0")) 864 goto cleanup; 865 866 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 867 goto cleanup; 868 869 if (cg_read_strcmp(memcg, "cgroup.procs", "")) 870 goto cleanup; 871 872 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 873 goto cleanup; 874 875 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 876 goto cleanup; 877 878 ret = KSFT_PASS; 879 880 cleanup: 881 cg_destroy(memcg); 882 free(memcg); 883 884 return ret; 885 } 886 887 struct tcp_server_args { 888 unsigned short port; 889 int ctl[2]; 890 }; 891 892 static int tcp_server(const char *cgroup, void *arg) 893 { 894 struct tcp_server_args *srv_args = arg; 895 struct sockaddr_in6 saddr = { 0 }; 896 socklen_t slen = sizeof(saddr); 897 int sk, client_sk, ctl_fd, yes = 1, ret = -1; 898 899 close(srv_args->ctl[0]); 900 ctl_fd = srv_args->ctl[1]; 901 902 saddr.sin6_family = AF_INET6; 903 saddr.sin6_addr = in6addr_any; 904 saddr.sin6_port = htons(srv_args->port); 905 906 sk = socket(AF_INET6, SOCK_STREAM, 0); 907 if (sk < 0) 908 return ret; 909 910 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) 911 goto cleanup; 912 913 if (bind(sk, (struct sockaddr *)&saddr, slen)) { 914 write(ctl_fd, &errno, sizeof(errno)); 915 goto cleanup; 916 } 917 918 if (listen(sk, 1)) 919 goto cleanup; 920 921 ret = 0; 922 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) { 923 ret = -1; 924 goto cleanup; 925 } 926 927 client_sk = accept(sk, NULL, NULL); 928 if (client_sk < 0) 929 goto cleanup; 930 931 ret = -1; 932 for (;;) { 933 uint8_t buf[0x100000]; 934 935 if (write(client_sk, buf, sizeof(buf)) <= 0) { 936 if (errno == ECONNRESET) 937 ret = 0; 938 break; 939 } 940 } 941 942 close(client_sk); 943 944 cleanup: 945 close(sk); 946 return ret; 947 } 948 949 static int tcp_client(const char *cgroup, unsigned short port) 950 { 951 const char server[] = "localhost"; 952 struct addrinfo *ai; 953 char servport[6]; 954 int retries = 0x10; /* nice round number */ 955 int sk, ret; 956 957 snprintf(servport, sizeof(servport), "%hd", port); 958 ret = getaddrinfo(server, servport, NULL, &ai); 959 if (ret) 960 return ret; 961 962 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); 963 if (sk < 0) 964 goto free_ainfo; 965 966 ret = connect(sk, ai->ai_addr, ai->ai_addrlen); 967 if (ret < 0) 968 goto close_sk; 969 970 ret = KSFT_FAIL; 971 while (retries--) { 972 uint8_t buf[0x100000]; 973 long current, sock; 974 975 if (read(sk, buf, sizeof(buf)) <= 0) 976 goto close_sk; 977 978 current = cg_read_long(cgroup, "memory.current"); 979 sock = cg_read_key_long(cgroup, "memory.stat", "sock "); 980 981 if (current < 0 || sock < 0) 982 goto close_sk; 983 984 if (values_close(current, sock, 10)) { 985 ret = KSFT_PASS; 986 break; 987 } 988 } 989 990 close_sk: 991 close(sk); 992 free_ainfo: 993 freeaddrinfo(ai); 994 return ret; 995 } 996 997 /* 998 * This test checks socket memory accounting. 999 * The test forks a TCP server listens on a random port between 1000 1000 * and 61000. Once it gets a client connection, it starts writing to 1001 * its socket. 1002 * The TCP client interleaves reads from the socket with check whether 1003 * memory.current and memory.stat.sock are similar. 1004 */ 1005 static int test_memcg_sock(const char *root) 1006 { 1007 int bind_retries = 5, ret = KSFT_FAIL, pid, err; 1008 unsigned short port; 1009 char *memcg; 1010 1011 memcg = cg_name(root, "memcg_test"); 1012 if (!memcg) 1013 goto cleanup; 1014 1015 if (cg_create(memcg)) 1016 goto cleanup; 1017 1018 while (bind_retries--) { 1019 struct tcp_server_args args; 1020 1021 if (pipe(args.ctl)) 1022 goto cleanup; 1023 1024 port = args.port = 1000 + rand() % 60000; 1025 1026 pid = cg_run_nowait(memcg, tcp_server, &args); 1027 if (pid < 0) 1028 goto cleanup; 1029 1030 close(args.ctl[1]); 1031 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err)) 1032 goto cleanup; 1033 close(args.ctl[0]); 1034 1035 if (!err) 1036 break; 1037 if (err != EADDRINUSE) 1038 goto cleanup; 1039 1040 waitpid(pid, NULL, 0); 1041 } 1042 1043 if (err == EADDRINUSE) { 1044 ret = KSFT_SKIP; 1045 goto cleanup; 1046 } 1047 1048 if (tcp_client(memcg, port) != KSFT_PASS) 1049 goto cleanup; 1050 1051 waitpid(pid, &err, 0); 1052 if (WEXITSTATUS(err)) 1053 goto cleanup; 1054 1055 if (cg_read_long(memcg, "memory.current") < 0) 1056 goto cleanup; 1057 1058 if (cg_read_key_long(memcg, "memory.stat", "sock ")) 1059 goto cleanup; 1060 1061 ret = KSFT_PASS; 1062 1063 cleanup: 1064 cg_destroy(memcg); 1065 free(memcg); 1066 1067 return ret; 1068 } 1069 1070 /* 1071 * This test disables swapping and tries to allocate anonymous memory 1072 * up to OOM with memory.group.oom set. Then it checks that all 1073 * processes in the leaf were killed. It also checks that oom_events 1074 * were propagated to the parent level. 1075 */ 1076 static int test_memcg_oom_group_leaf_events(const char *root) 1077 { 1078 int ret = KSFT_FAIL; 1079 char *parent, *child; 1080 long parent_oom_events; 1081 1082 parent = cg_name(root, "memcg_test_0"); 1083 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1084 1085 if (!parent || !child) 1086 goto cleanup; 1087 1088 if (cg_create(parent)) 1089 goto cleanup; 1090 1091 if (cg_create(child)) 1092 goto cleanup; 1093 1094 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 1095 goto cleanup; 1096 1097 if (cg_write(child, "memory.max", "50M")) 1098 goto cleanup; 1099 1100 if (cg_write(child, "memory.swap.max", "0")) 1101 goto cleanup; 1102 1103 if (cg_write(child, "memory.oom.group", "1")) 1104 goto cleanup; 1105 1106 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1107 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1108 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1109 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1110 goto cleanup; 1111 1112 if (cg_test_proc_killed(child)) 1113 goto cleanup; 1114 1115 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) 1116 goto cleanup; 1117 1118 parent_oom_events = cg_read_key_long( 1119 parent, "memory.events", "oom_kill "); 1120 /* 1121 * If memory_localevents is not enabled (the default), the parent should 1122 * count OOM events in its children groups. Otherwise, it should not 1123 * have observed any events. 1124 */ 1125 if (has_localevents && parent_oom_events != 0) 1126 goto cleanup; 1127 else if (!has_localevents && parent_oom_events <= 0) 1128 goto cleanup; 1129 1130 ret = KSFT_PASS; 1131 1132 cleanup: 1133 if (child) 1134 cg_destroy(child); 1135 if (parent) 1136 cg_destroy(parent); 1137 free(child); 1138 free(parent); 1139 1140 return ret; 1141 } 1142 1143 /* 1144 * This test disables swapping and tries to allocate anonymous memory 1145 * up to OOM with memory.group.oom set. Then it checks that all 1146 * processes in the parent and leaf were killed. 1147 */ 1148 static int test_memcg_oom_group_parent_events(const char *root) 1149 { 1150 int ret = KSFT_FAIL; 1151 char *parent, *child; 1152 1153 parent = cg_name(root, "memcg_test_0"); 1154 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1155 1156 if (!parent || !child) 1157 goto cleanup; 1158 1159 if (cg_create(parent)) 1160 goto cleanup; 1161 1162 if (cg_create(child)) 1163 goto cleanup; 1164 1165 if (cg_write(parent, "memory.max", "80M")) 1166 goto cleanup; 1167 1168 if (cg_write(parent, "memory.swap.max", "0")) 1169 goto cleanup; 1170 1171 if (cg_write(parent, "memory.oom.group", "1")) 1172 goto cleanup; 1173 1174 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1175 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1176 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1177 1178 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1179 goto cleanup; 1180 1181 if (cg_test_proc_killed(child)) 1182 goto cleanup; 1183 if (cg_test_proc_killed(parent)) 1184 goto cleanup; 1185 1186 ret = KSFT_PASS; 1187 1188 cleanup: 1189 if (child) 1190 cg_destroy(child); 1191 if (parent) 1192 cg_destroy(parent); 1193 free(child); 1194 free(parent); 1195 1196 return ret; 1197 } 1198 1199 /* 1200 * This test disables swapping and tries to allocate anonymous memory 1201 * up to OOM with memory.group.oom set. Then it checks that all 1202 * processes were killed except those set with OOM_SCORE_ADJ_MIN 1203 */ 1204 static int test_memcg_oom_group_score_events(const char *root) 1205 { 1206 int ret = KSFT_FAIL; 1207 char *memcg; 1208 int safe_pid; 1209 1210 memcg = cg_name(root, "memcg_test_0"); 1211 1212 if (!memcg) 1213 goto cleanup; 1214 1215 if (cg_create(memcg)) 1216 goto cleanup; 1217 1218 if (cg_write(memcg, "memory.max", "50M")) 1219 goto cleanup; 1220 1221 if (cg_write(memcg, "memory.swap.max", "0")) 1222 goto cleanup; 1223 1224 if (cg_write(memcg, "memory.oom.group", "1")) 1225 goto cleanup; 1226 1227 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1228 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN)) 1229 goto cleanup; 1230 1231 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1232 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1233 goto cleanup; 1234 1235 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3) 1236 goto cleanup; 1237 1238 if (kill(safe_pid, SIGKILL)) 1239 goto cleanup; 1240 1241 ret = KSFT_PASS; 1242 1243 cleanup: 1244 if (memcg) 1245 cg_destroy(memcg); 1246 free(memcg); 1247 1248 return ret; 1249 } 1250 1251 #define T(x) { x, #x } 1252 struct memcg_test { 1253 int (*fn)(const char *root); 1254 const char *name; 1255 } tests[] = { 1256 T(test_memcg_subtree_control), 1257 T(test_memcg_current), 1258 T(test_memcg_min), 1259 T(test_memcg_low), 1260 T(test_memcg_high), 1261 T(test_memcg_high_sync), 1262 T(test_memcg_max), 1263 T(test_memcg_reclaim), 1264 T(test_memcg_oom_events), 1265 T(test_memcg_swap_max), 1266 T(test_memcg_sock), 1267 T(test_memcg_oom_group_leaf_events), 1268 T(test_memcg_oom_group_parent_events), 1269 T(test_memcg_oom_group_score_events), 1270 }; 1271 #undef T 1272 1273 int main(int argc, char **argv) 1274 { 1275 char root[PATH_MAX]; 1276 int i, proc_status, ret = EXIT_SUCCESS; 1277 1278 if (cg_find_unified_root(root, sizeof(root))) 1279 ksft_exit_skip("cgroup v2 isn't mounted\n"); 1280 1281 /* 1282 * Check that memory controller is available: 1283 * memory is listed in cgroup.controllers 1284 */ 1285 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 1286 ksft_exit_skip("memory controller isn't available\n"); 1287 1288 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 1289 if (cg_write(root, "cgroup.subtree_control", "+memory")) 1290 ksft_exit_skip("Failed to set memory controller\n"); 1291 1292 proc_status = proc_mount_contains("memory_recursiveprot"); 1293 if (proc_status < 0) 1294 ksft_exit_skip("Failed to query cgroup mount option\n"); 1295 has_recursiveprot = proc_status; 1296 1297 proc_status = proc_mount_contains("memory_localevents"); 1298 if (proc_status < 0) 1299 ksft_exit_skip("Failed to query cgroup mount option\n"); 1300 has_localevents = proc_status; 1301 1302 for (i = 0; i < ARRAY_SIZE(tests); i++) { 1303 switch (tests[i].fn(root)) { 1304 case KSFT_PASS: 1305 ksft_test_result_pass("%s\n", tests[i].name); 1306 break; 1307 case KSFT_SKIP: 1308 ksft_test_result_skip("%s\n", tests[i].name); 1309 break; 1310 default: 1311 ret = EXIT_FAILURE; 1312 ksft_test_result_fail("%s\n", tests[i].name); 1313 break; 1314 } 1315 } 1316 1317 return ret; 1318 } 1319