1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #define _GNU_SOURCE 3 4 #include <linux/limits.h> 5 #include <linux/oom.h> 6 #include <fcntl.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <sys/stat.h> 11 #include <sys/types.h> 12 #include <unistd.h> 13 #include <sys/socket.h> 14 #include <sys/wait.h> 15 #include <arpa/inet.h> 16 #include <netinet/in.h> 17 #include <netdb.h> 18 #include <errno.h> 19 #include <sys/mman.h> 20 21 #include "../kselftest.h" 22 #include "cgroup_util.h" 23 24 static bool has_localevents; 25 static bool has_recursiveprot; 26 27 /* 28 * This test creates two nested cgroups with and without enabling 29 * the memory controller. 30 */ 31 static int test_memcg_subtree_control(const char *root) 32 { 33 char *parent, *child, *parent2 = NULL, *child2 = NULL; 34 int ret = KSFT_FAIL; 35 char buf[PAGE_SIZE]; 36 37 /* Create two nested cgroups with the memory controller enabled */ 38 parent = cg_name(root, "memcg_test_0"); 39 child = cg_name(root, "memcg_test_0/memcg_test_1"); 40 if (!parent || !child) 41 goto cleanup_free; 42 43 if (cg_create(parent)) 44 goto cleanup_free; 45 46 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 47 goto cleanup_parent; 48 49 if (cg_create(child)) 50 goto cleanup_parent; 51 52 if (cg_read_strstr(child, "cgroup.controllers", "memory")) 53 goto cleanup_child; 54 55 /* Create two nested cgroups without enabling memory controller */ 56 parent2 = cg_name(root, "memcg_test_1"); 57 child2 = cg_name(root, "memcg_test_1/memcg_test_1"); 58 if (!parent2 || !child2) 59 goto cleanup_free2; 60 61 if (cg_create(parent2)) 62 goto cleanup_free2; 63 64 if (cg_create(child2)) 65 goto cleanup_parent2; 66 67 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf))) 68 goto cleanup_all; 69 70 if (!cg_read_strstr(child2, "cgroup.controllers", "memory")) 71 goto cleanup_all; 72 73 ret = KSFT_PASS; 74 75 cleanup_all: 76 cg_destroy(child2); 77 cleanup_parent2: 78 cg_destroy(parent2); 79 cleanup_free2: 80 free(parent2); 81 free(child2); 82 cleanup_child: 83 cg_destroy(child); 84 cleanup_parent: 85 cg_destroy(parent); 86 cleanup_free: 87 free(parent); 88 free(child); 89 90 return ret; 91 } 92 93 static int alloc_anon_50M_check(const char *cgroup, void *arg) 94 { 95 size_t size = MB(50); 96 char *buf, *ptr; 97 long anon, current; 98 int ret = -1; 99 100 buf = malloc(size); 101 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 102 *ptr = 0; 103 104 current = cg_read_long(cgroup, "memory.current"); 105 if (current < size) 106 goto cleanup; 107 108 if (!values_close(size, current, 3)) 109 goto cleanup; 110 111 anon = cg_read_key_long(cgroup, "memory.stat", "anon "); 112 if (anon < 0) 113 goto cleanup; 114 115 if (!values_close(anon, current, 3)) 116 goto cleanup; 117 118 ret = 0; 119 cleanup: 120 free(buf); 121 return ret; 122 } 123 124 static int alloc_pagecache_50M_check(const char *cgroup, void *arg) 125 { 126 size_t size = MB(50); 127 int ret = -1; 128 long current, file; 129 int fd; 130 131 fd = get_temp_fd(); 132 if (fd < 0) 133 return -1; 134 135 if (alloc_pagecache(fd, size)) 136 goto cleanup; 137 138 current = cg_read_long(cgroup, "memory.current"); 139 if (current < size) 140 goto cleanup; 141 142 file = cg_read_key_long(cgroup, "memory.stat", "file "); 143 if (file < 0) 144 goto cleanup; 145 146 if (!values_close(file, current, 10)) 147 goto cleanup; 148 149 ret = 0; 150 151 cleanup: 152 close(fd); 153 return ret; 154 } 155 156 /* 157 * This test create a memory cgroup, allocates 158 * some anonymous memory and some pagecache 159 * and check memory.current and some memory.stat values. 160 */ 161 static int test_memcg_current(const char *root) 162 { 163 int ret = KSFT_FAIL; 164 long current; 165 char *memcg; 166 167 memcg = cg_name(root, "memcg_test"); 168 if (!memcg) 169 goto cleanup; 170 171 if (cg_create(memcg)) 172 goto cleanup; 173 174 current = cg_read_long(memcg, "memory.current"); 175 if (current != 0) 176 goto cleanup; 177 178 if (cg_run(memcg, alloc_anon_50M_check, NULL)) 179 goto cleanup; 180 181 if (cg_run(memcg, alloc_pagecache_50M_check, NULL)) 182 goto cleanup; 183 184 ret = KSFT_PASS; 185 186 cleanup: 187 cg_destroy(memcg); 188 free(memcg); 189 190 return ret; 191 } 192 193 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) 194 { 195 int fd = (long)arg; 196 int ppid = getppid(); 197 198 if (alloc_pagecache(fd, MB(50))) 199 return -1; 200 201 while (getppid() == ppid) 202 sleep(1); 203 204 return 0; 205 } 206 207 static int alloc_anon_noexit(const char *cgroup, void *arg) 208 { 209 int ppid = getppid(); 210 size_t size = (unsigned long)arg; 211 char *buf, *ptr; 212 213 buf = malloc(size); 214 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 215 *ptr = 0; 216 217 while (getppid() == ppid) 218 sleep(1); 219 220 free(buf); 221 return 0; 222 } 223 224 /* 225 * Wait until processes are killed asynchronously by the OOM killer 226 * If we exceed a timeout, fail. 227 */ 228 static int cg_test_proc_killed(const char *cgroup) 229 { 230 int limit; 231 232 for (limit = 10; limit > 0; limit--) { 233 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0) 234 return 0; 235 236 usleep(100000); 237 } 238 return -1; 239 } 240 241 static bool reclaim_until(const char *memcg, long goal); 242 243 /* 244 * First, this test creates the following hierarchy: 245 * A memory.min = 0, memory.max = 200M 246 * A/B memory.min = 50M 247 * A/B/C memory.min = 75M, memory.current = 50M 248 * A/B/D memory.min = 25M, memory.current = 50M 249 * A/B/E memory.min = 0, memory.current = 50M 250 * A/B/F memory.min = 500M, memory.current = 0 251 * 252 * (or memory.low if we test soft protection) 253 * 254 * Usages are pagecache and the test keeps a running 255 * process in every leaf cgroup. 256 * Then it creates A/G and creates a significant 257 * memory pressure in A. 258 * 259 * Then it checks actual memory usages and expects that: 260 * A/B memory.current ~= 50M 261 * A/B/C memory.current ~= 29M 262 * A/B/D memory.current ~= 21M 263 * A/B/E memory.current ~= 0 264 * A/B/F memory.current = 0 265 * (for origin of the numbers, see model in memcg_protection.m.) 266 * 267 * After that it tries to allocate more than there is 268 * unprotected memory in A available, and checks that: 269 * a) memory.min protects pagecache even in this case, 270 * b) memory.low allows reclaiming page cache with low events. 271 * 272 * Then we try to reclaim from A/B/C using memory.reclaim until its 273 * usage reaches 10M. 274 * This makes sure that: 275 * (a) We ignore the protection of the reclaim target memcg. 276 * (b) The previously calculated emin value (~29M) should be dismissed. 277 */ 278 static int test_memcg_protection(const char *root, bool min) 279 { 280 int ret = KSFT_FAIL, rc; 281 char *parent[3] = {NULL}; 282 char *children[4] = {NULL}; 283 const char *attribute = min ? "memory.min" : "memory.low"; 284 long c[4]; 285 int i, attempts; 286 int fd; 287 288 fd = get_temp_fd(); 289 if (fd < 0) 290 goto cleanup; 291 292 parent[0] = cg_name(root, "memcg_test_0"); 293 if (!parent[0]) 294 goto cleanup; 295 296 parent[1] = cg_name(parent[0], "memcg_test_1"); 297 if (!parent[1]) 298 goto cleanup; 299 300 parent[2] = cg_name(parent[0], "memcg_test_2"); 301 if (!parent[2]) 302 goto cleanup; 303 304 if (cg_create(parent[0])) 305 goto cleanup; 306 307 if (cg_read_long(parent[0], attribute)) { 308 /* No memory.min on older kernels is fine */ 309 if (min) 310 ret = KSFT_SKIP; 311 goto cleanup; 312 } 313 314 if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) 315 goto cleanup; 316 317 if (cg_write(parent[0], "memory.max", "200M")) 318 goto cleanup; 319 320 if (cg_write(parent[0], "memory.swap.max", "0")) 321 goto cleanup; 322 323 if (cg_create(parent[1])) 324 goto cleanup; 325 326 if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) 327 goto cleanup; 328 329 if (cg_create(parent[2])) 330 goto cleanup; 331 332 for (i = 0; i < ARRAY_SIZE(children); i++) { 333 children[i] = cg_name_indexed(parent[1], "child_memcg", i); 334 if (!children[i]) 335 goto cleanup; 336 337 if (cg_create(children[i])) 338 goto cleanup; 339 340 if (i > 2) 341 continue; 342 343 cg_run_nowait(children[i], alloc_pagecache_50M_noexit, 344 (void *)(long)fd); 345 } 346 347 if (cg_write(parent[1], attribute, "50M")) 348 goto cleanup; 349 if (cg_write(children[0], attribute, "75M")) 350 goto cleanup; 351 if (cg_write(children[1], attribute, "25M")) 352 goto cleanup; 353 if (cg_write(children[2], attribute, "0")) 354 goto cleanup; 355 if (cg_write(children[3], attribute, "500M")) 356 goto cleanup; 357 358 attempts = 0; 359 while (!values_close(cg_read_long(parent[1], "memory.current"), 360 MB(150), 3)) { 361 if (attempts++ > 5) 362 break; 363 sleep(1); 364 } 365 366 if (cg_run(parent[2], alloc_anon, (void *)MB(148))) 367 goto cleanup; 368 369 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 370 goto cleanup; 371 372 for (i = 0; i < ARRAY_SIZE(children); i++) 373 c[i] = cg_read_long(children[i], "memory.current"); 374 375 if (!values_close(c[0], MB(29), 10)) 376 goto cleanup; 377 378 if (!values_close(c[1], MB(21), 10)) 379 goto cleanup; 380 381 if (c[3] != 0) 382 goto cleanup; 383 384 rc = cg_run(parent[2], alloc_anon, (void *)MB(170)); 385 if (min && !rc) 386 goto cleanup; 387 else if (!min && rc) { 388 fprintf(stderr, 389 "memory.low prevents from allocating anon memory\n"); 390 goto cleanup; 391 } 392 393 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 394 goto cleanup; 395 396 if (!reclaim_until(children[0], MB(10))) 397 goto cleanup; 398 399 if (min) { 400 ret = KSFT_PASS; 401 goto cleanup; 402 } 403 404 for (i = 0; i < ARRAY_SIZE(children); i++) { 405 int no_low_events_index = 1; 406 long low, oom; 407 408 oom = cg_read_key_long(children[i], "memory.events", "oom "); 409 low = cg_read_key_long(children[i], "memory.events", "low "); 410 411 if (oom) 412 goto cleanup; 413 if (i <= no_low_events_index && low <= 0) 414 goto cleanup; 415 if (i > no_low_events_index && low) 416 goto cleanup; 417 418 } 419 420 ret = KSFT_PASS; 421 422 cleanup: 423 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { 424 if (!children[i]) 425 continue; 426 427 cg_destroy(children[i]); 428 free(children[i]); 429 } 430 431 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { 432 if (!parent[i]) 433 continue; 434 435 cg_destroy(parent[i]); 436 free(parent[i]); 437 } 438 close(fd); 439 return ret; 440 } 441 442 static int test_memcg_min(const char *root) 443 { 444 return test_memcg_protection(root, true); 445 } 446 447 static int test_memcg_low(const char *root) 448 { 449 return test_memcg_protection(root, false); 450 } 451 452 static int alloc_pagecache_max_30M(const char *cgroup, void *arg) 453 { 454 size_t size = MB(50); 455 int ret = -1; 456 long current, high, max; 457 int fd; 458 459 high = cg_read_long(cgroup, "memory.high"); 460 max = cg_read_long(cgroup, "memory.max"); 461 if (high != MB(30) && max != MB(30)) 462 return -1; 463 464 fd = get_temp_fd(); 465 if (fd < 0) 466 return -1; 467 468 if (alloc_pagecache(fd, size)) 469 goto cleanup; 470 471 current = cg_read_long(cgroup, "memory.current"); 472 if (!values_close(current, MB(30), 5)) 473 goto cleanup; 474 475 ret = 0; 476 477 cleanup: 478 close(fd); 479 return ret; 480 481 } 482 483 /* 484 * This test checks that memory.high limits the amount of 485 * memory which can be consumed by either anonymous memory 486 * or pagecache. 487 */ 488 static int test_memcg_high(const char *root) 489 { 490 int ret = KSFT_FAIL; 491 char *memcg; 492 long high; 493 494 memcg = cg_name(root, "memcg_test"); 495 if (!memcg) 496 goto cleanup; 497 498 if (cg_create(memcg)) 499 goto cleanup; 500 501 if (cg_read_strcmp(memcg, "memory.high", "max\n")) 502 goto cleanup; 503 504 if (cg_write(memcg, "memory.swap.max", "0")) 505 goto cleanup; 506 507 if (cg_write(memcg, "memory.high", "30M")) 508 goto cleanup; 509 510 if (cg_run(memcg, alloc_anon, (void *)MB(31))) 511 goto cleanup; 512 513 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL)) 514 goto cleanup; 515 516 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 517 goto cleanup; 518 519 high = cg_read_key_long(memcg, "memory.events", "high "); 520 if (high <= 0) 521 goto cleanup; 522 523 ret = KSFT_PASS; 524 525 cleanup: 526 cg_destroy(memcg); 527 free(memcg); 528 529 return ret; 530 } 531 532 static int alloc_anon_mlock(const char *cgroup, void *arg) 533 { 534 size_t size = (size_t)arg; 535 void *buf; 536 537 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 538 0, 0); 539 if (buf == MAP_FAILED) 540 return -1; 541 542 mlock(buf, size); 543 munmap(buf, size); 544 return 0; 545 } 546 547 /* 548 * This test checks that memory.high is able to throttle big single shot 549 * allocation i.e. large allocation within one kernel entry. 550 */ 551 static int test_memcg_high_sync(const char *root) 552 { 553 int ret = KSFT_FAIL, pid, fd = -1; 554 char *memcg; 555 long pre_high, pre_max; 556 long post_high, post_max; 557 558 memcg = cg_name(root, "memcg_test"); 559 if (!memcg) 560 goto cleanup; 561 562 if (cg_create(memcg)) 563 goto cleanup; 564 565 pre_high = cg_read_key_long(memcg, "memory.events", "high "); 566 pre_max = cg_read_key_long(memcg, "memory.events", "max "); 567 if (pre_high < 0 || pre_max < 0) 568 goto cleanup; 569 570 if (cg_write(memcg, "memory.swap.max", "0")) 571 goto cleanup; 572 573 if (cg_write(memcg, "memory.high", "30M")) 574 goto cleanup; 575 576 if (cg_write(memcg, "memory.max", "140M")) 577 goto cleanup; 578 579 fd = memcg_prepare_for_wait(memcg); 580 if (fd < 0) 581 goto cleanup; 582 583 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200)); 584 if (pid < 0) 585 goto cleanup; 586 587 cg_wait_for(fd); 588 589 post_high = cg_read_key_long(memcg, "memory.events", "high "); 590 post_max = cg_read_key_long(memcg, "memory.events", "max "); 591 if (post_high < 0 || post_max < 0) 592 goto cleanup; 593 594 if (pre_high == post_high || pre_max != post_max) 595 goto cleanup; 596 597 ret = KSFT_PASS; 598 599 cleanup: 600 if (fd >= 0) 601 close(fd); 602 cg_destroy(memcg); 603 free(memcg); 604 605 return ret; 606 } 607 608 /* 609 * This test checks that memory.max limits the amount of 610 * memory which can be consumed by either anonymous memory 611 * or pagecache. 612 */ 613 static int test_memcg_max(const char *root) 614 { 615 int ret = KSFT_FAIL; 616 char *memcg; 617 long current, max; 618 619 memcg = cg_name(root, "memcg_test"); 620 if (!memcg) 621 goto cleanup; 622 623 if (cg_create(memcg)) 624 goto cleanup; 625 626 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 627 goto cleanup; 628 629 if (cg_write(memcg, "memory.swap.max", "0")) 630 goto cleanup; 631 632 if (cg_write(memcg, "memory.max", "30M")) 633 goto cleanup; 634 635 /* Should be killed by OOM killer */ 636 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 637 goto cleanup; 638 639 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 640 goto cleanup; 641 642 current = cg_read_long(memcg, "memory.current"); 643 if (current > MB(30) || !current) 644 goto cleanup; 645 646 max = cg_read_key_long(memcg, "memory.events", "max "); 647 if (max <= 0) 648 goto cleanup; 649 650 ret = KSFT_PASS; 651 652 cleanup: 653 cg_destroy(memcg); 654 free(memcg); 655 656 return ret; 657 } 658 659 /* 660 * Reclaim from @memcg until usage reaches @goal by writing to 661 * memory.reclaim. 662 * 663 * This function will return false if the usage is already below the 664 * goal. 665 * 666 * This function assumes that writing to memory.reclaim is the only 667 * source of change in memory.current (no concurrent allocations or 668 * reclaim). 669 * 670 * This function makes sure memory.reclaim is sane. It will return 671 * false if memory.reclaim's error codes do not make sense, even if 672 * the usage goal was satisfied. 673 */ 674 static bool reclaim_until(const char *memcg, long goal) 675 { 676 char buf[64]; 677 int retries, err; 678 long current, to_reclaim; 679 bool reclaimed = false; 680 681 for (retries = 5; retries > 0; retries--) { 682 current = cg_read_long(memcg, "memory.current"); 683 684 if (current < goal || values_close(current, goal, 3)) 685 break; 686 /* Did memory.reclaim return 0 incorrectly? */ 687 else if (reclaimed) 688 return false; 689 690 to_reclaim = current - goal; 691 snprintf(buf, sizeof(buf), "%ld", to_reclaim); 692 err = cg_write(memcg, "memory.reclaim", buf); 693 if (!err) 694 reclaimed = true; 695 else if (err != -EAGAIN) 696 return false; 697 } 698 return reclaimed; 699 } 700 701 /* 702 * This test checks that memory.reclaim reclaims the given 703 * amount of memory (from both anon and file, if possible). 704 */ 705 static int test_memcg_reclaim(const char *root) 706 { 707 int ret = KSFT_FAIL, fd, retries; 708 char *memcg; 709 long current, expected_usage; 710 711 memcg = cg_name(root, "memcg_test"); 712 if (!memcg) 713 goto cleanup; 714 715 if (cg_create(memcg)) 716 goto cleanup; 717 718 current = cg_read_long(memcg, "memory.current"); 719 if (current != 0) 720 goto cleanup; 721 722 fd = get_temp_fd(); 723 if (fd < 0) 724 goto cleanup; 725 726 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd); 727 728 /* 729 * If swap is enabled, try to reclaim from both anon and file, else try 730 * to reclaim from file only. 731 */ 732 if (is_swap_enabled()) { 733 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50)); 734 expected_usage = MB(100); 735 } else 736 expected_usage = MB(50); 737 738 /* 739 * Wait until current usage reaches the expected usage (or we run out of 740 * retries). 741 */ 742 retries = 5; 743 while (!values_close(cg_read_long(memcg, "memory.current"), 744 expected_usage, 10)) { 745 if (retries--) { 746 sleep(1); 747 continue; 748 } else { 749 fprintf(stderr, 750 "failed to allocate %ld for memcg reclaim test\n", 751 expected_usage); 752 goto cleanup; 753 } 754 } 755 756 /* 757 * Reclaim until current reaches 30M, this makes sure we hit both anon 758 * and file if swap is enabled. 759 */ 760 if (!reclaim_until(memcg, MB(30))) 761 goto cleanup; 762 763 ret = KSFT_PASS; 764 cleanup: 765 cg_destroy(memcg); 766 free(memcg); 767 close(fd); 768 769 return ret; 770 } 771 772 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) 773 { 774 long mem_max = (long)arg; 775 size_t size = MB(50); 776 char *buf, *ptr; 777 long mem_current, swap_current; 778 int ret = -1; 779 780 buf = malloc(size); 781 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 782 *ptr = 0; 783 784 mem_current = cg_read_long(cgroup, "memory.current"); 785 if (!mem_current || !values_close(mem_current, mem_max, 3)) 786 goto cleanup; 787 788 swap_current = cg_read_long(cgroup, "memory.swap.current"); 789 if (!swap_current || 790 !values_close(mem_current + swap_current, size, 3)) 791 goto cleanup; 792 793 ret = 0; 794 cleanup: 795 free(buf); 796 return ret; 797 } 798 799 /* 800 * This test checks that memory.swap.max limits the amount of 801 * anonymous memory which can be swapped out. 802 */ 803 static int test_memcg_swap_max(const char *root) 804 { 805 int ret = KSFT_FAIL; 806 char *memcg; 807 long max; 808 809 if (!is_swap_enabled()) 810 return KSFT_SKIP; 811 812 memcg = cg_name(root, "memcg_test"); 813 if (!memcg) 814 goto cleanup; 815 816 if (cg_create(memcg)) 817 goto cleanup; 818 819 if (cg_read_long(memcg, "memory.swap.current")) { 820 ret = KSFT_SKIP; 821 goto cleanup; 822 } 823 824 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 825 goto cleanup; 826 827 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n")) 828 goto cleanup; 829 830 if (cg_write(memcg, "memory.swap.max", "30M")) 831 goto cleanup; 832 833 if (cg_write(memcg, "memory.max", "30M")) 834 goto cleanup; 835 836 /* Should be killed by OOM killer */ 837 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 838 goto cleanup; 839 840 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 841 goto cleanup; 842 843 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 844 goto cleanup; 845 846 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30))) 847 goto cleanup; 848 849 max = cg_read_key_long(memcg, "memory.events", "max "); 850 if (max <= 0) 851 goto cleanup; 852 853 ret = KSFT_PASS; 854 855 cleanup: 856 cg_destroy(memcg); 857 free(memcg); 858 859 return ret; 860 } 861 862 /* 863 * This test disables swapping and tries to allocate anonymous memory 864 * up to OOM. Then it checks for oom and oom_kill events in 865 * memory.events. 866 */ 867 static int test_memcg_oom_events(const char *root) 868 { 869 int ret = KSFT_FAIL; 870 char *memcg; 871 872 memcg = cg_name(root, "memcg_test"); 873 if (!memcg) 874 goto cleanup; 875 876 if (cg_create(memcg)) 877 goto cleanup; 878 879 if (cg_write(memcg, "memory.max", "30M")) 880 goto cleanup; 881 882 if (cg_write(memcg, "memory.swap.max", "0")) 883 goto cleanup; 884 885 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 886 goto cleanup; 887 888 if (cg_read_strcmp(memcg, "cgroup.procs", "")) 889 goto cleanup; 890 891 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 892 goto cleanup; 893 894 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 895 goto cleanup; 896 897 ret = KSFT_PASS; 898 899 cleanup: 900 cg_destroy(memcg); 901 free(memcg); 902 903 return ret; 904 } 905 906 struct tcp_server_args { 907 unsigned short port; 908 int ctl[2]; 909 }; 910 911 static int tcp_server(const char *cgroup, void *arg) 912 { 913 struct tcp_server_args *srv_args = arg; 914 struct sockaddr_in6 saddr = { 0 }; 915 socklen_t slen = sizeof(saddr); 916 int sk, client_sk, ctl_fd, yes = 1, ret = -1; 917 918 close(srv_args->ctl[0]); 919 ctl_fd = srv_args->ctl[1]; 920 921 saddr.sin6_family = AF_INET6; 922 saddr.sin6_addr = in6addr_any; 923 saddr.sin6_port = htons(srv_args->port); 924 925 sk = socket(AF_INET6, SOCK_STREAM, 0); 926 if (sk < 0) 927 return ret; 928 929 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) 930 goto cleanup; 931 932 if (bind(sk, (struct sockaddr *)&saddr, slen)) { 933 write(ctl_fd, &errno, sizeof(errno)); 934 goto cleanup; 935 } 936 937 if (listen(sk, 1)) 938 goto cleanup; 939 940 ret = 0; 941 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) { 942 ret = -1; 943 goto cleanup; 944 } 945 946 client_sk = accept(sk, NULL, NULL); 947 if (client_sk < 0) 948 goto cleanup; 949 950 ret = -1; 951 for (;;) { 952 uint8_t buf[0x100000]; 953 954 if (write(client_sk, buf, sizeof(buf)) <= 0) { 955 if (errno == ECONNRESET) 956 ret = 0; 957 break; 958 } 959 } 960 961 close(client_sk); 962 963 cleanup: 964 close(sk); 965 return ret; 966 } 967 968 static int tcp_client(const char *cgroup, unsigned short port) 969 { 970 const char server[] = "localhost"; 971 struct addrinfo *ai; 972 char servport[6]; 973 int retries = 0x10; /* nice round number */ 974 int sk, ret; 975 976 snprintf(servport, sizeof(servport), "%hd", port); 977 ret = getaddrinfo(server, servport, NULL, &ai); 978 if (ret) 979 return ret; 980 981 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); 982 if (sk < 0) 983 goto free_ainfo; 984 985 ret = connect(sk, ai->ai_addr, ai->ai_addrlen); 986 if (ret < 0) 987 goto close_sk; 988 989 ret = KSFT_FAIL; 990 while (retries--) { 991 uint8_t buf[0x100000]; 992 long current, sock; 993 994 if (read(sk, buf, sizeof(buf)) <= 0) 995 goto close_sk; 996 997 current = cg_read_long(cgroup, "memory.current"); 998 sock = cg_read_key_long(cgroup, "memory.stat", "sock "); 999 1000 if (current < 0 || sock < 0) 1001 goto close_sk; 1002 1003 if (values_close(current, sock, 10)) { 1004 ret = KSFT_PASS; 1005 break; 1006 } 1007 } 1008 1009 close_sk: 1010 close(sk); 1011 free_ainfo: 1012 freeaddrinfo(ai); 1013 return ret; 1014 } 1015 1016 /* 1017 * This test checks socket memory accounting. 1018 * The test forks a TCP server listens on a random port between 1000 1019 * and 61000. Once it gets a client connection, it starts writing to 1020 * its socket. 1021 * The TCP client interleaves reads from the socket with check whether 1022 * memory.current and memory.stat.sock are similar. 1023 */ 1024 static int test_memcg_sock(const char *root) 1025 { 1026 int bind_retries = 5, ret = KSFT_FAIL, pid, err; 1027 unsigned short port; 1028 char *memcg; 1029 1030 memcg = cg_name(root, "memcg_test"); 1031 if (!memcg) 1032 goto cleanup; 1033 1034 if (cg_create(memcg)) 1035 goto cleanup; 1036 1037 while (bind_retries--) { 1038 struct tcp_server_args args; 1039 1040 if (pipe(args.ctl)) 1041 goto cleanup; 1042 1043 port = args.port = 1000 + rand() % 60000; 1044 1045 pid = cg_run_nowait(memcg, tcp_server, &args); 1046 if (pid < 0) 1047 goto cleanup; 1048 1049 close(args.ctl[1]); 1050 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err)) 1051 goto cleanup; 1052 close(args.ctl[0]); 1053 1054 if (!err) 1055 break; 1056 if (err != EADDRINUSE) 1057 goto cleanup; 1058 1059 waitpid(pid, NULL, 0); 1060 } 1061 1062 if (err == EADDRINUSE) { 1063 ret = KSFT_SKIP; 1064 goto cleanup; 1065 } 1066 1067 if (tcp_client(memcg, port) != KSFT_PASS) 1068 goto cleanup; 1069 1070 waitpid(pid, &err, 0); 1071 if (WEXITSTATUS(err)) 1072 goto cleanup; 1073 1074 if (cg_read_long(memcg, "memory.current") < 0) 1075 goto cleanup; 1076 1077 if (cg_read_key_long(memcg, "memory.stat", "sock ")) 1078 goto cleanup; 1079 1080 ret = KSFT_PASS; 1081 1082 cleanup: 1083 cg_destroy(memcg); 1084 free(memcg); 1085 1086 return ret; 1087 } 1088 1089 /* 1090 * This test disables swapping and tries to allocate anonymous memory 1091 * up to OOM with memory.group.oom set. Then it checks that all 1092 * processes in the leaf were killed. It also checks that oom_events 1093 * were propagated to the parent level. 1094 */ 1095 static int test_memcg_oom_group_leaf_events(const char *root) 1096 { 1097 int ret = KSFT_FAIL; 1098 char *parent, *child; 1099 long parent_oom_events; 1100 1101 parent = cg_name(root, "memcg_test_0"); 1102 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1103 1104 if (!parent || !child) 1105 goto cleanup; 1106 1107 if (cg_create(parent)) 1108 goto cleanup; 1109 1110 if (cg_create(child)) 1111 goto cleanup; 1112 1113 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 1114 goto cleanup; 1115 1116 if (cg_write(child, "memory.max", "50M")) 1117 goto cleanup; 1118 1119 if (cg_write(child, "memory.swap.max", "0")) 1120 goto cleanup; 1121 1122 if (cg_write(child, "memory.oom.group", "1")) 1123 goto cleanup; 1124 1125 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1126 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1127 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1128 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1129 goto cleanup; 1130 1131 if (cg_test_proc_killed(child)) 1132 goto cleanup; 1133 1134 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) 1135 goto cleanup; 1136 1137 parent_oom_events = cg_read_key_long( 1138 parent, "memory.events", "oom_kill "); 1139 /* 1140 * If memory_localevents is not enabled (the default), the parent should 1141 * count OOM events in its children groups. Otherwise, it should not 1142 * have observed any events. 1143 */ 1144 if (has_localevents && parent_oom_events != 0) 1145 goto cleanup; 1146 else if (!has_localevents && parent_oom_events <= 0) 1147 goto cleanup; 1148 1149 ret = KSFT_PASS; 1150 1151 cleanup: 1152 if (child) 1153 cg_destroy(child); 1154 if (parent) 1155 cg_destroy(parent); 1156 free(child); 1157 free(parent); 1158 1159 return ret; 1160 } 1161 1162 /* 1163 * This test disables swapping and tries to allocate anonymous memory 1164 * up to OOM with memory.group.oom set. Then it checks that all 1165 * processes in the parent and leaf were killed. 1166 */ 1167 static int test_memcg_oom_group_parent_events(const char *root) 1168 { 1169 int ret = KSFT_FAIL; 1170 char *parent, *child; 1171 1172 parent = cg_name(root, "memcg_test_0"); 1173 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1174 1175 if (!parent || !child) 1176 goto cleanup; 1177 1178 if (cg_create(parent)) 1179 goto cleanup; 1180 1181 if (cg_create(child)) 1182 goto cleanup; 1183 1184 if (cg_write(parent, "memory.max", "80M")) 1185 goto cleanup; 1186 1187 if (cg_write(parent, "memory.swap.max", "0")) 1188 goto cleanup; 1189 1190 if (cg_write(parent, "memory.oom.group", "1")) 1191 goto cleanup; 1192 1193 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1194 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1195 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1196 1197 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1198 goto cleanup; 1199 1200 if (cg_test_proc_killed(child)) 1201 goto cleanup; 1202 if (cg_test_proc_killed(parent)) 1203 goto cleanup; 1204 1205 ret = KSFT_PASS; 1206 1207 cleanup: 1208 if (child) 1209 cg_destroy(child); 1210 if (parent) 1211 cg_destroy(parent); 1212 free(child); 1213 free(parent); 1214 1215 return ret; 1216 } 1217 1218 /* 1219 * This test disables swapping and tries to allocate anonymous memory 1220 * up to OOM with memory.group.oom set. Then it checks that all 1221 * processes were killed except those set with OOM_SCORE_ADJ_MIN 1222 */ 1223 static int test_memcg_oom_group_score_events(const char *root) 1224 { 1225 int ret = KSFT_FAIL; 1226 char *memcg; 1227 int safe_pid; 1228 1229 memcg = cg_name(root, "memcg_test_0"); 1230 1231 if (!memcg) 1232 goto cleanup; 1233 1234 if (cg_create(memcg)) 1235 goto cleanup; 1236 1237 if (cg_write(memcg, "memory.max", "50M")) 1238 goto cleanup; 1239 1240 if (cg_write(memcg, "memory.swap.max", "0")) 1241 goto cleanup; 1242 1243 if (cg_write(memcg, "memory.oom.group", "1")) 1244 goto cleanup; 1245 1246 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1247 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN)) 1248 goto cleanup; 1249 1250 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1251 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1252 goto cleanup; 1253 1254 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3) 1255 goto cleanup; 1256 1257 if (kill(safe_pid, SIGKILL)) 1258 goto cleanup; 1259 1260 ret = KSFT_PASS; 1261 1262 cleanup: 1263 if (memcg) 1264 cg_destroy(memcg); 1265 free(memcg); 1266 1267 return ret; 1268 } 1269 1270 #define T(x) { x, #x } 1271 struct memcg_test { 1272 int (*fn)(const char *root); 1273 const char *name; 1274 } tests[] = { 1275 T(test_memcg_subtree_control), 1276 T(test_memcg_current), 1277 T(test_memcg_min), 1278 T(test_memcg_low), 1279 T(test_memcg_high), 1280 T(test_memcg_high_sync), 1281 T(test_memcg_max), 1282 T(test_memcg_reclaim), 1283 T(test_memcg_oom_events), 1284 T(test_memcg_swap_max), 1285 T(test_memcg_sock), 1286 T(test_memcg_oom_group_leaf_events), 1287 T(test_memcg_oom_group_parent_events), 1288 T(test_memcg_oom_group_score_events), 1289 }; 1290 #undef T 1291 1292 int main(int argc, char **argv) 1293 { 1294 char root[PATH_MAX]; 1295 int i, proc_status, ret = EXIT_SUCCESS; 1296 1297 if (cg_find_unified_root(root, sizeof(root))) 1298 ksft_exit_skip("cgroup v2 isn't mounted\n"); 1299 1300 /* 1301 * Check that memory controller is available: 1302 * memory is listed in cgroup.controllers 1303 */ 1304 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 1305 ksft_exit_skip("memory controller isn't available\n"); 1306 1307 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 1308 if (cg_write(root, "cgroup.subtree_control", "+memory")) 1309 ksft_exit_skip("Failed to set memory controller\n"); 1310 1311 proc_status = proc_mount_contains("memory_recursiveprot"); 1312 if (proc_status < 0) 1313 ksft_exit_skip("Failed to query cgroup mount option\n"); 1314 has_recursiveprot = proc_status; 1315 1316 proc_status = proc_mount_contains("memory_localevents"); 1317 if (proc_status < 0) 1318 ksft_exit_skip("Failed to query cgroup mount option\n"); 1319 has_localevents = proc_status; 1320 1321 for (i = 0; i < ARRAY_SIZE(tests); i++) { 1322 switch (tests[i].fn(root)) { 1323 case KSFT_PASS: 1324 ksft_test_result_pass("%s\n", tests[i].name); 1325 break; 1326 case KSFT_SKIP: 1327 ksft_test_result_skip("%s\n", tests[i].name); 1328 break; 1329 default: 1330 ret = EXIT_FAILURE; 1331 ksft_test_result_fail("%s\n", tests[i].name); 1332 break; 1333 } 1334 } 1335 1336 return ret; 1337 } 1338