1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #define _GNU_SOURCE 3 4 #include <linux/limits.h> 5 #include <linux/oom.h> 6 #include <fcntl.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <sys/stat.h> 11 #include <sys/types.h> 12 #include <unistd.h> 13 #include <sys/socket.h> 14 #include <sys/wait.h> 15 #include <arpa/inet.h> 16 #include <netinet/in.h> 17 #include <netdb.h> 18 #include <errno.h> 19 #include <sys/mman.h> 20 21 #include "../kselftest.h" 22 #include "cgroup_util.h" 23 24 static bool has_localevents; 25 static bool has_recursiveprot; 26 27 /* 28 * This test creates two nested cgroups with and without enabling 29 * the memory controller. 30 */ 31 static int test_memcg_subtree_control(const char *root) 32 { 33 char *parent, *child, *parent2 = NULL, *child2 = NULL; 34 int ret = KSFT_FAIL; 35 char buf[PAGE_SIZE]; 36 37 /* Create two nested cgroups with the memory controller enabled */ 38 parent = cg_name(root, "memcg_test_0"); 39 child = cg_name(root, "memcg_test_0/memcg_test_1"); 40 if (!parent || !child) 41 goto cleanup_free; 42 43 if (cg_create(parent)) 44 goto cleanup_free; 45 46 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 47 goto cleanup_parent; 48 49 if (cg_create(child)) 50 goto cleanup_parent; 51 52 if (cg_read_strstr(child, "cgroup.controllers", "memory")) 53 goto cleanup_child; 54 55 /* Create two nested cgroups without enabling memory controller */ 56 parent2 = cg_name(root, "memcg_test_1"); 57 child2 = cg_name(root, "memcg_test_1/memcg_test_1"); 58 if (!parent2 || !child2) 59 goto cleanup_free2; 60 61 if (cg_create(parent2)) 62 goto cleanup_free2; 63 64 if (cg_create(child2)) 65 goto cleanup_parent2; 66 67 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf))) 68 goto cleanup_all; 69 70 if (!cg_read_strstr(child2, "cgroup.controllers", "memory")) 71 goto cleanup_all; 72 73 ret = KSFT_PASS; 74 75 cleanup_all: 76 cg_destroy(child2); 77 cleanup_parent2: 78 cg_destroy(parent2); 79 cleanup_free2: 80 free(parent2); 81 free(child2); 82 cleanup_child: 83 cg_destroy(child); 84 cleanup_parent: 85 cg_destroy(parent); 86 cleanup_free: 87 free(parent); 88 free(child); 89 90 return ret; 91 } 92 93 static int alloc_anon_50M_check(const char *cgroup, void *arg) 94 { 95 size_t size = MB(50); 96 char *buf, *ptr; 97 long anon, current; 98 int ret = -1; 99 100 buf = malloc(size); 101 if (buf == NULL) { 102 fprintf(stderr, "malloc() failed\n"); 103 return -1; 104 } 105 106 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 107 *ptr = 0; 108 109 current = cg_read_long(cgroup, "memory.current"); 110 if (current < size) 111 goto cleanup; 112 113 if (!values_close(size, current, 3)) 114 goto cleanup; 115 116 anon = cg_read_key_long(cgroup, "memory.stat", "anon "); 117 if (anon < 0) 118 goto cleanup; 119 120 if (!values_close(anon, current, 3)) 121 goto cleanup; 122 123 ret = 0; 124 cleanup: 125 free(buf); 126 return ret; 127 } 128 129 static int alloc_pagecache_50M_check(const char *cgroup, void *arg) 130 { 131 size_t size = MB(50); 132 int ret = -1; 133 long current, file; 134 int fd; 135 136 fd = get_temp_fd(); 137 if (fd < 0) 138 return -1; 139 140 if (alloc_pagecache(fd, size)) 141 goto cleanup; 142 143 current = cg_read_long(cgroup, "memory.current"); 144 if (current < size) 145 goto cleanup; 146 147 file = cg_read_key_long(cgroup, "memory.stat", "file "); 148 if (file < 0) 149 goto cleanup; 150 151 if (!values_close(file, current, 10)) 152 goto cleanup; 153 154 ret = 0; 155 156 cleanup: 157 close(fd); 158 return ret; 159 } 160 161 /* 162 * This test create a memory cgroup, allocates 163 * some anonymous memory and some pagecache 164 * and check memory.current and some memory.stat values. 165 */ 166 static int test_memcg_current(const char *root) 167 { 168 int ret = KSFT_FAIL; 169 long current; 170 char *memcg; 171 172 memcg = cg_name(root, "memcg_test"); 173 if (!memcg) 174 goto cleanup; 175 176 if (cg_create(memcg)) 177 goto cleanup; 178 179 current = cg_read_long(memcg, "memory.current"); 180 if (current != 0) 181 goto cleanup; 182 183 if (cg_run(memcg, alloc_anon_50M_check, NULL)) 184 goto cleanup; 185 186 if (cg_run(memcg, alloc_pagecache_50M_check, NULL)) 187 goto cleanup; 188 189 ret = KSFT_PASS; 190 191 cleanup: 192 cg_destroy(memcg); 193 free(memcg); 194 195 return ret; 196 } 197 198 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) 199 { 200 int fd = (long)arg; 201 int ppid = getppid(); 202 203 if (alloc_pagecache(fd, MB(50))) 204 return -1; 205 206 while (getppid() == ppid) 207 sleep(1); 208 209 return 0; 210 } 211 212 static int alloc_anon_noexit(const char *cgroup, void *arg) 213 { 214 int ppid = getppid(); 215 size_t size = (unsigned long)arg; 216 char *buf, *ptr; 217 218 buf = malloc(size); 219 if (buf == NULL) { 220 fprintf(stderr, "malloc() failed\n"); 221 return -1; 222 } 223 224 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 225 *ptr = 0; 226 227 while (getppid() == ppid) 228 sleep(1); 229 230 free(buf); 231 return 0; 232 } 233 234 /* 235 * Wait until processes are killed asynchronously by the OOM killer 236 * If we exceed a timeout, fail. 237 */ 238 static int cg_test_proc_killed(const char *cgroup) 239 { 240 int limit; 241 242 for (limit = 10; limit > 0; limit--) { 243 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0) 244 return 0; 245 246 usleep(100000); 247 } 248 return -1; 249 } 250 251 static bool reclaim_until(const char *memcg, long goal); 252 253 /* 254 * First, this test creates the following hierarchy: 255 * A memory.min = 0, memory.max = 200M 256 * A/B memory.min = 50M 257 * A/B/C memory.min = 75M, memory.current = 50M 258 * A/B/D memory.min = 25M, memory.current = 50M 259 * A/B/E memory.min = 0, memory.current = 50M 260 * A/B/F memory.min = 500M, memory.current = 0 261 * 262 * (or memory.low if we test soft protection) 263 * 264 * Usages are pagecache and the test keeps a running 265 * process in every leaf cgroup. 266 * Then it creates A/G and creates a significant 267 * memory pressure in A. 268 * 269 * Then it checks actual memory usages and expects that: 270 * A/B memory.current ~= 50M 271 * A/B/C memory.current ~= 29M 272 * A/B/D memory.current ~= 21M 273 * A/B/E memory.current ~= 0 274 * A/B/F memory.current = 0 275 * (for origin of the numbers, see model in memcg_protection.m.) 276 * 277 * After that it tries to allocate more than there is 278 * unprotected memory in A available, and checks that: 279 * a) memory.min protects pagecache even in this case, 280 * b) memory.low allows reclaiming page cache with low events. 281 * 282 * Then we try to reclaim from A/B/C using memory.reclaim until its 283 * usage reaches 10M. 284 * This makes sure that: 285 * (a) We ignore the protection of the reclaim target memcg. 286 * (b) The previously calculated emin value (~29M) should be dismissed. 287 */ 288 static int test_memcg_protection(const char *root, bool min) 289 { 290 int ret = KSFT_FAIL, rc; 291 char *parent[3] = {NULL}; 292 char *children[4] = {NULL}; 293 const char *attribute = min ? "memory.min" : "memory.low"; 294 long c[4]; 295 long current; 296 int i, attempts; 297 int fd; 298 299 fd = get_temp_fd(); 300 if (fd < 0) 301 goto cleanup; 302 303 parent[0] = cg_name(root, "memcg_test_0"); 304 if (!parent[0]) 305 goto cleanup; 306 307 parent[1] = cg_name(parent[0], "memcg_test_1"); 308 if (!parent[1]) 309 goto cleanup; 310 311 parent[2] = cg_name(parent[0], "memcg_test_2"); 312 if (!parent[2]) 313 goto cleanup; 314 315 if (cg_create(parent[0])) 316 goto cleanup; 317 318 if (cg_read_long(parent[0], attribute)) { 319 /* No memory.min on older kernels is fine */ 320 if (min) 321 ret = KSFT_SKIP; 322 goto cleanup; 323 } 324 325 if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) 326 goto cleanup; 327 328 if (cg_write(parent[0], "memory.max", "200M")) 329 goto cleanup; 330 331 if (cg_write(parent[0], "memory.swap.max", "0")) 332 goto cleanup; 333 334 if (cg_create(parent[1])) 335 goto cleanup; 336 337 if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) 338 goto cleanup; 339 340 if (cg_create(parent[2])) 341 goto cleanup; 342 343 for (i = 0; i < ARRAY_SIZE(children); i++) { 344 children[i] = cg_name_indexed(parent[1], "child_memcg", i); 345 if (!children[i]) 346 goto cleanup; 347 348 if (cg_create(children[i])) 349 goto cleanup; 350 351 if (i > 2) 352 continue; 353 354 cg_run_nowait(children[i], alloc_pagecache_50M_noexit, 355 (void *)(long)fd); 356 } 357 358 if (cg_write(parent[1], attribute, "50M")) 359 goto cleanup; 360 if (cg_write(children[0], attribute, "75M")) 361 goto cleanup; 362 if (cg_write(children[1], attribute, "25M")) 363 goto cleanup; 364 if (cg_write(children[2], attribute, "0")) 365 goto cleanup; 366 if (cg_write(children[3], attribute, "500M")) 367 goto cleanup; 368 369 attempts = 0; 370 while (!values_close(cg_read_long(parent[1], "memory.current"), 371 MB(150), 3)) { 372 if (attempts++ > 5) 373 break; 374 sleep(1); 375 } 376 377 if (cg_run(parent[2], alloc_anon, (void *)MB(148))) 378 goto cleanup; 379 380 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 381 goto cleanup; 382 383 for (i = 0; i < ARRAY_SIZE(children); i++) 384 c[i] = cg_read_long(children[i], "memory.current"); 385 386 if (!values_close(c[0], MB(29), 10)) 387 goto cleanup; 388 389 if (!values_close(c[1], MB(21), 10)) 390 goto cleanup; 391 392 if (c[3] != 0) 393 goto cleanup; 394 395 rc = cg_run(parent[2], alloc_anon, (void *)MB(170)); 396 if (min && !rc) 397 goto cleanup; 398 else if (!min && rc) { 399 fprintf(stderr, 400 "memory.low prevents from allocating anon memory\n"); 401 goto cleanup; 402 } 403 404 current = min ? MB(50) : MB(30); 405 if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3)) 406 goto cleanup; 407 408 if (!reclaim_until(children[0], MB(10))) 409 goto cleanup; 410 411 if (min) { 412 ret = KSFT_PASS; 413 goto cleanup; 414 } 415 416 for (i = 0; i < ARRAY_SIZE(children); i++) { 417 int no_low_events_index = 1; 418 long low, oom; 419 420 oom = cg_read_key_long(children[i], "memory.events", "oom "); 421 low = cg_read_key_long(children[i], "memory.events", "low "); 422 423 if (oom) 424 goto cleanup; 425 if (i <= no_low_events_index && low <= 0) 426 goto cleanup; 427 if (i > no_low_events_index && low) 428 goto cleanup; 429 430 } 431 432 ret = KSFT_PASS; 433 434 cleanup: 435 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { 436 if (!children[i]) 437 continue; 438 439 cg_destroy(children[i]); 440 free(children[i]); 441 } 442 443 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { 444 if (!parent[i]) 445 continue; 446 447 cg_destroy(parent[i]); 448 free(parent[i]); 449 } 450 close(fd); 451 return ret; 452 } 453 454 static int test_memcg_min(const char *root) 455 { 456 return test_memcg_protection(root, true); 457 } 458 459 static int test_memcg_low(const char *root) 460 { 461 return test_memcg_protection(root, false); 462 } 463 464 static int alloc_pagecache_max_30M(const char *cgroup, void *arg) 465 { 466 size_t size = MB(50); 467 int ret = -1; 468 long current, high, max; 469 int fd; 470 471 high = cg_read_long(cgroup, "memory.high"); 472 max = cg_read_long(cgroup, "memory.max"); 473 if (high != MB(30) && max != MB(30)) 474 return -1; 475 476 fd = get_temp_fd(); 477 if (fd < 0) 478 return -1; 479 480 if (alloc_pagecache(fd, size)) 481 goto cleanup; 482 483 current = cg_read_long(cgroup, "memory.current"); 484 if (!values_close(current, MB(30), 5)) 485 goto cleanup; 486 487 ret = 0; 488 489 cleanup: 490 close(fd); 491 return ret; 492 493 } 494 495 /* 496 * This test checks that memory.high limits the amount of 497 * memory which can be consumed by either anonymous memory 498 * or pagecache. 499 */ 500 static int test_memcg_high(const char *root) 501 { 502 int ret = KSFT_FAIL; 503 char *memcg; 504 long high; 505 506 memcg = cg_name(root, "memcg_test"); 507 if (!memcg) 508 goto cleanup; 509 510 if (cg_create(memcg)) 511 goto cleanup; 512 513 if (cg_read_strcmp(memcg, "memory.high", "max\n")) 514 goto cleanup; 515 516 if (cg_write(memcg, "memory.swap.max", "0")) 517 goto cleanup; 518 519 if (cg_write(memcg, "memory.high", "30M")) 520 goto cleanup; 521 522 if (cg_run(memcg, alloc_anon, (void *)MB(31))) 523 goto cleanup; 524 525 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL)) 526 goto cleanup; 527 528 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 529 goto cleanup; 530 531 high = cg_read_key_long(memcg, "memory.events", "high "); 532 if (high <= 0) 533 goto cleanup; 534 535 ret = KSFT_PASS; 536 537 cleanup: 538 cg_destroy(memcg); 539 free(memcg); 540 541 return ret; 542 } 543 544 static int alloc_anon_mlock(const char *cgroup, void *arg) 545 { 546 size_t size = (size_t)arg; 547 void *buf; 548 549 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 550 0, 0); 551 if (buf == MAP_FAILED) 552 return -1; 553 554 mlock(buf, size); 555 munmap(buf, size); 556 return 0; 557 } 558 559 /* 560 * This test checks that memory.high is able to throttle big single shot 561 * allocation i.e. large allocation within one kernel entry. 562 */ 563 static int test_memcg_high_sync(const char *root) 564 { 565 int ret = KSFT_FAIL, pid, fd = -1; 566 char *memcg; 567 long pre_high, pre_max; 568 long post_high, post_max; 569 570 memcg = cg_name(root, "memcg_test"); 571 if (!memcg) 572 goto cleanup; 573 574 if (cg_create(memcg)) 575 goto cleanup; 576 577 pre_high = cg_read_key_long(memcg, "memory.events", "high "); 578 pre_max = cg_read_key_long(memcg, "memory.events", "max "); 579 if (pre_high < 0 || pre_max < 0) 580 goto cleanup; 581 582 if (cg_write(memcg, "memory.swap.max", "0")) 583 goto cleanup; 584 585 if (cg_write(memcg, "memory.high", "30M")) 586 goto cleanup; 587 588 if (cg_write(memcg, "memory.max", "140M")) 589 goto cleanup; 590 591 fd = memcg_prepare_for_wait(memcg); 592 if (fd < 0) 593 goto cleanup; 594 595 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200)); 596 if (pid < 0) 597 goto cleanup; 598 599 cg_wait_for(fd); 600 601 post_high = cg_read_key_long(memcg, "memory.events", "high "); 602 post_max = cg_read_key_long(memcg, "memory.events", "max "); 603 if (post_high < 0 || post_max < 0) 604 goto cleanup; 605 606 if (pre_high == post_high || pre_max != post_max) 607 goto cleanup; 608 609 ret = KSFT_PASS; 610 611 cleanup: 612 if (fd >= 0) 613 close(fd); 614 cg_destroy(memcg); 615 free(memcg); 616 617 return ret; 618 } 619 620 /* 621 * This test checks that memory.max limits the amount of 622 * memory which can be consumed by either anonymous memory 623 * or pagecache. 624 */ 625 static int test_memcg_max(const char *root) 626 { 627 int ret = KSFT_FAIL; 628 char *memcg; 629 long current, max; 630 631 memcg = cg_name(root, "memcg_test"); 632 if (!memcg) 633 goto cleanup; 634 635 if (cg_create(memcg)) 636 goto cleanup; 637 638 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 639 goto cleanup; 640 641 if (cg_write(memcg, "memory.swap.max", "0")) 642 goto cleanup; 643 644 if (cg_write(memcg, "memory.max", "30M")) 645 goto cleanup; 646 647 /* Should be killed by OOM killer */ 648 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 649 goto cleanup; 650 651 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 652 goto cleanup; 653 654 current = cg_read_long(memcg, "memory.current"); 655 if (current > MB(30) || !current) 656 goto cleanup; 657 658 max = cg_read_key_long(memcg, "memory.events", "max "); 659 if (max <= 0) 660 goto cleanup; 661 662 ret = KSFT_PASS; 663 664 cleanup: 665 cg_destroy(memcg); 666 free(memcg); 667 668 return ret; 669 } 670 671 /* 672 * Reclaim from @memcg until usage reaches @goal by writing to 673 * memory.reclaim. 674 * 675 * This function will return false if the usage is already below the 676 * goal. 677 * 678 * This function assumes that writing to memory.reclaim is the only 679 * source of change in memory.current (no concurrent allocations or 680 * reclaim). 681 * 682 * This function makes sure memory.reclaim is sane. It will return 683 * false if memory.reclaim's error codes do not make sense, even if 684 * the usage goal was satisfied. 685 */ 686 static bool reclaim_until(const char *memcg, long goal) 687 { 688 char buf[64]; 689 int retries, err; 690 long current, to_reclaim; 691 bool reclaimed = false; 692 693 for (retries = 5; retries > 0; retries--) { 694 current = cg_read_long(memcg, "memory.current"); 695 696 if (current < goal || values_close(current, goal, 3)) 697 break; 698 /* Did memory.reclaim return 0 incorrectly? */ 699 else if (reclaimed) 700 return false; 701 702 to_reclaim = current - goal; 703 snprintf(buf, sizeof(buf), "%ld", to_reclaim); 704 err = cg_write(memcg, "memory.reclaim", buf); 705 if (!err) 706 reclaimed = true; 707 else if (err != -EAGAIN) 708 return false; 709 } 710 return reclaimed; 711 } 712 713 /* 714 * This test checks that memory.reclaim reclaims the given 715 * amount of memory (from both anon and file, if possible). 716 */ 717 static int test_memcg_reclaim(const char *root) 718 { 719 int ret = KSFT_FAIL, fd, retries; 720 char *memcg; 721 long current, expected_usage; 722 723 memcg = cg_name(root, "memcg_test"); 724 if (!memcg) 725 goto cleanup; 726 727 if (cg_create(memcg)) 728 goto cleanup; 729 730 current = cg_read_long(memcg, "memory.current"); 731 if (current != 0) 732 goto cleanup; 733 734 fd = get_temp_fd(); 735 if (fd < 0) 736 goto cleanup; 737 738 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd); 739 740 /* 741 * If swap is enabled, try to reclaim from both anon and file, else try 742 * to reclaim from file only. 743 */ 744 if (is_swap_enabled()) { 745 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50)); 746 expected_usage = MB(100); 747 } else 748 expected_usage = MB(50); 749 750 /* 751 * Wait until current usage reaches the expected usage (or we run out of 752 * retries). 753 */ 754 retries = 5; 755 while (!values_close(cg_read_long(memcg, "memory.current"), 756 expected_usage, 10)) { 757 if (retries--) { 758 sleep(1); 759 continue; 760 } else { 761 fprintf(stderr, 762 "failed to allocate %ld for memcg reclaim test\n", 763 expected_usage); 764 goto cleanup; 765 } 766 } 767 768 /* 769 * Reclaim until current reaches 30M, this makes sure we hit both anon 770 * and file if swap is enabled. 771 */ 772 if (!reclaim_until(memcg, MB(30))) 773 goto cleanup; 774 775 ret = KSFT_PASS; 776 cleanup: 777 cg_destroy(memcg); 778 free(memcg); 779 close(fd); 780 781 return ret; 782 } 783 784 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) 785 { 786 long mem_max = (long)arg; 787 size_t size = MB(50); 788 char *buf, *ptr; 789 long mem_current, swap_current; 790 int ret = -1; 791 792 buf = malloc(size); 793 if (buf == NULL) { 794 fprintf(stderr, "malloc() failed\n"); 795 return -1; 796 } 797 798 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 799 *ptr = 0; 800 801 mem_current = cg_read_long(cgroup, "memory.current"); 802 if (!mem_current || !values_close(mem_current, mem_max, 3)) 803 goto cleanup; 804 805 swap_current = cg_read_long(cgroup, "memory.swap.current"); 806 if (!swap_current || 807 !values_close(mem_current + swap_current, size, 3)) 808 goto cleanup; 809 810 ret = 0; 811 cleanup: 812 free(buf); 813 return ret; 814 } 815 816 /* 817 * This test checks that memory.swap.max limits the amount of 818 * anonymous memory which can be swapped out. 819 */ 820 static int test_memcg_swap_max(const char *root) 821 { 822 int ret = KSFT_FAIL; 823 char *memcg; 824 long max; 825 826 if (!is_swap_enabled()) 827 return KSFT_SKIP; 828 829 memcg = cg_name(root, "memcg_test"); 830 if (!memcg) 831 goto cleanup; 832 833 if (cg_create(memcg)) 834 goto cleanup; 835 836 if (cg_read_long(memcg, "memory.swap.current")) { 837 ret = KSFT_SKIP; 838 goto cleanup; 839 } 840 841 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 842 goto cleanup; 843 844 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n")) 845 goto cleanup; 846 847 if (cg_write(memcg, "memory.swap.max", "30M")) 848 goto cleanup; 849 850 if (cg_write(memcg, "memory.max", "30M")) 851 goto cleanup; 852 853 /* Should be killed by OOM killer */ 854 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 855 goto cleanup; 856 857 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 858 goto cleanup; 859 860 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 861 goto cleanup; 862 863 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30))) 864 goto cleanup; 865 866 max = cg_read_key_long(memcg, "memory.events", "max "); 867 if (max <= 0) 868 goto cleanup; 869 870 ret = KSFT_PASS; 871 872 cleanup: 873 cg_destroy(memcg); 874 free(memcg); 875 876 return ret; 877 } 878 879 /* 880 * This test disables swapping and tries to allocate anonymous memory 881 * up to OOM. Then it checks for oom and oom_kill events in 882 * memory.events. 883 */ 884 static int test_memcg_oom_events(const char *root) 885 { 886 int ret = KSFT_FAIL; 887 char *memcg; 888 889 memcg = cg_name(root, "memcg_test"); 890 if (!memcg) 891 goto cleanup; 892 893 if (cg_create(memcg)) 894 goto cleanup; 895 896 if (cg_write(memcg, "memory.max", "30M")) 897 goto cleanup; 898 899 if (cg_write(memcg, "memory.swap.max", "0")) 900 goto cleanup; 901 902 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 903 goto cleanup; 904 905 if (cg_read_strcmp(memcg, "cgroup.procs", "")) 906 goto cleanup; 907 908 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 909 goto cleanup; 910 911 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 912 goto cleanup; 913 914 ret = KSFT_PASS; 915 916 cleanup: 917 cg_destroy(memcg); 918 free(memcg); 919 920 return ret; 921 } 922 923 struct tcp_server_args { 924 unsigned short port; 925 int ctl[2]; 926 }; 927 928 static int tcp_server(const char *cgroup, void *arg) 929 { 930 struct tcp_server_args *srv_args = arg; 931 struct sockaddr_in6 saddr = { 0 }; 932 socklen_t slen = sizeof(saddr); 933 int sk, client_sk, ctl_fd, yes = 1, ret = -1; 934 935 close(srv_args->ctl[0]); 936 ctl_fd = srv_args->ctl[1]; 937 938 saddr.sin6_family = AF_INET6; 939 saddr.sin6_addr = in6addr_any; 940 saddr.sin6_port = htons(srv_args->port); 941 942 sk = socket(AF_INET6, SOCK_STREAM, 0); 943 if (sk < 0) 944 return ret; 945 946 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) 947 goto cleanup; 948 949 if (bind(sk, (struct sockaddr *)&saddr, slen)) { 950 write(ctl_fd, &errno, sizeof(errno)); 951 goto cleanup; 952 } 953 954 if (listen(sk, 1)) 955 goto cleanup; 956 957 ret = 0; 958 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) { 959 ret = -1; 960 goto cleanup; 961 } 962 963 client_sk = accept(sk, NULL, NULL); 964 if (client_sk < 0) 965 goto cleanup; 966 967 ret = -1; 968 for (;;) { 969 uint8_t buf[0x100000]; 970 971 if (write(client_sk, buf, sizeof(buf)) <= 0) { 972 if (errno == ECONNRESET) 973 ret = 0; 974 break; 975 } 976 } 977 978 close(client_sk); 979 980 cleanup: 981 close(sk); 982 return ret; 983 } 984 985 static int tcp_client(const char *cgroup, unsigned short port) 986 { 987 const char server[] = "localhost"; 988 struct addrinfo *ai; 989 char servport[6]; 990 int retries = 0x10; /* nice round number */ 991 int sk, ret; 992 993 snprintf(servport, sizeof(servport), "%hd", port); 994 ret = getaddrinfo(server, servport, NULL, &ai); 995 if (ret) 996 return ret; 997 998 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); 999 if (sk < 0) 1000 goto free_ainfo; 1001 1002 ret = connect(sk, ai->ai_addr, ai->ai_addrlen); 1003 if (ret < 0) 1004 goto close_sk; 1005 1006 ret = KSFT_FAIL; 1007 while (retries--) { 1008 uint8_t buf[0x100000]; 1009 long current, sock; 1010 1011 if (read(sk, buf, sizeof(buf)) <= 0) 1012 goto close_sk; 1013 1014 current = cg_read_long(cgroup, "memory.current"); 1015 sock = cg_read_key_long(cgroup, "memory.stat", "sock "); 1016 1017 if (current < 0 || sock < 0) 1018 goto close_sk; 1019 1020 if (values_close(current, sock, 10)) { 1021 ret = KSFT_PASS; 1022 break; 1023 } 1024 } 1025 1026 close_sk: 1027 close(sk); 1028 free_ainfo: 1029 freeaddrinfo(ai); 1030 return ret; 1031 } 1032 1033 /* 1034 * This test checks socket memory accounting. 1035 * The test forks a TCP server listens on a random port between 1000 1036 * and 61000. Once it gets a client connection, it starts writing to 1037 * its socket. 1038 * The TCP client interleaves reads from the socket with check whether 1039 * memory.current and memory.stat.sock are similar. 1040 */ 1041 static int test_memcg_sock(const char *root) 1042 { 1043 int bind_retries = 5, ret = KSFT_FAIL, pid, err; 1044 unsigned short port; 1045 char *memcg; 1046 1047 memcg = cg_name(root, "memcg_test"); 1048 if (!memcg) 1049 goto cleanup; 1050 1051 if (cg_create(memcg)) 1052 goto cleanup; 1053 1054 while (bind_retries--) { 1055 struct tcp_server_args args; 1056 1057 if (pipe(args.ctl)) 1058 goto cleanup; 1059 1060 port = args.port = 1000 + rand() % 60000; 1061 1062 pid = cg_run_nowait(memcg, tcp_server, &args); 1063 if (pid < 0) 1064 goto cleanup; 1065 1066 close(args.ctl[1]); 1067 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err)) 1068 goto cleanup; 1069 close(args.ctl[0]); 1070 1071 if (!err) 1072 break; 1073 if (err != EADDRINUSE) 1074 goto cleanup; 1075 1076 waitpid(pid, NULL, 0); 1077 } 1078 1079 if (err == EADDRINUSE) { 1080 ret = KSFT_SKIP; 1081 goto cleanup; 1082 } 1083 1084 if (tcp_client(memcg, port) != KSFT_PASS) 1085 goto cleanup; 1086 1087 waitpid(pid, &err, 0); 1088 if (WEXITSTATUS(err)) 1089 goto cleanup; 1090 1091 if (cg_read_long(memcg, "memory.current") < 0) 1092 goto cleanup; 1093 1094 if (cg_read_key_long(memcg, "memory.stat", "sock ")) 1095 goto cleanup; 1096 1097 ret = KSFT_PASS; 1098 1099 cleanup: 1100 cg_destroy(memcg); 1101 free(memcg); 1102 1103 return ret; 1104 } 1105 1106 /* 1107 * This test disables swapping and tries to allocate anonymous memory 1108 * up to OOM with memory.group.oom set. Then it checks that all 1109 * processes in the leaf were killed. It also checks that oom_events 1110 * were propagated to the parent level. 1111 */ 1112 static int test_memcg_oom_group_leaf_events(const char *root) 1113 { 1114 int ret = KSFT_FAIL; 1115 char *parent, *child; 1116 long parent_oom_events; 1117 1118 parent = cg_name(root, "memcg_test_0"); 1119 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1120 1121 if (!parent || !child) 1122 goto cleanup; 1123 1124 if (cg_create(parent)) 1125 goto cleanup; 1126 1127 if (cg_create(child)) 1128 goto cleanup; 1129 1130 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 1131 goto cleanup; 1132 1133 if (cg_write(child, "memory.max", "50M")) 1134 goto cleanup; 1135 1136 if (cg_write(child, "memory.swap.max", "0")) 1137 goto cleanup; 1138 1139 if (cg_write(child, "memory.oom.group", "1")) 1140 goto cleanup; 1141 1142 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1143 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1144 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1145 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1146 goto cleanup; 1147 1148 if (cg_test_proc_killed(child)) 1149 goto cleanup; 1150 1151 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) 1152 goto cleanup; 1153 1154 parent_oom_events = cg_read_key_long( 1155 parent, "memory.events", "oom_kill "); 1156 /* 1157 * If memory_localevents is not enabled (the default), the parent should 1158 * count OOM events in its children groups. Otherwise, it should not 1159 * have observed any events. 1160 */ 1161 if (has_localevents && parent_oom_events != 0) 1162 goto cleanup; 1163 else if (!has_localevents && parent_oom_events <= 0) 1164 goto cleanup; 1165 1166 ret = KSFT_PASS; 1167 1168 cleanup: 1169 if (child) 1170 cg_destroy(child); 1171 if (parent) 1172 cg_destroy(parent); 1173 free(child); 1174 free(parent); 1175 1176 return ret; 1177 } 1178 1179 /* 1180 * This test disables swapping and tries to allocate anonymous memory 1181 * up to OOM with memory.group.oom set. Then it checks that all 1182 * processes in the parent and leaf were killed. 1183 */ 1184 static int test_memcg_oom_group_parent_events(const char *root) 1185 { 1186 int ret = KSFT_FAIL; 1187 char *parent, *child; 1188 1189 parent = cg_name(root, "memcg_test_0"); 1190 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1191 1192 if (!parent || !child) 1193 goto cleanup; 1194 1195 if (cg_create(parent)) 1196 goto cleanup; 1197 1198 if (cg_create(child)) 1199 goto cleanup; 1200 1201 if (cg_write(parent, "memory.max", "80M")) 1202 goto cleanup; 1203 1204 if (cg_write(parent, "memory.swap.max", "0")) 1205 goto cleanup; 1206 1207 if (cg_write(parent, "memory.oom.group", "1")) 1208 goto cleanup; 1209 1210 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1211 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1212 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1213 1214 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1215 goto cleanup; 1216 1217 if (cg_test_proc_killed(child)) 1218 goto cleanup; 1219 if (cg_test_proc_killed(parent)) 1220 goto cleanup; 1221 1222 ret = KSFT_PASS; 1223 1224 cleanup: 1225 if (child) 1226 cg_destroy(child); 1227 if (parent) 1228 cg_destroy(parent); 1229 free(child); 1230 free(parent); 1231 1232 return ret; 1233 } 1234 1235 /* 1236 * This test disables swapping and tries to allocate anonymous memory 1237 * up to OOM with memory.group.oom set. Then it checks that all 1238 * processes were killed except those set with OOM_SCORE_ADJ_MIN 1239 */ 1240 static int test_memcg_oom_group_score_events(const char *root) 1241 { 1242 int ret = KSFT_FAIL; 1243 char *memcg; 1244 int safe_pid; 1245 1246 memcg = cg_name(root, "memcg_test_0"); 1247 1248 if (!memcg) 1249 goto cleanup; 1250 1251 if (cg_create(memcg)) 1252 goto cleanup; 1253 1254 if (cg_write(memcg, "memory.max", "50M")) 1255 goto cleanup; 1256 1257 if (cg_write(memcg, "memory.swap.max", "0")) 1258 goto cleanup; 1259 1260 if (cg_write(memcg, "memory.oom.group", "1")) 1261 goto cleanup; 1262 1263 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1264 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN)) 1265 goto cleanup; 1266 1267 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1268 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1269 goto cleanup; 1270 1271 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3) 1272 goto cleanup; 1273 1274 if (kill(safe_pid, SIGKILL)) 1275 goto cleanup; 1276 1277 ret = KSFT_PASS; 1278 1279 cleanup: 1280 if (memcg) 1281 cg_destroy(memcg); 1282 free(memcg); 1283 1284 return ret; 1285 } 1286 1287 #define T(x) { x, #x } 1288 struct memcg_test { 1289 int (*fn)(const char *root); 1290 const char *name; 1291 } tests[] = { 1292 T(test_memcg_subtree_control), 1293 T(test_memcg_current), 1294 T(test_memcg_min), 1295 T(test_memcg_low), 1296 T(test_memcg_high), 1297 T(test_memcg_high_sync), 1298 T(test_memcg_max), 1299 T(test_memcg_reclaim), 1300 T(test_memcg_oom_events), 1301 T(test_memcg_swap_max), 1302 T(test_memcg_sock), 1303 T(test_memcg_oom_group_leaf_events), 1304 T(test_memcg_oom_group_parent_events), 1305 T(test_memcg_oom_group_score_events), 1306 }; 1307 #undef T 1308 1309 int main(int argc, char **argv) 1310 { 1311 char root[PATH_MAX]; 1312 int i, proc_status, ret = EXIT_SUCCESS; 1313 1314 if (cg_find_unified_root(root, sizeof(root))) 1315 ksft_exit_skip("cgroup v2 isn't mounted\n"); 1316 1317 /* 1318 * Check that memory controller is available: 1319 * memory is listed in cgroup.controllers 1320 */ 1321 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 1322 ksft_exit_skip("memory controller isn't available\n"); 1323 1324 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 1325 if (cg_write(root, "cgroup.subtree_control", "+memory")) 1326 ksft_exit_skip("Failed to set memory controller\n"); 1327 1328 proc_status = proc_mount_contains("memory_recursiveprot"); 1329 if (proc_status < 0) 1330 ksft_exit_skip("Failed to query cgroup mount option\n"); 1331 has_recursiveprot = proc_status; 1332 1333 proc_status = proc_mount_contains("memory_localevents"); 1334 if (proc_status < 0) 1335 ksft_exit_skip("Failed to query cgroup mount option\n"); 1336 has_localevents = proc_status; 1337 1338 for (i = 0; i < ARRAY_SIZE(tests); i++) { 1339 switch (tests[i].fn(root)) { 1340 case KSFT_PASS: 1341 ksft_test_result_pass("%s\n", tests[i].name); 1342 break; 1343 case KSFT_SKIP: 1344 ksft_test_result_skip("%s\n", tests[i].name); 1345 break; 1346 default: 1347 ret = EXIT_FAILURE; 1348 ksft_test_result_fail("%s\n", tests[i].name); 1349 break; 1350 } 1351 } 1352 1353 return ret; 1354 } 1355