1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #define _GNU_SOURCE 3 4 #include <linux/limits.h> 5 #include <linux/oom.h> 6 #include <fcntl.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <sys/stat.h> 11 #include <sys/types.h> 12 #include <unistd.h> 13 #include <sys/socket.h> 14 #include <sys/wait.h> 15 #include <arpa/inet.h> 16 #include <netinet/in.h> 17 #include <netdb.h> 18 #include <errno.h> 19 #include <sys/mman.h> 20 21 #include "../kselftest.h" 22 #include "cgroup_util.h" 23 24 static bool has_localevents; 25 static bool has_recursiveprot; 26 27 /* 28 * This test creates two nested cgroups with and without enabling 29 * the memory controller. 30 */ 31 static int test_memcg_subtree_control(const char *root) 32 { 33 char *parent, *child, *parent2 = NULL, *child2 = NULL; 34 int ret = KSFT_FAIL; 35 char buf[PAGE_SIZE]; 36 37 /* Create two nested cgroups with the memory controller enabled */ 38 parent = cg_name(root, "memcg_test_0"); 39 child = cg_name(root, "memcg_test_0/memcg_test_1"); 40 if (!parent || !child) 41 goto cleanup_free; 42 43 if (cg_create(parent)) 44 goto cleanup_free; 45 46 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 47 goto cleanup_parent; 48 49 if (cg_create(child)) 50 goto cleanup_parent; 51 52 if (cg_read_strstr(child, "cgroup.controllers", "memory")) 53 goto cleanup_child; 54 55 /* Create two nested cgroups without enabling memory controller */ 56 parent2 = cg_name(root, "memcg_test_1"); 57 child2 = cg_name(root, "memcg_test_1/memcg_test_1"); 58 if (!parent2 || !child2) 59 goto cleanup_free2; 60 61 if (cg_create(parent2)) 62 goto cleanup_free2; 63 64 if (cg_create(child2)) 65 goto cleanup_parent2; 66 67 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf))) 68 goto cleanup_all; 69 70 if (!cg_read_strstr(child2, "cgroup.controllers", "memory")) 71 goto cleanup_all; 72 73 ret = KSFT_PASS; 74 75 cleanup_all: 76 cg_destroy(child2); 77 cleanup_parent2: 78 cg_destroy(parent2); 79 cleanup_free2: 80 free(parent2); 81 free(child2); 82 cleanup_child: 83 cg_destroy(child); 84 cleanup_parent: 85 cg_destroy(parent); 86 cleanup_free: 87 free(parent); 88 free(child); 89 90 return ret; 91 } 92 93 static int alloc_anon_50M_check(const char *cgroup, void *arg) 94 { 95 size_t size = MB(50); 96 char *buf, *ptr; 97 long anon, current; 98 int ret = -1; 99 100 buf = malloc(size); 101 if (buf == NULL) { 102 fprintf(stderr, "malloc() failed\n"); 103 return -1; 104 } 105 106 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 107 *ptr = 0; 108 109 current = cg_read_long(cgroup, "memory.current"); 110 if (current < size) 111 goto cleanup; 112 113 if (!values_close(size, current, 3)) 114 goto cleanup; 115 116 anon = cg_read_key_long(cgroup, "memory.stat", "anon "); 117 if (anon < 0) 118 goto cleanup; 119 120 if (!values_close(anon, current, 3)) 121 goto cleanup; 122 123 ret = 0; 124 cleanup: 125 free(buf); 126 return ret; 127 } 128 129 static int alloc_pagecache_50M_check(const char *cgroup, void *arg) 130 { 131 size_t size = MB(50); 132 int ret = -1; 133 long current, file; 134 int fd; 135 136 fd = get_temp_fd(); 137 if (fd < 0) 138 return -1; 139 140 if (alloc_pagecache(fd, size)) 141 goto cleanup; 142 143 current = cg_read_long(cgroup, "memory.current"); 144 if (current < size) 145 goto cleanup; 146 147 file = cg_read_key_long(cgroup, "memory.stat", "file "); 148 if (file < 0) 149 goto cleanup; 150 151 if (!values_close(file, current, 10)) 152 goto cleanup; 153 154 ret = 0; 155 156 cleanup: 157 close(fd); 158 return ret; 159 } 160 161 /* 162 * This test create a memory cgroup, allocates 163 * some anonymous memory and some pagecache 164 * and check memory.current and some memory.stat values. 165 */ 166 static int test_memcg_current(const char *root) 167 { 168 int ret = KSFT_FAIL; 169 long current; 170 char *memcg; 171 172 memcg = cg_name(root, "memcg_test"); 173 if (!memcg) 174 goto cleanup; 175 176 if (cg_create(memcg)) 177 goto cleanup; 178 179 current = cg_read_long(memcg, "memory.current"); 180 if (current != 0) 181 goto cleanup; 182 183 if (cg_run(memcg, alloc_anon_50M_check, NULL)) 184 goto cleanup; 185 186 if (cg_run(memcg, alloc_pagecache_50M_check, NULL)) 187 goto cleanup; 188 189 ret = KSFT_PASS; 190 191 cleanup: 192 cg_destroy(memcg); 193 free(memcg); 194 195 return ret; 196 } 197 198 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) 199 { 200 int fd = (long)arg; 201 int ppid = getppid(); 202 203 if (alloc_pagecache(fd, MB(50))) 204 return -1; 205 206 while (getppid() == ppid) 207 sleep(1); 208 209 return 0; 210 } 211 212 static int alloc_anon_noexit(const char *cgroup, void *arg) 213 { 214 int ppid = getppid(); 215 size_t size = (unsigned long)arg; 216 char *buf, *ptr; 217 218 buf = malloc(size); 219 if (buf == NULL) { 220 fprintf(stderr, "malloc() failed\n"); 221 return -1; 222 } 223 224 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 225 *ptr = 0; 226 227 while (getppid() == ppid) 228 sleep(1); 229 230 free(buf); 231 return 0; 232 } 233 234 /* 235 * Wait until processes are killed asynchronously by the OOM killer 236 * If we exceed a timeout, fail. 237 */ 238 static int cg_test_proc_killed(const char *cgroup) 239 { 240 int limit; 241 242 for (limit = 10; limit > 0; limit--) { 243 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0) 244 return 0; 245 246 usleep(100000); 247 } 248 return -1; 249 } 250 251 static bool reclaim_until(const char *memcg, long goal); 252 253 /* 254 * First, this test creates the following hierarchy: 255 * A memory.min = 0, memory.max = 200M 256 * A/B memory.min = 50M 257 * A/B/C memory.min = 75M, memory.current = 50M 258 * A/B/D memory.min = 25M, memory.current = 50M 259 * A/B/E memory.min = 0, memory.current = 50M 260 * A/B/F memory.min = 500M, memory.current = 0 261 * 262 * (or memory.low if we test soft protection) 263 * 264 * Usages are pagecache and the test keeps a running 265 * process in every leaf cgroup. 266 * Then it creates A/G and creates a significant 267 * memory pressure in A. 268 * 269 * Then it checks actual memory usages and expects that: 270 * A/B memory.current ~= 50M 271 * A/B/C memory.current ~= 29M 272 * A/B/D memory.current ~= 21M 273 * A/B/E memory.current ~= 0 274 * A/B/F memory.current = 0 275 * (for origin of the numbers, see model in memcg_protection.m.) 276 * 277 * After that it tries to allocate more than there is 278 * unprotected memory in A available, and checks that: 279 * a) memory.min protects pagecache even in this case, 280 * b) memory.low allows reclaiming page cache with low events. 281 * 282 * Then we try to reclaim from A/B/C using memory.reclaim until its 283 * usage reaches 10M. 284 * This makes sure that: 285 * (a) We ignore the protection of the reclaim target memcg. 286 * (b) The previously calculated emin value (~29M) should be dismissed. 287 */ 288 static int test_memcg_protection(const char *root, bool min) 289 { 290 int ret = KSFT_FAIL, rc; 291 char *parent[3] = {NULL}; 292 char *children[4] = {NULL}; 293 const char *attribute = min ? "memory.min" : "memory.low"; 294 long c[4]; 295 int i, attempts; 296 int fd; 297 298 fd = get_temp_fd(); 299 if (fd < 0) 300 goto cleanup; 301 302 parent[0] = cg_name(root, "memcg_test_0"); 303 if (!parent[0]) 304 goto cleanup; 305 306 parent[1] = cg_name(parent[0], "memcg_test_1"); 307 if (!parent[1]) 308 goto cleanup; 309 310 parent[2] = cg_name(parent[0], "memcg_test_2"); 311 if (!parent[2]) 312 goto cleanup; 313 314 if (cg_create(parent[0])) 315 goto cleanup; 316 317 if (cg_read_long(parent[0], attribute)) { 318 /* No memory.min on older kernels is fine */ 319 if (min) 320 ret = KSFT_SKIP; 321 goto cleanup; 322 } 323 324 if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) 325 goto cleanup; 326 327 if (cg_write(parent[0], "memory.max", "200M")) 328 goto cleanup; 329 330 if (cg_write(parent[0], "memory.swap.max", "0")) 331 goto cleanup; 332 333 if (cg_create(parent[1])) 334 goto cleanup; 335 336 if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) 337 goto cleanup; 338 339 if (cg_create(parent[2])) 340 goto cleanup; 341 342 for (i = 0; i < ARRAY_SIZE(children); i++) { 343 children[i] = cg_name_indexed(parent[1], "child_memcg", i); 344 if (!children[i]) 345 goto cleanup; 346 347 if (cg_create(children[i])) 348 goto cleanup; 349 350 if (i > 2) 351 continue; 352 353 cg_run_nowait(children[i], alloc_pagecache_50M_noexit, 354 (void *)(long)fd); 355 } 356 357 if (cg_write(parent[1], attribute, "50M")) 358 goto cleanup; 359 if (cg_write(children[0], attribute, "75M")) 360 goto cleanup; 361 if (cg_write(children[1], attribute, "25M")) 362 goto cleanup; 363 if (cg_write(children[2], attribute, "0")) 364 goto cleanup; 365 if (cg_write(children[3], attribute, "500M")) 366 goto cleanup; 367 368 attempts = 0; 369 while (!values_close(cg_read_long(parent[1], "memory.current"), 370 MB(150), 3)) { 371 if (attempts++ > 5) 372 break; 373 sleep(1); 374 } 375 376 if (cg_run(parent[2], alloc_anon, (void *)MB(148))) 377 goto cleanup; 378 379 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 380 goto cleanup; 381 382 for (i = 0; i < ARRAY_SIZE(children); i++) 383 c[i] = cg_read_long(children[i], "memory.current"); 384 385 if (!values_close(c[0], MB(29), 10)) 386 goto cleanup; 387 388 if (!values_close(c[1], MB(21), 10)) 389 goto cleanup; 390 391 if (c[3] != 0) 392 goto cleanup; 393 394 rc = cg_run(parent[2], alloc_anon, (void *)MB(170)); 395 if (min && !rc) 396 goto cleanup; 397 else if (!min && rc) { 398 fprintf(stderr, 399 "memory.low prevents from allocating anon memory\n"); 400 goto cleanup; 401 } 402 403 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 404 goto cleanup; 405 406 if (!reclaim_until(children[0], MB(10))) 407 goto cleanup; 408 409 if (min) { 410 ret = KSFT_PASS; 411 goto cleanup; 412 } 413 414 for (i = 0; i < ARRAY_SIZE(children); i++) { 415 int no_low_events_index = 1; 416 long low, oom; 417 418 oom = cg_read_key_long(children[i], "memory.events", "oom "); 419 low = cg_read_key_long(children[i], "memory.events", "low "); 420 421 if (oom) 422 goto cleanup; 423 if (i <= no_low_events_index && low <= 0) 424 goto cleanup; 425 if (i > no_low_events_index && low) 426 goto cleanup; 427 428 } 429 430 ret = KSFT_PASS; 431 432 cleanup: 433 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { 434 if (!children[i]) 435 continue; 436 437 cg_destroy(children[i]); 438 free(children[i]); 439 } 440 441 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { 442 if (!parent[i]) 443 continue; 444 445 cg_destroy(parent[i]); 446 free(parent[i]); 447 } 448 close(fd); 449 return ret; 450 } 451 452 static int test_memcg_min(const char *root) 453 { 454 return test_memcg_protection(root, true); 455 } 456 457 static int test_memcg_low(const char *root) 458 { 459 return test_memcg_protection(root, false); 460 } 461 462 static int alloc_pagecache_max_30M(const char *cgroup, void *arg) 463 { 464 size_t size = MB(50); 465 int ret = -1; 466 long current, high, max; 467 int fd; 468 469 high = cg_read_long(cgroup, "memory.high"); 470 max = cg_read_long(cgroup, "memory.max"); 471 if (high != MB(30) && max != MB(30)) 472 return -1; 473 474 fd = get_temp_fd(); 475 if (fd < 0) 476 return -1; 477 478 if (alloc_pagecache(fd, size)) 479 goto cleanup; 480 481 current = cg_read_long(cgroup, "memory.current"); 482 if (!values_close(current, MB(30), 5)) 483 goto cleanup; 484 485 ret = 0; 486 487 cleanup: 488 close(fd); 489 return ret; 490 491 } 492 493 /* 494 * This test checks that memory.high limits the amount of 495 * memory which can be consumed by either anonymous memory 496 * or pagecache. 497 */ 498 static int test_memcg_high(const char *root) 499 { 500 int ret = KSFT_FAIL; 501 char *memcg; 502 long high; 503 504 memcg = cg_name(root, "memcg_test"); 505 if (!memcg) 506 goto cleanup; 507 508 if (cg_create(memcg)) 509 goto cleanup; 510 511 if (cg_read_strcmp(memcg, "memory.high", "max\n")) 512 goto cleanup; 513 514 if (cg_write(memcg, "memory.swap.max", "0")) 515 goto cleanup; 516 517 if (cg_write(memcg, "memory.high", "30M")) 518 goto cleanup; 519 520 if (cg_run(memcg, alloc_anon, (void *)MB(31))) 521 goto cleanup; 522 523 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL)) 524 goto cleanup; 525 526 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 527 goto cleanup; 528 529 high = cg_read_key_long(memcg, "memory.events", "high "); 530 if (high <= 0) 531 goto cleanup; 532 533 ret = KSFT_PASS; 534 535 cleanup: 536 cg_destroy(memcg); 537 free(memcg); 538 539 return ret; 540 } 541 542 static int alloc_anon_mlock(const char *cgroup, void *arg) 543 { 544 size_t size = (size_t)arg; 545 void *buf; 546 547 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 548 0, 0); 549 if (buf == MAP_FAILED) 550 return -1; 551 552 mlock(buf, size); 553 munmap(buf, size); 554 return 0; 555 } 556 557 /* 558 * This test checks that memory.high is able to throttle big single shot 559 * allocation i.e. large allocation within one kernel entry. 560 */ 561 static int test_memcg_high_sync(const char *root) 562 { 563 int ret = KSFT_FAIL, pid, fd = -1; 564 char *memcg; 565 long pre_high, pre_max; 566 long post_high, post_max; 567 568 memcg = cg_name(root, "memcg_test"); 569 if (!memcg) 570 goto cleanup; 571 572 if (cg_create(memcg)) 573 goto cleanup; 574 575 pre_high = cg_read_key_long(memcg, "memory.events", "high "); 576 pre_max = cg_read_key_long(memcg, "memory.events", "max "); 577 if (pre_high < 0 || pre_max < 0) 578 goto cleanup; 579 580 if (cg_write(memcg, "memory.swap.max", "0")) 581 goto cleanup; 582 583 if (cg_write(memcg, "memory.high", "30M")) 584 goto cleanup; 585 586 if (cg_write(memcg, "memory.max", "140M")) 587 goto cleanup; 588 589 fd = memcg_prepare_for_wait(memcg); 590 if (fd < 0) 591 goto cleanup; 592 593 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200)); 594 if (pid < 0) 595 goto cleanup; 596 597 cg_wait_for(fd); 598 599 post_high = cg_read_key_long(memcg, "memory.events", "high "); 600 post_max = cg_read_key_long(memcg, "memory.events", "max "); 601 if (post_high < 0 || post_max < 0) 602 goto cleanup; 603 604 if (pre_high == post_high || pre_max != post_max) 605 goto cleanup; 606 607 ret = KSFT_PASS; 608 609 cleanup: 610 if (fd >= 0) 611 close(fd); 612 cg_destroy(memcg); 613 free(memcg); 614 615 return ret; 616 } 617 618 /* 619 * This test checks that memory.max limits the amount of 620 * memory which can be consumed by either anonymous memory 621 * or pagecache. 622 */ 623 static int test_memcg_max(const char *root) 624 { 625 int ret = KSFT_FAIL; 626 char *memcg; 627 long current, max; 628 629 memcg = cg_name(root, "memcg_test"); 630 if (!memcg) 631 goto cleanup; 632 633 if (cg_create(memcg)) 634 goto cleanup; 635 636 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 637 goto cleanup; 638 639 if (cg_write(memcg, "memory.swap.max", "0")) 640 goto cleanup; 641 642 if (cg_write(memcg, "memory.max", "30M")) 643 goto cleanup; 644 645 /* Should be killed by OOM killer */ 646 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 647 goto cleanup; 648 649 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 650 goto cleanup; 651 652 current = cg_read_long(memcg, "memory.current"); 653 if (current > MB(30) || !current) 654 goto cleanup; 655 656 max = cg_read_key_long(memcg, "memory.events", "max "); 657 if (max <= 0) 658 goto cleanup; 659 660 ret = KSFT_PASS; 661 662 cleanup: 663 cg_destroy(memcg); 664 free(memcg); 665 666 return ret; 667 } 668 669 /* 670 * Reclaim from @memcg until usage reaches @goal by writing to 671 * memory.reclaim. 672 * 673 * This function will return false if the usage is already below the 674 * goal. 675 * 676 * This function assumes that writing to memory.reclaim is the only 677 * source of change in memory.current (no concurrent allocations or 678 * reclaim). 679 * 680 * This function makes sure memory.reclaim is sane. It will return 681 * false if memory.reclaim's error codes do not make sense, even if 682 * the usage goal was satisfied. 683 */ 684 static bool reclaim_until(const char *memcg, long goal) 685 { 686 char buf[64]; 687 int retries, err; 688 long current, to_reclaim; 689 bool reclaimed = false; 690 691 for (retries = 5; retries > 0; retries--) { 692 current = cg_read_long(memcg, "memory.current"); 693 694 if (current < goal || values_close(current, goal, 3)) 695 break; 696 /* Did memory.reclaim return 0 incorrectly? */ 697 else if (reclaimed) 698 return false; 699 700 to_reclaim = current - goal; 701 snprintf(buf, sizeof(buf), "%ld", to_reclaim); 702 err = cg_write(memcg, "memory.reclaim", buf); 703 if (!err) 704 reclaimed = true; 705 else if (err != -EAGAIN) 706 return false; 707 } 708 return reclaimed; 709 } 710 711 /* 712 * This test checks that memory.reclaim reclaims the given 713 * amount of memory (from both anon and file, if possible). 714 */ 715 static int test_memcg_reclaim(const char *root) 716 { 717 int ret = KSFT_FAIL, fd, retries; 718 char *memcg; 719 long current, expected_usage; 720 721 memcg = cg_name(root, "memcg_test"); 722 if (!memcg) 723 goto cleanup; 724 725 if (cg_create(memcg)) 726 goto cleanup; 727 728 current = cg_read_long(memcg, "memory.current"); 729 if (current != 0) 730 goto cleanup; 731 732 fd = get_temp_fd(); 733 if (fd < 0) 734 goto cleanup; 735 736 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd); 737 738 /* 739 * If swap is enabled, try to reclaim from both anon and file, else try 740 * to reclaim from file only. 741 */ 742 if (is_swap_enabled()) { 743 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50)); 744 expected_usage = MB(100); 745 } else 746 expected_usage = MB(50); 747 748 /* 749 * Wait until current usage reaches the expected usage (or we run out of 750 * retries). 751 */ 752 retries = 5; 753 while (!values_close(cg_read_long(memcg, "memory.current"), 754 expected_usage, 10)) { 755 if (retries--) { 756 sleep(1); 757 continue; 758 } else { 759 fprintf(stderr, 760 "failed to allocate %ld for memcg reclaim test\n", 761 expected_usage); 762 goto cleanup; 763 } 764 } 765 766 /* 767 * Reclaim until current reaches 30M, this makes sure we hit both anon 768 * and file if swap is enabled. 769 */ 770 if (!reclaim_until(memcg, MB(30))) 771 goto cleanup; 772 773 ret = KSFT_PASS; 774 cleanup: 775 cg_destroy(memcg); 776 free(memcg); 777 close(fd); 778 779 return ret; 780 } 781 782 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) 783 { 784 long mem_max = (long)arg; 785 size_t size = MB(50); 786 char *buf, *ptr; 787 long mem_current, swap_current; 788 int ret = -1; 789 790 buf = malloc(size); 791 if (buf == NULL) { 792 fprintf(stderr, "malloc() failed\n"); 793 return -1; 794 } 795 796 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 797 *ptr = 0; 798 799 mem_current = cg_read_long(cgroup, "memory.current"); 800 if (!mem_current || !values_close(mem_current, mem_max, 3)) 801 goto cleanup; 802 803 swap_current = cg_read_long(cgroup, "memory.swap.current"); 804 if (!swap_current || 805 !values_close(mem_current + swap_current, size, 3)) 806 goto cleanup; 807 808 ret = 0; 809 cleanup: 810 free(buf); 811 return ret; 812 } 813 814 /* 815 * This test checks that memory.swap.max limits the amount of 816 * anonymous memory which can be swapped out. 817 */ 818 static int test_memcg_swap_max(const char *root) 819 { 820 int ret = KSFT_FAIL; 821 char *memcg; 822 long max; 823 824 if (!is_swap_enabled()) 825 return KSFT_SKIP; 826 827 memcg = cg_name(root, "memcg_test"); 828 if (!memcg) 829 goto cleanup; 830 831 if (cg_create(memcg)) 832 goto cleanup; 833 834 if (cg_read_long(memcg, "memory.swap.current")) { 835 ret = KSFT_SKIP; 836 goto cleanup; 837 } 838 839 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 840 goto cleanup; 841 842 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n")) 843 goto cleanup; 844 845 if (cg_write(memcg, "memory.swap.max", "30M")) 846 goto cleanup; 847 848 if (cg_write(memcg, "memory.max", "30M")) 849 goto cleanup; 850 851 /* Should be killed by OOM killer */ 852 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 853 goto cleanup; 854 855 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 856 goto cleanup; 857 858 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 859 goto cleanup; 860 861 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30))) 862 goto cleanup; 863 864 max = cg_read_key_long(memcg, "memory.events", "max "); 865 if (max <= 0) 866 goto cleanup; 867 868 ret = KSFT_PASS; 869 870 cleanup: 871 cg_destroy(memcg); 872 free(memcg); 873 874 return ret; 875 } 876 877 /* 878 * This test disables swapping and tries to allocate anonymous memory 879 * up to OOM. Then it checks for oom and oom_kill events in 880 * memory.events. 881 */ 882 static int test_memcg_oom_events(const char *root) 883 { 884 int ret = KSFT_FAIL; 885 char *memcg; 886 887 memcg = cg_name(root, "memcg_test"); 888 if (!memcg) 889 goto cleanup; 890 891 if (cg_create(memcg)) 892 goto cleanup; 893 894 if (cg_write(memcg, "memory.max", "30M")) 895 goto cleanup; 896 897 if (cg_write(memcg, "memory.swap.max", "0")) 898 goto cleanup; 899 900 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 901 goto cleanup; 902 903 if (cg_read_strcmp(memcg, "cgroup.procs", "")) 904 goto cleanup; 905 906 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 907 goto cleanup; 908 909 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 910 goto cleanup; 911 912 ret = KSFT_PASS; 913 914 cleanup: 915 cg_destroy(memcg); 916 free(memcg); 917 918 return ret; 919 } 920 921 struct tcp_server_args { 922 unsigned short port; 923 int ctl[2]; 924 }; 925 926 static int tcp_server(const char *cgroup, void *arg) 927 { 928 struct tcp_server_args *srv_args = arg; 929 struct sockaddr_in6 saddr = { 0 }; 930 socklen_t slen = sizeof(saddr); 931 int sk, client_sk, ctl_fd, yes = 1, ret = -1; 932 933 close(srv_args->ctl[0]); 934 ctl_fd = srv_args->ctl[1]; 935 936 saddr.sin6_family = AF_INET6; 937 saddr.sin6_addr = in6addr_any; 938 saddr.sin6_port = htons(srv_args->port); 939 940 sk = socket(AF_INET6, SOCK_STREAM, 0); 941 if (sk < 0) 942 return ret; 943 944 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) 945 goto cleanup; 946 947 if (bind(sk, (struct sockaddr *)&saddr, slen)) { 948 write(ctl_fd, &errno, sizeof(errno)); 949 goto cleanup; 950 } 951 952 if (listen(sk, 1)) 953 goto cleanup; 954 955 ret = 0; 956 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) { 957 ret = -1; 958 goto cleanup; 959 } 960 961 client_sk = accept(sk, NULL, NULL); 962 if (client_sk < 0) 963 goto cleanup; 964 965 ret = -1; 966 for (;;) { 967 uint8_t buf[0x100000]; 968 969 if (write(client_sk, buf, sizeof(buf)) <= 0) { 970 if (errno == ECONNRESET) 971 ret = 0; 972 break; 973 } 974 } 975 976 close(client_sk); 977 978 cleanup: 979 close(sk); 980 return ret; 981 } 982 983 static int tcp_client(const char *cgroup, unsigned short port) 984 { 985 const char server[] = "localhost"; 986 struct addrinfo *ai; 987 char servport[6]; 988 int retries = 0x10; /* nice round number */ 989 int sk, ret; 990 991 snprintf(servport, sizeof(servport), "%hd", port); 992 ret = getaddrinfo(server, servport, NULL, &ai); 993 if (ret) 994 return ret; 995 996 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); 997 if (sk < 0) 998 goto free_ainfo; 999 1000 ret = connect(sk, ai->ai_addr, ai->ai_addrlen); 1001 if (ret < 0) 1002 goto close_sk; 1003 1004 ret = KSFT_FAIL; 1005 while (retries--) { 1006 uint8_t buf[0x100000]; 1007 long current, sock; 1008 1009 if (read(sk, buf, sizeof(buf)) <= 0) 1010 goto close_sk; 1011 1012 current = cg_read_long(cgroup, "memory.current"); 1013 sock = cg_read_key_long(cgroup, "memory.stat", "sock "); 1014 1015 if (current < 0 || sock < 0) 1016 goto close_sk; 1017 1018 if (values_close(current, sock, 10)) { 1019 ret = KSFT_PASS; 1020 break; 1021 } 1022 } 1023 1024 close_sk: 1025 close(sk); 1026 free_ainfo: 1027 freeaddrinfo(ai); 1028 return ret; 1029 } 1030 1031 /* 1032 * This test checks socket memory accounting. 1033 * The test forks a TCP server listens on a random port between 1000 1034 * and 61000. Once it gets a client connection, it starts writing to 1035 * its socket. 1036 * The TCP client interleaves reads from the socket with check whether 1037 * memory.current and memory.stat.sock are similar. 1038 */ 1039 static int test_memcg_sock(const char *root) 1040 { 1041 int bind_retries = 5, ret = KSFT_FAIL, pid, err; 1042 unsigned short port; 1043 char *memcg; 1044 1045 memcg = cg_name(root, "memcg_test"); 1046 if (!memcg) 1047 goto cleanup; 1048 1049 if (cg_create(memcg)) 1050 goto cleanup; 1051 1052 while (bind_retries--) { 1053 struct tcp_server_args args; 1054 1055 if (pipe(args.ctl)) 1056 goto cleanup; 1057 1058 port = args.port = 1000 + rand() % 60000; 1059 1060 pid = cg_run_nowait(memcg, tcp_server, &args); 1061 if (pid < 0) 1062 goto cleanup; 1063 1064 close(args.ctl[1]); 1065 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err)) 1066 goto cleanup; 1067 close(args.ctl[0]); 1068 1069 if (!err) 1070 break; 1071 if (err != EADDRINUSE) 1072 goto cleanup; 1073 1074 waitpid(pid, NULL, 0); 1075 } 1076 1077 if (err == EADDRINUSE) { 1078 ret = KSFT_SKIP; 1079 goto cleanup; 1080 } 1081 1082 if (tcp_client(memcg, port) != KSFT_PASS) 1083 goto cleanup; 1084 1085 waitpid(pid, &err, 0); 1086 if (WEXITSTATUS(err)) 1087 goto cleanup; 1088 1089 if (cg_read_long(memcg, "memory.current") < 0) 1090 goto cleanup; 1091 1092 if (cg_read_key_long(memcg, "memory.stat", "sock ")) 1093 goto cleanup; 1094 1095 ret = KSFT_PASS; 1096 1097 cleanup: 1098 cg_destroy(memcg); 1099 free(memcg); 1100 1101 return ret; 1102 } 1103 1104 /* 1105 * This test disables swapping and tries to allocate anonymous memory 1106 * up to OOM with memory.group.oom set. Then it checks that all 1107 * processes in the leaf were killed. It also checks that oom_events 1108 * were propagated to the parent level. 1109 */ 1110 static int test_memcg_oom_group_leaf_events(const char *root) 1111 { 1112 int ret = KSFT_FAIL; 1113 char *parent, *child; 1114 long parent_oom_events; 1115 1116 parent = cg_name(root, "memcg_test_0"); 1117 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1118 1119 if (!parent || !child) 1120 goto cleanup; 1121 1122 if (cg_create(parent)) 1123 goto cleanup; 1124 1125 if (cg_create(child)) 1126 goto cleanup; 1127 1128 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 1129 goto cleanup; 1130 1131 if (cg_write(child, "memory.max", "50M")) 1132 goto cleanup; 1133 1134 if (cg_write(child, "memory.swap.max", "0")) 1135 goto cleanup; 1136 1137 if (cg_write(child, "memory.oom.group", "1")) 1138 goto cleanup; 1139 1140 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1141 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1142 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1143 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1144 goto cleanup; 1145 1146 if (cg_test_proc_killed(child)) 1147 goto cleanup; 1148 1149 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) 1150 goto cleanup; 1151 1152 parent_oom_events = cg_read_key_long( 1153 parent, "memory.events", "oom_kill "); 1154 /* 1155 * If memory_localevents is not enabled (the default), the parent should 1156 * count OOM events in its children groups. Otherwise, it should not 1157 * have observed any events. 1158 */ 1159 if (has_localevents && parent_oom_events != 0) 1160 goto cleanup; 1161 else if (!has_localevents && parent_oom_events <= 0) 1162 goto cleanup; 1163 1164 ret = KSFT_PASS; 1165 1166 cleanup: 1167 if (child) 1168 cg_destroy(child); 1169 if (parent) 1170 cg_destroy(parent); 1171 free(child); 1172 free(parent); 1173 1174 return ret; 1175 } 1176 1177 /* 1178 * This test disables swapping and tries to allocate anonymous memory 1179 * up to OOM with memory.group.oom set. Then it checks that all 1180 * processes in the parent and leaf were killed. 1181 */ 1182 static int test_memcg_oom_group_parent_events(const char *root) 1183 { 1184 int ret = KSFT_FAIL; 1185 char *parent, *child; 1186 1187 parent = cg_name(root, "memcg_test_0"); 1188 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1189 1190 if (!parent || !child) 1191 goto cleanup; 1192 1193 if (cg_create(parent)) 1194 goto cleanup; 1195 1196 if (cg_create(child)) 1197 goto cleanup; 1198 1199 if (cg_write(parent, "memory.max", "80M")) 1200 goto cleanup; 1201 1202 if (cg_write(parent, "memory.swap.max", "0")) 1203 goto cleanup; 1204 1205 if (cg_write(parent, "memory.oom.group", "1")) 1206 goto cleanup; 1207 1208 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1209 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1210 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1211 1212 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1213 goto cleanup; 1214 1215 if (cg_test_proc_killed(child)) 1216 goto cleanup; 1217 if (cg_test_proc_killed(parent)) 1218 goto cleanup; 1219 1220 ret = KSFT_PASS; 1221 1222 cleanup: 1223 if (child) 1224 cg_destroy(child); 1225 if (parent) 1226 cg_destroy(parent); 1227 free(child); 1228 free(parent); 1229 1230 return ret; 1231 } 1232 1233 /* 1234 * This test disables swapping and tries to allocate anonymous memory 1235 * up to OOM with memory.group.oom set. Then it checks that all 1236 * processes were killed except those set with OOM_SCORE_ADJ_MIN 1237 */ 1238 static int test_memcg_oom_group_score_events(const char *root) 1239 { 1240 int ret = KSFT_FAIL; 1241 char *memcg; 1242 int safe_pid; 1243 1244 memcg = cg_name(root, "memcg_test_0"); 1245 1246 if (!memcg) 1247 goto cleanup; 1248 1249 if (cg_create(memcg)) 1250 goto cleanup; 1251 1252 if (cg_write(memcg, "memory.max", "50M")) 1253 goto cleanup; 1254 1255 if (cg_write(memcg, "memory.swap.max", "0")) 1256 goto cleanup; 1257 1258 if (cg_write(memcg, "memory.oom.group", "1")) 1259 goto cleanup; 1260 1261 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1262 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN)) 1263 goto cleanup; 1264 1265 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1266 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1267 goto cleanup; 1268 1269 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3) 1270 goto cleanup; 1271 1272 if (kill(safe_pid, SIGKILL)) 1273 goto cleanup; 1274 1275 ret = KSFT_PASS; 1276 1277 cleanup: 1278 if (memcg) 1279 cg_destroy(memcg); 1280 free(memcg); 1281 1282 return ret; 1283 } 1284 1285 #define T(x) { x, #x } 1286 struct memcg_test { 1287 int (*fn)(const char *root); 1288 const char *name; 1289 } tests[] = { 1290 T(test_memcg_subtree_control), 1291 T(test_memcg_current), 1292 T(test_memcg_min), 1293 T(test_memcg_low), 1294 T(test_memcg_high), 1295 T(test_memcg_high_sync), 1296 T(test_memcg_max), 1297 T(test_memcg_reclaim), 1298 T(test_memcg_oom_events), 1299 T(test_memcg_swap_max), 1300 T(test_memcg_sock), 1301 T(test_memcg_oom_group_leaf_events), 1302 T(test_memcg_oom_group_parent_events), 1303 T(test_memcg_oom_group_score_events), 1304 }; 1305 #undef T 1306 1307 int main(int argc, char **argv) 1308 { 1309 char root[PATH_MAX]; 1310 int i, proc_status, ret = EXIT_SUCCESS; 1311 1312 if (cg_find_unified_root(root, sizeof(root))) 1313 ksft_exit_skip("cgroup v2 isn't mounted\n"); 1314 1315 /* 1316 * Check that memory controller is available: 1317 * memory is listed in cgroup.controllers 1318 */ 1319 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 1320 ksft_exit_skip("memory controller isn't available\n"); 1321 1322 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 1323 if (cg_write(root, "cgroup.subtree_control", "+memory")) 1324 ksft_exit_skip("Failed to set memory controller\n"); 1325 1326 proc_status = proc_mount_contains("memory_recursiveprot"); 1327 if (proc_status < 0) 1328 ksft_exit_skip("Failed to query cgroup mount option\n"); 1329 has_recursiveprot = proc_status; 1330 1331 proc_status = proc_mount_contains("memory_localevents"); 1332 if (proc_status < 0) 1333 ksft_exit_skip("Failed to query cgroup mount option\n"); 1334 has_localevents = proc_status; 1335 1336 for (i = 0; i < ARRAY_SIZE(tests); i++) { 1337 switch (tests[i].fn(root)) { 1338 case KSFT_PASS: 1339 ksft_test_result_pass("%s\n", tests[i].name); 1340 break; 1341 case KSFT_SKIP: 1342 ksft_test_result_skip("%s\n", tests[i].name); 1343 break; 1344 default: 1345 ret = EXIT_FAILURE; 1346 ksft_test_result_fail("%s\n", tests[i].name); 1347 break; 1348 } 1349 } 1350 1351 return ret; 1352 } 1353