1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #define _GNU_SOURCE 3 4 #include <linux/limits.h> 5 #include <linux/oom.h> 6 #include <fcntl.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <sys/stat.h> 11 #include <sys/types.h> 12 #include <unistd.h> 13 #include <sys/socket.h> 14 #include <sys/wait.h> 15 #include <arpa/inet.h> 16 #include <netinet/in.h> 17 #include <netdb.h> 18 #include <errno.h> 19 #include <sys/mman.h> 20 21 #include "../kselftest.h" 22 #include "cgroup_util.h" 23 24 static bool has_localevents; 25 static bool has_recursiveprot; 26 27 /* 28 * This test creates two nested cgroups with and without enabling 29 * the memory controller. 30 */ 31 static int test_memcg_subtree_control(const char *root) 32 { 33 char *parent, *child, *parent2 = NULL, *child2 = NULL; 34 int ret = KSFT_FAIL; 35 char buf[PAGE_SIZE]; 36 37 /* Create two nested cgroups with the memory controller enabled */ 38 parent = cg_name(root, "memcg_test_0"); 39 child = cg_name(root, "memcg_test_0/memcg_test_1"); 40 if (!parent || !child) 41 goto cleanup_free; 42 43 if (cg_create(parent)) 44 goto cleanup_free; 45 46 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 47 goto cleanup_parent; 48 49 if (cg_create(child)) 50 goto cleanup_parent; 51 52 if (cg_read_strstr(child, "cgroup.controllers", "memory")) 53 goto cleanup_child; 54 55 /* Create two nested cgroups without enabling memory controller */ 56 parent2 = cg_name(root, "memcg_test_1"); 57 child2 = cg_name(root, "memcg_test_1/memcg_test_1"); 58 if (!parent2 || !child2) 59 goto cleanup_free2; 60 61 if (cg_create(parent2)) 62 goto cleanup_free2; 63 64 if (cg_create(child2)) 65 goto cleanup_parent2; 66 67 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf))) 68 goto cleanup_all; 69 70 if (!cg_read_strstr(child2, "cgroup.controllers", "memory")) 71 goto cleanup_all; 72 73 ret = KSFT_PASS; 74 75 cleanup_all: 76 cg_destroy(child2); 77 cleanup_parent2: 78 cg_destroy(parent2); 79 cleanup_free2: 80 free(parent2); 81 free(child2); 82 cleanup_child: 83 cg_destroy(child); 84 cleanup_parent: 85 cg_destroy(parent); 86 cleanup_free: 87 free(parent); 88 free(child); 89 90 return ret; 91 } 92 93 static int alloc_anon_50M_check(const char *cgroup, void *arg) 94 { 95 size_t size = MB(50); 96 char *buf, *ptr; 97 long anon, current; 98 int ret = -1; 99 100 buf = malloc(size); 101 if (buf == NULL) { 102 fprintf(stderr, "malloc() failed\n"); 103 return -1; 104 } 105 106 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 107 *ptr = 0; 108 109 current = cg_read_long(cgroup, "memory.current"); 110 if (current < size) 111 goto cleanup; 112 113 if (!values_close(size, current, 3)) 114 goto cleanup; 115 116 anon = cg_read_key_long(cgroup, "memory.stat", "anon "); 117 if (anon < 0) 118 goto cleanup; 119 120 if (!values_close(anon, current, 3)) 121 goto cleanup; 122 123 ret = 0; 124 cleanup: 125 free(buf); 126 return ret; 127 } 128 129 static int alloc_pagecache_50M_check(const char *cgroup, void *arg) 130 { 131 size_t size = MB(50); 132 int ret = -1; 133 long current, file; 134 int fd; 135 136 fd = get_temp_fd(); 137 if (fd < 0) 138 return -1; 139 140 if (alloc_pagecache(fd, size)) 141 goto cleanup; 142 143 current = cg_read_long(cgroup, "memory.current"); 144 if (current < size) 145 goto cleanup; 146 147 file = cg_read_key_long(cgroup, "memory.stat", "file "); 148 if (file < 0) 149 goto cleanup; 150 151 if (!values_close(file, current, 10)) 152 goto cleanup; 153 154 ret = 0; 155 156 cleanup: 157 close(fd); 158 return ret; 159 } 160 161 /* 162 * This test create a memory cgroup, allocates 163 * some anonymous memory and some pagecache 164 * and check memory.current and some memory.stat values. 165 */ 166 static int test_memcg_current(const char *root) 167 { 168 int ret = KSFT_FAIL; 169 long current; 170 char *memcg; 171 172 memcg = cg_name(root, "memcg_test"); 173 if (!memcg) 174 goto cleanup; 175 176 if (cg_create(memcg)) 177 goto cleanup; 178 179 current = cg_read_long(memcg, "memory.current"); 180 if (current != 0) 181 goto cleanup; 182 183 if (cg_run(memcg, alloc_anon_50M_check, NULL)) 184 goto cleanup; 185 186 if (cg_run(memcg, alloc_pagecache_50M_check, NULL)) 187 goto cleanup; 188 189 ret = KSFT_PASS; 190 191 cleanup: 192 cg_destroy(memcg); 193 free(memcg); 194 195 return ret; 196 } 197 198 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) 199 { 200 int fd = (long)arg; 201 int ppid = getppid(); 202 203 if (alloc_pagecache(fd, MB(50))) 204 return -1; 205 206 while (getppid() == ppid) 207 sleep(1); 208 209 return 0; 210 } 211 212 static int alloc_anon_noexit(const char *cgroup, void *arg) 213 { 214 int ppid = getppid(); 215 size_t size = (unsigned long)arg; 216 char *buf, *ptr; 217 218 buf = malloc(size); 219 if (buf == NULL) { 220 fprintf(stderr, "malloc() failed\n"); 221 return -1; 222 } 223 224 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 225 *ptr = 0; 226 227 while (getppid() == ppid) 228 sleep(1); 229 230 free(buf); 231 return 0; 232 } 233 234 /* 235 * Wait until processes are killed asynchronously by the OOM killer 236 * If we exceed a timeout, fail. 237 */ 238 static int cg_test_proc_killed(const char *cgroup) 239 { 240 int limit; 241 242 for (limit = 10; limit > 0; limit--) { 243 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0) 244 return 0; 245 246 usleep(100000); 247 } 248 return -1; 249 } 250 251 static bool reclaim_until(const char *memcg, long goal); 252 253 /* 254 * First, this test creates the following hierarchy: 255 * A memory.min = 0, memory.max = 200M 256 * A/B memory.min = 50M 257 * A/B/C memory.min = 75M, memory.current = 50M 258 * A/B/D memory.min = 25M, memory.current = 50M 259 * A/B/E memory.min = 0, memory.current = 50M 260 * A/B/F memory.min = 500M, memory.current = 0 261 * 262 * (or memory.low if we test soft protection) 263 * 264 * Usages are pagecache and the test keeps a running 265 * process in every leaf cgroup. 266 * Then it creates A/G and creates a significant 267 * memory pressure in A. 268 * 269 * Then it checks actual memory usages and expects that: 270 * A/B memory.current ~= 50M 271 * A/B/C memory.current ~= 29M 272 * A/B/D memory.current ~= 21M 273 * A/B/E memory.current ~= 0 274 * A/B/F memory.current = 0 275 * (for origin of the numbers, see model in memcg_protection.m.) 276 * 277 * After that it tries to allocate more than there is 278 * unprotected memory in A available, and checks that: 279 * a) memory.min protects pagecache even in this case, 280 * b) memory.low allows reclaiming page cache with low events. 281 * 282 * Then we try to reclaim from A/B/C using memory.reclaim until its 283 * usage reaches 10M. 284 * This makes sure that: 285 * (a) We ignore the protection of the reclaim target memcg. 286 * (b) The previously calculated emin value (~29M) should be dismissed. 287 */ 288 static int test_memcg_protection(const char *root, bool min) 289 { 290 int ret = KSFT_FAIL, rc; 291 char *parent[3] = {NULL}; 292 char *children[4] = {NULL}; 293 const char *attribute = min ? "memory.min" : "memory.low"; 294 long c[4]; 295 long current; 296 int i, attempts; 297 int fd; 298 299 fd = get_temp_fd(); 300 if (fd < 0) 301 goto cleanup; 302 303 parent[0] = cg_name(root, "memcg_test_0"); 304 if (!parent[0]) 305 goto cleanup; 306 307 parent[1] = cg_name(parent[0], "memcg_test_1"); 308 if (!parent[1]) 309 goto cleanup; 310 311 parent[2] = cg_name(parent[0], "memcg_test_2"); 312 if (!parent[2]) 313 goto cleanup; 314 315 if (cg_create(parent[0])) 316 goto cleanup; 317 318 if (cg_read_long(parent[0], attribute)) { 319 /* No memory.min on older kernels is fine */ 320 if (min) 321 ret = KSFT_SKIP; 322 goto cleanup; 323 } 324 325 if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) 326 goto cleanup; 327 328 if (cg_write(parent[0], "memory.max", "200M")) 329 goto cleanup; 330 331 if (cg_write(parent[0], "memory.swap.max", "0")) 332 goto cleanup; 333 334 if (cg_create(parent[1])) 335 goto cleanup; 336 337 if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) 338 goto cleanup; 339 340 if (cg_create(parent[2])) 341 goto cleanup; 342 343 for (i = 0; i < ARRAY_SIZE(children); i++) { 344 children[i] = cg_name_indexed(parent[1], "child_memcg", i); 345 if (!children[i]) 346 goto cleanup; 347 348 if (cg_create(children[i])) 349 goto cleanup; 350 351 if (i > 2) 352 continue; 353 354 cg_run_nowait(children[i], alloc_pagecache_50M_noexit, 355 (void *)(long)fd); 356 } 357 358 if (cg_write(parent[1], attribute, "50M")) 359 goto cleanup; 360 if (cg_write(children[0], attribute, "75M")) 361 goto cleanup; 362 if (cg_write(children[1], attribute, "25M")) 363 goto cleanup; 364 if (cg_write(children[2], attribute, "0")) 365 goto cleanup; 366 if (cg_write(children[3], attribute, "500M")) 367 goto cleanup; 368 369 attempts = 0; 370 while (!values_close(cg_read_long(parent[1], "memory.current"), 371 MB(150), 3)) { 372 if (attempts++ > 5) 373 break; 374 sleep(1); 375 } 376 377 if (cg_run(parent[2], alloc_anon, (void *)MB(148))) 378 goto cleanup; 379 380 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 381 goto cleanup; 382 383 for (i = 0; i < ARRAY_SIZE(children); i++) 384 c[i] = cg_read_long(children[i], "memory.current"); 385 386 if (!values_close(c[0], MB(29), 10)) 387 goto cleanup; 388 389 if (!values_close(c[1], MB(21), 10)) 390 goto cleanup; 391 392 if (c[3] != 0) 393 goto cleanup; 394 395 rc = cg_run(parent[2], alloc_anon, (void *)MB(170)); 396 if (min && !rc) 397 goto cleanup; 398 else if (!min && rc) { 399 fprintf(stderr, 400 "memory.low prevents from allocating anon memory\n"); 401 goto cleanup; 402 } 403 404 current = min ? MB(50) : MB(30); 405 if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3)) 406 goto cleanup; 407 408 if (!reclaim_until(children[0], MB(10))) 409 goto cleanup; 410 411 if (min) { 412 ret = KSFT_PASS; 413 goto cleanup; 414 } 415 416 for (i = 0; i < ARRAY_SIZE(children); i++) { 417 int no_low_events_index = 1; 418 long low, oom; 419 420 oom = cg_read_key_long(children[i], "memory.events", "oom "); 421 low = cg_read_key_long(children[i], "memory.events", "low "); 422 423 if (oom) 424 goto cleanup; 425 if (i <= no_low_events_index && low <= 0) 426 goto cleanup; 427 if (i > no_low_events_index && low) 428 goto cleanup; 429 430 } 431 432 ret = KSFT_PASS; 433 434 cleanup: 435 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { 436 if (!children[i]) 437 continue; 438 439 cg_destroy(children[i]); 440 free(children[i]); 441 } 442 443 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { 444 if (!parent[i]) 445 continue; 446 447 cg_destroy(parent[i]); 448 free(parent[i]); 449 } 450 close(fd); 451 return ret; 452 } 453 454 static int test_memcg_min(const char *root) 455 { 456 return test_memcg_protection(root, true); 457 } 458 459 static int test_memcg_low(const char *root) 460 { 461 return test_memcg_protection(root, false); 462 } 463 464 static int alloc_pagecache_max_30M(const char *cgroup, void *arg) 465 { 466 size_t size = MB(50); 467 int ret = -1; 468 long current, high, max; 469 int fd; 470 471 high = cg_read_long(cgroup, "memory.high"); 472 max = cg_read_long(cgroup, "memory.max"); 473 if (high != MB(30) && max != MB(30)) 474 return -1; 475 476 fd = get_temp_fd(); 477 if (fd < 0) 478 return -1; 479 480 if (alloc_pagecache(fd, size)) 481 goto cleanup; 482 483 current = cg_read_long(cgroup, "memory.current"); 484 if (!values_close(current, MB(30), 5)) 485 goto cleanup; 486 487 ret = 0; 488 489 cleanup: 490 close(fd); 491 return ret; 492 493 } 494 495 /* 496 * This test checks that memory.high limits the amount of 497 * memory which can be consumed by either anonymous memory 498 * or pagecache. 499 */ 500 static int test_memcg_high(const char *root) 501 { 502 int ret = KSFT_FAIL; 503 char *memcg; 504 long high; 505 506 memcg = cg_name(root, "memcg_test"); 507 if (!memcg) 508 goto cleanup; 509 510 if (cg_create(memcg)) 511 goto cleanup; 512 513 if (cg_read_strcmp(memcg, "memory.high", "max\n")) 514 goto cleanup; 515 516 if (cg_write(memcg, "memory.swap.max", "0")) 517 goto cleanup; 518 519 if (cg_write(memcg, "memory.high", "30M")) 520 goto cleanup; 521 522 if (cg_run(memcg, alloc_anon, (void *)MB(31))) 523 goto cleanup; 524 525 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL)) 526 goto cleanup; 527 528 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 529 goto cleanup; 530 531 high = cg_read_key_long(memcg, "memory.events", "high "); 532 if (high <= 0) 533 goto cleanup; 534 535 ret = KSFT_PASS; 536 537 cleanup: 538 cg_destroy(memcg); 539 free(memcg); 540 541 return ret; 542 } 543 544 static int alloc_anon_mlock(const char *cgroup, void *arg) 545 { 546 size_t size = (size_t)arg; 547 void *buf; 548 549 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 550 0, 0); 551 if (buf == MAP_FAILED) 552 return -1; 553 554 mlock(buf, size); 555 munmap(buf, size); 556 return 0; 557 } 558 559 /* 560 * This test checks that memory.high is able to throttle big single shot 561 * allocation i.e. large allocation within one kernel entry. 562 */ 563 static int test_memcg_high_sync(const char *root) 564 { 565 int ret = KSFT_FAIL, pid, fd = -1; 566 char *memcg; 567 long pre_high, pre_max; 568 long post_high, post_max; 569 570 memcg = cg_name(root, "memcg_test"); 571 if (!memcg) 572 goto cleanup; 573 574 if (cg_create(memcg)) 575 goto cleanup; 576 577 pre_high = cg_read_key_long(memcg, "memory.events", "high "); 578 pre_max = cg_read_key_long(memcg, "memory.events", "max "); 579 if (pre_high < 0 || pre_max < 0) 580 goto cleanup; 581 582 if (cg_write(memcg, "memory.swap.max", "0")) 583 goto cleanup; 584 585 if (cg_write(memcg, "memory.high", "30M")) 586 goto cleanup; 587 588 if (cg_write(memcg, "memory.max", "140M")) 589 goto cleanup; 590 591 fd = memcg_prepare_for_wait(memcg); 592 if (fd < 0) 593 goto cleanup; 594 595 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200)); 596 if (pid < 0) 597 goto cleanup; 598 599 cg_wait_for(fd); 600 601 post_high = cg_read_key_long(memcg, "memory.events", "high "); 602 post_max = cg_read_key_long(memcg, "memory.events", "max "); 603 if (post_high < 0 || post_max < 0) 604 goto cleanup; 605 606 if (pre_high == post_high || pre_max != post_max) 607 goto cleanup; 608 609 ret = KSFT_PASS; 610 611 cleanup: 612 if (fd >= 0) 613 close(fd); 614 cg_destroy(memcg); 615 free(memcg); 616 617 return ret; 618 } 619 620 /* 621 * This test checks that memory.max limits the amount of 622 * memory which can be consumed by either anonymous memory 623 * or pagecache. 624 */ 625 static int test_memcg_max(const char *root) 626 { 627 int ret = KSFT_FAIL; 628 char *memcg; 629 long current, max; 630 631 memcg = cg_name(root, "memcg_test"); 632 if (!memcg) 633 goto cleanup; 634 635 if (cg_create(memcg)) 636 goto cleanup; 637 638 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 639 goto cleanup; 640 641 if (cg_write(memcg, "memory.swap.max", "0")) 642 goto cleanup; 643 644 if (cg_write(memcg, "memory.max", "30M")) 645 goto cleanup; 646 647 /* Should be killed by OOM killer */ 648 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 649 goto cleanup; 650 651 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 652 goto cleanup; 653 654 current = cg_read_long(memcg, "memory.current"); 655 if (current > MB(30) || !current) 656 goto cleanup; 657 658 max = cg_read_key_long(memcg, "memory.events", "max "); 659 if (max <= 0) 660 goto cleanup; 661 662 ret = KSFT_PASS; 663 664 cleanup: 665 cg_destroy(memcg); 666 free(memcg); 667 668 return ret; 669 } 670 671 /* 672 * Reclaim from @memcg until usage reaches @goal by writing to 673 * memory.reclaim. 674 * 675 * This function will return false if the usage is already below the 676 * goal. 677 * 678 * This function assumes that writing to memory.reclaim is the only 679 * source of change in memory.current (no concurrent allocations or 680 * reclaim). 681 * 682 * This function makes sure memory.reclaim is sane. It will return 683 * false if memory.reclaim's error codes do not make sense, even if 684 * the usage goal was satisfied. 685 */ 686 static bool reclaim_until(const char *memcg, long goal) 687 { 688 char buf[64]; 689 int retries, err; 690 long current, to_reclaim; 691 bool reclaimed = false; 692 693 for (retries = 5; retries > 0; retries--) { 694 current = cg_read_long(memcg, "memory.current"); 695 696 if (current < goal || values_close(current, goal, 3)) 697 break; 698 /* Did memory.reclaim return 0 incorrectly? */ 699 else if (reclaimed) 700 return false; 701 702 to_reclaim = current - goal; 703 snprintf(buf, sizeof(buf), "%ld", to_reclaim); 704 err = cg_write(memcg, "memory.reclaim", buf); 705 if (!err) 706 reclaimed = true; 707 else if (err != -EAGAIN) 708 return false; 709 } 710 return reclaimed; 711 } 712 713 /* 714 * This test checks that memory.reclaim reclaims the given 715 * amount of memory (from both anon and file, if possible). 716 */ 717 static int test_memcg_reclaim(const char *root) 718 { 719 int ret = KSFT_FAIL, fd, retries; 720 char *memcg; 721 long current, expected_usage; 722 723 memcg = cg_name(root, "memcg_test"); 724 if (!memcg) 725 goto cleanup; 726 727 if (cg_create(memcg)) 728 goto cleanup; 729 730 current = cg_read_long(memcg, "memory.current"); 731 if (current != 0) 732 goto cleanup; 733 734 fd = get_temp_fd(); 735 if (fd < 0) 736 goto cleanup; 737 738 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd); 739 740 /* 741 * If swap is enabled, try to reclaim from both anon and file, else try 742 * to reclaim from file only. 743 */ 744 if (is_swap_enabled()) { 745 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50)); 746 expected_usage = MB(100); 747 } else 748 expected_usage = MB(50); 749 750 /* 751 * Wait until current usage reaches the expected usage (or we run out of 752 * retries). 753 */ 754 retries = 5; 755 while (!values_close(cg_read_long(memcg, "memory.current"), 756 expected_usage, 10)) { 757 if (retries--) { 758 sleep(1); 759 continue; 760 } else { 761 fprintf(stderr, 762 "failed to allocate %ld for memcg reclaim test\n", 763 expected_usage); 764 goto cleanup; 765 } 766 } 767 768 /* 769 * Reclaim until current reaches 30M, this makes sure we hit both anon 770 * and file if swap is enabled. 771 */ 772 if (!reclaim_until(memcg, MB(30))) 773 goto cleanup; 774 775 ret = KSFT_PASS; 776 cleanup: 777 cg_destroy(memcg); 778 free(memcg); 779 close(fd); 780 781 return ret; 782 } 783 784 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) 785 { 786 long mem_max = (long)arg; 787 size_t size = MB(50); 788 char *buf, *ptr; 789 long mem_current, swap_current; 790 int ret = -1; 791 792 buf = malloc(size); 793 if (buf == NULL) { 794 fprintf(stderr, "malloc() failed\n"); 795 return -1; 796 } 797 798 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 799 *ptr = 0; 800 801 mem_current = cg_read_long(cgroup, "memory.current"); 802 if (!mem_current || !values_close(mem_current, mem_max, 3)) 803 goto cleanup; 804 805 swap_current = cg_read_long(cgroup, "memory.swap.current"); 806 if (!swap_current || 807 !values_close(mem_current + swap_current, size, 3)) 808 goto cleanup; 809 810 ret = 0; 811 cleanup: 812 free(buf); 813 return ret; 814 } 815 816 /* 817 * This test checks that memory.swap.max limits the amount of 818 * anonymous memory which can be swapped out. 819 */ 820 static int test_memcg_swap_max(const char *root) 821 { 822 int ret = KSFT_FAIL; 823 char *memcg; 824 long max; 825 826 if (!is_swap_enabled()) 827 return KSFT_SKIP; 828 829 memcg = cg_name(root, "memcg_test"); 830 if (!memcg) 831 goto cleanup; 832 833 if (cg_create(memcg)) 834 goto cleanup; 835 836 if (cg_read_long(memcg, "memory.swap.current")) { 837 ret = KSFT_SKIP; 838 goto cleanup; 839 } 840 841 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 842 goto cleanup; 843 844 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n")) 845 goto cleanup; 846 847 if (cg_write(memcg, "memory.swap.max", "30M")) 848 goto cleanup; 849 850 if (cg_write(memcg, "memory.max", "30M")) 851 goto cleanup; 852 853 /* Should be killed by OOM killer */ 854 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 855 goto cleanup; 856 857 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 858 goto cleanup; 859 860 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 861 goto cleanup; 862 863 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30))) 864 goto cleanup; 865 866 max = cg_read_key_long(memcg, "memory.events", "max "); 867 if (max <= 0) 868 goto cleanup; 869 870 ret = KSFT_PASS; 871 872 cleanup: 873 cg_destroy(memcg); 874 free(memcg); 875 876 return ret; 877 } 878 879 /* 880 * This test disables swapping and tries to allocate anonymous memory 881 * up to OOM. Then it checks for oom and oom_kill events in 882 * memory.events. 883 */ 884 static int test_memcg_oom_events(const char *root) 885 { 886 int ret = KSFT_FAIL; 887 char *memcg; 888 889 memcg = cg_name(root, "memcg_test"); 890 if (!memcg) 891 goto cleanup; 892 893 if (cg_create(memcg)) 894 goto cleanup; 895 896 if (cg_write(memcg, "memory.max", "30M")) 897 goto cleanup; 898 899 if (cg_write(memcg, "memory.swap.max", "0")) 900 goto cleanup; 901 902 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 903 goto cleanup; 904 905 if (cg_read_strcmp(memcg, "cgroup.procs", "")) 906 goto cleanup; 907 908 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 909 goto cleanup; 910 911 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 912 goto cleanup; 913 914 ret = KSFT_PASS; 915 916 cleanup: 917 cg_destroy(memcg); 918 free(memcg); 919 920 return ret; 921 } 922 923 struct tcp_server_args { 924 unsigned short port; 925 int ctl[2]; 926 }; 927 928 static int tcp_server(const char *cgroup, void *arg) 929 { 930 struct tcp_server_args *srv_args = arg; 931 struct sockaddr_in6 saddr = { 0 }; 932 socklen_t slen = sizeof(saddr); 933 int sk, client_sk, ctl_fd, yes = 1, ret = -1; 934 935 close(srv_args->ctl[0]); 936 ctl_fd = srv_args->ctl[1]; 937 938 saddr.sin6_family = AF_INET6; 939 saddr.sin6_addr = in6addr_any; 940 saddr.sin6_port = htons(srv_args->port); 941 942 sk = socket(AF_INET6, SOCK_STREAM, 0); 943 if (sk < 0) 944 return ret; 945 946 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) 947 goto cleanup; 948 949 if (bind(sk, (struct sockaddr *)&saddr, slen)) { 950 write(ctl_fd, &errno, sizeof(errno)); 951 goto cleanup; 952 } 953 954 if (listen(sk, 1)) 955 goto cleanup; 956 957 ret = 0; 958 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) { 959 ret = -1; 960 goto cleanup; 961 } 962 963 client_sk = accept(sk, NULL, NULL); 964 if (client_sk < 0) 965 goto cleanup; 966 967 ret = -1; 968 for (;;) { 969 uint8_t buf[0x100000]; 970 971 if (write(client_sk, buf, sizeof(buf)) <= 0) { 972 if (errno == ECONNRESET) 973 ret = 0; 974 break; 975 } 976 } 977 978 close(client_sk); 979 980 cleanup: 981 close(sk); 982 return ret; 983 } 984 985 static int tcp_client(const char *cgroup, unsigned short port) 986 { 987 const char server[] = "localhost"; 988 struct addrinfo *ai; 989 char servport[6]; 990 int retries = 0x10; /* nice round number */ 991 int sk, ret; 992 long allocated; 993 994 allocated = cg_read_long(cgroup, "memory.current"); 995 snprintf(servport, sizeof(servport), "%hd", port); 996 ret = getaddrinfo(server, servport, NULL, &ai); 997 if (ret) 998 return ret; 999 1000 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); 1001 if (sk < 0) 1002 goto free_ainfo; 1003 1004 ret = connect(sk, ai->ai_addr, ai->ai_addrlen); 1005 if (ret < 0) 1006 goto close_sk; 1007 1008 ret = KSFT_FAIL; 1009 while (retries--) { 1010 uint8_t buf[0x100000]; 1011 long current, sock; 1012 1013 if (read(sk, buf, sizeof(buf)) <= 0) 1014 goto close_sk; 1015 1016 current = cg_read_long(cgroup, "memory.current"); 1017 sock = cg_read_key_long(cgroup, "memory.stat", "sock "); 1018 1019 if (current < 0 || sock < 0) 1020 goto close_sk; 1021 1022 /* exclude the memory not related to socket connection */ 1023 if (values_close(current - allocated, sock, 10)) { 1024 ret = KSFT_PASS; 1025 break; 1026 } 1027 } 1028 1029 close_sk: 1030 close(sk); 1031 free_ainfo: 1032 freeaddrinfo(ai); 1033 return ret; 1034 } 1035 1036 /* 1037 * This test checks socket memory accounting. 1038 * The test forks a TCP server listens on a random port between 1000 1039 * and 61000. Once it gets a client connection, it starts writing to 1040 * its socket. 1041 * The TCP client interleaves reads from the socket with check whether 1042 * memory.current and memory.stat.sock are similar. 1043 */ 1044 static int test_memcg_sock(const char *root) 1045 { 1046 int bind_retries = 5, ret = KSFT_FAIL, pid, err; 1047 unsigned short port; 1048 char *memcg; 1049 1050 memcg = cg_name(root, "memcg_test"); 1051 if (!memcg) 1052 goto cleanup; 1053 1054 if (cg_create(memcg)) 1055 goto cleanup; 1056 1057 while (bind_retries--) { 1058 struct tcp_server_args args; 1059 1060 if (pipe(args.ctl)) 1061 goto cleanup; 1062 1063 port = args.port = 1000 + rand() % 60000; 1064 1065 pid = cg_run_nowait(memcg, tcp_server, &args); 1066 if (pid < 0) 1067 goto cleanup; 1068 1069 close(args.ctl[1]); 1070 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err)) 1071 goto cleanup; 1072 close(args.ctl[0]); 1073 1074 if (!err) 1075 break; 1076 if (err != EADDRINUSE) 1077 goto cleanup; 1078 1079 waitpid(pid, NULL, 0); 1080 } 1081 1082 if (err == EADDRINUSE) { 1083 ret = KSFT_SKIP; 1084 goto cleanup; 1085 } 1086 1087 if (tcp_client(memcg, port) != KSFT_PASS) 1088 goto cleanup; 1089 1090 waitpid(pid, &err, 0); 1091 if (WEXITSTATUS(err)) 1092 goto cleanup; 1093 1094 if (cg_read_long(memcg, "memory.current") < 0) 1095 goto cleanup; 1096 1097 if (cg_read_key_long(memcg, "memory.stat", "sock ")) 1098 goto cleanup; 1099 1100 ret = KSFT_PASS; 1101 1102 cleanup: 1103 cg_destroy(memcg); 1104 free(memcg); 1105 1106 return ret; 1107 } 1108 1109 /* 1110 * This test disables swapping and tries to allocate anonymous memory 1111 * up to OOM with memory.group.oom set. Then it checks that all 1112 * processes in the leaf were killed. It also checks that oom_events 1113 * were propagated to the parent level. 1114 */ 1115 static int test_memcg_oom_group_leaf_events(const char *root) 1116 { 1117 int ret = KSFT_FAIL; 1118 char *parent, *child; 1119 long parent_oom_events; 1120 1121 parent = cg_name(root, "memcg_test_0"); 1122 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1123 1124 if (!parent || !child) 1125 goto cleanup; 1126 1127 if (cg_create(parent)) 1128 goto cleanup; 1129 1130 if (cg_create(child)) 1131 goto cleanup; 1132 1133 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 1134 goto cleanup; 1135 1136 if (cg_write(child, "memory.max", "50M")) 1137 goto cleanup; 1138 1139 if (cg_write(child, "memory.swap.max", "0")) 1140 goto cleanup; 1141 1142 if (cg_write(child, "memory.oom.group", "1")) 1143 goto cleanup; 1144 1145 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1146 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1147 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1148 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1149 goto cleanup; 1150 1151 if (cg_test_proc_killed(child)) 1152 goto cleanup; 1153 1154 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) 1155 goto cleanup; 1156 1157 parent_oom_events = cg_read_key_long( 1158 parent, "memory.events", "oom_kill "); 1159 /* 1160 * If memory_localevents is not enabled (the default), the parent should 1161 * count OOM events in its children groups. Otherwise, it should not 1162 * have observed any events. 1163 */ 1164 if (has_localevents && parent_oom_events != 0) 1165 goto cleanup; 1166 else if (!has_localevents && parent_oom_events <= 0) 1167 goto cleanup; 1168 1169 ret = KSFT_PASS; 1170 1171 cleanup: 1172 if (child) 1173 cg_destroy(child); 1174 if (parent) 1175 cg_destroy(parent); 1176 free(child); 1177 free(parent); 1178 1179 return ret; 1180 } 1181 1182 /* 1183 * This test disables swapping and tries to allocate anonymous memory 1184 * up to OOM with memory.group.oom set. Then it checks that all 1185 * processes in the parent and leaf were killed. 1186 */ 1187 static int test_memcg_oom_group_parent_events(const char *root) 1188 { 1189 int ret = KSFT_FAIL; 1190 char *parent, *child; 1191 1192 parent = cg_name(root, "memcg_test_0"); 1193 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1194 1195 if (!parent || !child) 1196 goto cleanup; 1197 1198 if (cg_create(parent)) 1199 goto cleanup; 1200 1201 if (cg_create(child)) 1202 goto cleanup; 1203 1204 if (cg_write(parent, "memory.max", "80M")) 1205 goto cleanup; 1206 1207 if (cg_write(parent, "memory.swap.max", "0")) 1208 goto cleanup; 1209 1210 if (cg_write(parent, "memory.oom.group", "1")) 1211 goto cleanup; 1212 1213 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1214 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1215 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1216 1217 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1218 goto cleanup; 1219 1220 if (cg_test_proc_killed(child)) 1221 goto cleanup; 1222 if (cg_test_proc_killed(parent)) 1223 goto cleanup; 1224 1225 ret = KSFT_PASS; 1226 1227 cleanup: 1228 if (child) 1229 cg_destroy(child); 1230 if (parent) 1231 cg_destroy(parent); 1232 free(child); 1233 free(parent); 1234 1235 return ret; 1236 } 1237 1238 /* 1239 * This test disables swapping and tries to allocate anonymous memory 1240 * up to OOM with memory.group.oom set. Then it checks that all 1241 * processes were killed except those set with OOM_SCORE_ADJ_MIN 1242 */ 1243 static int test_memcg_oom_group_score_events(const char *root) 1244 { 1245 int ret = KSFT_FAIL; 1246 char *memcg; 1247 int safe_pid; 1248 1249 memcg = cg_name(root, "memcg_test_0"); 1250 1251 if (!memcg) 1252 goto cleanup; 1253 1254 if (cg_create(memcg)) 1255 goto cleanup; 1256 1257 if (cg_write(memcg, "memory.max", "50M")) 1258 goto cleanup; 1259 1260 if (cg_write(memcg, "memory.swap.max", "0")) 1261 goto cleanup; 1262 1263 if (cg_write(memcg, "memory.oom.group", "1")) 1264 goto cleanup; 1265 1266 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1267 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN)) 1268 goto cleanup; 1269 1270 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1271 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1272 goto cleanup; 1273 1274 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3) 1275 goto cleanup; 1276 1277 if (kill(safe_pid, SIGKILL)) 1278 goto cleanup; 1279 1280 ret = KSFT_PASS; 1281 1282 cleanup: 1283 if (memcg) 1284 cg_destroy(memcg); 1285 free(memcg); 1286 1287 return ret; 1288 } 1289 1290 #define T(x) { x, #x } 1291 struct memcg_test { 1292 int (*fn)(const char *root); 1293 const char *name; 1294 } tests[] = { 1295 T(test_memcg_subtree_control), 1296 T(test_memcg_current), 1297 T(test_memcg_min), 1298 T(test_memcg_low), 1299 T(test_memcg_high), 1300 T(test_memcg_high_sync), 1301 T(test_memcg_max), 1302 T(test_memcg_reclaim), 1303 T(test_memcg_oom_events), 1304 T(test_memcg_swap_max), 1305 T(test_memcg_sock), 1306 T(test_memcg_oom_group_leaf_events), 1307 T(test_memcg_oom_group_parent_events), 1308 T(test_memcg_oom_group_score_events), 1309 }; 1310 #undef T 1311 1312 int main(int argc, char **argv) 1313 { 1314 char root[PATH_MAX]; 1315 int i, proc_status, ret = EXIT_SUCCESS; 1316 1317 if (cg_find_unified_root(root, sizeof(root))) 1318 ksft_exit_skip("cgroup v2 isn't mounted\n"); 1319 1320 /* 1321 * Check that memory controller is available: 1322 * memory is listed in cgroup.controllers 1323 */ 1324 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 1325 ksft_exit_skip("memory controller isn't available\n"); 1326 1327 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 1328 if (cg_write(root, "cgroup.subtree_control", "+memory")) 1329 ksft_exit_skip("Failed to set memory controller\n"); 1330 1331 proc_status = proc_mount_contains("memory_recursiveprot"); 1332 if (proc_status < 0) 1333 ksft_exit_skip("Failed to query cgroup mount option\n"); 1334 has_recursiveprot = proc_status; 1335 1336 proc_status = proc_mount_contains("memory_localevents"); 1337 if (proc_status < 0) 1338 ksft_exit_skip("Failed to query cgroup mount option\n"); 1339 has_localevents = proc_status; 1340 1341 for (i = 0; i < ARRAY_SIZE(tests); i++) { 1342 switch (tests[i].fn(root)) { 1343 case KSFT_PASS: 1344 ksft_test_result_pass("%s\n", tests[i].name); 1345 break; 1346 case KSFT_SKIP: 1347 ksft_test_result_skip("%s\n", tests[i].name); 1348 break; 1349 default: 1350 ret = EXIT_FAILURE; 1351 ksft_test_result_fail("%s\n", tests[i].name); 1352 break; 1353 } 1354 } 1355 1356 return ret; 1357 } 1358