1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #define _GNU_SOURCE 3 4 #include <linux/limits.h> 5 #include <linux/oom.h> 6 #include <fcntl.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <sys/stat.h> 11 #include <sys/types.h> 12 #include <unistd.h> 13 #include <sys/socket.h> 14 #include <sys/wait.h> 15 #include <arpa/inet.h> 16 #include <netinet/in.h> 17 #include <netdb.h> 18 #include <errno.h> 19 #include <sys/mman.h> 20 21 #include "../kselftest.h" 22 #include "cgroup_util.h" 23 24 static bool has_localevents; 25 static bool has_recursiveprot; 26 27 /* 28 * This test creates two nested cgroups with and without enabling 29 * the memory controller. 30 */ 31 static int test_memcg_subtree_control(const char *root) 32 { 33 char *parent, *child, *parent2 = NULL, *child2 = NULL; 34 int ret = KSFT_FAIL; 35 char buf[PAGE_SIZE]; 36 37 /* Create two nested cgroups with the memory controller enabled */ 38 parent = cg_name(root, "memcg_test_0"); 39 child = cg_name(root, "memcg_test_0/memcg_test_1"); 40 if (!parent || !child) 41 goto cleanup_free; 42 43 if (cg_create(parent)) 44 goto cleanup_free; 45 46 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 47 goto cleanup_parent; 48 49 if (cg_create(child)) 50 goto cleanup_parent; 51 52 if (cg_read_strstr(child, "cgroup.controllers", "memory")) 53 goto cleanup_child; 54 55 /* Create two nested cgroups without enabling memory controller */ 56 parent2 = cg_name(root, "memcg_test_1"); 57 child2 = cg_name(root, "memcg_test_1/memcg_test_1"); 58 if (!parent2 || !child2) 59 goto cleanup_free2; 60 61 if (cg_create(parent2)) 62 goto cleanup_free2; 63 64 if (cg_create(child2)) 65 goto cleanup_parent2; 66 67 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf))) 68 goto cleanup_all; 69 70 if (!cg_read_strstr(child2, "cgroup.controllers", "memory")) 71 goto cleanup_all; 72 73 ret = KSFT_PASS; 74 75 cleanup_all: 76 cg_destroy(child2); 77 cleanup_parent2: 78 cg_destroy(parent2); 79 cleanup_free2: 80 free(parent2); 81 free(child2); 82 cleanup_child: 83 cg_destroy(child); 84 cleanup_parent: 85 cg_destroy(parent); 86 cleanup_free: 87 free(parent); 88 free(child); 89 90 return ret; 91 } 92 93 static int alloc_anon_50M_check(const char *cgroup, void *arg) 94 { 95 size_t size = MB(50); 96 char *buf, *ptr; 97 long anon, current; 98 int ret = -1; 99 100 buf = malloc(size); 101 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 102 *ptr = 0; 103 104 current = cg_read_long(cgroup, "memory.current"); 105 if (current < size) 106 goto cleanup; 107 108 if (!values_close(size, current, 3)) 109 goto cleanup; 110 111 anon = cg_read_key_long(cgroup, "memory.stat", "anon "); 112 if (anon < 0) 113 goto cleanup; 114 115 if (!values_close(anon, current, 3)) 116 goto cleanup; 117 118 ret = 0; 119 cleanup: 120 free(buf); 121 return ret; 122 } 123 124 static int alloc_pagecache_50M_check(const char *cgroup, void *arg) 125 { 126 size_t size = MB(50); 127 int ret = -1; 128 long current, file; 129 int fd; 130 131 fd = get_temp_fd(); 132 if (fd < 0) 133 return -1; 134 135 if (alloc_pagecache(fd, size)) 136 goto cleanup; 137 138 current = cg_read_long(cgroup, "memory.current"); 139 if (current < size) 140 goto cleanup; 141 142 file = cg_read_key_long(cgroup, "memory.stat", "file "); 143 if (file < 0) 144 goto cleanup; 145 146 if (!values_close(file, current, 10)) 147 goto cleanup; 148 149 ret = 0; 150 151 cleanup: 152 close(fd); 153 return ret; 154 } 155 156 /* 157 * This test create a memory cgroup, allocates 158 * some anonymous memory and some pagecache 159 * and check memory.current and some memory.stat values. 160 */ 161 static int test_memcg_current(const char *root) 162 { 163 int ret = KSFT_FAIL; 164 long current; 165 char *memcg; 166 167 memcg = cg_name(root, "memcg_test"); 168 if (!memcg) 169 goto cleanup; 170 171 if (cg_create(memcg)) 172 goto cleanup; 173 174 current = cg_read_long(memcg, "memory.current"); 175 if (current != 0) 176 goto cleanup; 177 178 if (cg_run(memcg, alloc_anon_50M_check, NULL)) 179 goto cleanup; 180 181 if (cg_run(memcg, alloc_pagecache_50M_check, NULL)) 182 goto cleanup; 183 184 ret = KSFT_PASS; 185 186 cleanup: 187 cg_destroy(memcg); 188 free(memcg); 189 190 return ret; 191 } 192 193 static int alloc_pagecache_50M(const char *cgroup, void *arg) 194 { 195 int fd = (long)arg; 196 197 return alloc_pagecache(fd, MB(50)); 198 } 199 200 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) 201 { 202 int fd = (long)arg; 203 int ppid = getppid(); 204 205 if (alloc_pagecache(fd, MB(50))) 206 return -1; 207 208 while (getppid() == ppid) 209 sleep(1); 210 211 return 0; 212 } 213 214 static int alloc_anon_noexit(const char *cgroup, void *arg) 215 { 216 int ppid = getppid(); 217 size_t size = (unsigned long)arg; 218 char *buf, *ptr; 219 220 buf = malloc(size); 221 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 222 *ptr = 0; 223 224 while (getppid() == ppid) 225 sleep(1); 226 227 free(buf); 228 return 0; 229 } 230 231 /* 232 * Wait until processes are killed asynchronously by the OOM killer 233 * If we exceed a timeout, fail. 234 */ 235 static int cg_test_proc_killed(const char *cgroup) 236 { 237 int limit; 238 239 for (limit = 10; limit > 0; limit--) { 240 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0) 241 return 0; 242 243 usleep(100000); 244 } 245 return -1; 246 } 247 248 /* 249 * First, this test creates the following hierarchy: 250 * A memory.min = 50M, memory.max = 200M 251 * A/B memory.min = 50M, memory.current = 50M 252 * A/B/C memory.min = 75M, memory.current = 50M 253 * A/B/D memory.min = 25M, memory.current = 50M 254 * A/B/E memory.min = 0, memory.current = 50M 255 * A/B/F memory.min = 500M, memory.current = 0 256 * 257 * Usages are pagecache, but the test keeps a running 258 * process in every leaf cgroup. 259 * Then it creates A/G and creates a significant 260 * memory pressure in it. 261 * 262 * A/B memory.current ~= 50M 263 * A/B/C memory.current ~= 33M 264 * A/B/D memory.current ~= 17M 265 * A/B/F memory.current ~= 0 266 * 267 * After that it tries to allocate more than there is 268 * unprotected memory in A available, and checks 269 * checks that memory.min protects pagecache even 270 * in this case. 271 */ 272 static int test_memcg_min(const char *root) 273 { 274 int ret = KSFT_FAIL; 275 char *parent[3] = {NULL}; 276 char *children[4] = {NULL}; 277 long c[4]; 278 int i, attempts; 279 int fd; 280 281 fd = get_temp_fd(); 282 if (fd < 0) 283 goto cleanup; 284 285 parent[0] = cg_name(root, "memcg_test_0"); 286 if (!parent[0]) 287 goto cleanup; 288 289 parent[1] = cg_name(parent[0], "memcg_test_1"); 290 if (!parent[1]) 291 goto cleanup; 292 293 parent[2] = cg_name(parent[0], "memcg_test_2"); 294 if (!parent[2]) 295 goto cleanup; 296 297 if (cg_create(parent[0])) 298 goto cleanup; 299 300 if (cg_read_long(parent[0], "memory.min")) { 301 ret = KSFT_SKIP; 302 goto cleanup; 303 } 304 305 if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) 306 goto cleanup; 307 308 if (cg_write(parent[0], "memory.max", "200M")) 309 goto cleanup; 310 311 if (cg_write(parent[0], "memory.swap.max", "0")) 312 goto cleanup; 313 314 if (cg_create(parent[1])) 315 goto cleanup; 316 317 if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) 318 goto cleanup; 319 320 if (cg_create(parent[2])) 321 goto cleanup; 322 323 for (i = 0; i < ARRAY_SIZE(children); i++) { 324 children[i] = cg_name_indexed(parent[1], "child_memcg", i); 325 if (!children[i]) 326 goto cleanup; 327 328 if (cg_create(children[i])) 329 goto cleanup; 330 331 if (i > 2) 332 continue; 333 334 cg_run_nowait(children[i], alloc_pagecache_50M_noexit, 335 (void *)(long)fd); 336 } 337 338 if (cg_write(parent[0], "memory.min", "50M")) 339 goto cleanup; 340 if (cg_write(parent[1], "memory.min", "50M")) 341 goto cleanup; 342 if (cg_write(children[0], "memory.min", "75M")) 343 goto cleanup; 344 if (cg_write(children[1], "memory.min", "25M")) 345 goto cleanup; 346 if (cg_write(children[2], "memory.min", "0")) 347 goto cleanup; 348 if (cg_write(children[3], "memory.min", "500M")) 349 goto cleanup; 350 351 attempts = 0; 352 while (!values_close(cg_read_long(parent[1], "memory.current"), 353 MB(150), 3)) { 354 if (attempts++ > 5) 355 break; 356 sleep(1); 357 } 358 359 if (cg_run(parent[2], alloc_anon, (void *)MB(148))) 360 goto cleanup; 361 362 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 363 goto cleanup; 364 365 for (i = 0; i < ARRAY_SIZE(children); i++) 366 c[i] = cg_read_long(children[i], "memory.current"); 367 368 if (!values_close(c[0], MB(33), 10)) 369 goto cleanup; 370 371 if (!values_close(c[1], MB(17), 10)) 372 goto cleanup; 373 374 if (c[3] != 0) 375 goto cleanup; 376 377 if (!cg_run(parent[2], alloc_anon, (void *)MB(170))) 378 goto cleanup; 379 380 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 381 goto cleanup; 382 383 ret = KSFT_PASS; 384 385 cleanup: 386 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { 387 if (!children[i]) 388 continue; 389 390 cg_destroy(children[i]); 391 free(children[i]); 392 } 393 394 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { 395 if (!parent[i]) 396 continue; 397 398 cg_destroy(parent[i]); 399 free(parent[i]); 400 } 401 close(fd); 402 return ret; 403 } 404 405 /* 406 * First, this test creates the following hierarchy: 407 * A memory.low = 50M, memory.max = 200M 408 * A/B memory.low = 50M, memory.current = 50M 409 * A/B/C memory.low = 75M, memory.current = 50M 410 * A/B/D memory.low = 25M, memory.current = 50M 411 * A/B/E memory.low = 0, memory.current = 50M 412 * A/B/F memory.low = 500M, memory.current = 0 413 * 414 * Usages are pagecache. 415 * Then it creates A/G an creates a significant 416 * memory pressure in it. 417 * 418 * Then it checks actual memory usages and expects that: 419 * A/B memory.current ~= 50M 420 * A/B/ memory.current ~= 33M 421 * A/B/D memory.current ~= 17M 422 * A/B/F memory.current ~= 0 423 * 424 * After that it tries to allocate more than there is 425 * unprotected memory in A available, 426 * and checks low and oom events in memory.events. 427 */ 428 static int test_memcg_low(const char *root) 429 { 430 int ret = KSFT_FAIL; 431 char *parent[3] = {NULL}; 432 char *children[4] = {NULL}; 433 long low, oom; 434 long c[4]; 435 int i; 436 int fd; 437 438 fd = get_temp_fd(); 439 if (fd < 0) 440 goto cleanup; 441 442 parent[0] = cg_name(root, "memcg_test_0"); 443 if (!parent[0]) 444 goto cleanup; 445 446 parent[1] = cg_name(parent[0], "memcg_test_1"); 447 if (!parent[1]) 448 goto cleanup; 449 450 parent[2] = cg_name(parent[0], "memcg_test_2"); 451 if (!parent[2]) 452 goto cleanup; 453 454 if (cg_create(parent[0])) 455 goto cleanup; 456 457 if (cg_read_long(parent[0], "memory.low")) 458 goto cleanup; 459 460 if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) 461 goto cleanup; 462 463 if (cg_write(parent[0], "memory.max", "200M")) 464 goto cleanup; 465 466 if (cg_write(parent[0], "memory.swap.max", "0")) 467 goto cleanup; 468 469 if (cg_create(parent[1])) 470 goto cleanup; 471 472 if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) 473 goto cleanup; 474 475 if (cg_create(parent[2])) 476 goto cleanup; 477 478 for (i = 0; i < ARRAY_SIZE(children); i++) { 479 children[i] = cg_name_indexed(parent[1], "child_memcg", i); 480 if (!children[i]) 481 goto cleanup; 482 483 if (cg_create(children[i])) 484 goto cleanup; 485 486 if (i > 2) 487 continue; 488 489 if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd)) 490 goto cleanup; 491 } 492 493 if (cg_write(parent[0], "memory.low", "50M")) 494 goto cleanup; 495 if (cg_write(parent[1], "memory.low", "50M")) 496 goto cleanup; 497 if (cg_write(children[0], "memory.low", "75M")) 498 goto cleanup; 499 if (cg_write(children[1], "memory.low", "25M")) 500 goto cleanup; 501 if (cg_write(children[2], "memory.low", "0")) 502 goto cleanup; 503 if (cg_write(children[3], "memory.low", "500M")) 504 goto cleanup; 505 506 if (cg_run(parent[2], alloc_anon, (void *)MB(148))) 507 goto cleanup; 508 509 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 510 goto cleanup; 511 512 for (i = 0; i < ARRAY_SIZE(children); i++) 513 c[i] = cg_read_long(children[i], "memory.current"); 514 515 if (!values_close(c[0], MB(33), 10)) 516 goto cleanup; 517 518 if (!values_close(c[1], MB(17), 10)) 519 goto cleanup; 520 521 if (c[3] != 0) 522 goto cleanup; 523 524 if (cg_run(parent[2], alloc_anon, (void *)MB(166))) { 525 fprintf(stderr, 526 "memory.low prevents from allocating anon memory\n"); 527 goto cleanup; 528 } 529 530 for (i = 0; i < ARRAY_SIZE(children); i++) { 531 int no_low_events_index = has_recursiveprot ? 2 : 1; 532 533 oom = cg_read_key_long(children[i], "memory.events", "oom "); 534 low = cg_read_key_long(children[i], "memory.events", "low "); 535 536 if (oom) 537 goto cleanup; 538 if (i <= no_low_events_index && low <= 0) 539 goto cleanup; 540 if (i > no_low_events_index && low) 541 goto cleanup; 542 543 } 544 545 ret = KSFT_PASS; 546 547 cleanup: 548 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { 549 if (!children[i]) 550 continue; 551 552 cg_destroy(children[i]); 553 free(children[i]); 554 } 555 556 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { 557 if (!parent[i]) 558 continue; 559 560 cg_destroy(parent[i]); 561 free(parent[i]); 562 } 563 close(fd); 564 return ret; 565 } 566 567 static int alloc_pagecache_max_30M(const char *cgroup, void *arg) 568 { 569 size_t size = MB(50); 570 int ret = -1; 571 long current, high, max; 572 int fd; 573 574 high = cg_read_long(cgroup, "memory.high"); 575 max = cg_read_long(cgroup, "memory.max"); 576 if (high != MB(30) && max != MB(30)) 577 return -1; 578 579 fd = get_temp_fd(); 580 if (fd < 0) 581 return -1; 582 583 if (alloc_pagecache(fd, size)) 584 goto cleanup; 585 586 current = cg_read_long(cgroup, "memory.current"); 587 if (!values_close(current, MB(30), 5)) 588 goto cleanup; 589 590 ret = 0; 591 592 cleanup: 593 close(fd); 594 return ret; 595 596 } 597 598 /* 599 * This test checks that memory.high limits the amount of 600 * memory which can be consumed by either anonymous memory 601 * or pagecache. 602 */ 603 static int test_memcg_high(const char *root) 604 { 605 int ret = KSFT_FAIL; 606 char *memcg; 607 long high; 608 609 memcg = cg_name(root, "memcg_test"); 610 if (!memcg) 611 goto cleanup; 612 613 if (cg_create(memcg)) 614 goto cleanup; 615 616 if (cg_read_strcmp(memcg, "memory.high", "max\n")) 617 goto cleanup; 618 619 if (cg_write(memcg, "memory.swap.max", "0")) 620 goto cleanup; 621 622 if (cg_write(memcg, "memory.high", "30M")) 623 goto cleanup; 624 625 if (cg_run(memcg, alloc_anon, (void *)MB(31))) 626 goto cleanup; 627 628 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL)) 629 goto cleanup; 630 631 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 632 goto cleanup; 633 634 high = cg_read_key_long(memcg, "memory.events", "high "); 635 if (high <= 0) 636 goto cleanup; 637 638 ret = KSFT_PASS; 639 640 cleanup: 641 cg_destroy(memcg); 642 free(memcg); 643 644 return ret; 645 } 646 647 static int alloc_anon_mlock(const char *cgroup, void *arg) 648 { 649 size_t size = (size_t)arg; 650 void *buf; 651 652 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 653 0, 0); 654 if (buf == MAP_FAILED) 655 return -1; 656 657 mlock(buf, size); 658 munmap(buf, size); 659 return 0; 660 } 661 662 /* 663 * This test checks that memory.high is able to throttle big single shot 664 * allocation i.e. large allocation within one kernel entry. 665 */ 666 static int test_memcg_high_sync(const char *root) 667 { 668 int ret = KSFT_FAIL, pid, fd = -1; 669 char *memcg; 670 long pre_high, pre_max; 671 long post_high, post_max; 672 673 memcg = cg_name(root, "memcg_test"); 674 if (!memcg) 675 goto cleanup; 676 677 if (cg_create(memcg)) 678 goto cleanup; 679 680 pre_high = cg_read_key_long(memcg, "memory.events", "high "); 681 pre_max = cg_read_key_long(memcg, "memory.events", "max "); 682 if (pre_high < 0 || pre_max < 0) 683 goto cleanup; 684 685 if (cg_write(memcg, "memory.swap.max", "0")) 686 goto cleanup; 687 688 if (cg_write(memcg, "memory.high", "30M")) 689 goto cleanup; 690 691 if (cg_write(memcg, "memory.max", "140M")) 692 goto cleanup; 693 694 fd = memcg_prepare_for_wait(memcg); 695 if (fd < 0) 696 goto cleanup; 697 698 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200)); 699 if (pid < 0) 700 goto cleanup; 701 702 cg_wait_for(fd); 703 704 post_high = cg_read_key_long(memcg, "memory.events", "high "); 705 post_max = cg_read_key_long(memcg, "memory.events", "max "); 706 if (post_high < 0 || post_max < 0) 707 goto cleanup; 708 709 if (pre_high == post_high || pre_max != post_max) 710 goto cleanup; 711 712 ret = KSFT_PASS; 713 714 cleanup: 715 if (fd >= 0) 716 close(fd); 717 cg_destroy(memcg); 718 free(memcg); 719 720 return ret; 721 } 722 723 /* 724 * This test checks that memory.max limits the amount of 725 * memory which can be consumed by either anonymous memory 726 * or pagecache. 727 */ 728 static int test_memcg_max(const char *root) 729 { 730 int ret = KSFT_FAIL; 731 char *memcg; 732 long current, max; 733 734 memcg = cg_name(root, "memcg_test"); 735 if (!memcg) 736 goto cleanup; 737 738 if (cg_create(memcg)) 739 goto cleanup; 740 741 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 742 goto cleanup; 743 744 if (cg_write(memcg, "memory.swap.max", "0")) 745 goto cleanup; 746 747 if (cg_write(memcg, "memory.max", "30M")) 748 goto cleanup; 749 750 /* Should be killed by OOM killer */ 751 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 752 goto cleanup; 753 754 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 755 goto cleanup; 756 757 current = cg_read_long(memcg, "memory.current"); 758 if (current > MB(30) || !current) 759 goto cleanup; 760 761 max = cg_read_key_long(memcg, "memory.events", "max "); 762 if (max <= 0) 763 goto cleanup; 764 765 ret = KSFT_PASS; 766 767 cleanup: 768 cg_destroy(memcg); 769 free(memcg); 770 771 return ret; 772 } 773 774 /* 775 * This test checks that memory.reclaim reclaims the given 776 * amount of memory (from both anon and file, if possible). 777 */ 778 static int test_memcg_reclaim(const char *root) 779 { 780 int ret = KSFT_FAIL, fd, retries; 781 char *memcg; 782 long current, expected_usage, to_reclaim; 783 char buf[64]; 784 785 memcg = cg_name(root, "memcg_test"); 786 if (!memcg) 787 goto cleanup; 788 789 if (cg_create(memcg)) 790 goto cleanup; 791 792 current = cg_read_long(memcg, "memory.current"); 793 if (current != 0) 794 goto cleanup; 795 796 fd = get_temp_fd(); 797 if (fd < 0) 798 goto cleanup; 799 800 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd); 801 802 /* 803 * If swap is enabled, try to reclaim from both anon and file, else try 804 * to reclaim from file only. 805 */ 806 if (is_swap_enabled()) { 807 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50)); 808 expected_usage = MB(100); 809 } else 810 expected_usage = MB(50); 811 812 /* 813 * Wait until current usage reaches the expected usage (or we run out of 814 * retries). 815 */ 816 retries = 5; 817 while (!values_close(cg_read_long(memcg, "memory.current"), 818 expected_usage, 10)) { 819 if (retries--) { 820 sleep(1); 821 continue; 822 } else { 823 fprintf(stderr, 824 "failed to allocate %ld for memcg reclaim test\n", 825 expected_usage); 826 goto cleanup; 827 } 828 } 829 830 /* 831 * Reclaim until current reaches 30M, this makes sure we hit both anon 832 * and file if swap is enabled. 833 */ 834 retries = 5; 835 while (true) { 836 int err; 837 838 current = cg_read_long(memcg, "memory.current"); 839 to_reclaim = current - MB(30); 840 841 /* 842 * We only keep looping if we get EAGAIN, which means we could 843 * not reclaim the full amount. 844 */ 845 if (to_reclaim <= 0) 846 goto cleanup; 847 848 849 snprintf(buf, sizeof(buf), "%ld", to_reclaim); 850 err = cg_write(memcg, "memory.reclaim", buf); 851 if (!err) { 852 /* 853 * If writing succeeds, then the written amount should have been 854 * fully reclaimed (and maybe more). 855 */ 856 current = cg_read_long(memcg, "memory.current"); 857 if (!values_close(current, MB(30), 3) && current > MB(30)) 858 goto cleanup; 859 break; 860 } 861 862 /* The kernel could not reclaim the full amount, try again. */ 863 if (err == -EAGAIN && retries--) 864 continue; 865 866 /* We got an unexpected error or ran out of retries. */ 867 goto cleanup; 868 } 869 870 ret = KSFT_PASS; 871 cleanup: 872 cg_destroy(memcg); 873 free(memcg); 874 close(fd); 875 876 return ret; 877 } 878 879 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) 880 { 881 long mem_max = (long)arg; 882 size_t size = MB(50); 883 char *buf, *ptr; 884 long mem_current, swap_current; 885 int ret = -1; 886 887 buf = malloc(size); 888 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 889 *ptr = 0; 890 891 mem_current = cg_read_long(cgroup, "memory.current"); 892 if (!mem_current || !values_close(mem_current, mem_max, 3)) 893 goto cleanup; 894 895 swap_current = cg_read_long(cgroup, "memory.swap.current"); 896 if (!swap_current || 897 !values_close(mem_current + swap_current, size, 3)) 898 goto cleanup; 899 900 ret = 0; 901 cleanup: 902 free(buf); 903 return ret; 904 } 905 906 /* 907 * This test checks that memory.swap.max limits the amount of 908 * anonymous memory which can be swapped out. 909 */ 910 static int test_memcg_swap_max(const char *root) 911 { 912 int ret = KSFT_FAIL; 913 char *memcg; 914 long max; 915 916 if (!is_swap_enabled()) 917 return KSFT_SKIP; 918 919 memcg = cg_name(root, "memcg_test"); 920 if (!memcg) 921 goto cleanup; 922 923 if (cg_create(memcg)) 924 goto cleanup; 925 926 if (cg_read_long(memcg, "memory.swap.current")) { 927 ret = KSFT_SKIP; 928 goto cleanup; 929 } 930 931 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 932 goto cleanup; 933 934 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n")) 935 goto cleanup; 936 937 if (cg_write(memcg, "memory.swap.max", "30M")) 938 goto cleanup; 939 940 if (cg_write(memcg, "memory.max", "30M")) 941 goto cleanup; 942 943 /* Should be killed by OOM killer */ 944 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 945 goto cleanup; 946 947 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 948 goto cleanup; 949 950 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 951 goto cleanup; 952 953 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30))) 954 goto cleanup; 955 956 max = cg_read_key_long(memcg, "memory.events", "max "); 957 if (max <= 0) 958 goto cleanup; 959 960 ret = KSFT_PASS; 961 962 cleanup: 963 cg_destroy(memcg); 964 free(memcg); 965 966 return ret; 967 } 968 969 /* 970 * This test disables swapping and tries to allocate anonymous memory 971 * up to OOM. Then it checks for oom and oom_kill events in 972 * memory.events. 973 */ 974 static int test_memcg_oom_events(const char *root) 975 { 976 int ret = KSFT_FAIL; 977 char *memcg; 978 979 memcg = cg_name(root, "memcg_test"); 980 if (!memcg) 981 goto cleanup; 982 983 if (cg_create(memcg)) 984 goto cleanup; 985 986 if (cg_write(memcg, "memory.max", "30M")) 987 goto cleanup; 988 989 if (cg_write(memcg, "memory.swap.max", "0")) 990 goto cleanup; 991 992 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 993 goto cleanup; 994 995 if (cg_read_strcmp(memcg, "cgroup.procs", "")) 996 goto cleanup; 997 998 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 999 goto cleanup; 1000 1001 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 1002 goto cleanup; 1003 1004 ret = KSFT_PASS; 1005 1006 cleanup: 1007 cg_destroy(memcg); 1008 free(memcg); 1009 1010 return ret; 1011 } 1012 1013 struct tcp_server_args { 1014 unsigned short port; 1015 int ctl[2]; 1016 }; 1017 1018 static int tcp_server(const char *cgroup, void *arg) 1019 { 1020 struct tcp_server_args *srv_args = arg; 1021 struct sockaddr_in6 saddr = { 0 }; 1022 socklen_t slen = sizeof(saddr); 1023 int sk, client_sk, ctl_fd, yes = 1, ret = -1; 1024 1025 close(srv_args->ctl[0]); 1026 ctl_fd = srv_args->ctl[1]; 1027 1028 saddr.sin6_family = AF_INET6; 1029 saddr.sin6_addr = in6addr_any; 1030 saddr.sin6_port = htons(srv_args->port); 1031 1032 sk = socket(AF_INET6, SOCK_STREAM, 0); 1033 if (sk < 0) 1034 return ret; 1035 1036 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) 1037 goto cleanup; 1038 1039 if (bind(sk, (struct sockaddr *)&saddr, slen)) { 1040 write(ctl_fd, &errno, sizeof(errno)); 1041 goto cleanup; 1042 } 1043 1044 if (listen(sk, 1)) 1045 goto cleanup; 1046 1047 ret = 0; 1048 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) { 1049 ret = -1; 1050 goto cleanup; 1051 } 1052 1053 client_sk = accept(sk, NULL, NULL); 1054 if (client_sk < 0) 1055 goto cleanup; 1056 1057 ret = -1; 1058 for (;;) { 1059 uint8_t buf[0x100000]; 1060 1061 if (write(client_sk, buf, sizeof(buf)) <= 0) { 1062 if (errno == ECONNRESET) 1063 ret = 0; 1064 break; 1065 } 1066 } 1067 1068 close(client_sk); 1069 1070 cleanup: 1071 close(sk); 1072 return ret; 1073 } 1074 1075 static int tcp_client(const char *cgroup, unsigned short port) 1076 { 1077 const char server[] = "localhost"; 1078 struct addrinfo *ai; 1079 char servport[6]; 1080 int retries = 0x10; /* nice round number */ 1081 int sk, ret; 1082 1083 snprintf(servport, sizeof(servport), "%hd", port); 1084 ret = getaddrinfo(server, servport, NULL, &ai); 1085 if (ret) 1086 return ret; 1087 1088 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); 1089 if (sk < 0) 1090 goto free_ainfo; 1091 1092 ret = connect(sk, ai->ai_addr, ai->ai_addrlen); 1093 if (ret < 0) 1094 goto close_sk; 1095 1096 ret = KSFT_FAIL; 1097 while (retries--) { 1098 uint8_t buf[0x100000]; 1099 long current, sock; 1100 1101 if (read(sk, buf, sizeof(buf)) <= 0) 1102 goto close_sk; 1103 1104 current = cg_read_long(cgroup, "memory.current"); 1105 sock = cg_read_key_long(cgroup, "memory.stat", "sock "); 1106 1107 if (current < 0 || sock < 0) 1108 goto close_sk; 1109 1110 if (values_close(current, sock, 10)) { 1111 ret = KSFT_PASS; 1112 break; 1113 } 1114 } 1115 1116 close_sk: 1117 close(sk); 1118 free_ainfo: 1119 freeaddrinfo(ai); 1120 return ret; 1121 } 1122 1123 /* 1124 * This test checks socket memory accounting. 1125 * The test forks a TCP server listens on a random port between 1000 1126 * and 61000. Once it gets a client connection, it starts writing to 1127 * its socket. 1128 * The TCP client interleaves reads from the socket with check whether 1129 * memory.current and memory.stat.sock are similar. 1130 */ 1131 static int test_memcg_sock(const char *root) 1132 { 1133 int bind_retries = 5, ret = KSFT_FAIL, pid, err; 1134 unsigned short port; 1135 char *memcg; 1136 1137 memcg = cg_name(root, "memcg_test"); 1138 if (!memcg) 1139 goto cleanup; 1140 1141 if (cg_create(memcg)) 1142 goto cleanup; 1143 1144 while (bind_retries--) { 1145 struct tcp_server_args args; 1146 1147 if (pipe(args.ctl)) 1148 goto cleanup; 1149 1150 port = args.port = 1000 + rand() % 60000; 1151 1152 pid = cg_run_nowait(memcg, tcp_server, &args); 1153 if (pid < 0) 1154 goto cleanup; 1155 1156 close(args.ctl[1]); 1157 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err)) 1158 goto cleanup; 1159 close(args.ctl[0]); 1160 1161 if (!err) 1162 break; 1163 if (err != EADDRINUSE) 1164 goto cleanup; 1165 1166 waitpid(pid, NULL, 0); 1167 } 1168 1169 if (err == EADDRINUSE) { 1170 ret = KSFT_SKIP; 1171 goto cleanup; 1172 } 1173 1174 if (tcp_client(memcg, port) != KSFT_PASS) 1175 goto cleanup; 1176 1177 waitpid(pid, &err, 0); 1178 if (WEXITSTATUS(err)) 1179 goto cleanup; 1180 1181 if (cg_read_long(memcg, "memory.current") < 0) 1182 goto cleanup; 1183 1184 if (cg_read_key_long(memcg, "memory.stat", "sock ")) 1185 goto cleanup; 1186 1187 ret = KSFT_PASS; 1188 1189 cleanup: 1190 cg_destroy(memcg); 1191 free(memcg); 1192 1193 return ret; 1194 } 1195 1196 /* 1197 * This test disables swapping and tries to allocate anonymous memory 1198 * up to OOM with memory.group.oom set. Then it checks that all 1199 * processes in the leaf were killed. It also checks that oom_events 1200 * were propagated to the parent level. 1201 */ 1202 static int test_memcg_oom_group_leaf_events(const char *root) 1203 { 1204 int ret = KSFT_FAIL; 1205 char *parent, *child; 1206 long parent_oom_events; 1207 1208 parent = cg_name(root, "memcg_test_0"); 1209 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1210 1211 if (!parent || !child) 1212 goto cleanup; 1213 1214 if (cg_create(parent)) 1215 goto cleanup; 1216 1217 if (cg_create(child)) 1218 goto cleanup; 1219 1220 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 1221 goto cleanup; 1222 1223 if (cg_write(child, "memory.max", "50M")) 1224 goto cleanup; 1225 1226 if (cg_write(child, "memory.swap.max", "0")) 1227 goto cleanup; 1228 1229 if (cg_write(child, "memory.oom.group", "1")) 1230 goto cleanup; 1231 1232 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1233 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1234 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1235 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1236 goto cleanup; 1237 1238 if (cg_test_proc_killed(child)) 1239 goto cleanup; 1240 1241 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) 1242 goto cleanup; 1243 1244 if (cg_read_key_long(parent, "memory.events", "oom_kill ") <= 0) 1245 goto cleanup; 1246 1247 ret = KSFT_PASS; 1248 1249 cleanup: 1250 if (child) 1251 cg_destroy(child); 1252 if (parent) 1253 cg_destroy(parent); 1254 free(child); 1255 free(parent); 1256 1257 return ret; 1258 } 1259 1260 /* 1261 * This test disables swapping and tries to allocate anonymous memory 1262 * up to OOM with memory.group.oom set. Then it checks that all 1263 * processes in the parent and leaf were killed. 1264 */ 1265 static int test_memcg_oom_group_parent_events(const char *root) 1266 { 1267 int ret = KSFT_FAIL; 1268 char *parent, *child; 1269 1270 parent = cg_name(root, "memcg_test_0"); 1271 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1272 1273 if (!parent || !child) 1274 goto cleanup; 1275 1276 if (cg_create(parent)) 1277 goto cleanup; 1278 1279 if (cg_create(child)) 1280 goto cleanup; 1281 1282 if (cg_write(parent, "memory.max", "80M")) 1283 goto cleanup; 1284 1285 if (cg_write(parent, "memory.swap.max", "0")) 1286 goto cleanup; 1287 1288 if (cg_write(parent, "memory.oom.group", "1")) 1289 goto cleanup; 1290 1291 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1292 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1293 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1294 1295 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1296 goto cleanup; 1297 1298 if (cg_test_proc_killed(child)) 1299 goto cleanup; 1300 if (cg_test_proc_killed(parent)) 1301 goto cleanup; 1302 1303 ret = KSFT_PASS; 1304 1305 cleanup: 1306 if (child) 1307 cg_destroy(child); 1308 if (parent) 1309 cg_destroy(parent); 1310 free(child); 1311 free(parent); 1312 1313 return ret; 1314 } 1315 1316 /* 1317 * This test disables swapping and tries to allocate anonymous memory 1318 * up to OOM with memory.group.oom set. Then it checks that all 1319 * processes were killed except those set with OOM_SCORE_ADJ_MIN 1320 */ 1321 static int test_memcg_oom_group_score_events(const char *root) 1322 { 1323 int ret = KSFT_FAIL; 1324 char *memcg; 1325 int safe_pid; 1326 1327 memcg = cg_name(root, "memcg_test_0"); 1328 1329 if (!memcg) 1330 goto cleanup; 1331 1332 if (cg_create(memcg)) 1333 goto cleanup; 1334 1335 if (cg_write(memcg, "memory.max", "50M")) 1336 goto cleanup; 1337 1338 if (cg_write(memcg, "memory.swap.max", "0")) 1339 goto cleanup; 1340 1341 if (cg_write(memcg, "memory.oom.group", "1")) 1342 goto cleanup; 1343 1344 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1345 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN)) 1346 goto cleanup; 1347 1348 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1349 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1350 goto cleanup; 1351 1352 parent_oom_events = cg_read_key_long( 1353 parent, "memory.events", "oom_kill "); 1354 /* 1355 * If memory_localevents is not enabled (the default), the parent should 1356 * count OOM events in its children groups. Otherwise, it should not 1357 * have observed any events. 1358 */ 1359 if ((has_localevents && parent_oom_events == 0) || 1360 parent_oom_events > 0) 1361 ret = KSFT_PASS; 1362 1363 if (kill(safe_pid, SIGKILL)) 1364 goto cleanup; 1365 1366 cleanup: 1367 if (memcg) 1368 cg_destroy(memcg); 1369 free(memcg); 1370 1371 return ret; 1372 } 1373 1374 #define T(x) { x, #x } 1375 struct memcg_test { 1376 int (*fn)(const char *root); 1377 const char *name; 1378 } tests[] = { 1379 T(test_memcg_subtree_control), 1380 T(test_memcg_current), 1381 T(test_memcg_min), 1382 T(test_memcg_low), 1383 T(test_memcg_high), 1384 T(test_memcg_high_sync), 1385 T(test_memcg_max), 1386 T(test_memcg_reclaim), 1387 T(test_memcg_oom_events), 1388 T(test_memcg_swap_max), 1389 T(test_memcg_sock), 1390 T(test_memcg_oom_group_leaf_events), 1391 T(test_memcg_oom_group_parent_events), 1392 T(test_memcg_oom_group_score_events), 1393 }; 1394 #undef T 1395 1396 int main(int argc, char **argv) 1397 { 1398 char root[PATH_MAX]; 1399 int i, proc_status, ret = EXIT_SUCCESS; 1400 1401 if (cg_find_unified_root(root, sizeof(root))) 1402 ksft_exit_skip("cgroup v2 isn't mounted\n"); 1403 1404 /* 1405 * Check that memory controller is available: 1406 * memory is listed in cgroup.controllers 1407 */ 1408 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 1409 ksft_exit_skip("memory controller isn't available\n"); 1410 1411 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 1412 if (cg_write(root, "cgroup.subtree_control", "+memory")) 1413 ksft_exit_skip("Failed to set memory controller\n"); 1414 1415 proc_status = proc_mount_contains("memory_recursiveprot"); 1416 if (proc_status < 0) 1417 ksft_exit_skip("Failed to query cgroup mount option\n"); 1418 has_recursiveprot = proc_status; 1419 1420 proc_status = proc_mount_contains("memory_localevents"); 1421 if (proc_status < 0) 1422 ksft_exit_skip("Failed to query cgroup mount option\n"); 1423 has_localevents = proc_status; 1424 1425 for (i = 0; i < ARRAY_SIZE(tests); i++) { 1426 switch (tests[i].fn(root)) { 1427 case KSFT_PASS: 1428 ksft_test_result_pass("%s\n", tests[i].name); 1429 break; 1430 case KSFT_SKIP: 1431 ksft_test_result_skip("%s\n", tests[i].name); 1432 break; 1433 default: 1434 ret = EXIT_FAILURE; 1435 ksft_test_result_fail("%s\n", tests[i].name); 1436 break; 1437 } 1438 } 1439 1440 return ret; 1441 } 1442