1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #define _GNU_SOURCE 3 4 #include <linux/limits.h> 5 #include <linux/oom.h> 6 #include <fcntl.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <sys/stat.h> 11 #include <sys/types.h> 12 #include <unistd.h> 13 #include <sys/socket.h> 14 #include <sys/wait.h> 15 #include <arpa/inet.h> 16 #include <netinet/in.h> 17 #include <netdb.h> 18 #include <errno.h> 19 #include <sys/mman.h> 20 21 #include "../kselftest.h" 22 #include "cgroup_util.h" 23 24 /* 25 * This test creates two nested cgroups with and without enabling 26 * the memory controller. 27 */ 28 static int test_memcg_subtree_control(const char *root) 29 { 30 char *parent, *child, *parent2 = NULL, *child2 = NULL; 31 int ret = KSFT_FAIL; 32 char buf[PAGE_SIZE]; 33 34 /* Create two nested cgroups with the memory controller enabled */ 35 parent = cg_name(root, "memcg_test_0"); 36 child = cg_name(root, "memcg_test_0/memcg_test_1"); 37 if (!parent || !child) 38 goto cleanup_free; 39 40 if (cg_create(parent)) 41 goto cleanup_free; 42 43 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 44 goto cleanup_parent; 45 46 if (cg_create(child)) 47 goto cleanup_parent; 48 49 if (cg_read_strstr(child, "cgroup.controllers", "memory")) 50 goto cleanup_child; 51 52 /* Create two nested cgroups without enabling memory controller */ 53 parent2 = cg_name(root, "memcg_test_1"); 54 child2 = cg_name(root, "memcg_test_1/memcg_test_1"); 55 if (!parent2 || !child2) 56 goto cleanup_free2; 57 58 if (cg_create(parent2)) 59 goto cleanup_free2; 60 61 if (cg_create(child2)) 62 goto cleanup_parent2; 63 64 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf))) 65 goto cleanup_all; 66 67 if (!cg_read_strstr(child2, "cgroup.controllers", "memory")) 68 goto cleanup_all; 69 70 ret = KSFT_PASS; 71 72 cleanup_all: 73 cg_destroy(child2); 74 cleanup_parent2: 75 cg_destroy(parent2); 76 cleanup_free2: 77 free(parent2); 78 free(child2); 79 cleanup_child: 80 cg_destroy(child); 81 cleanup_parent: 82 cg_destroy(parent); 83 cleanup_free: 84 free(parent); 85 free(child); 86 87 return ret; 88 } 89 90 static int alloc_anon_50M_check(const char *cgroup, void *arg) 91 { 92 size_t size = MB(50); 93 char *buf, *ptr; 94 long anon, current; 95 int ret = -1; 96 97 buf = malloc(size); 98 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 99 *ptr = 0; 100 101 current = cg_read_long(cgroup, "memory.current"); 102 if (current < size) 103 goto cleanup; 104 105 if (!values_close(size, current, 3)) 106 goto cleanup; 107 108 anon = cg_read_key_long(cgroup, "memory.stat", "anon "); 109 if (anon < 0) 110 goto cleanup; 111 112 if (!values_close(anon, current, 3)) 113 goto cleanup; 114 115 ret = 0; 116 cleanup: 117 free(buf); 118 return ret; 119 } 120 121 static int alloc_pagecache_50M_check(const char *cgroup, void *arg) 122 { 123 size_t size = MB(50); 124 int ret = -1; 125 long current, file; 126 int fd; 127 128 fd = get_temp_fd(); 129 if (fd < 0) 130 return -1; 131 132 if (alloc_pagecache(fd, size)) 133 goto cleanup; 134 135 current = cg_read_long(cgroup, "memory.current"); 136 if (current < size) 137 goto cleanup; 138 139 file = cg_read_key_long(cgroup, "memory.stat", "file "); 140 if (file < 0) 141 goto cleanup; 142 143 if (!values_close(file, current, 10)) 144 goto cleanup; 145 146 ret = 0; 147 148 cleanup: 149 close(fd); 150 return ret; 151 } 152 153 /* 154 * This test create a memory cgroup, allocates 155 * some anonymous memory and some pagecache 156 * and check memory.current and some memory.stat values. 157 */ 158 static int test_memcg_current(const char *root) 159 { 160 int ret = KSFT_FAIL; 161 long current; 162 char *memcg; 163 164 memcg = cg_name(root, "memcg_test"); 165 if (!memcg) 166 goto cleanup; 167 168 if (cg_create(memcg)) 169 goto cleanup; 170 171 current = cg_read_long(memcg, "memory.current"); 172 if (current != 0) 173 goto cleanup; 174 175 if (cg_run(memcg, alloc_anon_50M_check, NULL)) 176 goto cleanup; 177 178 if (cg_run(memcg, alloc_pagecache_50M_check, NULL)) 179 goto cleanup; 180 181 ret = KSFT_PASS; 182 183 cleanup: 184 cg_destroy(memcg); 185 free(memcg); 186 187 return ret; 188 } 189 190 static int alloc_pagecache_50M(const char *cgroup, void *arg) 191 { 192 int fd = (long)arg; 193 194 return alloc_pagecache(fd, MB(50)); 195 } 196 197 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) 198 { 199 int fd = (long)arg; 200 int ppid = getppid(); 201 202 if (alloc_pagecache(fd, MB(50))) 203 return -1; 204 205 while (getppid() == ppid) 206 sleep(1); 207 208 return 0; 209 } 210 211 static int alloc_anon_noexit(const char *cgroup, void *arg) 212 { 213 int ppid = getppid(); 214 size_t size = (unsigned long)arg; 215 char *buf, *ptr; 216 217 buf = malloc(size); 218 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 219 *ptr = 0; 220 221 while (getppid() == ppid) 222 sleep(1); 223 224 free(buf); 225 return 0; 226 } 227 228 /* 229 * Wait until processes are killed asynchronously by the OOM killer 230 * If we exceed a timeout, fail. 231 */ 232 static int cg_test_proc_killed(const char *cgroup) 233 { 234 int limit; 235 236 for (limit = 10; limit > 0; limit--) { 237 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0) 238 return 0; 239 240 usleep(100000); 241 } 242 return -1; 243 } 244 245 /* 246 * First, this test creates the following hierarchy: 247 * A memory.min = 50M, memory.max = 200M 248 * A/B memory.min = 50M, memory.current = 50M 249 * A/B/C memory.min = 75M, memory.current = 50M 250 * A/B/D memory.min = 25M, memory.current = 50M 251 * A/B/E memory.min = 500M, memory.current = 0 252 * A/B/F memory.min = 0, memory.current = 50M 253 * 254 * Usages are pagecache, but the test keeps a running 255 * process in every leaf cgroup. 256 * Then it creates A/G and creates a significant 257 * memory pressure in it. 258 * 259 * A/B memory.current ~= 50M 260 * A/B/C memory.current ~= 33M 261 * A/B/D memory.current ~= 17M 262 * A/B/E memory.current ~= 0 263 * 264 * After that it tries to allocate more than there is 265 * unprotected memory in A available, and checks 266 * checks that memory.min protects pagecache even 267 * in this case. 268 */ 269 static int test_memcg_min(const char *root) 270 { 271 int ret = KSFT_FAIL; 272 char *parent[3] = {NULL}; 273 char *children[4] = {NULL}; 274 long c[4]; 275 int i, attempts; 276 int fd; 277 278 fd = get_temp_fd(); 279 if (fd < 0) 280 goto cleanup; 281 282 parent[0] = cg_name(root, "memcg_test_0"); 283 if (!parent[0]) 284 goto cleanup; 285 286 parent[1] = cg_name(parent[0], "memcg_test_1"); 287 if (!parent[1]) 288 goto cleanup; 289 290 parent[2] = cg_name(parent[0], "memcg_test_2"); 291 if (!parent[2]) 292 goto cleanup; 293 294 if (cg_create(parent[0])) 295 goto cleanup; 296 297 if (cg_read_long(parent[0], "memory.min")) { 298 ret = KSFT_SKIP; 299 goto cleanup; 300 } 301 302 if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) 303 goto cleanup; 304 305 if (cg_write(parent[0], "memory.max", "200M")) 306 goto cleanup; 307 308 if (cg_write(parent[0], "memory.swap.max", "0")) 309 goto cleanup; 310 311 if (cg_create(parent[1])) 312 goto cleanup; 313 314 if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) 315 goto cleanup; 316 317 if (cg_create(parent[2])) 318 goto cleanup; 319 320 for (i = 0; i < ARRAY_SIZE(children); i++) { 321 children[i] = cg_name_indexed(parent[1], "child_memcg", i); 322 if (!children[i]) 323 goto cleanup; 324 325 if (cg_create(children[i])) 326 goto cleanup; 327 328 if (i == 2) 329 continue; 330 331 cg_run_nowait(children[i], alloc_pagecache_50M_noexit, 332 (void *)(long)fd); 333 } 334 335 if (cg_write(parent[0], "memory.min", "50M")) 336 goto cleanup; 337 if (cg_write(parent[1], "memory.min", "50M")) 338 goto cleanup; 339 if (cg_write(children[0], "memory.min", "75M")) 340 goto cleanup; 341 if (cg_write(children[1], "memory.min", "25M")) 342 goto cleanup; 343 if (cg_write(children[2], "memory.min", "500M")) 344 goto cleanup; 345 if (cg_write(children[3], "memory.min", "0")) 346 goto cleanup; 347 348 attempts = 0; 349 while (!values_close(cg_read_long(parent[1], "memory.current"), 350 MB(150), 3)) { 351 if (attempts++ > 5) 352 break; 353 sleep(1); 354 } 355 356 if (cg_run(parent[2], alloc_anon, (void *)MB(148))) 357 goto cleanup; 358 359 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 360 goto cleanup; 361 362 for (i = 0; i < ARRAY_SIZE(children); i++) 363 c[i] = cg_read_long(children[i], "memory.current"); 364 365 if (!values_close(c[0], MB(33), 10)) 366 goto cleanup; 367 368 if (!values_close(c[1], MB(17), 10)) 369 goto cleanup; 370 371 if (!values_close(c[2], 0, 1)) 372 goto cleanup; 373 374 if (!cg_run(parent[2], alloc_anon, (void *)MB(170))) 375 goto cleanup; 376 377 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 378 goto cleanup; 379 380 ret = KSFT_PASS; 381 382 cleanup: 383 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { 384 if (!children[i]) 385 continue; 386 387 cg_destroy(children[i]); 388 free(children[i]); 389 } 390 391 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { 392 if (!parent[i]) 393 continue; 394 395 cg_destroy(parent[i]); 396 free(parent[i]); 397 } 398 close(fd); 399 return ret; 400 } 401 402 /* 403 * First, this test creates the following hierarchy: 404 * A memory.low = 50M, memory.max = 200M 405 * A/B memory.low = 50M, memory.current = 50M 406 * A/B/C memory.low = 75M, memory.current = 50M 407 * A/B/D memory.low = 25M, memory.current = 50M 408 * A/B/E memory.low = 500M, memory.current = 0 409 * A/B/F memory.low = 0, memory.current = 50M 410 * 411 * Usages are pagecache. 412 * Then it creates A/G an creates a significant 413 * memory pressure in it. 414 * 415 * Then it checks actual memory usages and expects that: 416 * A/B memory.current ~= 50M 417 * A/B/ memory.current ~= 33M 418 * A/B/D memory.current ~= 17M 419 * A/B/E memory.current ~= 0 420 * 421 * After that it tries to allocate more than there is 422 * unprotected memory in A available, 423 * and checks low and oom events in memory.events. 424 */ 425 static int test_memcg_low(const char *root) 426 { 427 int ret = KSFT_FAIL; 428 char *parent[3] = {NULL}; 429 char *children[4] = {NULL}; 430 long low, oom; 431 long c[4]; 432 int i; 433 int fd; 434 435 fd = get_temp_fd(); 436 if (fd < 0) 437 goto cleanup; 438 439 parent[0] = cg_name(root, "memcg_test_0"); 440 if (!parent[0]) 441 goto cleanup; 442 443 parent[1] = cg_name(parent[0], "memcg_test_1"); 444 if (!parent[1]) 445 goto cleanup; 446 447 parent[2] = cg_name(parent[0], "memcg_test_2"); 448 if (!parent[2]) 449 goto cleanup; 450 451 if (cg_create(parent[0])) 452 goto cleanup; 453 454 if (cg_read_long(parent[0], "memory.low")) 455 goto cleanup; 456 457 if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) 458 goto cleanup; 459 460 if (cg_write(parent[0], "memory.max", "200M")) 461 goto cleanup; 462 463 if (cg_write(parent[0], "memory.swap.max", "0")) 464 goto cleanup; 465 466 if (cg_create(parent[1])) 467 goto cleanup; 468 469 if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) 470 goto cleanup; 471 472 if (cg_create(parent[2])) 473 goto cleanup; 474 475 for (i = 0; i < ARRAY_SIZE(children); i++) { 476 children[i] = cg_name_indexed(parent[1], "child_memcg", i); 477 if (!children[i]) 478 goto cleanup; 479 480 if (cg_create(children[i])) 481 goto cleanup; 482 483 if (i == 2) 484 continue; 485 486 if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd)) 487 goto cleanup; 488 } 489 490 if (cg_write(parent[0], "memory.low", "50M")) 491 goto cleanup; 492 if (cg_write(parent[1], "memory.low", "50M")) 493 goto cleanup; 494 if (cg_write(children[0], "memory.low", "75M")) 495 goto cleanup; 496 if (cg_write(children[1], "memory.low", "25M")) 497 goto cleanup; 498 if (cg_write(children[2], "memory.low", "500M")) 499 goto cleanup; 500 if (cg_write(children[3], "memory.low", "0")) 501 goto cleanup; 502 503 if (cg_run(parent[2], alloc_anon, (void *)MB(148))) 504 goto cleanup; 505 506 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) 507 goto cleanup; 508 509 for (i = 0; i < ARRAY_SIZE(children); i++) 510 c[i] = cg_read_long(children[i], "memory.current"); 511 512 if (!values_close(c[0], MB(33), 10)) 513 goto cleanup; 514 515 if (!values_close(c[1], MB(17), 10)) 516 goto cleanup; 517 518 if (!values_close(c[2], 0, 1)) 519 goto cleanup; 520 521 if (cg_run(parent[2], alloc_anon, (void *)MB(166))) { 522 fprintf(stderr, 523 "memory.low prevents from allocating anon memory\n"); 524 goto cleanup; 525 } 526 527 for (i = 0; i < ARRAY_SIZE(children); i++) { 528 oom = cg_read_key_long(children[i], "memory.events", "oom "); 529 low = cg_read_key_long(children[i], "memory.events", "low "); 530 531 if (oom) 532 goto cleanup; 533 if (i < 2 && low <= 0) 534 goto cleanup; 535 if (i >= 2 && low) 536 goto cleanup; 537 } 538 539 ret = KSFT_PASS; 540 541 cleanup: 542 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { 543 if (!children[i]) 544 continue; 545 546 cg_destroy(children[i]); 547 free(children[i]); 548 } 549 550 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { 551 if (!parent[i]) 552 continue; 553 554 cg_destroy(parent[i]); 555 free(parent[i]); 556 } 557 close(fd); 558 return ret; 559 } 560 561 static int alloc_pagecache_max_30M(const char *cgroup, void *arg) 562 { 563 size_t size = MB(50); 564 int ret = -1; 565 long current; 566 int fd; 567 568 fd = get_temp_fd(); 569 if (fd < 0) 570 return -1; 571 572 if (alloc_pagecache(fd, size)) 573 goto cleanup; 574 575 current = cg_read_long(cgroup, "memory.current"); 576 if (current <= MB(29) || current > MB(30)) 577 goto cleanup; 578 579 ret = 0; 580 581 cleanup: 582 close(fd); 583 return ret; 584 585 } 586 587 /* 588 * This test checks that memory.high limits the amount of 589 * memory which can be consumed by either anonymous memory 590 * or pagecache. 591 */ 592 static int test_memcg_high(const char *root) 593 { 594 int ret = KSFT_FAIL; 595 char *memcg; 596 long high; 597 598 memcg = cg_name(root, "memcg_test"); 599 if (!memcg) 600 goto cleanup; 601 602 if (cg_create(memcg)) 603 goto cleanup; 604 605 if (cg_read_strcmp(memcg, "memory.high", "max\n")) 606 goto cleanup; 607 608 if (cg_write(memcg, "memory.swap.max", "0")) 609 goto cleanup; 610 611 if (cg_write(memcg, "memory.high", "30M")) 612 goto cleanup; 613 614 if (cg_run(memcg, alloc_anon, (void *)MB(31))) 615 goto cleanup; 616 617 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL)) 618 goto cleanup; 619 620 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 621 goto cleanup; 622 623 high = cg_read_key_long(memcg, "memory.events", "high "); 624 if (high <= 0) 625 goto cleanup; 626 627 ret = KSFT_PASS; 628 629 cleanup: 630 cg_destroy(memcg); 631 free(memcg); 632 633 return ret; 634 } 635 636 static int alloc_anon_mlock(const char *cgroup, void *arg) 637 { 638 size_t size = (size_t)arg; 639 void *buf; 640 641 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, 642 0, 0); 643 if (buf == MAP_FAILED) 644 return -1; 645 646 mlock(buf, size); 647 munmap(buf, size); 648 return 0; 649 } 650 651 /* 652 * This test checks that memory.high is able to throttle big single shot 653 * allocation i.e. large allocation within one kernel entry. 654 */ 655 static int test_memcg_high_sync(const char *root) 656 { 657 int ret = KSFT_FAIL, pid, fd = -1; 658 char *memcg; 659 long pre_high, pre_max; 660 long post_high, post_max; 661 662 memcg = cg_name(root, "memcg_test"); 663 if (!memcg) 664 goto cleanup; 665 666 if (cg_create(memcg)) 667 goto cleanup; 668 669 pre_high = cg_read_key_long(memcg, "memory.events", "high "); 670 pre_max = cg_read_key_long(memcg, "memory.events", "max "); 671 if (pre_high < 0 || pre_max < 0) 672 goto cleanup; 673 674 if (cg_write(memcg, "memory.swap.max", "0")) 675 goto cleanup; 676 677 if (cg_write(memcg, "memory.high", "30M")) 678 goto cleanup; 679 680 if (cg_write(memcg, "memory.max", "140M")) 681 goto cleanup; 682 683 fd = memcg_prepare_for_wait(memcg); 684 if (fd < 0) 685 goto cleanup; 686 687 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200)); 688 if (pid < 0) 689 goto cleanup; 690 691 cg_wait_for(fd); 692 693 post_high = cg_read_key_long(memcg, "memory.events", "high "); 694 post_max = cg_read_key_long(memcg, "memory.events", "max "); 695 if (post_high < 0 || post_max < 0) 696 goto cleanup; 697 698 if (pre_high == post_high || pre_max != post_max) 699 goto cleanup; 700 701 ret = KSFT_PASS; 702 703 cleanup: 704 if (fd >= 0) 705 close(fd); 706 cg_destroy(memcg); 707 free(memcg); 708 709 return ret; 710 } 711 712 /* 713 * This test checks that memory.max limits the amount of 714 * memory which can be consumed by either anonymous memory 715 * or pagecache. 716 */ 717 static int test_memcg_max(const char *root) 718 { 719 int ret = KSFT_FAIL; 720 char *memcg; 721 long current, max; 722 723 memcg = cg_name(root, "memcg_test"); 724 if (!memcg) 725 goto cleanup; 726 727 if (cg_create(memcg)) 728 goto cleanup; 729 730 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 731 goto cleanup; 732 733 if (cg_write(memcg, "memory.swap.max", "0")) 734 goto cleanup; 735 736 if (cg_write(memcg, "memory.max", "30M")) 737 goto cleanup; 738 739 /* Should be killed by OOM killer */ 740 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 741 goto cleanup; 742 743 if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) 744 goto cleanup; 745 746 current = cg_read_long(memcg, "memory.current"); 747 if (current > MB(30) || !current) 748 goto cleanup; 749 750 max = cg_read_key_long(memcg, "memory.events", "max "); 751 if (max <= 0) 752 goto cleanup; 753 754 ret = KSFT_PASS; 755 756 cleanup: 757 cg_destroy(memcg); 758 free(memcg); 759 760 return ret; 761 } 762 763 /* 764 * This test checks that memory.reclaim reclaims the given 765 * amount of memory (from both anon and file, if possible). 766 */ 767 static int test_memcg_reclaim(const char *root) 768 { 769 int ret = KSFT_FAIL, fd, retries; 770 char *memcg; 771 long current, expected_usage, to_reclaim; 772 char buf[64]; 773 774 memcg = cg_name(root, "memcg_test"); 775 if (!memcg) 776 goto cleanup; 777 778 if (cg_create(memcg)) 779 goto cleanup; 780 781 current = cg_read_long(memcg, "memory.current"); 782 if (current != 0) 783 goto cleanup; 784 785 fd = get_temp_fd(); 786 if (fd < 0) 787 goto cleanup; 788 789 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd); 790 791 /* 792 * If swap is enabled, try to reclaim from both anon and file, else try 793 * to reclaim from file only. 794 */ 795 if (is_swap_enabled()) { 796 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50)); 797 expected_usage = MB(100); 798 } else 799 expected_usage = MB(50); 800 801 /* 802 * Wait until current usage reaches the expected usage (or we run out of 803 * retries). 804 */ 805 retries = 5; 806 while (!values_close(cg_read_long(memcg, "memory.current"), 807 expected_usage, 10)) { 808 if (retries--) { 809 sleep(1); 810 continue; 811 } else { 812 fprintf(stderr, 813 "failed to allocate %ld for memcg reclaim test\n", 814 expected_usage); 815 goto cleanup; 816 } 817 } 818 819 /* 820 * Reclaim until current reaches 30M, this makes sure we hit both anon 821 * and file if swap is enabled. 822 */ 823 retries = 5; 824 while (true) { 825 int err; 826 827 current = cg_read_long(memcg, "memory.current"); 828 to_reclaim = current - MB(30); 829 830 /* 831 * We only keep looping if we get EAGAIN, which means we could 832 * not reclaim the full amount. 833 */ 834 if (to_reclaim <= 0) 835 goto cleanup; 836 837 838 snprintf(buf, sizeof(buf), "%ld", to_reclaim); 839 err = cg_write(memcg, "memory.reclaim", buf); 840 if (!err) { 841 /* 842 * If writing succeeds, then the written amount should have been 843 * fully reclaimed (and maybe more). 844 */ 845 current = cg_read_long(memcg, "memory.current"); 846 if (!values_close(current, MB(30), 3) && current > MB(30)) 847 goto cleanup; 848 break; 849 } 850 851 /* The kernel could not reclaim the full amount, try again. */ 852 if (err == -EAGAIN && retries--) 853 continue; 854 855 /* We got an unexpected error or ran out of retries. */ 856 goto cleanup; 857 } 858 859 ret = KSFT_PASS; 860 cleanup: 861 cg_destroy(memcg); 862 free(memcg); 863 close(fd); 864 865 return ret; 866 } 867 868 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) 869 { 870 long mem_max = (long)arg; 871 size_t size = MB(50); 872 char *buf, *ptr; 873 long mem_current, swap_current; 874 int ret = -1; 875 876 buf = malloc(size); 877 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 878 *ptr = 0; 879 880 mem_current = cg_read_long(cgroup, "memory.current"); 881 if (!mem_current || !values_close(mem_current, mem_max, 3)) 882 goto cleanup; 883 884 swap_current = cg_read_long(cgroup, "memory.swap.current"); 885 if (!swap_current || 886 !values_close(mem_current + swap_current, size, 3)) 887 goto cleanup; 888 889 ret = 0; 890 cleanup: 891 free(buf); 892 return ret; 893 } 894 895 /* 896 * This test checks that memory.swap.max limits the amount of 897 * anonymous memory which can be swapped out. 898 */ 899 static int test_memcg_swap_max(const char *root) 900 { 901 int ret = KSFT_FAIL; 902 char *memcg; 903 long max; 904 905 if (!is_swap_enabled()) 906 return KSFT_SKIP; 907 908 memcg = cg_name(root, "memcg_test"); 909 if (!memcg) 910 goto cleanup; 911 912 if (cg_create(memcg)) 913 goto cleanup; 914 915 if (cg_read_long(memcg, "memory.swap.current")) { 916 ret = KSFT_SKIP; 917 goto cleanup; 918 } 919 920 if (cg_read_strcmp(memcg, "memory.max", "max\n")) 921 goto cleanup; 922 923 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n")) 924 goto cleanup; 925 926 if (cg_write(memcg, "memory.swap.max", "30M")) 927 goto cleanup; 928 929 if (cg_write(memcg, "memory.max", "30M")) 930 goto cleanup; 931 932 /* Should be killed by OOM killer */ 933 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 934 goto cleanup; 935 936 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 937 goto cleanup; 938 939 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 940 goto cleanup; 941 942 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30))) 943 goto cleanup; 944 945 max = cg_read_key_long(memcg, "memory.events", "max "); 946 if (max <= 0) 947 goto cleanup; 948 949 ret = KSFT_PASS; 950 951 cleanup: 952 cg_destroy(memcg); 953 free(memcg); 954 955 return ret; 956 } 957 958 /* 959 * This test disables swapping and tries to allocate anonymous memory 960 * up to OOM. Then it checks for oom and oom_kill events in 961 * memory.events. 962 */ 963 static int test_memcg_oom_events(const char *root) 964 { 965 int ret = KSFT_FAIL; 966 char *memcg; 967 968 memcg = cg_name(root, "memcg_test"); 969 if (!memcg) 970 goto cleanup; 971 972 if (cg_create(memcg)) 973 goto cleanup; 974 975 if (cg_write(memcg, "memory.max", "30M")) 976 goto cleanup; 977 978 if (cg_write(memcg, "memory.swap.max", "0")) 979 goto cleanup; 980 981 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 982 goto cleanup; 983 984 if (cg_read_strcmp(memcg, "cgroup.procs", "")) 985 goto cleanup; 986 987 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) 988 goto cleanup; 989 990 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) 991 goto cleanup; 992 993 ret = KSFT_PASS; 994 995 cleanup: 996 cg_destroy(memcg); 997 free(memcg); 998 999 return ret; 1000 } 1001 1002 struct tcp_server_args { 1003 unsigned short port; 1004 int ctl[2]; 1005 }; 1006 1007 static int tcp_server(const char *cgroup, void *arg) 1008 { 1009 struct tcp_server_args *srv_args = arg; 1010 struct sockaddr_in6 saddr = { 0 }; 1011 socklen_t slen = sizeof(saddr); 1012 int sk, client_sk, ctl_fd, yes = 1, ret = -1; 1013 1014 close(srv_args->ctl[0]); 1015 ctl_fd = srv_args->ctl[1]; 1016 1017 saddr.sin6_family = AF_INET6; 1018 saddr.sin6_addr = in6addr_any; 1019 saddr.sin6_port = htons(srv_args->port); 1020 1021 sk = socket(AF_INET6, SOCK_STREAM, 0); 1022 if (sk < 0) 1023 return ret; 1024 1025 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) 1026 goto cleanup; 1027 1028 if (bind(sk, (struct sockaddr *)&saddr, slen)) { 1029 write(ctl_fd, &errno, sizeof(errno)); 1030 goto cleanup; 1031 } 1032 1033 if (listen(sk, 1)) 1034 goto cleanup; 1035 1036 ret = 0; 1037 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) { 1038 ret = -1; 1039 goto cleanup; 1040 } 1041 1042 client_sk = accept(sk, NULL, NULL); 1043 if (client_sk < 0) 1044 goto cleanup; 1045 1046 ret = -1; 1047 for (;;) { 1048 uint8_t buf[0x100000]; 1049 1050 if (write(client_sk, buf, sizeof(buf)) <= 0) { 1051 if (errno == ECONNRESET) 1052 ret = 0; 1053 break; 1054 } 1055 } 1056 1057 close(client_sk); 1058 1059 cleanup: 1060 close(sk); 1061 return ret; 1062 } 1063 1064 static int tcp_client(const char *cgroup, unsigned short port) 1065 { 1066 const char server[] = "localhost"; 1067 struct addrinfo *ai; 1068 char servport[6]; 1069 int retries = 0x10; /* nice round number */ 1070 int sk, ret; 1071 1072 snprintf(servport, sizeof(servport), "%hd", port); 1073 ret = getaddrinfo(server, servport, NULL, &ai); 1074 if (ret) 1075 return ret; 1076 1077 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); 1078 if (sk < 0) 1079 goto free_ainfo; 1080 1081 ret = connect(sk, ai->ai_addr, ai->ai_addrlen); 1082 if (ret < 0) 1083 goto close_sk; 1084 1085 ret = KSFT_FAIL; 1086 while (retries--) { 1087 uint8_t buf[0x100000]; 1088 long current, sock; 1089 1090 if (read(sk, buf, sizeof(buf)) <= 0) 1091 goto close_sk; 1092 1093 current = cg_read_long(cgroup, "memory.current"); 1094 sock = cg_read_key_long(cgroup, "memory.stat", "sock "); 1095 1096 if (current < 0 || sock < 0) 1097 goto close_sk; 1098 1099 if (current < sock) 1100 goto close_sk; 1101 1102 if (values_close(current, sock, 10)) { 1103 ret = KSFT_PASS; 1104 break; 1105 } 1106 } 1107 1108 close_sk: 1109 close(sk); 1110 free_ainfo: 1111 freeaddrinfo(ai); 1112 return ret; 1113 } 1114 1115 /* 1116 * This test checks socket memory accounting. 1117 * The test forks a TCP server listens on a random port between 1000 1118 * and 61000. Once it gets a client connection, it starts writing to 1119 * its socket. 1120 * The TCP client interleaves reads from the socket with check whether 1121 * memory.current and memory.stat.sock are similar. 1122 */ 1123 static int test_memcg_sock(const char *root) 1124 { 1125 int bind_retries = 5, ret = KSFT_FAIL, pid, err; 1126 unsigned short port; 1127 char *memcg; 1128 1129 memcg = cg_name(root, "memcg_test"); 1130 if (!memcg) 1131 goto cleanup; 1132 1133 if (cg_create(memcg)) 1134 goto cleanup; 1135 1136 while (bind_retries--) { 1137 struct tcp_server_args args; 1138 1139 if (pipe(args.ctl)) 1140 goto cleanup; 1141 1142 port = args.port = 1000 + rand() % 60000; 1143 1144 pid = cg_run_nowait(memcg, tcp_server, &args); 1145 if (pid < 0) 1146 goto cleanup; 1147 1148 close(args.ctl[1]); 1149 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err)) 1150 goto cleanup; 1151 close(args.ctl[0]); 1152 1153 if (!err) 1154 break; 1155 if (err != EADDRINUSE) 1156 goto cleanup; 1157 1158 waitpid(pid, NULL, 0); 1159 } 1160 1161 if (err == EADDRINUSE) { 1162 ret = KSFT_SKIP; 1163 goto cleanup; 1164 } 1165 1166 if (tcp_client(memcg, port) != KSFT_PASS) 1167 goto cleanup; 1168 1169 waitpid(pid, &err, 0); 1170 if (WEXITSTATUS(err)) 1171 goto cleanup; 1172 1173 if (cg_read_long(memcg, "memory.current") < 0) 1174 goto cleanup; 1175 1176 if (cg_read_key_long(memcg, "memory.stat", "sock ")) 1177 goto cleanup; 1178 1179 ret = KSFT_PASS; 1180 1181 cleanup: 1182 cg_destroy(memcg); 1183 free(memcg); 1184 1185 return ret; 1186 } 1187 1188 /* 1189 * This test disables swapping and tries to allocate anonymous memory 1190 * up to OOM with memory.group.oom set. Then it checks that all 1191 * processes in the leaf were killed. It also checks that oom_events 1192 * were propagated to the parent level. 1193 */ 1194 static int test_memcg_oom_group_leaf_events(const char *root) 1195 { 1196 int ret = KSFT_FAIL; 1197 char *parent, *child; 1198 1199 parent = cg_name(root, "memcg_test_0"); 1200 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1201 1202 if (!parent || !child) 1203 goto cleanup; 1204 1205 if (cg_create(parent)) 1206 goto cleanup; 1207 1208 if (cg_create(child)) 1209 goto cleanup; 1210 1211 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 1212 goto cleanup; 1213 1214 if (cg_write(child, "memory.max", "50M")) 1215 goto cleanup; 1216 1217 if (cg_write(child, "memory.swap.max", "0")) 1218 goto cleanup; 1219 1220 if (cg_write(child, "memory.oom.group", "1")) 1221 goto cleanup; 1222 1223 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1224 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1225 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1226 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1227 goto cleanup; 1228 1229 if (cg_test_proc_killed(child)) 1230 goto cleanup; 1231 1232 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) 1233 goto cleanup; 1234 1235 if (cg_read_key_long(parent, "memory.events", "oom_kill ") <= 0) 1236 goto cleanup; 1237 1238 ret = KSFT_PASS; 1239 1240 cleanup: 1241 if (child) 1242 cg_destroy(child); 1243 if (parent) 1244 cg_destroy(parent); 1245 free(child); 1246 free(parent); 1247 1248 return ret; 1249 } 1250 1251 /* 1252 * This test disables swapping and tries to allocate anonymous memory 1253 * up to OOM with memory.group.oom set. Then it checks that all 1254 * processes in the parent and leaf were killed. 1255 */ 1256 static int test_memcg_oom_group_parent_events(const char *root) 1257 { 1258 int ret = KSFT_FAIL; 1259 char *parent, *child; 1260 1261 parent = cg_name(root, "memcg_test_0"); 1262 child = cg_name(root, "memcg_test_0/memcg_test_1"); 1263 1264 if (!parent || !child) 1265 goto cleanup; 1266 1267 if (cg_create(parent)) 1268 goto cleanup; 1269 1270 if (cg_create(child)) 1271 goto cleanup; 1272 1273 if (cg_write(parent, "memory.max", "80M")) 1274 goto cleanup; 1275 1276 if (cg_write(parent, "memory.swap.max", "0")) 1277 goto cleanup; 1278 1279 if (cg_write(parent, "memory.oom.group", "1")) 1280 goto cleanup; 1281 1282 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); 1283 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1284 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); 1285 1286 if (!cg_run(child, alloc_anon, (void *)MB(100))) 1287 goto cleanup; 1288 1289 if (cg_test_proc_killed(child)) 1290 goto cleanup; 1291 if (cg_test_proc_killed(parent)) 1292 goto cleanup; 1293 1294 ret = KSFT_PASS; 1295 1296 cleanup: 1297 if (child) 1298 cg_destroy(child); 1299 if (parent) 1300 cg_destroy(parent); 1301 free(child); 1302 free(parent); 1303 1304 return ret; 1305 } 1306 1307 /* 1308 * This test disables swapping and tries to allocate anonymous memory 1309 * up to OOM with memory.group.oom set. Then it checks that all 1310 * processes were killed except those set with OOM_SCORE_ADJ_MIN 1311 */ 1312 static int test_memcg_oom_group_score_events(const char *root) 1313 { 1314 int ret = KSFT_FAIL; 1315 char *memcg; 1316 int safe_pid; 1317 1318 memcg = cg_name(root, "memcg_test_0"); 1319 1320 if (!memcg) 1321 goto cleanup; 1322 1323 if (cg_create(memcg)) 1324 goto cleanup; 1325 1326 if (cg_write(memcg, "memory.max", "50M")) 1327 goto cleanup; 1328 1329 if (cg_write(memcg, "memory.swap.max", "0")) 1330 goto cleanup; 1331 1332 if (cg_write(memcg, "memory.oom.group", "1")) 1333 goto cleanup; 1334 1335 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1336 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN)) 1337 goto cleanup; 1338 1339 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); 1340 if (!cg_run(memcg, alloc_anon, (void *)MB(100))) 1341 goto cleanup; 1342 1343 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3) 1344 goto cleanup; 1345 1346 if (kill(safe_pid, SIGKILL)) 1347 goto cleanup; 1348 1349 ret = KSFT_PASS; 1350 1351 cleanup: 1352 if (memcg) 1353 cg_destroy(memcg); 1354 free(memcg); 1355 1356 return ret; 1357 } 1358 1359 1360 #define T(x) { x, #x } 1361 struct memcg_test { 1362 int (*fn)(const char *root); 1363 const char *name; 1364 } tests[] = { 1365 T(test_memcg_subtree_control), 1366 T(test_memcg_current), 1367 T(test_memcg_min), 1368 T(test_memcg_low), 1369 T(test_memcg_high), 1370 T(test_memcg_high_sync), 1371 T(test_memcg_max), 1372 T(test_memcg_reclaim), 1373 T(test_memcg_oom_events), 1374 T(test_memcg_swap_max), 1375 T(test_memcg_sock), 1376 T(test_memcg_oom_group_leaf_events), 1377 T(test_memcg_oom_group_parent_events), 1378 T(test_memcg_oom_group_score_events), 1379 }; 1380 #undef T 1381 1382 int main(int argc, char **argv) 1383 { 1384 char root[PATH_MAX]; 1385 int i, ret = EXIT_SUCCESS; 1386 1387 if (cg_find_unified_root(root, sizeof(root))) 1388 ksft_exit_skip("cgroup v2 isn't mounted\n"); 1389 1390 /* 1391 * Check that memory controller is available: 1392 * memory is listed in cgroup.controllers 1393 */ 1394 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 1395 ksft_exit_skip("memory controller isn't available\n"); 1396 1397 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 1398 if (cg_write(root, "cgroup.subtree_control", "+memory")) 1399 ksft_exit_skip("Failed to set memory controller\n"); 1400 1401 for (i = 0; i < ARRAY_SIZE(tests); i++) { 1402 switch (tests[i].fn(root)) { 1403 case KSFT_PASS: 1404 ksft_test_result_pass("%s\n", tests[i].name); 1405 break; 1406 case KSFT_SKIP: 1407 ksft_test_result_skip("%s\n", tests[i].name); 1408 break; 1409 default: 1410 ret = EXIT_FAILURE; 1411 ksft_test_result_fail("%s\n", tests[i].name); 1412 break; 1413 } 1414 } 1415 1416 return ret; 1417 } 1418