1 // SPDX-License-Identifier: GPL-2.0 2 3 #define _GNU_SOURCE 4 #include <linux/limits.h> 5 #include <sys/sysinfo.h> 6 #include <sys/wait.h> 7 #include <errno.h> 8 #include <pthread.h> 9 #include <stdio.h> 10 #include <time.h> 11 12 #include "../kselftest.h" 13 #include "cgroup_util.h" 14 15 enum hog_clock_type { 16 // Count elapsed time using the CLOCK_PROCESS_CPUTIME_ID clock. 17 CPU_HOG_CLOCK_PROCESS, 18 // Count elapsed time using system wallclock time. 19 CPU_HOG_CLOCK_WALL, 20 }; 21 22 struct cpu_hogger { 23 char *cgroup; 24 pid_t pid; 25 long usage; 26 }; 27 28 struct cpu_hog_func_param { 29 int nprocs; 30 struct timespec ts; 31 enum hog_clock_type clock_type; 32 }; 33 34 /* 35 * This test creates two nested cgroups with and without enabling 36 * the cpu controller. 37 */ 38 static int test_cpucg_subtree_control(const char *root) 39 { 40 char *parent = NULL, *child = NULL, *parent2 = NULL, *child2 = NULL; 41 int ret = KSFT_FAIL; 42 43 // Create two nested cgroups with the cpu controller enabled. 44 parent = cg_name(root, "cpucg_test_0"); 45 if (!parent) 46 goto cleanup; 47 48 if (cg_create(parent)) 49 goto cleanup; 50 51 if (cg_write(parent, "cgroup.subtree_control", "+cpu")) 52 goto cleanup; 53 54 child = cg_name(parent, "cpucg_test_child"); 55 if (!child) 56 goto cleanup; 57 58 if (cg_create(child)) 59 goto cleanup; 60 61 if (cg_read_strstr(child, "cgroup.controllers", "cpu")) 62 goto cleanup; 63 64 // Create two nested cgroups without enabling the cpu controller. 65 parent2 = cg_name(root, "cpucg_test_1"); 66 if (!parent2) 67 goto cleanup; 68 69 if (cg_create(parent2)) 70 goto cleanup; 71 72 child2 = cg_name(parent2, "cpucg_test_child"); 73 if (!child2) 74 goto cleanup; 75 76 if (cg_create(child2)) 77 goto cleanup; 78 79 if (!cg_read_strstr(child2, "cgroup.controllers", "cpu")) 80 goto cleanup; 81 82 ret = KSFT_PASS; 83 84 cleanup: 85 cg_destroy(child); 86 free(child); 87 cg_destroy(child2); 88 free(child2); 89 cg_destroy(parent); 90 free(parent); 91 cg_destroy(parent2); 92 free(parent2); 93 94 return ret; 95 } 96 97 static void *hog_cpu_thread_func(void *arg) 98 { 99 while (1) 100 ; 101 102 return NULL; 103 } 104 105 static struct timespec 106 timespec_sub(const struct timespec *lhs, const struct timespec *rhs) 107 { 108 struct timespec zero = { 109 .tv_sec = 0, 110 .tv_nsec = 0, 111 }; 112 struct timespec ret; 113 114 if (lhs->tv_sec < rhs->tv_sec) 115 return zero; 116 117 ret.tv_sec = lhs->tv_sec - rhs->tv_sec; 118 119 if (lhs->tv_nsec < rhs->tv_nsec) { 120 if (ret.tv_sec == 0) 121 return zero; 122 123 ret.tv_sec--; 124 ret.tv_nsec = NSEC_PER_SEC - rhs->tv_nsec + lhs->tv_nsec; 125 } else 126 ret.tv_nsec = lhs->tv_nsec - rhs->tv_nsec; 127 128 return ret; 129 } 130 131 static int hog_cpus_timed(const char *cgroup, void *arg) 132 { 133 const struct cpu_hog_func_param *param = 134 (struct cpu_hog_func_param *)arg; 135 struct timespec ts_run = param->ts; 136 struct timespec ts_remaining = ts_run; 137 struct timespec ts_start; 138 int i, ret; 139 140 ret = clock_gettime(CLOCK_MONOTONIC, &ts_start); 141 if (ret != 0) 142 return ret; 143 144 for (i = 0; i < param->nprocs; i++) { 145 pthread_t tid; 146 147 ret = pthread_create(&tid, NULL, &hog_cpu_thread_func, NULL); 148 if (ret != 0) 149 return ret; 150 } 151 152 while (ts_remaining.tv_sec > 0 || ts_remaining.tv_nsec > 0) { 153 struct timespec ts_total; 154 155 ret = nanosleep(&ts_remaining, NULL); 156 if (ret && errno != EINTR) 157 return ret; 158 159 if (param->clock_type == CPU_HOG_CLOCK_PROCESS) { 160 ret = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts_total); 161 if (ret != 0) 162 return ret; 163 } else { 164 struct timespec ts_current; 165 166 ret = clock_gettime(CLOCK_MONOTONIC, &ts_current); 167 if (ret != 0) 168 return ret; 169 170 ts_total = timespec_sub(&ts_current, &ts_start); 171 } 172 173 ts_remaining = timespec_sub(&ts_run, &ts_total); 174 } 175 176 return 0; 177 } 178 179 /* 180 * Creates a cpu cgroup, burns a CPU for a few quanta, and verifies that 181 * cpu.stat shows the expected output. 182 */ 183 static int test_cpucg_stats(const char *root) 184 { 185 int ret = KSFT_FAIL; 186 long usage_usec, user_usec, system_usec; 187 long usage_seconds = 2; 188 long expected_usage_usec = usage_seconds * USEC_PER_SEC; 189 char *cpucg; 190 191 cpucg = cg_name(root, "cpucg_test"); 192 if (!cpucg) 193 goto cleanup; 194 195 if (cg_create(cpucg)) 196 goto cleanup; 197 198 usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec"); 199 user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); 200 system_usec = cg_read_key_long(cpucg, "cpu.stat", "system_usec"); 201 if (usage_usec != 0 || user_usec != 0 || system_usec != 0) 202 goto cleanup; 203 204 struct cpu_hog_func_param param = { 205 .nprocs = 1, 206 .ts = { 207 .tv_sec = usage_seconds, 208 .tv_nsec = 0, 209 }, 210 .clock_type = CPU_HOG_CLOCK_PROCESS, 211 }; 212 if (cg_run(cpucg, hog_cpus_timed, (void *)¶m)) 213 goto cleanup; 214 215 usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec"); 216 user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); 217 if (user_usec <= 0) 218 goto cleanup; 219 220 if (!values_close(usage_usec, expected_usage_usec, 1)) 221 goto cleanup; 222 223 ret = KSFT_PASS; 224 225 cleanup: 226 cg_destroy(cpucg); 227 free(cpucg); 228 229 return ret; 230 } 231 232 static int 233 run_cpucg_weight_test( 234 const char *root, 235 pid_t (*spawn_child)(const struct cpu_hogger *child), 236 int (*validate)(const struct cpu_hogger *children, int num_children)) 237 { 238 int ret = KSFT_FAIL, i; 239 char *parent = NULL; 240 struct cpu_hogger children[3] = {NULL}; 241 242 parent = cg_name(root, "cpucg_test_0"); 243 if (!parent) 244 goto cleanup; 245 246 if (cg_create(parent)) 247 goto cleanup; 248 249 if (cg_write(parent, "cgroup.subtree_control", "+cpu")) 250 goto cleanup; 251 252 for (i = 0; i < ARRAY_SIZE(children); i++) { 253 children[i].cgroup = cg_name_indexed(parent, "cpucg_child", i); 254 if (!children[i].cgroup) 255 goto cleanup; 256 257 if (cg_create(children[i].cgroup)) 258 goto cleanup; 259 260 if (cg_write_numeric(children[i].cgroup, "cpu.weight", 261 50 * (i + 1))) 262 goto cleanup; 263 } 264 265 for (i = 0; i < ARRAY_SIZE(children); i++) { 266 pid_t pid = spawn_child(&children[i]); 267 if (pid <= 0) 268 goto cleanup; 269 children[i].pid = pid; 270 } 271 272 for (i = 0; i < ARRAY_SIZE(children); i++) { 273 int retcode; 274 275 waitpid(children[i].pid, &retcode, 0); 276 if (!WIFEXITED(retcode)) 277 goto cleanup; 278 if (WEXITSTATUS(retcode)) 279 goto cleanup; 280 } 281 282 for (i = 0; i < ARRAY_SIZE(children); i++) 283 children[i].usage = cg_read_key_long(children[i].cgroup, 284 "cpu.stat", "usage_usec"); 285 286 if (validate(children, ARRAY_SIZE(children))) 287 goto cleanup; 288 289 ret = KSFT_PASS; 290 cleanup: 291 for (i = 0; i < ARRAY_SIZE(children); i++) { 292 cg_destroy(children[i].cgroup); 293 free(children[i].cgroup); 294 } 295 cg_destroy(parent); 296 free(parent); 297 298 return ret; 299 } 300 301 static pid_t weight_hog_ncpus(const struct cpu_hogger *child, int ncpus) 302 { 303 long usage_seconds = 10; 304 struct cpu_hog_func_param param = { 305 .nprocs = ncpus, 306 .ts = { 307 .tv_sec = usage_seconds, 308 .tv_nsec = 0, 309 }, 310 .clock_type = CPU_HOG_CLOCK_WALL, 311 }; 312 return cg_run_nowait(child->cgroup, hog_cpus_timed, (void *)¶m); 313 } 314 315 static pid_t weight_hog_all_cpus(const struct cpu_hogger *child) 316 { 317 return weight_hog_ncpus(child, get_nprocs()); 318 } 319 320 static int 321 overprovision_validate(const struct cpu_hogger *children, int num_children) 322 { 323 int ret = KSFT_FAIL, i; 324 325 for (i = 0; i < num_children - 1; i++) { 326 long delta; 327 328 if (children[i + 1].usage <= children[i].usage) 329 goto cleanup; 330 331 delta = children[i + 1].usage - children[i].usage; 332 if (!values_close(delta, children[0].usage, 35)) 333 goto cleanup; 334 } 335 336 ret = KSFT_PASS; 337 cleanup: 338 return ret; 339 } 340 341 /* 342 * First, this test creates the following hierarchy: 343 * A 344 * A/B cpu.weight = 50 345 * A/C cpu.weight = 100 346 * A/D cpu.weight = 150 347 * 348 * A separate process is then created for each child cgroup which spawns as 349 * many threads as there are cores, and hogs each CPU as much as possible 350 * for some time interval. 351 * 352 * Once all of the children have exited, we verify that each child cgroup 353 * was given proportional runtime as informed by their cpu.weight. 354 */ 355 static int test_cpucg_weight_overprovisioned(const char *root) 356 { 357 return run_cpucg_weight_test(root, weight_hog_all_cpus, 358 overprovision_validate); 359 } 360 361 static pid_t weight_hog_one_cpu(const struct cpu_hogger *child) 362 { 363 return weight_hog_ncpus(child, 1); 364 } 365 366 static int 367 underprovision_validate(const struct cpu_hogger *children, int num_children) 368 { 369 int ret = KSFT_FAIL, i; 370 371 for (i = 0; i < num_children - 1; i++) { 372 if (!values_close(children[i + 1].usage, children[0].usage, 15)) 373 goto cleanup; 374 } 375 376 ret = KSFT_PASS; 377 cleanup: 378 return ret; 379 } 380 381 /* 382 * First, this test creates the following hierarchy: 383 * A 384 * A/B cpu.weight = 50 385 * A/C cpu.weight = 100 386 * A/D cpu.weight = 150 387 * 388 * A separate process is then created for each child cgroup which spawns a 389 * single thread that hogs a CPU. The testcase is only run on systems that 390 * have at least one core per-thread in the child processes. 391 * 392 * Once all of the children have exited, we verify that each child cgroup 393 * had roughly the same runtime despite having different cpu.weight. 394 */ 395 static int test_cpucg_weight_underprovisioned(const char *root) 396 { 397 // Only run the test if there are enough cores to avoid overprovisioning 398 // the system. 399 if (get_nprocs() < 4) 400 return KSFT_SKIP; 401 402 return run_cpucg_weight_test(root, weight_hog_one_cpu, 403 underprovision_validate); 404 } 405 406 static int 407 run_cpucg_nested_weight_test(const char *root, bool overprovisioned) 408 { 409 int ret = KSFT_FAIL, i; 410 char *parent = NULL, *child = NULL; 411 struct cpu_hogger leaf[3] = {NULL}; 412 long nested_leaf_usage, child_usage; 413 int nprocs = get_nprocs(); 414 415 if (!overprovisioned) { 416 if (nprocs < 4) 417 /* 418 * Only run the test if there are enough cores to avoid overprovisioning 419 * the system. 420 */ 421 return KSFT_SKIP; 422 nprocs /= 4; 423 } 424 425 parent = cg_name(root, "cpucg_test"); 426 child = cg_name(parent, "cpucg_child"); 427 if (!parent || !child) 428 goto cleanup; 429 430 if (cg_create(parent)) 431 goto cleanup; 432 if (cg_write(parent, "cgroup.subtree_control", "+cpu")) 433 goto cleanup; 434 435 if (cg_create(child)) 436 goto cleanup; 437 if (cg_write(child, "cgroup.subtree_control", "+cpu")) 438 goto cleanup; 439 if (cg_write(child, "cpu.weight", "1000")) 440 goto cleanup; 441 442 for (i = 0; i < ARRAY_SIZE(leaf); i++) { 443 const char *ancestor; 444 long weight; 445 446 if (i == 0) { 447 ancestor = parent; 448 weight = 1000; 449 } else { 450 ancestor = child; 451 weight = 5000; 452 } 453 leaf[i].cgroup = cg_name_indexed(ancestor, "cpucg_leaf", i); 454 if (!leaf[i].cgroup) 455 goto cleanup; 456 457 if (cg_create(leaf[i].cgroup)) 458 goto cleanup; 459 460 if (cg_write_numeric(leaf[i].cgroup, "cpu.weight", weight)) 461 goto cleanup; 462 } 463 464 for (i = 0; i < ARRAY_SIZE(leaf); i++) { 465 pid_t pid; 466 struct cpu_hog_func_param param = { 467 .nprocs = nprocs, 468 .ts = { 469 .tv_sec = 10, 470 .tv_nsec = 0, 471 }, 472 .clock_type = CPU_HOG_CLOCK_WALL, 473 }; 474 475 pid = cg_run_nowait(leaf[i].cgroup, hog_cpus_timed, 476 (void *)¶m); 477 if (pid <= 0) 478 goto cleanup; 479 leaf[i].pid = pid; 480 } 481 482 for (i = 0; i < ARRAY_SIZE(leaf); i++) { 483 int retcode; 484 485 waitpid(leaf[i].pid, &retcode, 0); 486 if (!WIFEXITED(retcode)) 487 goto cleanup; 488 if (WEXITSTATUS(retcode)) 489 goto cleanup; 490 } 491 492 for (i = 0; i < ARRAY_SIZE(leaf); i++) { 493 leaf[i].usage = cg_read_key_long(leaf[i].cgroup, 494 "cpu.stat", "usage_usec"); 495 if (leaf[i].usage <= 0) 496 goto cleanup; 497 } 498 499 nested_leaf_usage = leaf[1].usage + leaf[2].usage; 500 if (overprovisioned) { 501 if (!values_close(leaf[0].usage, nested_leaf_usage, 15)) 502 goto cleanup; 503 } else if (!values_close(leaf[0].usage * 2, nested_leaf_usage, 15)) 504 goto cleanup; 505 506 507 child_usage = cg_read_key_long(child, "cpu.stat", "usage_usec"); 508 if (child_usage <= 0) 509 goto cleanup; 510 if (!values_close(child_usage, nested_leaf_usage, 1)) 511 goto cleanup; 512 513 ret = KSFT_PASS; 514 cleanup: 515 for (i = 0; i < ARRAY_SIZE(leaf); i++) { 516 cg_destroy(leaf[i].cgroup); 517 free(leaf[i].cgroup); 518 } 519 cg_destroy(child); 520 free(child); 521 cg_destroy(parent); 522 free(parent); 523 524 return ret; 525 } 526 527 /* 528 * First, this test creates the following hierarchy: 529 * A 530 * A/B cpu.weight = 1000 531 * A/C cpu.weight = 1000 532 * A/C/D cpu.weight = 5000 533 * A/C/E cpu.weight = 5000 534 * 535 * A separate process is then created for each leaf, which spawn nproc threads 536 * that burn a CPU for a few seconds. 537 * 538 * Once all of those processes have exited, we verify that each of the leaf 539 * cgroups have roughly the same usage from cpu.stat. 540 */ 541 static int 542 test_cpucg_nested_weight_overprovisioned(const char *root) 543 { 544 return run_cpucg_nested_weight_test(root, true); 545 } 546 547 /* 548 * First, this test creates the following hierarchy: 549 * A 550 * A/B cpu.weight = 1000 551 * A/C cpu.weight = 1000 552 * A/C/D cpu.weight = 5000 553 * A/C/E cpu.weight = 5000 554 * 555 * A separate process is then created for each leaf, which nproc / 4 threads 556 * that burns a CPU for a few seconds. 557 * 558 * Once all of those processes have exited, we verify that each of the leaf 559 * cgroups have roughly the same usage from cpu.stat. 560 */ 561 static int 562 test_cpucg_nested_weight_underprovisioned(const char *root) 563 { 564 return run_cpucg_nested_weight_test(root, false); 565 } 566 567 /* 568 * This test creates a cgroup with some maximum value within a period, and 569 * verifies that a process in the cgroup is not overscheduled. 570 */ 571 static int test_cpucg_max(const char *root) 572 { 573 int ret = KSFT_FAIL; 574 long usage_usec, user_usec; 575 long usage_seconds = 1; 576 long expected_usage_usec = usage_seconds * USEC_PER_SEC; 577 char *cpucg; 578 579 cpucg = cg_name(root, "cpucg_test"); 580 if (!cpucg) 581 goto cleanup; 582 583 if (cg_create(cpucg)) 584 goto cleanup; 585 586 if (cg_write(cpucg, "cpu.max", "1000")) 587 goto cleanup; 588 589 struct cpu_hog_func_param param = { 590 .nprocs = 1, 591 .ts = { 592 .tv_sec = usage_seconds, 593 .tv_nsec = 0, 594 }, 595 .clock_type = CPU_HOG_CLOCK_WALL, 596 }; 597 if (cg_run(cpucg, hog_cpus_timed, (void *)¶m)) 598 goto cleanup; 599 600 usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec"); 601 user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); 602 if (user_usec <= 0) 603 goto cleanup; 604 605 if (user_usec >= expected_usage_usec) 606 goto cleanup; 607 608 if (values_close(usage_usec, expected_usage_usec, 95)) 609 goto cleanup; 610 611 ret = KSFT_PASS; 612 613 cleanup: 614 cg_destroy(cpucg); 615 free(cpucg); 616 617 return ret; 618 } 619 620 /* 621 * This test verifies that a process inside of a nested cgroup whose parent 622 * group has a cpu.max value set, is properly throttled. 623 */ 624 static int test_cpucg_max_nested(const char *root) 625 { 626 int ret = KSFT_FAIL; 627 long usage_usec, user_usec; 628 long usage_seconds = 1; 629 long expected_usage_usec = usage_seconds * USEC_PER_SEC; 630 char *parent, *child; 631 632 parent = cg_name(root, "cpucg_parent"); 633 child = cg_name(parent, "cpucg_child"); 634 if (!parent || !child) 635 goto cleanup; 636 637 if (cg_create(parent)) 638 goto cleanup; 639 640 if (cg_write(parent, "cgroup.subtree_control", "+cpu")) 641 goto cleanup; 642 643 if (cg_create(child)) 644 goto cleanup; 645 646 if (cg_write(parent, "cpu.max", "1000")) 647 goto cleanup; 648 649 struct cpu_hog_func_param param = { 650 .nprocs = 1, 651 .ts = { 652 .tv_sec = usage_seconds, 653 .tv_nsec = 0, 654 }, 655 .clock_type = CPU_HOG_CLOCK_WALL, 656 }; 657 if (cg_run(child, hog_cpus_timed, (void *)¶m)) 658 goto cleanup; 659 660 usage_usec = cg_read_key_long(child, "cpu.stat", "usage_usec"); 661 user_usec = cg_read_key_long(child, "cpu.stat", "user_usec"); 662 if (user_usec <= 0) 663 goto cleanup; 664 665 if (user_usec >= expected_usage_usec) 666 goto cleanup; 667 668 if (values_close(usage_usec, expected_usage_usec, 95)) 669 goto cleanup; 670 671 ret = KSFT_PASS; 672 673 cleanup: 674 cg_destroy(child); 675 free(child); 676 cg_destroy(parent); 677 free(parent); 678 679 return ret; 680 } 681 682 #define T(x) { x, #x } 683 struct cpucg_test { 684 int (*fn)(const char *root); 685 const char *name; 686 } tests[] = { 687 T(test_cpucg_subtree_control), 688 T(test_cpucg_stats), 689 T(test_cpucg_weight_overprovisioned), 690 T(test_cpucg_weight_underprovisioned), 691 T(test_cpucg_nested_weight_overprovisioned), 692 T(test_cpucg_nested_weight_underprovisioned), 693 T(test_cpucg_max), 694 T(test_cpucg_max_nested), 695 }; 696 #undef T 697 698 int main(int argc, char *argv[]) 699 { 700 char root[PATH_MAX]; 701 int i, ret = EXIT_SUCCESS; 702 703 if (cg_find_unified_root(root, sizeof(root))) 704 ksft_exit_skip("cgroup v2 isn't mounted\n"); 705 706 if (cg_read_strstr(root, "cgroup.subtree_control", "cpu")) 707 if (cg_write(root, "cgroup.subtree_control", "+cpu")) 708 ksft_exit_skip("Failed to set cpu controller\n"); 709 710 for (i = 0; i < ARRAY_SIZE(tests); i++) { 711 switch (tests[i].fn(root)) { 712 case KSFT_PASS: 713 ksft_test_result_pass("%s\n", tests[i].name); 714 break; 715 case KSFT_SKIP: 716 ksft_test_result_skip("%s\n", tests[i].name); 717 break; 718 default: 719 ret = EXIT_FAILURE; 720 ksft_test_result_fail("%s\n", tests[i].name); 721 break; 722 } 723 } 724 725 return ret; 726 } 727