1 /* SPDX-License-Identifier: GPL-2.0 */ 2 3 #define _GNU_SOURCE 4 5 #include <errno.h> 6 #include <fcntl.h> 7 #include <linux/limits.h> 8 #include <poll.h> 9 #include <signal.h> 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <string.h> 13 #include <sys/inotify.h> 14 #include <sys/stat.h> 15 #include <sys/types.h> 16 #include <sys/wait.h> 17 #include <unistd.h> 18 19 #include "cgroup_util.h" 20 #include "../clone3/clone3_selftests.h" 21 22 static ssize_t read_text(const char *path, char *buf, size_t max_len) 23 { 24 ssize_t len; 25 int fd; 26 27 fd = open(path, O_RDONLY); 28 if (fd < 0) 29 return fd; 30 31 len = read(fd, buf, max_len - 1); 32 if (len < 0) 33 goto out; 34 35 buf[len] = 0; 36 out: 37 close(fd); 38 return len; 39 } 40 41 static ssize_t write_text(const char *path, char *buf, ssize_t len) 42 { 43 int fd; 44 45 fd = open(path, O_WRONLY | O_APPEND); 46 if (fd < 0) 47 return fd; 48 49 len = write(fd, buf, len); 50 if (len < 0) { 51 close(fd); 52 return len; 53 } 54 55 close(fd); 56 57 return len; 58 } 59 60 char *cg_name(const char *root, const char *name) 61 { 62 size_t len = strlen(root) + strlen(name) + 2; 63 char *ret = malloc(len); 64 65 snprintf(ret, len, "%s/%s", root, name); 66 67 return ret; 68 } 69 70 char *cg_name_indexed(const char *root, const char *name, int index) 71 { 72 size_t len = strlen(root) + strlen(name) + 10; 73 char *ret = malloc(len); 74 75 snprintf(ret, len, "%s/%s_%d", root, name, index); 76 77 return ret; 78 } 79 80 char *cg_control(const char *cgroup, const char *control) 81 { 82 size_t len = strlen(cgroup) + strlen(control) + 2; 83 char *ret = malloc(len); 84 85 snprintf(ret, len, "%s/%s", cgroup, control); 86 87 return ret; 88 } 89 90 int cg_read(const char *cgroup, const char *control, char *buf, size_t len) 91 { 92 char path[PATH_MAX]; 93 94 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 95 96 if (read_text(path, buf, len) >= 0) 97 return 0; 98 99 return -1; 100 } 101 102 int cg_read_strcmp(const char *cgroup, const char *control, 103 const char *expected) 104 { 105 size_t size; 106 char *buf; 107 int ret; 108 109 /* Handle the case of comparing against empty string */ 110 if (!expected) 111 return -1; 112 else 113 size = strlen(expected) + 1; 114 115 buf = malloc(size); 116 if (!buf) 117 return -1; 118 119 if (cg_read(cgroup, control, buf, size)) { 120 free(buf); 121 return -1; 122 } 123 124 ret = strcmp(expected, buf); 125 free(buf); 126 return ret; 127 } 128 129 int cg_read_strstr(const char *cgroup, const char *control, const char *needle) 130 { 131 char buf[PAGE_SIZE]; 132 133 if (cg_read(cgroup, control, buf, sizeof(buf))) 134 return -1; 135 136 return strstr(buf, needle) ? 0 : -1; 137 } 138 139 long cg_read_long(const char *cgroup, const char *control) 140 { 141 char buf[128]; 142 143 if (cg_read(cgroup, control, buf, sizeof(buf))) 144 return -1; 145 146 return atol(buf); 147 } 148 149 long cg_read_key_long(const char *cgroup, const char *control, const char *key) 150 { 151 char buf[PAGE_SIZE]; 152 char *ptr; 153 154 if (cg_read(cgroup, control, buf, sizeof(buf))) 155 return -1; 156 157 ptr = strstr(buf, key); 158 if (!ptr) 159 return -1; 160 161 return atol(ptr + strlen(key)); 162 } 163 164 long cg_read_lc(const char *cgroup, const char *control) 165 { 166 char buf[PAGE_SIZE]; 167 const char delim[] = "\n"; 168 char *line; 169 long cnt = 0; 170 171 if (cg_read(cgroup, control, buf, sizeof(buf))) 172 return -1; 173 174 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 175 cnt++; 176 177 return cnt; 178 } 179 180 int cg_write(const char *cgroup, const char *control, char *buf) 181 { 182 char path[PATH_MAX]; 183 ssize_t len = strlen(buf); 184 185 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 186 187 if (write_text(path, buf, len) == len) 188 return 0; 189 190 return -1; 191 } 192 193 int cg_write_numeric(const char *cgroup, const char *control, long value) 194 { 195 char buf[64]; 196 int ret; 197 198 ret = sprintf(buf, "%lu", value); 199 if (ret < 0) 200 return ret; 201 202 return cg_write(cgroup, control, buf); 203 } 204 205 int cg_find_unified_root(char *root, size_t len) 206 { 207 char buf[10 * PAGE_SIZE]; 208 char *fs, *mount, *type; 209 const char delim[] = "\n\t "; 210 211 if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0) 212 return -1; 213 214 /* 215 * Example: 216 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0 217 */ 218 for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) { 219 mount = strtok(NULL, delim); 220 type = strtok(NULL, delim); 221 strtok(NULL, delim); 222 strtok(NULL, delim); 223 strtok(NULL, delim); 224 225 if (strcmp(type, "cgroup2") == 0) { 226 strncpy(root, mount, len); 227 return 0; 228 } 229 } 230 231 return -1; 232 } 233 234 int cg_create(const char *cgroup) 235 { 236 return mkdir(cgroup, 0755); 237 } 238 239 int cg_wait_for_proc_count(const char *cgroup, int count) 240 { 241 char buf[10 * PAGE_SIZE] = {0}; 242 int attempts; 243 char *ptr; 244 245 for (attempts = 10; attempts >= 0; attempts--) { 246 int nr = 0; 247 248 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 249 break; 250 251 for (ptr = buf; *ptr; ptr++) 252 if (*ptr == '\n') 253 nr++; 254 255 if (nr >= count) 256 return 0; 257 258 usleep(100000); 259 } 260 261 return -1; 262 } 263 264 int cg_killall(const char *cgroup) 265 { 266 char buf[PAGE_SIZE]; 267 char *ptr = buf; 268 269 /* If cgroup.kill exists use it. */ 270 if (!cg_write(cgroup, "cgroup.kill", "1")) 271 return 0; 272 273 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 274 return -1; 275 276 while (ptr < buf + sizeof(buf)) { 277 int pid = strtol(ptr, &ptr, 10); 278 279 if (pid == 0) 280 break; 281 if (*ptr) 282 ptr++; 283 else 284 break; 285 if (kill(pid, SIGKILL)) 286 return -1; 287 } 288 289 return 0; 290 } 291 292 int cg_destroy(const char *cgroup) 293 { 294 int ret; 295 296 retry: 297 ret = rmdir(cgroup); 298 if (ret && errno == EBUSY) { 299 cg_killall(cgroup); 300 usleep(100); 301 goto retry; 302 } 303 304 if (ret && errno == ENOENT) 305 ret = 0; 306 307 return ret; 308 } 309 310 int cg_enter(const char *cgroup, int pid) 311 { 312 char pidbuf[64]; 313 314 snprintf(pidbuf, sizeof(pidbuf), "%d", pid); 315 return cg_write(cgroup, "cgroup.procs", pidbuf); 316 } 317 318 int cg_enter_current(const char *cgroup) 319 { 320 return cg_write(cgroup, "cgroup.procs", "0"); 321 } 322 323 int cg_enter_current_thread(const char *cgroup) 324 { 325 return cg_write(cgroup, "cgroup.threads", "0"); 326 } 327 328 int cg_run(const char *cgroup, 329 int (*fn)(const char *cgroup, void *arg), 330 void *arg) 331 { 332 int pid, retcode; 333 334 pid = fork(); 335 if (pid < 0) { 336 return pid; 337 } else if (pid == 0) { 338 char buf[64]; 339 340 snprintf(buf, sizeof(buf), "%d", getpid()); 341 if (cg_write(cgroup, "cgroup.procs", buf)) 342 exit(EXIT_FAILURE); 343 exit(fn(cgroup, arg)); 344 } else { 345 waitpid(pid, &retcode, 0); 346 if (WIFEXITED(retcode)) 347 return WEXITSTATUS(retcode); 348 else 349 return -1; 350 } 351 } 352 353 pid_t clone_into_cgroup(int cgroup_fd) 354 { 355 #ifdef CLONE_ARGS_SIZE_VER2 356 pid_t pid; 357 358 struct __clone_args args = { 359 .flags = CLONE_INTO_CGROUP, 360 .exit_signal = SIGCHLD, 361 .cgroup = cgroup_fd, 362 }; 363 364 pid = sys_clone3(&args, sizeof(struct __clone_args)); 365 /* 366 * Verify that this is a genuine test failure: 367 * ENOSYS -> clone3() not available 368 * E2BIG -> CLONE_INTO_CGROUP not available 369 */ 370 if (pid < 0 && (errno == ENOSYS || errno == E2BIG)) 371 goto pretend_enosys; 372 373 return pid; 374 375 pretend_enosys: 376 #endif 377 errno = ENOSYS; 378 return -ENOSYS; 379 } 380 381 int clone_reap(pid_t pid, int options) 382 { 383 int ret; 384 siginfo_t info = { 385 .si_signo = 0, 386 }; 387 388 again: 389 ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD); 390 if (ret < 0) { 391 if (errno == EINTR) 392 goto again; 393 return -1; 394 } 395 396 if (options & WEXITED) { 397 if (WIFEXITED(info.si_status)) 398 return WEXITSTATUS(info.si_status); 399 } 400 401 if (options & WSTOPPED) { 402 if (WIFSTOPPED(info.si_status)) 403 return WSTOPSIG(info.si_status); 404 } 405 406 if (options & WCONTINUED) { 407 if (WIFCONTINUED(info.si_status)) 408 return 0; 409 } 410 411 return -1; 412 } 413 414 int dirfd_open_opath(const char *dir) 415 { 416 return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH); 417 } 418 419 #define close_prot_errno(fd) \ 420 if (fd >= 0) { \ 421 int _e_ = errno; \ 422 close(fd); \ 423 errno = _e_; \ 424 } 425 426 static int clone_into_cgroup_run_nowait(const char *cgroup, 427 int (*fn)(const char *cgroup, void *arg), 428 void *arg) 429 { 430 int cgroup_fd; 431 pid_t pid; 432 433 cgroup_fd = dirfd_open_opath(cgroup); 434 if (cgroup_fd < 0) 435 return -1; 436 437 pid = clone_into_cgroup(cgroup_fd); 438 close_prot_errno(cgroup_fd); 439 if (pid == 0) 440 exit(fn(cgroup, arg)); 441 442 return pid; 443 } 444 445 int cg_run_nowait(const char *cgroup, 446 int (*fn)(const char *cgroup, void *arg), 447 void *arg) 448 { 449 int pid; 450 451 pid = clone_into_cgroup_run_nowait(cgroup, fn, arg); 452 if (pid > 0) 453 return pid; 454 455 /* Genuine test failure. */ 456 if (pid < 0 && errno != ENOSYS) 457 return -1; 458 459 pid = fork(); 460 if (pid == 0) { 461 char buf[64]; 462 463 snprintf(buf, sizeof(buf), "%d", getpid()); 464 if (cg_write(cgroup, "cgroup.procs", buf)) 465 exit(EXIT_FAILURE); 466 exit(fn(cgroup, arg)); 467 } 468 469 return pid; 470 } 471 472 int get_temp_fd(void) 473 { 474 return open(".", O_TMPFILE | O_RDWR | O_EXCL); 475 } 476 477 int alloc_pagecache(int fd, size_t size) 478 { 479 char buf[PAGE_SIZE]; 480 struct stat st; 481 int i; 482 483 if (fstat(fd, &st)) 484 goto cleanup; 485 486 size += st.st_size; 487 488 if (ftruncate(fd, size)) 489 goto cleanup; 490 491 for (i = 0; i < size; i += sizeof(buf)) 492 read(fd, buf, sizeof(buf)); 493 494 return 0; 495 496 cleanup: 497 return -1; 498 } 499 500 int alloc_anon(const char *cgroup, void *arg) 501 { 502 size_t size = (unsigned long)arg; 503 char *buf, *ptr; 504 505 buf = malloc(size); 506 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 507 *ptr = 0; 508 509 free(buf); 510 return 0; 511 } 512 513 int is_swap_enabled(void) 514 { 515 char buf[PAGE_SIZE]; 516 const char delim[] = "\n"; 517 int cnt = 0; 518 char *line; 519 520 if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0) 521 return -1; 522 523 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 524 cnt++; 525 526 return cnt > 1; 527 } 528 529 int set_oom_adj_score(int pid, int score) 530 { 531 char path[PATH_MAX]; 532 int fd, len; 533 534 sprintf(path, "/proc/%d/oom_score_adj", pid); 535 536 fd = open(path, O_WRONLY | O_APPEND); 537 if (fd < 0) 538 return fd; 539 540 len = dprintf(fd, "%d", score); 541 if (len < 0) { 542 close(fd); 543 return len; 544 } 545 546 close(fd); 547 return 0; 548 } 549 550 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size) 551 { 552 char path[PATH_MAX]; 553 554 if (!pid) 555 snprintf(path, sizeof(path), "/proc/%s/%s", 556 thread ? "thread-self" : "self", item); 557 else 558 snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); 559 560 return read_text(path, buf, size); 561 } 562 563 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle) 564 { 565 char buf[PAGE_SIZE]; 566 567 if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0) 568 return -1; 569 570 return strstr(buf, needle) ? 0 : -1; 571 } 572 573 int clone_into_cgroup_run_wait(const char *cgroup) 574 { 575 int cgroup_fd; 576 pid_t pid; 577 578 cgroup_fd = dirfd_open_opath(cgroup); 579 if (cgroup_fd < 0) 580 return -1; 581 582 pid = clone_into_cgroup(cgroup_fd); 583 close_prot_errno(cgroup_fd); 584 if (pid < 0) 585 return -1; 586 587 if (pid == 0) 588 exit(EXIT_SUCCESS); 589 590 /* 591 * We don't care whether this fails. We only care whether the initial 592 * clone succeeded. 593 */ 594 (void)clone_reap(pid, WEXITED); 595 return 0; 596 } 597 598 static int __prepare_for_wait(const char *cgroup, const char *filename) 599 { 600 int fd, ret = -1; 601 602 fd = inotify_init1(0); 603 if (fd == -1) 604 return fd; 605 606 ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY); 607 if (ret == -1) { 608 close(fd); 609 fd = -1; 610 } 611 612 return fd; 613 } 614 615 int cg_prepare_for_wait(const char *cgroup) 616 { 617 return __prepare_for_wait(cgroup, "cgroup.events"); 618 } 619 620 int memcg_prepare_for_wait(const char *cgroup) 621 { 622 return __prepare_for_wait(cgroup, "memory.events"); 623 } 624 625 int cg_wait_for(int fd) 626 { 627 int ret = -1; 628 struct pollfd fds = { 629 .fd = fd, 630 .events = POLLIN, 631 }; 632 633 while (true) { 634 ret = poll(&fds, 1, 10000); 635 636 if (ret == -1) { 637 if (errno == EINTR) 638 continue; 639 640 break; 641 } 642 643 if (ret > 0 && fds.revents & POLLIN) { 644 ret = 0; 645 break; 646 } 647 } 648 649 return ret; 650 } 651