1 /* SPDX-License-Identifier: GPL-2.0 */ 2 3 #define _GNU_SOURCE 4 5 #include <errno.h> 6 #include <fcntl.h> 7 #include <linux/limits.h> 8 #include <poll.h> 9 #include <signal.h> 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <string.h> 13 #include <sys/inotify.h> 14 #include <sys/stat.h> 15 #include <sys/types.h> 16 #include <sys/wait.h> 17 #include <unistd.h> 18 19 #include "cgroup_util.h" 20 #include "../clone3/clone3_selftests.h" 21 22 /* Returns read len on success, or -errno on failure. */ 23 static ssize_t read_text(const char *path, char *buf, size_t max_len) 24 { 25 ssize_t len; 26 int fd; 27 28 fd = open(path, O_RDONLY); 29 if (fd < 0) 30 return -errno; 31 32 len = read(fd, buf, max_len - 1); 33 34 if (len >= 0) 35 buf[len] = 0; 36 37 close(fd); 38 return len < 0 ? -errno : len; 39 } 40 41 /* Returns written len on success, or -errno on failure. */ 42 static ssize_t write_text(const char *path, char *buf, ssize_t len) 43 { 44 int fd; 45 46 fd = open(path, O_WRONLY | O_APPEND); 47 if (fd < 0) 48 return -errno; 49 50 len = write(fd, buf, len); 51 close(fd); 52 return len < 0 ? -errno : len; 53 } 54 55 char *cg_name(const char *root, const char *name) 56 { 57 size_t len = strlen(root) + strlen(name) + 2; 58 char *ret = malloc(len); 59 60 snprintf(ret, len, "%s/%s", root, name); 61 62 return ret; 63 } 64 65 char *cg_name_indexed(const char *root, const char *name, int index) 66 { 67 size_t len = strlen(root) + strlen(name) + 10; 68 char *ret = malloc(len); 69 70 snprintf(ret, len, "%s/%s_%d", root, name, index); 71 72 return ret; 73 } 74 75 char *cg_control(const char *cgroup, const char *control) 76 { 77 size_t len = strlen(cgroup) + strlen(control) + 2; 78 char *ret = malloc(len); 79 80 snprintf(ret, len, "%s/%s", cgroup, control); 81 82 return ret; 83 } 84 85 /* Returns 0 on success, or -errno on failure. */ 86 int cg_read(const char *cgroup, const char *control, char *buf, size_t len) 87 { 88 char path[PATH_MAX]; 89 ssize_t ret; 90 91 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 92 93 ret = read_text(path, buf, len); 94 return ret >= 0 ? 0 : ret; 95 } 96 97 int cg_read_strcmp(const char *cgroup, const char *control, 98 const char *expected) 99 { 100 size_t size; 101 char *buf; 102 int ret; 103 104 /* Handle the case of comparing against empty string */ 105 if (!expected) 106 return -1; 107 else 108 size = strlen(expected) + 1; 109 110 buf = malloc(size); 111 if (!buf) 112 return -1; 113 114 if (cg_read(cgroup, control, buf, size)) { 115 free(buf); 116 return -1; 117 } 118 119 ret = strcmp(expected, buf); 120 free(buf); 121 return ret; 122 } 123 124 int cg_read_strstr(const char *cgroup, const char *control, const char *needle) 125 { 126 char buf[PAGE_SIZE]; 127 128 if (cg_read(cgroup, control, buf, sizeof(buf))) 129 return -1; 130 131 return strstr(buf, needle) ? 0 : -1; 132 } 133 134 long cg_read_long(const char *cgroup, const char *control) 135 { 136 char buf[128]; 137 138 if (cg_read(cgroup, control, buf, sizeof(buf))) 139 return -1; 140 141 return atol(buf); 142 } 143 144 long cg_read_key_long(const char *cgroup, const char *control, const char *key) 145 { 146 char buf[PAGE_SIZE]; 147 char *ptr; 148 149 if (cg_read(cgroup, control, buf, sizeof(buf))) 150 return -1; 151 152 ptr = strstr(buf, key); 153 if (!ptr) 154 return -1; 155 156 return atol(ptr + strlen(key)); 157 } 158 159 long cg_read_lc(const char *cgroup, const char *control) 160 { 161 char buf[PAGE_SIZE]; 162 const char delim[] = "\n"; 163 char *line; 164 long cnt = 0; 165 166 if (cg_read(cgroup, control, buf, sizeof(buf))) 167 return -1; 168 169 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 170 cnt++; 171 172 return cnt; 173 } 174 175 /* Returns 0 on success, or -errno on failure. */ 176 int cg_write(const char *cgroup, const char *control, char *buf) 177 { 178 char path[PATH_MAX]; 179 ssize_t len = strlen(buf), ret; 180 181 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 182 ret = write_text(path, buf, len); 183 return ret == len ? 0 : ret; 184 } 185 186 int cg_find_unified_root(char *root, size_t len) 187 { 188 char buf[10 * PAGE_SIZE]; 189 char *fs, *mount, *type; 190 const char delim[] = "\n\t "; 191 192 if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0) 193 return -1; 194 195 /* 196 * Example: 197 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0 198 */ 199 for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) { 200 mount = strtok(NULL, delim); 201 type = strtok(NULL, delim); 202 strtok(NULL, delim); 203 strtok(NULL, delim); 204 strtok(NULL, delim); 205 206 if (strcmp(type, "cgroup2") == 0) { 207 strncpy(root, mount, len); 208 return 0; 209 } 210 } 211 212 return -1; 213 } 214 215 int cg_create(const char *cgroup) 216 { 217 return mkdir(cgroup, 0755); 218 } 219 220 int cg_wait_for_proc_count(const char *cgroup, int count) 221 { 222 char buf[10 * PAGE_SIZE] = {0}; 223 int attempts; 224 char *ptr; 225 226 for (attempts = 10; attempts >= 0; attempts--) { 227 int nr = 0; 228 229 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 230 break; 231 232 for (ptr = buf; *ptr; ptr++) 233 if (*ptr == '\n') 234 nr++; 235 236 if (nr >= count) 237 return 0; 238 239 usleep(100000); 240 } 241 242 return -1; 243 } 244 245 int cg_killall(const char *cgroup) 246 { 247 char buf[PAGE_SIZE]; 248 char *ptr = buf; 249 250 /* If cgroup.kill exists use it. */ 251 if (!cg_write(cgroup, "cgroup.kill", "1")) 252 return 0; 253 254 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 255 return -1; 256 257 while (ptr < buf + sizeof(buf)) { 258 int pid = strtol(ptr, &ptr, 10); 259 260 if (pid == 0) 261 break; 262 if (*ptr) 263 ptr++; 264 else 265 break; 266 if (kill(pid, SIGKILL)) 267 return -1; 268 } 269 270 return 0; 271 } 272 273 int cg_destroy(const char *cgroup) 274 { 275 int ret; 276 277 retry: 278 ret = rmdir(cgroup); 279 if (ret && errno == EBUSY) { 280 cg_killall(cgroup); 281 usleep(100); 282 goto retry; 283 } 284 285 if (ret && errno == ENOENT) 286 ret = 0; 287 288 return ret; 289 } 290 291 int cg_enter(const char *cgroup, int pid) 292 { 293 char pidbuf[64]; 294 295 snprintf(pidbuf, sizeof(pidbuf), "%d", pid); 296 return cg_write(cgroup, "cgroup.procs", pidbuf); 297 } 298 299 int cg_enter_current(const char *cgroup) 300 { 301 return cg_write(cgroup, "cgroup.procs", "0"); 302 } 303 304 int cg_enter_current_thread(const char *cgroup) 305 { 306 return cg_write(cgroup, "cgroup.threads", "0"); 307 } 308 309 int cg_run(const char *cgroup, 310 int (*fn)(const char *cgroup, void *arg), 311 void *arg) 312 { 313 int pid, retcode; 314 315 pid = fork(); 316 if (pid < 0) { 317 return pid; 318 } else if (pid == 0) { 319 char buf[64]; 320 321 snprintf(buf, sizeof(buf), "%d", getpid()); 322 if (cg_write(cgroup, "cgroup.procs", buf)) 323 exit(EXIT_FAILURE); 324 exit(fn(cgroup, arg)); 325 } else { 326 waitpid(pid, &retcode, 0); 327 if (WIFEXITED(retcode)) 328 return WEXITSTATUS(retcode); 329 else 330 return -1; 331 } 332 } 333 334 pid_t clone_into_cgroup(int cgroup_fd) 335 { 336 #ifdef CLONE_ARGS_SIZE_VER2 337 pid_t pid; 338 339 struct __clone_args args = { 340 .flags = CLONE_INTO_CGROUP, 341 .exit_signal = SIGCHLD, 342 .cgroup = cgroup_fd, 343 }; 344 345 pid = sys_clone3(&args, sizeof(struct __clone_args)); 346 /* 347 * Verify that this is a genuine test failure: 348 * ENOSYS -> clone3() not available 349 * E2BIG -> CLONE_INTO_CGROUP not available 350 */ 351 if (pid < 0 && (errno == ENOSYS || errno == E2BIG)) 352 goto pretend_enosys; 353 354 return pid; 355 356 pretend_enosys: 357 #endif 358 errno = ENOSYS; 359 return -ENOSYS; 360 } 361 362 int clone_reap(pid_t pid, int options) 363 { 364 int ret; 365 siginfo_t info = { 366 .si_signo = 0, 367 }; 368 369 again: 370 ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD); 371 if (ret < 0) { 372 if (errno == EINTR) 373 goto again; 374 return -1; 375 } 376 377 if (options & WEXITED) { 378 if (WIFEXITED(info.si_status)) 379 return WEXITSTATUS(info.si_status); 380 } 381 382 if (options & WSTOPPED) { 383 if (WIFSTOPPED(info.si_status)) 384 return WSTOPSIG(info.si_status); 385 } 386 387 if (options & WCONTINUED) { 388 if (WIFCONTINUED(info.si_status)) 389 return 0; 390 } 391 392 return -1; 393 } 394 395 int dirfd_open_opath(const char *dir) 396 { 397 return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH); 398 } 399 400 #define close_prot_errno(fd) \ 401 if (fd >= 0) { \ 402 int _e_ = errno; \ 403 close(fd); \ 404 errno = _e_; \ 405 } 406 407 static int clone_into_cgroup_run_nowait(const char *cgroup, 408 int (*fn)(const char *cgroup, void *arg), 409 void *arg) 410 { 411 int cgroup_fd; 412 pid_t pid; 413 414 cgroup_fd = dirfd_open_opath(cgroup); 415 if (cgroup_fd < 0) 416 return -1; 417 418 pid = clone_into_cgroup(cgroup_fd); 419 close_prot_errno(cgroup_fd); 420 if (pid == 0) 421 exit(fn(cgroup, arg)); 422 423 return pid; 424 } 425 426 int cg_run_nowait(const char *cgroup, 427 int (*fn)(const char *cgroup, void *arg), 428 void *arg) 429 { 430 int pid; 431 432 pid = clone_into_cgroup_run_nowait(cgroup, fn, arg); 433 if (pid > 0) 434 return pid; 435 436 /* Genuine test failure. */ 437 if (pid < 0 && errno != ENOSYS) 438 return -1; 439 440 pid = fork(); 441 if (pid == 0) { 442 char buf[64]; 443 444 snprintf(buf, sizeof(buf), "%d", getpid()); 445 if (cg_write(cgroup, "cgroup.procs", buf)) 446 exit(EXIT_FAILURE); 447 exit(fn(cgroup, arg)); 448 } 449 450 return pid; 451 } 452 453 int get_temp_fd(void) 454 { 455 return open(".", O_TMPFILE | O_RDWR | O_EXCL); 456 } 457 458 int alloc_pagecache(int fd, size_t size) 459 { 460 char buf[PAGE_SIZE]; 461 struct stat st; 462 int i; 463 464 if (fstat(fd, &st)) 465 goto cleanup; 466 467 size += st.st_size; 468 469 if (ftruncate(fd, size)) 470 goto cleanup; 471 472 for (i = 0; i < size; i += sizeof(buf)) 473 read(fd, buf, sizeof(buf)); 474 475 return 0; 476 477 cleanup: 478 return -1; 479 } 480 481 int alloc_anon(const char *cgroup, void *arg) 482 { 483 size_t size = (unsigned long)arg; 484 char *buf, *ptr; 485 486 buf = malloc(size); 487 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 488 *ptr = 0; 489 490 free(buf); 491 return 0; 492 } 493 494 int is_swap_enabled(void) 495 { 496 char buf[PAGE_SIZE]; 497 const char delim[] = "\n"; 498 int cnt = 0; 499 char *line; 500 501 if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0) 502 return -1; 503 504 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 505 cnt++; 506 507 return cnt > 1; 508 } 509 510 int set_oom_adj_score(int pid, int score) 511 { 512 char path[PATH_MAX]; 513 int fd, len; 514 515 sprintf(path, "/proc/%d/oom_score_adj", pid); 516 517 fd = open(path, O_WRONLY | O_APPEND); 518 if (fd < 0) 519 return fd; 520 521 len = dprintf(fd, "%d", score); 522 if (len < 0) { 523 close(fd); 524 return len; 525 } 526 527 close(fd); 528 return 0; 529 } 530 531 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size) 532 { 533 char path[PATH_MAX]; 534 535 if (!pid) 536 snprintf(path, sizeof(path), "/proc/%s/%s", 537 thread ? "thread-self" : "self", item); 538 else 539 snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); 540 541 size = read_text(path, buf, size); 542 return size < 0 ? -1 : size; 543 } 544 545 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle) 546 { 547 char buf[PAGE_SIZE]; 548 549 if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0) 550 return -1; 551 552 return strstr(buf, needle) ? 0 : -1; 553 } 554 555 int clone_into_cgroup_run_wait(const char *cgroup) 556 { 557 int cgroup_fd; 558 pid_t pid; 559 560 cgroup_fd = dirfd_open_opath(cgroup); 561 if (cgroup_fd < 0) 562 return -1; 563 564 pid = clone_into_cgroup(cgroup_fd); 565 close_prot_errno(cgroup_fd); 566 if (pid < 0) 567 return -1; 568 569 if (pid == 0) 570 exit(EXIT_SUCCESS); 571 572 /* 573 * We don't care whether this fails. We only care whether the initial 574 * clone succeeded. 575 */ 576 (void)clone_reap(pid, WEXITED); 577 return 0; 578 } 579 580 static int __prepare_for_wait(const char *cgroup, const char *filename) 581 { 582 int fd, ret = -1; 583 584 fd = inotify_init1(0); 585 if (fd == -1) 586 return fd; 587 588 ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY); 589 if (ret == -1) { 590 close(fd); 591 fd = -1; 592 } 593 594 return fd; 595 } 596 597 int cg_prepare_for_wait(const char *cgroup) 598 { 599 return __prepare_for_wait(cgroup, "cgroup.events"); 600 } 601 602 int memcg_prepare_for_wait(const char *cgroup) 603 { 604 return __prepare_for_wait(cgroup, "memory.events"); 605 } 606 607 int cg_wait_for(int fd) 608 { 609 int ret = -1; 610 struct pollfd fds = { 611 .fd = fd, 612 .events = POLLIN, 613 }; 614 615 while (true) { 616 ret = poll(&fds, 1, 10000); 617 618 if (ret == -1) { 619 if (errno == EINTR) 620 continue; 621 622 break; 623 } 624 625 if (ret > 0 && fds.revents & POLLIN) { 626 ret = 0; 627 break; 628 } 629 } 630 631 return ret; 632 } 633