1 /* SPDX-License-Identifier: GPL-2.0 */ 2 3 #define _GNU_SOURCE 4 5 #include <errno.h> 6 #include <fcntl.h> 7 #include <linux/limits.h> 8 #include <signal.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <sys/stat.h> 13 #include <sys/types.h> 14 #include <sys/wait.h> 15 #include <unistd.h> 16 17 #include "cgroup_util.h" 18 #include "../clone3/clone3_selftests.h" 19 20 static ssize_t read_text(const char *path, char *buf, size_t max_len) 21 { 22 ssize_t len; 23 int fd; 24 25 fd = open(path, O_RDONLY); 26 if (fd < 0) 27 return fd; 28 29 len = read(fd, buf, max_len - 1); 30 if (len < 0) 31 goto out; 32 33 buf[len] = 0; 34 out: 35 close(fd); 36 return len; 37 } 38 39 static ssize_t write_text(const char *path, char *buf, ssize_t len) 40 { 41 int fd; 42 43 fd = open(path, O_WRONLY | O_APPEND); 44 if (fd < 0) 45 return fd; 46 47 len = write(fd, buf, len); 48 if (len < 0) { 49 close(fd); 50 return len; 51 } 52 53 close(fd); 54 55 return len; 56 } 57 58 char *cg_name(const char *root, const char *name) 59 { 60 size_t len = strlen(root) + strlen(name) + 2; 61 char *ret = malloc(len); 62 63 snprintf(ret, len, "%s/%s", root, name); 64 65 return ret; 66 } 67 68 char *cg_name_indexed(const char *root, const char *name, int index) 69 { 70 size_t len = strlen(root) + strlen(name) + 10; 71 char *ret = malloc(len); 72 73 snprintf(ret, len, "%s/%s_%d", root, name, index); 74 75 return ret; 76 } 77 78 char *cg_control(const char *cgroup, const char *control) 79 { 80 size_t len = strlen(cgroup) + strlen(control) + 2; 81 char *ret = malloc(len); 82 83 snprintf(ret, len, "%s/%s", cgroup, control); 84 85 return ret; 86 } 87 88 int cg_read(const char *cgroup, const char *control, char *buf, size_t len) 89 { 90 char path[PATH_MAX]; 91 92 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 93 94 if (read_text(path, buf, len) >= 0) 95 return 0; 96 97 return -1; 98 } 99 100 int cg_read_strcmp(const char *cgroup, const char *control, 101 const char *expected) 102 { 103 size_t size; 104 char *buf; 105 int ret; 106 107 /* Handle the case of comparing against empty string */ 108 if (!expected) 109 return -1; 110 else 111 size = strlen(expected) + 1; 112 113 buf = malloc(size); 114 if (!buf) 115 return -1; 116 117 if (cg_read(cgroup, control, buf, size)) { 118 free(buf); 119 return -1; 120 } 121 122 ret = strcmp(expected, buf); 123 free(buf); 124 return ret; 125 } 126 127 int cg_read_strstr(const char *cgroup, const char *control, const char *needle) 128 { 129 char buf[PAGE_SIZE]; 130 131 if (cg_read(cgroup, control, buf, sizeof(buf))) 132 return -1; 133 134 return strstr(buf, needle) ? 0 : -1; 135 } 136 137 long cg_read_long(const char *cgroup, const char *control) 138 { 139 char buf[128]; 140 141 if (cg_read(cgroup, control, buf, sizeof(buf))) 142 return -1; 143 144 return atol(buf); 145 } 146 147 long cg_read_key_long(const char *cgroup, const char *control, const char *key) 148 { 149 char buf[PAGE_SIZE]; 150 char *ptr; 151 152 if (cg_read(cgroup, control, buf, sizeof(buf))) 153 return -1; 154 155 ptr = strstr(buf, key); 156 if (!ptr) 157 return -1; 158 159 return atol(ptr + strlen(key)); 160 } 161 162 long cg_read_lc(const char *cgroup, const char *control) 163 { 164 char buf[PAGE_SIZE]; 165 const char delim[] = "\n"; 166 char *line; 167 long cnt = 0; 168 169 if (cg_read(cgroup, control, buf, sizeof(buf))) 170 return -1; 171 172 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 173 cnt++; 174 175 return cnt; 176 } 177 178 int cg_write(const char *cgroup, const char *control, char *buf) 179 { 180 char path[PATH_MAX]; 181 ssize_t len = strlen(buf); 182 183 snprintf(path, sizeof(path), "%s/%s", cgroup, control); 184 185 if (write_text(path, buf, len) == len) 186 return 0; 187 188 return -1; 189 } 190 191 int cg_find_unified_root(char *root, size_t len) 192 { 193 char buf[10 * PAGE_SIZE]; 194 char *fs, *mount, *type; 195 const char delim[] = "\n\t "; 196 197 if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0) 198 return -1; 199 200 /* 201 * Example: 202 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0 203 */ 204 for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) { 205 mount = strtok(NULL, delim); 206 type = strtok(NULL, delim); 207 strtok(NULL, delim); 208 strtok(NULL, delim); 209 strtok(NULL, delim); 210 211 if (strcmp(type, "cgroup2") == 0) { 212 strncpy(root, mount, len); 213 return 0; 214 } 215 } 216 217 return -1; 218 } 219 220 int cg_create(const char *cgroup) 221 { 222 return mkdir(cgroup, 0644); 223 } 224 225 int cg_wait_for_proc_count(const char *cgroup, int count) 226 { 227 char buf[10 * PAGE_SIZE] = {0}; 228 int attempts; 229 char *ptr; 230 231 for (attempts = 10; attempts >= 0; attempts--) { 232 int nr = 0; 233 234 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 235 break; 236 237 for (ptr = buf; *ptr; ptr++) 238 if (*ptr == '\n') 239 nr++; 240 241 if (nr >= count) 242 return 0; 243 244 usleep(100000); 245 } 246 247 return -1; 248 } 249 250 int cg_killall(const char *cgroup) 251 { 252 char buf[PAGE_SIZE]; 253 char *ptr = buf; 254 255 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) 256 return -1; 257 258 while (ptr < buf + sizeof(buf)) { 259 int pid = strtol(ptr, &ptr, 10); 260 261 if (pid == 0) 262 break; 263 if (*ptr) 264 ptr++; 265 else 266 break; 267 if (kill(pid, SIGKILL)) 268 return -1; 269 } 270 271 return 0; 272 } 273 274 int cg_destroy(const char *cgroup) 275 { 276 int ret; 277 278 retry: 279 ret = rmdir(cgroup); 280 if (ret && errno == EBUSY) { 281 cg_killall(cgroup); 282 usleep(100); 283 goto retry; 284 } 285 286 if (ret && errno == ENOENT) 287 ret = 0; 288 289 return ret; 290 } 291 292 int cg_enter(const char *cgroup, int pid) 293 { 294 char pidbuf[64]; 295 296 snprintf(pidbuf, sizeof(pidbuf), "%d", pid); 297 return cg_write(cgroup, "cgroup.procs", pidbuf); 298 } 299 300 int cg_enter_current(const char *cgroup) 301 { 302 return cg_write(cgroup, "cgroup.procs", "0"); 303 } 304 305 int cg_enter_current_thread(const char *cgroup) 306 { 307 return cg_write(cgroup, "cgroup.threads", "0"); 308 } 309 310 int cg_run(const char *cgroup, 311 int (*fn)(const char *cgroup, void *arg), 312 void *arg) 313 { 314 int pid, retcode; 315 316 pid = fork(); 317 if (pid < 0) { 318 return pid; 319 } else if (pid == 0) { 320 char buf[64]; 321 322 snprintf(buf, sizeof(buf), "%d", getpid()); 323 if (cg_write(cgroup, "cgroup.procs", buf)) 324 exit(EXIT_FAILURE); 325 exit(fn(cgroup, arg)); 326 } else { 327 waitpid(pid, &retcode, 0); 328 if (WIFEXITED(retcode)) 329 return WEXITSTATUS(retcode); 330 else 331 return -1; 332 } 333 } 334 335 pid_t clone_into_cgroup(int cgroup_fd) 336 { 337 #ifdef CLONE_ARGS_SIZE_VER2 338 pid_t pid; 339 340 struct __clone_args args = { 341 .flags = CLONE_INTO_CGROUP, 342 .exit_signal = SIGCHLD, 343 .cgroup = cgroup_fd, 344 }; 345 346 pid = sys_clone3(&args, sizeof(struct __clone_args)); 347 /* 348 * Verify that this is a genuine test failure: 349 * ENOSYS -> clone3() not available 350 * E2BIG -> CLONE_INTO_CGROUP not available 351 */ 352 if (pid < 0 && (errno == ENOSYS || errno == E2BIG)) 353 goto pretend_enosys; 354 355 return pid; 356 357 pretend_enosys: 358 #endif 359 errno = ENOSYS; 360 return -ENOSYS; 361 } 362 363 int clone_reap(pid_t pid, int options) 364 { 365 int ret; 366 siginfo_t info = { 367 .si_signo = 0, 368 }; 369 370 again: 371 ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD); 372 if (ret < 0) { 373 if (errno == EINTR) 374 goto again; 375 return -1; 376 } 377 378 if (options & WEXITED) { 379 if (WIFEXITED(info.si_status)) 380 return WEXITSTATUS(info.si_status); 381 } 382 383 if (options & WSTOPPED) { 384 if (WIFSTOPPED(info.si_status)) 385 return WSTOPSIG(info.si_status); 386 } 387 388 if (options & WCONTINUED) { 389 if (WIFCONTINUED(info.si_status)) 390 return 0; 391 } 392 393 return -1; 394 } 395 396 int dirfd_open_opath(const char *dir) 397 { 398 return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH); 399 } 400 401 #define close_prot_errno(fd) \ 402 if (fd >= 0) { \ 403 int _e_ = errno; \ 404 close(fd); \ 405 errno = _e_; \ 406 } 407 408 static int clone_into_cgroup_run_nowait(const char *cgroup, 409 int (*fn)(const char *cgroup, void *arg), 410 void *arg) 411 { 412 int cgroup_fd; 413 pid_t pid; 414 415 cgroup_fd = dirfd_open_opath(cgroup); 416 if (cgroup_fd < 0) 417 return -1; 418 419 pid = clone_into_cgroup(cgroup_fd); 420 close_prot_errno(cgroup_fd); 421 if (pid == 0) 422 exit(fn(cgroup, arg)); 423 424 return pid; 425 } 426 427 int cg_run_nowait(const char *cgroup, 428 int (*fn)(const char *cgroup, void *arg), 429 void *arg) 430 { 431 int pid; 432 433 pid = clone_into_cgroup_run_nowait(cgroup, fn, arg); 434 if (pid > 0) 435 return pid; 436 437 /* Genuine test failure. */ 438 if (pid < 0 && errno != ENOSYS) 439 return -1; 440 441 pid = fork(); 442 if (pid == 0) { 443 char buf[64]; 444 445 snprintf(buf, sizeof(buf), "%d", getpid()); 446 if (cg_write(cgroup, "cgroup.procs", buf)) 447 exit(EXIT_FAILURE); 448 exit(fn(cgroup, arg)); 449 } 450 451 return pid; 452 } 453 454 int get_temp_fd(void) 455 { 456 return open(".", O_TMPFILE | O_RDWR | O_EXCL); 457 } 458 459 int alloc_pagecache(int fd, size_t size) 460 { 461 char buf[PAGE_SIZE]; 462 struct stat st; 463 int i; 464 465 if (fstat(fd, &st)) 466 goto cleanup; 467 468 size += st.st_size; 469 470 if (ftruncate(fd, size)) 471 goto cleanup; 472 473 for (i = 0; i < size; i += sizeof(buf)) 474 read(fd, buf, sizeof(buf)); 475 476 return 0; 477 478 cleanup: 479 return -1; 480 } 481 482 int alloc_anon(const char *cgroup, void *arg) 483 { 484 size_t size = (unsigned long)arg; 485 char *buf, *ptr; 486 487 buf = malloc(size); 488 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) 489 *ptr = 0; 490 491 free(buf); 492 return 0; 493 } 494 495 int is_swap_enabled(void) 496 { 497 char buf[PAGE_SIZE]; 498 const char delim[] = "\n"; 499 int cnt = 0; 500 char *line; 501 502 if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0) 503 return -1; 504 505 for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) 506 cnt++; 507 508 return cnt > 1; 509 } 510 511 int set_oom_adj_score(int pid, int score) 512 { 513 char path[PATH_MAX]; 514 int fd, len; 515 516 sprintf(path, "/proc/%d/oom_score_adj", pid); 517 518 fd = open(path, O_WRONLY | O_APPEND); 519 if (fd < 0) 520 return fd; 521 522 len = dprintf(fd, "%d", score); 523 if (len < 0) { 524 close(fd); 525 return len; 526 } 527 528 close(fd); 529 return 0; 530 } 531 532 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size) 533 { 534 char path[PATH_MAX]; 535 536 if (!pid) 537 snprintf(path, sizeof(path), "/proc/%s/%s", 538 thread ? "thread-self" : "self", item); 539 else 540 snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); 541 542 return read_text(path, buf, size); 543 } 544 545 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle) 546 { 547 char buf[PAGE_SIZE]; 548 549 if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0) 550 return -1; 551 552 return strstr(buf, needle) ? 0 : -1; 553 } 554 555 int clone_into_cgroup_run_wait(const char *cgroup) 556 { 557 int cgroup_fd; 558 pid_t pid; 559 560 cgroup_fd = dirfd_open_opath(cgroup); 561 if (cgroup_fd < 0) 562 return -1; 563 564 pid = clone_into_cgroup(cgroup_fd); 565 close_prot_errno(cgroup_fd); 566 if (pid < 0) 567 return -1; 568 569 if (pid == 0) 570 exit(EXIT_SUCCESS); 571 572 /* 573 * We don't care whether this fails. We only care whether the initial 574 * clone succeeded. 575 */ 576 (void)clone_reap(pid, WEXITED); 577 return 0; 578 } 579