1 // SPDX-License-Identifier: GPL-2.0 2 #define _GNU_SOURCE 3 #include <sched.h> 4 #include <sys/mount.h> 5 #include <sys/stat.h> 6 #include <sys/types.h> 7 #include <linux/limits.h> 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <linux/sched.h> 11 #include <fcntl.h> 12 #include <unistd.h> 13 #include <ftw.h> 14 15 #include "cgroup_helpers.h" 16 17 /* 18 * To avoid relying on the system setup, when setup_cgroup_env is called 19 * we create a new mount namespace, and cgroup namespace. The cgroupv2 20 * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't 21 * have cgroupv2 enabled at this point in time. It's easier to create our 22 * own mount namespace and manage it ourselves. We assume /mnt exists. 23 * 24 * Related cgroupv1 helpers are named *classid*(), since we only use the 25 * net_cls controller for tagging net_cls.classid. We assume the default 26 * mount under /sys/fs/cgroup/net_cls, which should be the case for the 27 * vast majority of users. 28 */ 29 30 #define WALK_FD_LIMIT 16 31 32 #define CGROUP_MOUNT_PATH "/mnt" 33 #define CGROUP_MOUNT_DFLT "/sys/fs/cgroup" 34 #define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls" 35 #define CGROUP_WORK_DIR "/cgroup-test-work-dir" 36 37 #define format_cgroup_path_pid(buf, path, pid) \ 38 snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \ 39 CGROUP_WORK_DIR, pid, path) 40 41 #define format_cgroup_path(buf, path) \ 42 format_cgroup_path_pid(buf, path, getpid()) 43 44 #define format_parent_cgroup_path(buf, path) \ 45 format_cgroup_path_pid(buf, path, getppid()) 46 47 #define format_classid_path(buf) \ 48 snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \ 49 CGROUP_WORK_DIR) 50 51 static int __enable_controllers(const char *cgroup_path, const char *controllers) 52 { 53 char path[PATH_MAX + 1]; 54 char enable[PATH_MAX + 1]; 55 char *c, *c2; 56 int fd, cfd; 57 ssize_t len; 58 59 /* If not controllers are passed, enable all available controllers */ 60 if (!controllers) { 61 snprintf(path, sizeof(path), "%s/cgroup.controllers", 62 cgroup_path); 63 fd = open(path, O_RDONLY); 64 if (fd < 0) { 65 log_err("Opening cgroup.controllers: %s", path); 66 return 1; 67 } 68 len = read(fd, enable, sizeof(enable) - 1); 69 if (len < 0) { 70 close(fd); 71 log_err("Reading cgroup.controllers: %s", path); 72 return 1; 73 } else if (len == 0) { /* No controllers to enable */ 74 close(fd); 75 return 0; 76 } 77 enable[len] = 0; 78 close(fd); 79 } else { 80 strncpy(enable, controllers, sizeof(enable)); 81 } 82 83 snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path); 84 cfd = open(path, O_RDWR); 85 if (cfd < 0) { 86 log_err("Opening cgroup.subtree_control: %s", path); 87 return 1; 88 } 89 90 for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) { 91 if (dprintf(cfd, "+%s\n", c) <= 0) { 92 log_err("Enabling controller %s: %s", c, path); 93 close(cfd); 94 return 1; 95 } 96 } 97 close(cfd); 98 return 0; 99 } 100 101 /** 102 * enable_controllers() - Enable cgroup v2 controllers 103 * @relative_path: The cgroup path, relative to the workdir 104 * @controllers: List of controllers to enable in cgroup.controllers format 105 * 106 * 107 * Enable given cgroup v2 controllers, if @controllers is NULL, enable all 108 * available controllers. 109 * 110 * If successful, 0 is returned. 111 */ 112 int enable_controllers(const char *relative_path, const char *controllers) 113 { 114 char cgroup_path[PATH_MAX + 1]; 115 116 format_cgroup_path(cgroup_path, relative_path); 117 return __enable_controllers(cgroup_path, controllers); 118 } 119 120 static int __write_cgroup_file(const char *cgroup_path, const char *file, 121 const char *buf) 122 { 123 char file_path[PATH_MAX + 1]; 124 int fd; 125 126 snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file); 127 fd = open(file_path, O_RDWR); 128 if (fd < 0) { 129 log_err("Opening %s", file_path); 130 return 1; 131 } 132 133 if (dprintf(fd, "%s", buf) <= 0) { 134 log_err("Writing to %s", file_path); 135 close(fd); 136 return 1; 137 } 138 close(fd); 139 return 0; 140 } 141 142 /** 143 * write_cgroup_file() - Write to a cgroup file 144 * @relative_path: The cgroup path, relative to the workdir 145 * @file: The name of the file in cgroupfs to write to 146 * @buf: Buffer to write to the file 147 * 148 * Write to a file in the given cgroup's directory. 149 * 150 * If successful, 0 is returned. 151 */ 152 int write_cgroup_file(const char *relative_path, const char *file, 153 const char *buf) 154 { 155 char cgroup_path[PATH_MAX - 24]; 156 157 format_cgroup_path(cgroup_path, relative_path); 158 return __write_cgroup_file(cgroup_path, file, buf); 159 } 160 161 /** 162 * write_cgroup_file_parent() - Write to a cgroup file in the parent process 163 * workdir 164 * @relative_path: The cgroup path, relative to the parent process workdir 165 * @file: The name of the file in cgroupfs to write to 166 * @buf: Buffer to write to the file 167 * 168 * Write to a file in the given cgroup's directory under the parent process 169 * workdir. 170 * 171 * If successful, 0 is returned. 172 */ 173 int write_cgroup_file_parent(const char *relative_path, const char *file, 174 const char *buf) 175 { 176 char cgroup_path[PATH_MAX - 24]; 177 178 format_parent_cgroup_path(cgroup_path, relative_path); 179 return __write_cgroup_file(cgroup_path, file, buf); 180 } 181 182 /** 183 * setup_cgroup_environment() - Setup the cgroup environment 184 * 185 * After calling this function, cleanup_cgroup_environment should be called 186 * once testing is complete. 187 * 188 * This function will print an error to stderr and return 1 if it is unable 189 * to setup the cgroup environment. If setup is successful, 0 is returned. 190 */ 191 int setup_cgroup_environment(void) 192 { 193 char cgroup_workdir[PATH_MAX - 24]; 194 195 format_cgroup_path(cgroup_workdir, ""); 196 197 if (unshare(CLONE_NEWNS)) { 198 log_err("unshare"); 199 return 1; 200 } 201 202 if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) { 203 log_err("mount fakeroot"); 204 return 1; 205 } 206 207 if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) { 208 log_err("mount cgroup2"); 209 return 1; 210 } 211 212 /* Cleanup existing failed runs, now that the environment is setup */ 213 cleanup_cgroup_environment(); 214 215 if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { 216 log_err("mkdir cgroup work dir"); 217 return 1; 218 } 219 220 /* Enable all available controllers to increase test coverage */ 221 if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) || 222 __enable_controllers(cgroup_workdir, NULL)) 223 return 1; 224 225 return 0; 226 } 227 228 static int nftwfunc(const char *filename, const struct stat *statptr, 229 int fileflags, struct FTW *pfwt) 230 { 231 if ((fileflags & FTW_D) && rmdir(filename)) 232 log_err("Removing cgroup: %s", filename); 233 return 0; 234 } 235 236 static int join_cgroup_from_top(const char *cgroup_path) 237 { 238 char cgroup_procs_path[PATH_MAX + 1]; 239 pid_t pid = getpid(); 240 int fd, rc = 0; 241 242 snprintf(cgroup_procs_path, sizeof(cgroup_procs_path), 243 "%s/cgroup.procs", cgroup_path); 244 245 fd = open(cgroup_procs_path, O_WRONLY); 246 if (fd < 0) { 247 log_err("Opening Cgroup Procs: %s", cgroup_procs_path); 248 return 1; 249 } 250 251 if (dprintf(fd, "%d\n", pid) < 0) { 252 log_err("Joining Cgroup"); 253 rc = 1; 254 } 255 256 close(fd); 257 return rc; 258 } 259 260 /** 261 * join_cgroup() - Join a cgroup 262 * @relative_path: The cgroup path, relative to the workdir, to join 263 * 264 * This function expects a cgroup to already be created, relative to the cgroup 265 * work dir, and it joins it. For example, passing "/my-cgroup" as the path 266 * would actually put the calling process into the cgroup 267 * "/cgroup-test-work-dir/my-cgroup" 268 * 269 * On success, it returns 0, otherwise on failure it returns 1. 270 */ 271 int join_cgroup(const char *relative_path) 272 { 273 char cgroup_path[PATH_MAX + 1]; 274 275 format_cgroup_path(cgroup_path, relative_path); 276 return join_cgroup_from_top(cgroup_path); 277 } 278 279 /** 280 * join_parent_cgroup() - Join a cgroup in the parent process workdir 281 * @relative_path: The cgroup path, relative to parent process workdir, to join 282 * 283 * See join_cgroup(). 284 * 285 * On success, it returns 0, otherwise on failure it returns 1. 286 */ 287 int join_parent_cgroup(const char *relative_path) 288 { 289 char cgroup_path[PATH_MAX + 1]; 290 291 format_parent_cgroup_path(cgroup_path, relative_path); 292 return join_cgroup_from_top(cgroup_path); 293 } 294 295 /** 296 * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment 297 * 298 * This is an idempotent function to delete all temporary cgroups that 299 * have been created during the test, including the cgroup testing work 300 * directory. 301 * 302 * At call time, it moves the calling process to the root cgroup, and then 303 * runs the deletion process. It is idempotent, and should not fail, unless 304 * a process is lingering. 305 * 306 * On failure, it will print an error to stderr, and try to continue. 307 */ 308 void cleanup_cgroup_environment(void) 309 { 310 char cgroup_workdir[PATH_MAX + 1]; 311 312 format_cgroup_path(cgroup_workdir, ""); 313 join_cgroup_from_top(CGROUP_MOUNT_PATH); 314 nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); 315 } 316 317 /** 318 * get_root_cgroup() - Get the FD of the root cgroup 319 * 320 * On success, it returns the file descriptor. On failure, it returns -1. 321 * If there is a failure, it prints the error to stderr. 322 */ 323 int get_root_cgroup(void) 324 { 325 int fd; 326 327 fd = open(CGROUP_MOUNT_PATH, O_RDONLY); 328 if (fd < 0) { 329 log_err("Opening root cgroup"); 330 return -1; 331 } 332 return fd; 333 } 334 335 /** 336 * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD 337 * @relative_path: The cgroup path, relative to the workdir, to join 338 * 339 * This function creates a cgroup under the top level workdir and returns the 340 * file descriptor. It is idempotent. 341 * 342 * On success, it returns the file descriptor. On failure it returns -1. 343 * If there is a failure, it prints the error to stderr. 344 */ 345 int create_and_get_cgroup(const char *relative_path) 346 { 347 char cgroup_path[PATH_MAX + 1]; 348 int fd; 349 350 format_cgroup_path(cgroup_path, relative_path); 351 if (mkdir(cgroup_path, 0777) && errno != EEXIST) { 352 log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path); 353 return -1; 354 } 355 356 fd = open(cgroup_path, O_RDONLY); 357 if (fd < 0) { 358 log_err("Opening Cgroup"); 359 return -1; 360 } 361 362 return fd; 363 } 364 365 /** 366 * get_cgroup_id() - Get cgroup id for a particular cgroup path 367 * @relative_path: The cgroup path, relative to the workdir, to join 368 * 369 * On success, it returns the cgroup id. On failure it returns 0, 370 * which is an invalid cgroup id. 371 * If there is a failure, it prints the error to stderr. 372 */ 373 unsigned long long get_cgroup_id(const char *relative_path) 374 { 375 int dirfd, err, flags, mount_id, fhsize; 376 union { 377 unsigned long long cgid; 378 unsigned char raw_bytes[8]; 379 } id; 380 char cgroup_workdir[PATH_MAX + 1]; 381 struct file_handle *fhp, *fhp2; 382 unsigned long long ret = 0; 383 384 format_cgroup_path(cgroup_workdir, relative_path); 385 386 dirfd = AT_FDCWD; 387 flags = 0; 388 fhsize = sizeof(*fhp); 389 fhp = calloc(1, fhsize); 390 if (!fhp) { 391 log_err("calloc"); 392 return 0; 393 } 394 err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags); 395 if (err >= 0 || fhp->handle_bytes != 8) { 396 log_err("name_to_handle_at"); 397 goto free_mem; 398 } 399 400 fhsize = sizeof(struct file_handle) + fhp->handle_bytes; 401 fhp2 = realloc(fhp, fhsize); 402 if (!fhp2) { 403 log_err("realloc"); 404 goto free_mem; 405 } 406 err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags); 407 fhp = fhp2; 408 if (err < 0) { 409 log_err("name_to_handle_at"); 410 goto free_mem; 411 } 412 413 memcpy(id.raw_bytes, fhp->f_handle, 8); 414 ret = id.cgid; 415 416 free_mem: 417 free(fhp); 418 return ret; 419 } 420 421 int cgroup_setup_and_join(const char *path) { 422 int cg_fd; 423 424 if (setup_cgroup_environment()) { 425 fprintf(stderr, "Failed to setup cgroup environment\n"); 426 return -EINVAL; 427 } 428 429 cg_fd = create_and_get_cgroup(path); 430 if (cg_fd < 0) { 431 fprintf(stderr, "Failed to create test cgroup\n"); 432 cleanup_cgroup_environment(); 433 return cg_fd; 434 } 435 436 if (join_cgroup(path)) { 437 fprintf(stderr, "Failed to join cgroup\n"); 438 cleanup_cgroup_environment(); 439 return -EINVAL; 440 } 441 return cg_fd; 442 } 443 444 /** 445 * setup_classid_environment() - Setup the cgroupv1 net_cls environment 446 * 447 * After calling this function, cleanup_classid_environment should be called 448 * once testing is complete. 449 * 450 * This function will print an error to stderr and return 1 if it is unable 451 * to setup the cgroup environment. If setup is successful, 0 is returned. 452 */ 453 int setup_classid_environment(void) 454 { 455 char cgroup_workdir[PATH_MAX + 1]; 456 457 format_classid_path(cgroup_workdir); 458 459 if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) && 460 errno != EBUSY) { 461 log_err("mount cgroup base"); 462 return 1; 463 } 464 465 if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) { 466 log_err("mkdir cgroup net_cls"); 467 return 1; 468 } 469 470 if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") && 471 errno != EBUSY) { 472 log_err("mount cgroup net_cls"); 473 return 1; 474 } 475 476 cleanup_classid_environment(); 477 478 if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { 479 log_err("mkdir cgroup work dir"); 480 return 1; 481 } 482 483 return 0; 484 } 485 486 /** 487 * set_classid() - Set a cgroupv1 net_cls classid 488 * @id: the numeric classid 489 * 490 * Writes the passed classid into the cgroup work dir's net_cls.classid 491 * file in order to later on trigger socket tagging. 492 * 493 * On success, it returns 0, otherwise on failure it returns 1. If there 494 * is a failure, it prints the error to stderr. 495 */ 496 int set_classid(unsigned int id) 497 { 498 char cgroup_workdir[PATH_MAX - 42]; 499 char cgroup_classid_path[PATH_MAX + 1]; 500 int fd, rc = 0; 501 502 format_classid_path(cgroup_workdir); 503 snprintf(cgroup_classid_path, sizeof(cgroup_classid_path), 504 "%s/net_cls.classid", cgroup_workdir); 505 506 fd = open(cgroup_classid_path, O_WRONLY); 507 if (fd < 0) { 508 log_err("Opening cgroup classid: %s", cgroup_classid_path); 509 return 1; 510 } 511 512 if (dprintf(fd, "%u\n", id) < 0) { 513 log_err("Setting cgroup classid"); 514 rc = 1; 515 } 516 517 close(fd); 518 return rc; 519 } 520 521 /** 522 * join_classid() - Join a cgroupv1 net_cls classid 523 * 524 * This function expects the cgroup work dir to be already created, as we 525 * join it here. This causes the process sockets to be tagged with the given 526 * net_cls classid. 527 * 528 * On success, it returns 0, otherwise on failure it returns 1. 529 */ 530 int join_classid(void) 531 { 532 char cgroup_workdir[PATH_MAX + 1]; 533 534 format_classid_path(cgroup_workdir); 535 return join_cgroup_from_top(cgroup_workdir); 536 } 537 538 /** 539 * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment 540 * 541 * At call time, it moves the calling process to the root cgroup, and then 542 * runs the deletion process. 543 * 544 * On failure, it will print an error to stderr, and try to continue. 545 */ 546 void cleanup_classid_environment(void) 547 { 548 char cgroup_workdir[PATH_MAX + 1]; 549 550 format_classid_path(cgroup_workdir); 551 join_cgroup_from_top(NETCLS_MOUNT_PATH); 552 nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); 553 } 554