1 // SPDX-License-Identifier: GPL-2.0 2 #define _GNU_SOURCE 3 #include <sched.h> 4 #include <sys/mount.h> 5 #include <sys/stat.h> 6 #include <sys/types.h> 7 #include <linux/limits.h> 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <linux/sched.h> 11 #include <fcntl.h> 12 #include <unistd.h> 13 #include <ftw.h> 14 15 #include "cgroup_helpers.h" 16 #include "bpf_util.h" 17 18 /* 19 * To avoid relying on the system setup, when setup_cgroup_env is called 20 * we create a new mount namespace, and cgroup namespace. The cgroupv2 21 * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't 22 * have cgroupv2 enabled at this point in time. It's easier to create our 23 * own mount namespace and manage it ourselves. We assume /mnt exists. 24 * 25 * Related cgroupv1 helpers are named *classid*(), since we only use the 26 * net_cls controller for tagging net_cls.classid. We assume the default 27 * mount under /sys/fs/cgroup/net_cls, which should be the case for the 28 * vast majority of users. 29 */ 30 31 #define WALK_FD_LIMIT 16 32 33 #define CGROUP_MOUNT_PATH "/mnt" 34 #define CGROUP_MOUNT_DFLT "/sys/fs/cgroup" 35 #define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls" 36 #define CGROUP_WORK_DIR "/cgroup-test-work-dir" 37 38 #define format_cgroup_path_pid(buf, path, pid) \ 39 snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \ 40 CGROUP_WORK_DIR, pid, path) 41 42 #define format_cgroup_path(buf, path) \ 43 format_cgroup_path_pid(buf, path, getpid()) 44 45 #define format_parent_cgroup_path(buf, path) \ 46 format_cgroup_path_pid(buf, path, getppid()) 47 48 #define format_classid_path(buf) \ 49 snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \ 50 CGROUP_WORK_DIR) 51 52 static int __enable_controllers(const char *cgroup_path, const char *controllers) 53 { 54 char path[PATH_MAX + 1]; 55 char enable[PATH_MAX + 1]; 56 char *c, *c2; 57 int fd, cfd; 58 ssize_t len; 59 60 /* If not controllers are passed, enable all available controllers */ 61 if (!controllers) { 62 snprintf(path, sizeof(path), "%s/cgroup.controllers", 63 cgroup_path); 64 fd = open(path, O_RDONLY); 65 if (fd < 0) { 66 log_err("Opening cgroup.controllers: %s", path); 67 return 1; 68 } 69 len = read(fd, enable, sizeof(enable) - 1); 70 if (len < 0) { 71 close(fd); 72 log_err("Reading cgroup.controllers: %s", path); 73 return 1; 74 } else if (len == 0) { /* No controllers to enable */ 75 close(fd); 76 return 0; 77 } 78 enable[len] = 0; 79 close(fd); 80 } else { 81 bpf_strlcpy(enable, controllers, sizeof(enable)); 82 } 83 84 snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path); 85 cfd = open(path, O_RDWR); 86 if (cfd < 0) { 87 log_err("Opening cgroup.subtree_control: %s", path); 88 return 1; 89 } 90 91 for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) { 92 if (dprintf(cfd, "+%s\n", c) <= 0) { 93 log_err("Enabling controller %s: %s", c, path); 94 close(cfd); 95 return 1; 96 } 97 } 98 close(cfd); 99 return 0; 100 } 101 102 /** 103 * enable_controllers() - Enable cgroup v2 controllers 104 * @relative_path: The cgroup path, relative to the workdir 105 * @controllers: List of controllers to enable in cgroup.controllers format 106 * 107 * 108 * Enable given cgroup v2 controllers, if @controllers is NULL, enable all 109 * available controllers. 110 * 111 * If successful, 0 is returned. 112 */ 113 int enable_controllers(const char *relative_path, const char *controllers) 114 { 115 char cgroup_path[PATH_MAX + 1]; 116 117 format_cgroup_path(cgroup_path, relative_path); 118 return __enable_controllers(cgroup_path, controllers); 119 } 120 121 static int __write_cgroup_file(const char *cgroup_path, const char *file, 122 const char *buf) 123 { 124 char file_path[PATH_MAX + 1]; 125 int fd; 126 127 snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file); 128 fd = open(file_path, O_RDWR); 129 if (fd < 0) { 130 log_err("Opening %s", file_path); 131 return 1; 132 } 133 134 if (dprintf(fd, "%s", buf) <= 0) { 135 log_err("Writing to %s", file_path); 136 close(fd); 137 return 1; 138 } 139 close(fd); 140 return 0; 141 } 142 143 /** 144 * write_cgroup_file() - Write to a cgroup file 145 * @relative_path: The cgroup path, relative to the workdir 146 * @file: The name of the file in cgroupfs to write to 147 * @buf: Buffer to write to the file 148 * 149 * Write to a file in the given cgroup's directory. 150 * 151 * If successful, 0 is returned. 152 */ 153 int write_cgroup_file(const char *relative_path, const char *file, 154 const char *buf) 155 { 156 char cgroup_path[PATH_MAX - 24]; 157 158 format_cgroup_path(cgroup_path, relative_path); 159 return __write_cgroup_file(cgroup_path, file, buf); 160 } 161 162 /** 163 * write_cgroup_file_parent() - Write to a cgroup file in the parent process 164 * workdir 165 * @relative_path: The cgroup path, relative to the parent process workdir 166 * @file: The name of the file in cgroupfs to write to 167 * @buf: Buffer to write to the file 168 * 169 * Write to a file in the given cgroup's directory under the parent process 170 * workdir. 171 * 172 * If successful, 0 is returned. 173 */ 174 int write_cgroup_file_parent(const char *relative_path, const char *file, 175 const char *buf) 176 { 177 char cgroup_path[PATH_MAX - 24]; 178 179 format_parent_cgroup_path(cgroup_path, relative_path); 180 return __write_cgroup_file(cgroup_path, file, buf); 181 } 182 183 /** 184 * setup_cgroup_environment() - Setup the cgroup environment 185 * 186 * After calling this function, cleanup_cgroup_environment should be called 187 * once testing is complete. 188 * 189 * This function will print an error to stderr and return 1 if it is unable 190 * to setup the cgroup environment. If setup is successful, 0 is returned. 191 */ 192 int setup_cgroup_environment(void) 193 { 194 char cgroup_workdir[PATH_MAX - 24]; 195 196 format_cgroup_path(cgroup_workdir, ""); 197 198 if (unshare(CLONE_NEWNS)) { 199 log_err("unshare"); 200 return 1; 201 } 202 203 if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) { 204 log_err("mount fakeroot"); 205 return 1; 206 } 207 208 if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) { 209 log_err("mount cgroup2"); 210 return 1; 211 } 212 213 /* Cleanup existing failed runs, now that the environment is setup */ 214 cleanup_cgroup_environment(); 215 216 if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { 217 log_err("mkdir cgroup work dir"); 218 return 1; 219 } 220 221 /* Enable all available controllers to increase test coverage */ 222 if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) || 223 __enable_controllers(cgroup_workdir, NULL)) 224 return 1; 225 226 return 0; 227 } 228 229 static int nftwfunc(const char *filename, const struct stat *statptr, 230 int fileflags, struct FTW *pfwt) 231 { 232 if ((fileflags & FTW_D) && rmdir(filename)) 233 log_err("Removing cgroup: %s", filename); 234 return 0; 235 } 236 237 static int join_cgroup_from_top(const char *cgroup_path) 238 { 239 char cgroup_procs_path[PATH_MAX + 1]; 240 pid_t pid = getpid(); 241 int fd, rc = 0; 242 243 snprintf(cgroup_procs_path, sizeof(cgroup_procs_path), 244 "%s/cgroup.procs", cgroup_path); 245 246 fd = open(cgroup_procs_path, O_WRONLY); 247 if (fd < 0) { 248 log_err("Opening Cgroup Procs: %s", cgroup_procs_path); 249 return 1; 250 } 251 252 if (dprintf(fd, "%d\n", pid) < 0) { 253 log_err("Joining Cgroup"); 254 rc = 1; 255 } 256 257 close(fd); 258 return rc; 259 } 260 261 /** 262 * join_cgroup() - Join a cgroup 263 * @relative_path: The cgroup path, relative to the workdir, to join 264 * 265 * This function expects a cgroup to already be created, relative to the cgroup 266 * work dir, and it joins it. For example, passing "/my-cgroup" as the path 267 * would actually put the calling process into the cgroup 268 * "/cgroup-test-work-dir/my-cgroup" 269 * 270 * On success, it returns 0, otherwise on failure it returns 1. 271 */ 272 int join_cgroup(const char *relative_path) 273 { 274 char cgroup_path[PATH_MAX + 1]; 275 276 format_cgroup_path(cgroup_path, relative_path); 277 return join_cgroup_from_top(cgroup_path); 278 } 279 280 /** 281 * join_root_cgroup() - Join the root cgroup 282 * 283 * This function joins the root cgroup. 284 * 285 * On success, it returns 0, otherwise on failure it returns 1. 286 */ 287 int join_root_cgroup(void) 288 { 289 return join_cgroup_from_top(CGROUP_MOUNT_PATH); 290 } 291 292 /** 293 * join_parent_cgroup() - Join a cgroup in the parent process workdir 294 * @relative_path: The cgroup path, relative to parent process workdir, to join 295 * 296 * See join_cgroup(). 297 * 298 * On success, it returns 0, otherwise on failure it returns 1. 299 */ 300 int join_parent_cgroup(const char *relative_path) 301 { 302 char cgroup_path[PATH_MAX + 1]; 303 304 format_parent_cgroup_path(cgroup_path, relative_path); 305 return join_cgroup_from_top(cgroup_path); 306 } 307 308 /** 309 * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment 310 * 311 * This is an idempotent function to delete all temporary cgroups that 312 * have been created during the test, including the cgroup testing work 313 * directory. 314 * 315 * At call time, it moves the calling process to the root cgroup, and then 316 * runs the deletion process. It is idempotent, and should not fail, unless 317 * a process is lingering. 318 * 319 * On failure, it will print an error to stderr, and try to continue. 320 */ 321 void cleanup_cgroup_environment(void) 322 { 323 char cgroup_workdir[PATH_MAX + 1]; 324 325 format_cgroup_path(cgroup_workdir, ""); 326 join_cgroup_from_top(CGROUP_MOUNT_PATH); 327 nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); 328 } 329 330 /** 331 * get_root_cgroup() - Get the FD of the root cgroup 332 * 333 * On success, it returns the file descriptor. On failure, it returns -1. 334 * If there is a failure, it prints the error to stderr. 335 */ 336 int get_root_cgroup(void) 337 { 338 int fd; 339 340 fd = open(CGROUP_MOUNT_PATH, O_RDONLY); 341 if (fd < 0) { 342 log_err("Opening root cgroup"); 343 return -1; 344 } 345 return fd; 346 } 347 348 /* 349 * remove_cgroup() - Remove a cgroup 350 * @relative_path: The cgroup path, relative to the workdir, to remove 351 * 352 * This function expects a cgroup to already be created, relative to the cgroup 353 * work dir. It also expects the cgroup doesn't have any children or live 354 * processes and it removes the cgroup. 355 * 356 * On failure, it will print an error to stderr. 357 */ 358 void remove_cgroup(const char *relative_path) 359 { 360 char cgroup_path[PATH_MAX + 1]; 361 362 format_cgroup_path(cgroup_path, relative_path); 363 if (rmdir(cgroup_path)) 364 log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path); 365 } 366 367 /** 368 * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD 369 * @relative_path: The cgroup path, relative to the workdir, to join 370 * 371 * This function creates a cgroup under the top level workdir and returns the 372 * file descriptor. It is idempotent. 373 * 374 * On success, it returns the file descriptor. On failure it returns -1. 375 * If there is a failure, it prints the error to stderr. 376 */ 377 int create_and_get_cgroup(const char *relative_path) 378 { 379 char cgroup_path[PATH_MAX + 1]; 380 int fd; 381 382 format_cgroup_path(cgroup_path, relative_path); 383 if (mkdir(cgroup_path, 0777) && errno != EEXIST) { 384 log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path); 385 return -1; 386 } 387 388 fd = open(cgroup_path, O_RDONLY); 389 if (fd < 0) { 390 log_err("Opening Cgroup"); 391 return -1; 392 } 393 394 return fd; 395 } 396 397 /** 398 * get_cgroup_id() - Get cgroup id for a particular cgroup path 399 * @relative_path: The cgroup path, relative to the workdir, to join 400 * 401 * On success, it returns the cgroup id. On failure it returns 0, 402 * which is an invalid cgroup id. 403 * If there is a failure, it prints the error to stderr. 404 */ 405 unsigned long long get_cgroup_id(const char *relative_path) 406 { 407 int dirfd, err, flags, mount_id, fhsize; 408 union { 409 unsigned long long cgid; 410 unsigned char raw_bytes[8]; 411 } id; 412 char cgroup_workdir[PATH_MAX + 1]; 413 struct file_handle *fhp, *fhp2; 414 unsigned long long ret = 0; 415 416 format_cgroup_path(cgroup_workdir, relative_path); 417 418 dirfd = AT_FDCWD; 419 flags = 0; 420 fhsize = sizeof(*fhp); 421 fhp = calloc(1, fhsize); 422 if (!fhp) { 423 log_err("calloc"); 424 return 0; 425 } 426 err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags); 427 if (err >= 0 || fhp->handle_bytes != 8) { 428 log_err("name_to_handle_at"); 429 goto free_mem; 430 } 431 432 fhsize = sizeof(struct file_handle) + fhp->handle_bytes; 433 fhp2 = realloc(fhp, fhsize); 434 if (!fhp2) { 435 log_err("realloc"); 436 goto free_mem; 437 } 438 err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags); 439 fhp = fhp2; 440 if (err < 0) { 441 log_err("name_to_handle_at"); 442 goto free_mem; 443 } 444 445 memcpy(id.raw_bytes, fhp->f_handle, 8); 446 ret = id.cgid; 447 448 free_mem: 449 free(fhp); 450 return ret; 451 } 452 453 int cgroup_setup_and_join(const char *path) { 454 int cg_fd; 455 456 if (setup_cgroup_environment()) { 457 fprintf(stderr, "Failed to setup cgroup environment\n"); 458 return -EINVAL; 459 } 460 461 cg_fd = create_and_get_cgroup(path); 462 if (cg_fd < 0) { 463 fprintf(stderr, "Failed to create test cgroup\n"); 464 cleanup_cgroup_environment(); 465 return cg_fd; 466 } 467 468 if (join_cgroup(path)) { 469 fprintf(stderr, "Failed to join cgroup\n"); 470 cleanup_cgroup_environment(); 471 return -EINVAL; 472 } 473 return cg_fd; 474 } 475 476 /** 477 * setup_classid_environment() - Setup the cgroupv1 net_cls environment 478 * 479 * After calling this function, cleanup_classid_environment should be called 480 * once testing is complete. 481 * 482 * This function will print an error to stderr and return 1 if it is unable 483 * to setup the cgroup environment. If setup is successful, 0 is returned. 484 */ 485 int setup_classid_environment(void) 486 { 487 char cgroup_workdir[PATH_MAX + 1]; 488 489 format_classid_path(cgroup_workdir); 490 491 if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) && 492 errno != EBUSY) { 493 log_err("mount cgroup base"); 494 return 1; 495 } 496 497 if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) { 498 log_err("mkdir cgroup net_cls"); 499 return 1; 500 } 501 502 if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") && 503 errno != EBUSY) { 504 log_err("mount cgroup net_cls"); 505 return 1; 506 } 507 508 cleanup_classid_environment(); 509 510 if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { 511 log_err("mkdir cgroup work dir"); 512 return 1; 513 } 514 515 return 0; 516 } 517 518 /** 519 * set_classid() - Set a cgroupv1 net_cls classid 520 * @id: the numeric classid 521 * 522 * Writes the passed classid into the cgroup work dir's net_cls.classid 523 * file in order to later on trigger socket tagging. 524 * 525 * On success, it returns 0, otherwise on failure it returns 1. If there 526 * is a failure, it prints the error to stderr. 527 */ 528 int set_classid(unsigned int id) 529 { 530 char cgroup_workdir[PATH_MAX - 42]; 531 char cgroup_classid_path[PATH_MAX + 1]; 532 int fd, rc = 0; 533 534 format_classid_path(cgroup_workdir); 535 snprintf(cgroup_classid_path, sizeof(cgroup_classid_path), 536 "%s/net_cls.classid", cgroup_workdir); 537 538 fd = open(cgroup_classid_path, O_WRONLY); 539 if (fd < 0) { 540 log_err("Opening cgroup classid: %s", cgroup_classid_path); 541 return 1; 542 } 543 544 if (dprintf(fd, "%u\n", id) < 0) { 545 log_err("Setting cgroup classid"); 546 rc = 1; 547 } 548 549 close(fd); 550 return rc; 551 } 552 553 /** 554 * join_classid() - Join a cgroupv1 net_cls classid 555 * 556 * This function expects the cgroup work dir to be already created, as we 557 * join it here. This causes the process sockets to be tagged with the given 558 * net_cls classid. 559 * 560 * On success, it returns 0, otherwise on failure it returns 1. 561 */ 562 int join_classid(void) 563 { 564 char cgroup_workdir[PATH_MAX + 1]; 565 566 format_classid_path(cgroup_workdir); 567 return join_cgroup_from_top(cgroup_workdir); 568 } 569 570 /** 571 * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment 572 * 573 * At call time, it moves the calling process to the root cgroup, and then 574 * runs the deletion process. 575 * 576 * On failure, it will print an error to stderr, and try to continue. 577 */ 578 void cleanup_classid_environment(void) 579 { 580 char cgroup_workdir[PATH_MAX + 1]; 581 582 format_classid_path(cgroup_workdir); 583 join_cgroup_from_top(NETCLS_MOUNT_PATH); 584 nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); 585 } 586