1 // SPDX-License-Identifier: GPL-2.0 2 #define _GNU_SOURCE 3 #include <sched.h> 4 #include <sys/mount.h> 5 #include <sys/stat.h> 6 #include <sys/types.h> 7 #include <linux/limits.h> 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <linux/sched.h> 11 #include <fcntl.h> 12 #include <unistd.h> 13 #include <ftw.h> 14 15 #include "cgroup_helpers.h" 16 #include "bpf_util.h" 17 18 /* 19 * To avoid relying on the system setup, when setup_cgroup_env is called 20 * we create a new mount namespace, and cgroup namespace. The cgroupv2 21 * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't 22 * have cgroupv2 enabled at this point in time. It's easier to create our 23 * own mount namespace and manage it ourselves. We assume /mnt exists. 24 * 25 * Related cgroupv1 helpers are named *classid*(), since we only use the 26 * net_cls controller for tagging net_cls.classid. We assume the default 27 * mount under /sys/fs/cgroup/net_cls, which should be the case for the 28 * vast majority of users. 29 */ 30 31 #define WALK_FD_LIMIT 16 32 33 #define CGROUP_MOUNT_PATH "/mnt" 34 #define CGROUP_MOUNT_DFLT "/sys/fs/cgroup" 35 #define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls" 36 #define CGROUP_WORK_DIR "/cgroup-test-work-dir" 37 38 #define format_cgroup_path_pid(buf, path, pid) \ 39 snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \ 40 CGROUP_WORK_DIR, pid, path) 41 42 #define format_cgroup_path(buf, path) \ 43 format_cgroup_path_pid(buf, path, getpid()) 44 45 #define format_parent_cgroup_path(buf, path) \ 46 format_cgroup_path_pid(buf, path, getppid()) 47 48 #define format_classid_path(buf) \ 49 snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \ 50 CGROUP_WORK_DIR) 51 52 static int __enable_controllers(const char *cgroup_path, const char *controllers) 53 { 54 char path[PATH_MAX + 1]; 55 char enable[PATH_MAX + 1]; 56 char *c, *c2; 57 int fd, cfd; 58 ssize_t len; 59 60 /* If not controllers are passed, enable all available controllers */ 61 if (!controllers) { 62 snprintf(path, sizeof(path), "%s/cgroup.controllers", 63 cgroup_path); 64 fd = open(path, O_RDONLY); 65 if (fd < 0) { 66 log_err("Opening cgroup.controllers: %s", path); 67 return 1; 68 } 69 len = read(fd, enable, sizeof(enable) - 1); 70 if (len < 0) { 71 close(fd); 72 log_err("Reading cgroup.controllers: %s", path); 73 return 1; 74 } else if (len == 0) { /* No controllers to enable */ 75 close(fd); 76 return 0; 77 } 78 enable[len] = 0; 79 close(fd); 80 } else { 81 bpf_strlcpy(enable, controllers, sizeof(enable)); 82 } 83 84 snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path); 85 cfd = open(path, O_RDWR); 86 if (cfd < 0) { 87 log_err("Opening cgroup.subtree_control: %s", path); 88 return 1; 89 } 90 91 for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) { 92 if (dprintf(cfd, "+%s\n", c) <= 0) { 93 log_err("Enabling controller %s: %s", c, path); 94 close(cfd); 95 return 1; 96 } 97 } 98 close(cfd); 99 return 0; 100 } 101 102 /** 103 * enable_controllers() - Enable cgroup v2 controllers 104 * @relative_path: The cgroup path, relative to the workdir 105 * @controllers: List of controllers to enable in cgroup.controllers format 106 * 107 * 108 * Enable given cgroup v2 controllers, if @controllers is NULL, enable all 109 * available controllers. 110 * 111 * If successful, 0 is returned. 112 */ 113 int enable_controllers(const char *relative_path, const char *controllers) 114 { 115 char cgroup_path[PATH_MAX + 1]; 116 117 format_cgroup_path(cgroup_path, relative_path); 118 return __enable_controllers(cgroup_path, controllers); 119 } 120 121 static int __write_cgroup_file(const char *cgroup_path, const char *file, 122 const char *buf) 123 { 124 char file_path[PATH_MAX + 1]; 125 int fd; 126 127 snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file); 128 fd = open(file_path, O_RDWR); 129 if (fd < 0) { 130 log_err("Opening %s", file_path); 131 return 1; 132 } 133 134 if (dprintf(fd, "%s", buf) <= 0) { 135 log_err("Writing to %s", file_path); 136 close(fd); 137 return 1; 138 } 139 close(fd); 140 return 0; 141 } 142 143 /** 144 * write_cgroup_file() - Write to a cgroup file 145 * @relative_path: The cgroup path, relative to the workdir 146 * @file: The name of the file in cgroupfs to write to 147 * @buf: Buffer to write to the file 148 * 149 * Write to a file in the given cgroup's directory. 150 * 151 * If successful, 0 is returned. 152 */ 153 int write_cgroup_file(const char *relative_path, const char *file, 154 const char *buf) 155 { 156 char cgroup_path[PATH_MAX - 24]; 157 158 format_cgroup_path(cgroup_path, relative_path); 159 return __write_cgroup_file(cgroup_path, file, buf); 160 } 161 162 /** 163 * write_cgroup_file_parent() - Write to a cgroup file in the parent process 164 * workdir 165 * @relative_path: The cgroup path, relative to the parent process workdir 166 * @file: The name of the file in cgroupfs to write to 167 * @buf: Buffer to write to the file 168 * 169 * Write to a file in the given cgroup's directory under the parent process 170 * workdir. 171 * 172 * If successful, 0 is returned. 173 */ 174 int write_cgroup_file_parent(const char *relative_path, const char *file, 175 const char *buf) 176 { 177 char cgroup_path[PATH_MAX - 24]; 178 179 format_parent_cgroup_path(cgroup_path, relative_path); 180 return __write_cgroup_file(cgroup_path, file, buf); 181 } 182 183 /** 184 * setup_cgroup_environment() - Setup the cgroup environment 185 * 186 * After calling this function, cleanup_cgroup_environment should be called 187 * once testing is complete. 188 * 189 * This function will print an error to stderr and return 1 if it is unable 190 * to setup the cgroup environment. If setup is successful, 0 is returned. 191 */ 192 int setup_cgroup_environment(void) 193 { 194 char cgroup_workdir[PATH_MAX - 24]; 195 196 format_cgroup_path(cgroup_workdir, ""); 197 198 if (unshare(CLONE_NEWNS)) { 199 log_err("unshare"); 200 return 1; 201 } 202 203 if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) { 204 log_err("mount fakeroot"); 205 return 1; 206 } 207 208 if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) { 209 log_err("mount cgroup2"); 210 return 1; 211 } 212 213 /* Cleanup existing failed runs, now that the environment is setup */ 214 cleanup_cgroup_environment(); 215 216 if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { 217 log_err("mkdir cgroup work dir"); 218 return 1; 219 } 220 221 /* Enable all available controllers to increase test coverage */ 222 if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) || 223 __enable_controllers(cgroup_workdir, NULL)) 224 return 1; 225 226 return 0; 227 } 228 229 static int nftwfunc(const char *filename, const struct stat *statptr, 230 int fileflags, struct FTW *pfwt) 231 { 232 if ((fileflags & FTW_D) && rmdir(filename)) 233 log_err("Removing cgroup: %s", filename); 234 return 0; 235 } 236 237 static int join_cgroup_from_top(const char *cgroup_path) 238 { 239 char cgroup_procs_path[PATH_MAX + 1]; 240 pid_t pid = getpid(); 241 int fd, rc = 0; 242 243 snprintf(cgroup_procs_path, sizeof(cgroup_procs_path), 244 "%s/cgroup.procs", cgroup_path); 245 246 fd = open(cgroup_procs_path, O_WRONLY); 247 if (fd < 0) { 248 log_err("Opening Cgroup Procs: %s", cgroup_procs_path); 249 return 1; 250 } 251 252 if (dprintf(fd, "%d\n", pid) < 0) { 253 log_err("Joining Cgroup"); 254 rc = 1; 255 } 256 257 close(fd); 258 return rc; 259 } 260 261 /** 262 * join_cgroup() - Join a cgroup 263 * @relative_path: The cgroup path, relative to the workdir, to join 264 * 265 * This function expects a cgroup to already be created, relative to the cgroup 266 * work dir, and it joins it. For example, passing "/my-cgroup" as the path 267 * would actually put the calling process into the cgroup 268 * "/cgroup-test-work-dir/my-cgroup" 269 * 270 * On success, it returns 0, otherwise on failure it returns 1. 271 */ 272 int join_cgroup(const char *relative_path) 273 { 274 char cgroup_path[PATH_MAX + 1]; 275 276 format_cgroup_path(cgroup_path, relative_path); 277 return join_cgroup_from_top(cgroup_path); 278 } 279 280 /** 281 * join_parent_cgroup() - Join a cgroup in the parent process workdir 282 * @relative_path: The cgroup path, relative to parent process workdir, to join 283 * 284 * See join_cgroup(). 285 * 286 * On success, it returns 0, otherwise on failure it returns 1. 287 */ 288 int join_parent_cgroup(const char *relative_path) 289 { 290 char cgroup_path[PATH_MAX + 1]; 291 292 format_parent_cgroup_path(cgroup_path, relative_path); 293 return join_cgroup_from_top(cgroup_path); 294 } 295 296 /** 297 * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment 298 * 299 * This is an idempotent function to delete all temporary cgroups that 300 * have been created during the test, including the cgroup testing work 301 * directory. 302 * 303 * At call time, it moves the calling process to the root cgroup, and then 304 * runs the deletion process. It is idempotent, and should not fail, unless 305 * a process is lingering. 306 * 307 * On failure, it will print an error to stderr, and try to continue. 308 */ 309 void cleanup_cgroup_environment(void) 310 { 311 char cgroup_workdir[PATH_MAX + 1]; 312 313 format_cgroup_path(cgroup_workdir, ""); 314 join_cgroup_from_top(CGROUP_MOUNT_PATH); 315 nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); 316 } 317 318 /** 319 * get_root_cgroup() - Get the FD of the root cgroup 320 * 321 * On success, it returns the file descriptor. On failure, it returns -1. 322 * If there is a failure, it prints the error to stderr. 323 */ 324 int get_root_cgroup(void) 325 { 326 int fd; 327 328 fd = open(CGROUP_MOUNT_PATH, O_RDONLY); 329 if (fd < 0) { 330 log_err("Opening root cgroup"); 331 return -1; 332 } 333 return fd; 334 } 335 336 /* 337 * remove_cgroup() - Remove a cgroup 338 * @relative_path: The cgroup path, relative to the workdir, to remove 339 * 340 * This function expects a cgroup to already be created, relative to the cgroup 341 * work dir. It also expects the cgroup doesn't have any children or live 342 * processes and it removes the cgroup. 343 * 344 * On failure, it will print an error to stderr. 345 */ 346 void remove_cgroup(const char *relative_path) 347 { 348 char cgroup_path[PATH_MAX + 1]; 349 350 format_cgroup_path(cgroup_path, relative_path); 351 if (rmdir(cgroup_path)) 352 log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path); 353 } 354 355 /** 356 * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD 357 * @relative_path: The cgroup path, relative to the workdir, to join 358 * 359 * This function creates a cgroup under the top level workdir and returns the 360 * file descriptor. It is idempotent. 361 * 362 * On success, it returns the file descriptor. On failure it returns -1. 363 * If there is a failure, it prints the error to stderr. 364 */ 365 int create_and_get_cgroup(const char *relative_path) 366 { 367 char cgroup_path[PATH_MAX + 1]; 368 int fd; 369 370 format_cgroup_path(cgroup_path, relative_path); 371 if (mkdir(cgroup_path, 0777) && errno != EEXIST) { 372 log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path); 373 return -1; 374 } 375 376 fd = open(cgroup_path, O_RDONLY); 377 if (fd < 0) { 378 log_err("Opening Cgroup"); 379 return -1; 380 } 381 382 return fd; 383 } 384 385 /** 386 * get_cgroup_id() - Get cgroup id for a particular cgroup path 387 * @relative_path: The cgroup path, relative to the workdir, to join 388 * 389 * On success, it returns the cgroup id. On failure it returns 0, 390 * which is an invalid cgroup id. 391 * If there is a failure, it prints the error to stderr. 392 */ 393 unsigned long long get_cgroup_id(const char *relative_path) 394 { 395 int dirfd, err, flags, mount_id, fhsize; 396 union { 397 unsigned long long cgid; 398 unsigned char raw_bytes[8]; 399 } id; 400 char cgroup_workdir[PATH_MAX + 1]; 401 struct file_handle *fhp, *fhp2; 402 unsigned long long ret = 0; 403 404 format_cgroup_path(cgroup_workdir, relative_path); 405 406 dirfd = AT_FDCWD; 407 flags = 0; 408 fhsize = sizeof(*fhp); 409 fhp = calloc(1, fhsize); 410 if (!fhp) { 411 log_err("calloc"); 412 return 0; 413 } 414 err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags); 415 if (err >= 0 || fhp->handle_bytes != 8) { 416 log_err("name_to_handle_at"); 417 goto free_mem; 418 } 419 420 fhsize = sizeof(struct file_handle) + fhp->handle_bytes; 421 fhp2 = realloc(fhp, fhsize); 422 if (!fhp2) { 423 log_err("realloc"); 424 goto free_mem; 425 } 426 err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags); 427 fhp = fhp2; 428 if (err < 0) { 429 log_err("name_to_handle_at"); 430 goto free_mem; 431 } 432 433 memcpy(id.raw_bytes, fhp->f_handle, 8); 434 ret = id.cgid; 435 436 free_mem: 437 free(fhp); 438 return ret; 439 } 440 441 int cgroup_setup_and_join(const char *path) { 442 int cg_fd; 443 444 if (setup_cgroup_environment()) { 445 fprintf(stderr, "Failed to setup cgroup environment\n"); 446 return -EINVAL; 447 } 448 449 cg_fd = create_and_get_cgroup(path); 450 if (cg_fd < 0) { 451 fprintf(stderr, "Failed to create test cgroup\n"); 452 cleanup_cgroup_environment(); 453 return cg_fd; 454 } 455 456 if (join_cgroup(path)) { 457 fprintf(stderr, "Failed to join cgroup\n"); 458 cleanup_cgroup_environment(); 459 return -EINVAL; 460 } 461 return cg_fd; 462 } 463 464 /** 465 * setup_classid_environment() - Setup the cgroupv1 net_cls environment 466 * 467 * After calling this function, cleanup_classid_environment should be called 468 * once testing is complete. 469 * 470 * This function will print an error to stderr and return 1 if it is unable 471 * to setup the cgroup environment. If setup is successful, 0 is returned. 472 */ 473 int setup_classid_environment(void) 474 { 475 char cgroup_workdir[PATH_MAX + 1]; 476 477 format_classid_path(cgroup_workdir); 478 479 if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) && 480 errno != EBUSY) { 481 log_err("mount cgroup base"); 482 return 1; 483 } 484 485 if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) { 486 log_err("mkdir cgroup net_cls"); 487 return 1; 488 } 489 490 if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") && 491 errno != EBUSY) { 492 log_err("mount cgroup net_cls"); 493 return 1; 494 } 495 496 cleanup_classid_environment(); 497 498 if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { 499 log_err("mkdir cgroup work dir"); 500 return 1; 501 } 502 503 return 0; 504 } 505 506 /** 507 * set_classid() - Set a cgroupv1 net_cls classid 508 * @id: the numeric classid 509 * 510 * Writes the passed classid into the cgroup work dir's net_cls.classid 511 * file in order to later on trigger socket tagging. 512 * 513 * On success, it returns 0, otherwise on failure it returns 1. If there 514 * is a failure, it prints the error to stderr. 515 */ 516 int set_classid(unsigned int id) 517 { 518 char cgroup_workdir[PATH_MAX - 42]; 519 char cgroup_classid_path[PATH_MAX + 1]; 520 int fd, rc = 0; 521 522 format_classid_path(cgroup_workdir); 523 snprintf(cgroup_classid_path, sizeof(cgroup_classid_path), 524 "%s/net_cls.classid", cgroup_workdir); 525 526 fd = open(cgroup_classid_path, O_WRONLY); 527 if (fd < 0) { 528 log_err("Opening cgroup classid: %s", cgroup_classid_path); 529 return 1; 530 } 531 532 if (dprintf(fd, "%u\n", id) < 0) { 533 log_err("Setting cgroup classid"); 534 rc = 1; 535 } 536 537 close(fd); 538 return rc; 539 } 540 541 /** 542 * join_classid() - Join a cgroupv1 net_cls classid 543 * 544 * This function expects the cgroup work dir to be already created, as we 545 * join it here. This causes the process sockets to be tagged with the given 546 * net_cls classid. 547 * 548 * On success, it returns 0, otherwise on failure it returns 1. 549 */ 550 int join_classid(void) 551 { 552 char cgroup_workdir[PATH_MAX + 1]; 553 554 format_classid_path(cgroup_workdir); 555 return join_cgroup_from_top(cgroup_workdir); 556 } 557 558 /** 559 * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment 560 * 561 * At call time, it moves the calling process to the root cgroup, and then 562 * runs the deletion process. 563 * 564 * On failure, it will print an error to stderr, and try to continue. 565 */ 566 void cleanup_classid_environment(void) 567 { 568 char cgroup_workdir[PATH_MAX + 1]; 569 570 format_classid_path(cgroup_workdir); 571 join_cgroup_from_top(NETCLS_MOUNT_PATH); 572 nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); 573 } 574