1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <sched.h>
4 #include <sys/mount.h>
5 #include <sys/stat.h>
6 #include <sys/types.h>
7 #include <linux/limits.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <linux/sched.h>
11 #include <fcntl.h>
12 #include <unistd.h>
13 #include <ftw.h>
14 
15 #include "cgroup_helpers.h"
16 #include "bpf_util.h"
17 
18 /*
19  * To avoid relying on the system setup, when setup_cgroup_env is called
20  * we create a new mount namespace, and cgroup namespace. The cgroupv2
21  * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't
22  * have cgroupv2 enabled at this point in time. It's easier to create our
23  * own mount namespace and manage it ourselves. We assume /mnt exists.
24  *
25  * Related cgroupv1 helpers are named *classid*(), since we only use the
26  * net_cls controller for tagging net_cls.classid. We assume the default
27  * mount under /sys/fs/cgroup/net_cls, which should be the case for the
28  * vast majority of users.
29  */
30 
31 #define WALK_FD_LIMIT			16
32 
33 #define CGROUP_MOUNT_PATH		"/mnt"
34 #define CGROUP_MOUNT_DFLT		"/sys/fs/cgroup"
35 #define NETCLS_MOUNT_PATH		CGROUP_MOUNT_DFLT "/net_cls"
36 #define CGROUP_WORK_DIR			"/cgroup-test-work-dir"
37 
38 #define format_cgroup_path_pid(buf, path, pid) \
39 	snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \
40 	CGROUP_WORK_DIR, pid, path)
41 
42 #define format_cgroup_path(buf, path) \
43 	format_cgroup_path_pid(buf, path, getpid())
44 
45 #define format_parent_cgroup_path(buf, path) \
46 	format_cgroup_path_pid(buf, path, getppid())
47 
48 #define format_classid_path(buf)				\
49 	snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH,	\
50 		 CGROUP_WORK_DIR)
51 
52 static int __enable_controllers(const char *cgroup_path, const char *controllers)
53 {
54 	char path[PATH_MAX + 1];
55 	char enable[PATH_MAX + 1];
56 	char *c, *c2;
57 	int fd, cfd;
58 	ssize_t len;
59 
60 	/* If not controllers are passed, enable all available controllers */
61 	if (!controllers) {
62 		snprintf(path, sizeof(path), "%s/cgroup.controllers",
63 			 cgroup_path);
64 		fd = open(path, O_RDONLY);
65 		if (fd < 0) {
66 			log_err("Opening cgroup.controllers: %s", path);
67 			return 1;
68 		}
69 		len = read(fd, enable, sizeof(enable) - 1);
70 		if (len < 0) {
71 			close(fd);
72 			log_err("Reading cgroup.controllers: %s", path);
73 			return 1;
74 		} else if (len == 0) { /* No controllers to enable */
75 			close(fd);
76 			return 0;
77 		}
78 		enable[len] = 0;
79 		close(fd);
80 	} else {
81 		bpf_strlcpy(enable, controllers, sizeof(enable));
82 	}
83 
84 	snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
85 	cfd = open(path, O_RDWR);
86 	if (cfd < 0) {
87 		log_err("Opening cgroup.subtree_control: %s", path);
88 		return 1;
89 	}
90 
91 	for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
92 		if (dprintf(cfd, "+%s\n", c) <= 0) {
93 			log_err("Enabling controller %s: %s", c, path);
94 			close(cfd);
95 			return 1;
96 		}
97 	}
98 	close(cfd);
99 	return 0;
100 }
101 
102 /**
103  * enable_controllers() - Enable cgroup v2 controllers
104  * @relative_path: The cgroup path, relative to the workdir
105  * @controllers: List of controllers to enable in cgroup.controllers format
106  *
107  *
108  * Enable given cgroup v2 controllers, if @controllers is NULL, enable all
109  * available controllers.
110  *
111  * If successful, 0 is returned.
112  */
113 int enable_controllers(const char *relative_path, const char *controllers)
114 {
115 	char cgroup_path[PATH_MAX + 1];
116 
117 	format_cgroup_path(cgroup_path, relative_path);
118 	return __enable_controllers(cgroup_path, controllers);
119 }
120 
121 static int __write_cgroup_file(const char *cgroup_path, const char *file,
122 			       const char *buf)
123 {
124 	char file_path[PATH_MAX + 1];
125 	int fd;
126 
127 	snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file);
128 	fd = open(file_path, O_RDWR);
129 	if (fd < 0) {
130 		log_err("Opening %s", file_path);
131 		return 1;
132 	}
133 
134 	if (dprintf(fd, "%s", buf) <= 0) {
135 		log_err("Writing to %s", file_path);
136 		close(fd);
137 		return 1;
138 	}
139 	close(fd);
140 	return 0;
141 }
142 
143 /**
144  * write_cgroup_file() - Write to a cgroup file
145  * @relative_path: The cgroup path, relative to the workdir
146  * @file: The name of the file in cgroupfs to write to
147  * @buf: Buffer to write to the file
148  *
149  * Write to a file in the given cgroup's directory.
150  *
151  * If successful, 0 is returned.
152  */
153 int write_cgroup_file(const char *relative_path, const char *file,
154 		      const char *buf)
155 {
156 	char cgroup_path[PATH_MAX - 24];
157 
158 	format_cgroup_path(cgroup_path, relative_path);
159 	return __write_cgroup_file(cgroup_path, file, buf);
160 }
161 
162 /**
163  * write_cgroup_file_parent() - Write to a cgroup file in the parent process
164  *                              workdir
165  * @relative_path: The cgroup path, relative to the parent process workdir
166  * @file: The name of the file in cgroupfs to write to
167  * @buf: Buffer to write to the file
168  *
169  * Write to a file in the given cgroup's directory under the parent process
170  * workdir.
171  *
172  * If successful, 0 is returned.
173  */
174 int write_cgroup_file_parent(const char *relative_path, const char *file,
175 			     const char *buf)
176 {
177 	char cgroup_path[PATH_MAX - 24];
178 
179 	format_parent_cgroup_path(cgroup_path, relative_path);
180 	return __write_cgroup_file(cgroup_path, file, buf);
181 }
182 
183 /**
184  * setup_cgroup_environment() - Setup the cgroup environment
185  *
186  * After calling this function, cleanup_cgroup_environment should be called
187  * once testing is complete.
188  *
189  * This function will print an error to stderr and return 1 if it is unable
190  * to setup the cgroup environment. If setup is successful, 0 is returned.
191  */
192 int setup_cgroup_environment(void)
193 {
194 	char cgroup_workdir[PATH_MAX - 24];
195 
196 	format_cgroup_path(cgroup_workdir, "");
197 
198 	if (unshare(CLONE_NEWNS)) {
199 		log_err("unshare");
200 		return 1;
201 	}
202 
203 	if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
204 		log_err("mount fakeroot");
205 		return 1;
206 	}
207 
208 	if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) {
209 		log_err("mount cgroup2");
210 		return 1;
211 	}
212 
213 	/* Cleanup existing failed runs, now that the environment is setup */
214 	cleanup_cgroup_environment();
215 
216 	if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
217 		log_err("mkdir cgroup work dir");
218 		return 1;
219 	}
220 
221 	/* Enable all available controllers to increase test coverage */
222 	if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) ||
223 	    __enable_controllers(cgroup_workdir, NULL))
224 		return 1;
225 
226 	return 0;
227 }
228 
229 static int nftwfunc(const char *filename, const struct stat *statptr,
230 		    int fileflags, struct FTW *pfwt)
231 {
232 	if ((fileflags & FTW_D) && rmdir(filename))
233 		log_err("Removing cgroup: %s", filename);
234 	return 0;
235 }
236 
237 static int join_cgroup_from_top(const char *cgroup_path)
238 {
239 	char cgroup_procs_path[PATH_MAX + 1];
240 	pid_t pid = getpid();
241 	int fd, rc = 0;
242 
243 	snprintf(cgroup_procs_path, sizeof(cgroup_procs_path),
244 		 "%s/cgroup.procs", cgroup_path);
245 
246 	fd = open(cgroup_procs_path, O_WRONLY);
247 	if (fd < 0) {
248 		log_err("Opening Cgroup Procs: %s", cgroup_procs_path);
249 		return 1;
250 	}
251 
252 	if (dprintf(fd, "%d\n", pid) < 0) {
253 		log_err("Joining Cgroup");
254 		rc = 1;
255 	}
256 
257 	close(fd);
258 	return rc;
259 }
260 
261 /**
262  * join_cgroup() - Join a cgroup
263  * @relative_path: The cgroup path, relative to the workdir, to join
264  *
265  * This function expects a cgroup to already be created, relative to the cgroup
266  * work dir, and it joins it. For example, passing "/my-cgroup" as the path
267  * would actually put the calling process into the cgroup
268  * "/cgroup-test-work-dir/my-cgroup"
269  *
270  * On success, it returns 0, otherwise on failure it returns 1.
271  */
272 int join_cgroup(const char *relative_path)
273 {
274 	char cgroup_path[PATH_MAX + 1];
275 
276 	format_cgroup_path(cgroup_path, relative_path);
277 	return join_cgroup_from_top(cgroup_path);
278 }
279 
280 /**
281  * join_root_cgroup() - Join the root cgroup
282  *
283  * This function joins the root cgroup.
284  *
285  * On success, it returns 0, otherwise on failure it returns 1.
286  */
287 int join_root_cgroup(void)
288 {
289 	return join_cgroup_from_top(CGROUP_MOUNT_PATH);
290 }
291 
292 /**
293  * join_parent_cgroup() - Join a cgroup in the parent process workdir
294  * @relative_path: The cgroup path, relative to parent process workdir, to join
295  *
296  * See join_cgroup().
297  *
298  * On success, it returns 0, otherwise on failure it returns 1.
299  */
300 int join_parent_cgroup(const char *relative_path)
301 {
302 	char cgroup_path[PATH_MAX + 1];
303 
304 	format_parent_cgroup_path(cgroup_path, relative_path);
305 	return join_cgroup_from_top(cgroup_path);
306 }
307 
308 /**
309  * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment
310  *
311  * This is an idempotent function to delete all temporary cgroups that
312  * have been created during the test, including the cgroup testing work
313  * directory.
314  *
315  * At call time, it moves the calling process to the root cgroup, and then
316  * runs the deletion process. It is idempotent, and should not fail, unless
317  * a process is lingering.
318  *
319  * On failure, it will print an error to stderr, and try to continue.
320  */
321 void cleanup_cgroup_environment(void)
322 {
323 	char cgroup_workdir[PATH_MAX + 1];
324 
325 	format_cgroup_path(cgroup_workdir, "");
326 	join_cgroup_from_top(CGROUP_MOUNT_PATH);
327 	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
328 }
329 
330 /**
331  * get_root_cgroup() - Get the FD of the root cgroup
332  *
333  * On success, it returns the file descriptor. On failure, it returns -1.
334  * If there is a failure, it prints the error to stderr.
335  */
336 int get_root_cgroup(void)
337 {
338 	int fd;
339 
340 	fd = open(CGROUP_MOUNT_PATH, O_RDONLY);
341 	if (fd < 0) {
342 		log_err("Opening root cgroup");
343 		return -1;
344 	}
345 	return fd;
346 }
347 
348 /*
349  * remove_cgroup() - Remove a cgroup
350  * @relative_path: The cgroup path, relative to the workdir, to remove
351  *
352  * This function expects a cgroup to already be created, relative to the cgroup
353  * work dir. It also expects the cgroup doesn't have any children or live
354  * processes and it removes the cgroup.
355  *
356  * On failure, it will print an error to stderr.
357  */
358 void remove_cgroup(const char *relative_path)
359 {
360 	char cgroup_path[PATH_MAX + 1];
361 
362 	format_cgroup_path(cgroup_path, relative_path);
363 	if (rmdir(cgroup_path))
364 		log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);
365 }
366 
367 /**
368  * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
369  * @relative_path: The cgroup path, relative to the workdir, to join
370  *
371  * This function creates a cgroup under the top level workdir and returns the
372  * file descriptor. It is idempotent.
373  *
374  * On success, it returns the file descriptor. On failure it returns -1.
375  * If there is a failure, it prints the error to stderr.
376  */
377 int create_and_get_cgroup(const char *relative_path)
378 {
379 	char cgroup_path[PATH_MAX + 1];
380 	int fd;
381 
382 	format_cgroup_path(cgroup_path, relative_path);
383 	if (mkdir(cgroup_path, 0777) && errno != EEXIST) {
384 		log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path);
385 		return -1;
386 	}
387 
388 	fd = open(cgroup_path, O_RDONLY);
389 	if (fd < 0) {
390 		log_err("Opening Cgroup");
391 		return -1;
392 	}
393 
394 	return fd;
395 }
396 
397 /**
398  * get_cgroup_id() - Get cgroup id for a particular cgroup path
399  * @relative_path: The cgroup path, relative to the workdir, to join
400  *
401  * On success, it returns the cgroup id. On failure it returns 0,
402  * which is an invalid cgroup id.
403  * If there is a failure, it prints the error to stderr.
404  */
405 unsigned long long get_cgroup_id(const char *relative_path)
406 {
407 	int dirfd, err, flags, mount_id, fhsize;
408 	union {
409 		unsigned long long cgid;
410 		unsigned char raw_bytes[8];
411 	} id;
412 	char cgroup_workdir[PATH_MAX + 1];
413 	struct file_handle *fhp, *fhp2;
414 	unsigned long long ret = 0;
415 
416 	format_cgroup_path(cgroup_workdir, relative_path);
417 
418 	dirfd = AT_FDCWD;
419 	flags = 0;
420 	fhsize = sizeof(*fhp);
421 	fhp = calloc(1, fhsize);
422 	if (!fhp) {
423 		log_err("calloc");
424 		return 0;
425 	}
426 	err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags);
427 	if (err >= 0 || fhp->handle_bytes != 8) {
428 		log_err("name_to_handle_at");
429 		goto free_mem;
430 	}
431 
432 	fhsize = sizeof(struct file_handle) + fhp->handle_bytes;
433 	fhp2 = realloc(fhp, fhsize);
434 	if (!fhp2) {
435 		log_err("realloc");
436 		goto free_mem;
437 	}
438 	err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags);
439 	fhp = fhp2;
440 	if (err < 0) {
441 		log_err("name_to_handle_at");
442 		goto free_mem;
443 	}
444 
445 	memcpy(id.raw_bytes, fhp->f_handle, 8);
446 	ret = id.cgid;
447 
448 free_mem:
449 	free(fhp);
450 	return ret;
451 }
452 
453 int cgroup_setup_and_join(const char *path) {
454 	int cg_fd;
455 
456 	if (setup_cgroup_environment()) {
457 		fprintf(stderr, "Failed to setup cgroup environment\n");
458 		return -EINVAL;
459 	}
460 
461 	cg_fd = create_and_get_cgroup(path);
462 	if (cg_fd < 0) {
463 		fprintf(stderr, "Failed to create test cgroup\n");
464 		cleanup_cgroup_environment();
465 		return cg_fd;
466 	}
467 
468 	if (join_cgroup(path)) {
469 		fprintf(stderr, "Failed to join cgroup\n");
470 		cleanup_cgroup_environment();
471 		return -EINVAL;
472 	}
473 	return cg_fd;
474 }
475 
476 /**
477  * setup_classid_environment() - Setup the cgroupv1 net_cls environment
478  *
479  * After calling this function, cleanup_classid_environment should be called
480  * once testing is complete.
481  *
482  * This function will print an error to stderr and return 1 if it is unable
483  * to setup the cgroup environment. If setup is successful, 0 is returned.
484  */
485 int setup_classid_environment(void)
486 {
487 	char cgroup_workdir[PATH_MAX + 1];
488 
489 	format_classid_path(cgroup_workdir);
490 
491 	if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) &&
492 	    errno != EBUSY) {
493 		log_err("mount cgroup base");
494 		return 1;
495 	}
496 
497 	if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) {
498 		log_err("mkdir cgroup net_cls");
499 		return 1;
500 	}
501 
502 	if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls")) {
503 		if (errno != EBUSY) {
504 			log_err("mount cgroup net_cls");
505 			return 1;
506 		}
507 
508 		if (rmdir(NETCLS_MOUNT_PATH)) {
509 			log_err("rmdir cgroup net_cls");
510 			return 1;
511 		}
512 		if (umount(CGROUP_MOUNT_DFLT)) {
513 			log_err("umount cgroup base");
514 			return 1;
515 		}
516 	}
517 
518 	cleanup_classid_environment();
519 
520 	if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
521 		log_err("mkdir cgroup work dir");
522 		return 1;
523 	}
524 
525 	return 0;
526 }
527 
528 /**
529  * set_classid() - Set a cgroupv1 net_cls classid
530  * @id: the numeric classid
531  *
532  * Writes the passed classid into the cgroup work dir's net_cls.classid
533  * file in order to later on trigger socket tagging.
534  *
535  * On success, it returns 0, otherwise on failure it returns 1. If there
536  * is a failure, it prints the error to stderr.
537  */
538 int set_classid(unsigned int id)
539 {
540 	char cgroup_workdir[PATH_MAX - 42];
541 	char cgroup_classid_path[PATH_MAX + 1];
542 	int fd, rc = 0;
543 
544 	format_classid_path(cgroup_workdir);
545 	snprintf(cgroup_classid_path, sizeof(cgroup_classid_path),
546 		 "%s/net_cls.classid", cgroup_workdir);
547 
548 	fd = open(cgroup_classid_path, O_WRONLY);
549 	if (fd < 0) {
550 		log_err("Opening cgroup classid: %s", cgroup_classid_path);
551 		return 1;
552 	}
553 
554 	if (dprintf(fd, "%u\n", id) < 0) {
555 		log_err("Setting cgroup classid");
556 		rc = 1;
557 	}
558 
559 	close(fd);
560 	return rc;
561 }
562 
563 /**
564  * join_classid() - Join a cgroupv1 net_cls classid
565  *
566  * This function expects the cgroup work dir to be already created, as we
567  * join it here. This causes the process sockets to be tagged with the given
568  * net_cls classid.
569  *
570  * On success, it returns 0, otherwise on failure it returns 1.
571  */
572 int join_classid(void)
573 {
574 	char cgroup_workdir[PATH_MAX + 1];
575 
576 	format_classid_path(cgroup_workdir);
577 	return join_cgroup_from_top(cgroup_workdir);
578 }
579 
580 /**
581  * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment
582  *
583  * At call time, it moves the calling process to the root cgroup, and then
584  * runs the deletion process.
585  *
586  * On failure, it will print an error to stderr, and try to continue.
587  */
588 void cleanup_classid_environment(void)
589 {
590 	char cgroup_workdir[PATH_MAX + 1];
591 
592 	format_classid_path(cgroup_workdir);
593 	join_cgroup_from_top(NETCLS_MOUNT_PATH);
594 	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
595 }
596