1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <sched.h>
4 #include <sys/mount.h>
5 #include <sys/stat.h>
6 #include <sys/types.h>
7 #include <linux/limits.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <linux/sched.h>
11 #include <fcntl.h>
12 #include <unistd.h>
13 #include <ftw.h>
14 
15 #include "cgroup_helpers.h"
16 
17 /*
18  * To avoid relying on the system setup, when setup_cgroup_env is called
19  * we create a new mount namespace, and cgroup namespace. The cgroupv2
20  * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't
21  * have cgroupv2 enabled at this point in time. It's easier to create our
22  * own mount namespace and manage it ourselves. We assume /mnt exists.
23  *
24  * Related cgroupv1 helpers are named *classid*(), since we only use the
25  * net_cls controller for tagging net_cls.classid. We assume the default
26  * mount under /sys/fs/cgroup/net_cls, which should be the case for the
27  * vast majority of users.
28  */
29 
30 #define WALK_FD_LIMIT			16
31 
32 #define CGROUP_MOUNT_PATH		"/mnt"
33 #define CGROUP_MOUNT_DFLT		"/sys/fs/cgroup"
34 #define NETCLS_MOUNT_PATH		CGROUP_MOUNT_DFLT "/net_cls"
35 #define CGROUP_WORK_DIR			"/cgroup-test-work-dir"
36 
37 #define format_cgroup_path_pid(buf, path, pid) \
38 	snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \
39 	CGROUP_WORK_DIR, pid, path)
40 
41 #define format_cgroup_path(buf, path) \
42 	format_cgroup_path_pid(buf, path, getpid())
43 
44 #define format_parent_cgroup_path(buf, path) \
45 	format_cgroup_path_pid(buf, path, getppid())
46 
47 #define format_classid_path(buf)				\
48 	snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH,	\
49 		 CGROUP_WORK_DIR)
50 
51 static int __enable_controllers(const char *cgroup_path, const char *controllers)
52 {
53 	char path[PATH_MAX + 1];
54 	char enable[PATH_MAX + 1];
55 	char *c, *c2;
56 	int fd, cfd;
57 	ssize_t len;
58 
59 	/* If not controllers are passed, enable all available controllers */
60 	if (!controllers) {
61 		snprintf(path, sizeof(path), "%s/cgroup.controllers",
62 			 cgroup_path);
63 		fd = open(path, O_RDONLY);
64 		if (fd < 0) {
65 			log_err("Opening cgroup.controllers: %s", path);
66 			return 1;
67 		}
68 		len = read(fd, enable, sizeof(enable) - 1);
69 		if (len < 0) {
70 			close(fd);
71 			log_err("Reading cgroup.controllers: %s", path);
72 			return 1;
73 		} else if (len == 0) { /* No controllers to enable */
74 			close(fd);
75 			return 0;
76 		}
77 		enable[len] = 0;
78 		close(fd);
79 	} else {
80 		strncpy(enable, controllers, sizeof(enable));
81 	}
82 
83 	snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
84 	cfd = open(path, O_RDWR);
85 	if (cfd < 0) {
86 		log_err("Opening cgroup.subtree_control: %s", path);
87 		return 1;
88 	}
89 
90 	for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
91 		if (dprintf(cfd, "+%s\n", c) <= 0) {
92 			log_err("Enabling controller %s: %s", c, path);
93 			close(cfd);
94 			return 1;
95 		}
96 	}
97 	close(cfd);
98 	return 0;
99 }
100 
101 /**
102  * enable_controllers() - Enable cgroup v2 controllers
103  * @relative_path: The cgroup path, relative to the workdir
104  * @controllers: List of controllers to enable in cgroup.controllers format
105  *
106  *
107  * Enable given cgroup v2 controllers, if @controllers is NULL, enable all
108  * available controllers.
109  *
110  * If successful, 0 is returned.
111  */
112 int enable_controllers(const char *relative_path, const char *controllers)
113 {
114 	char cgroup_path[PATH_MAX + 1];
115 
116 	format_cgroup_path(cgroup_path, relative_path);
117 	return __enable_controllers(cgroup_path, controllers);
118 }
119 
120 static int __write_cgroup_file(const char *cgroup_path, const char *file,
121 			       const char *buf)
122 {
123 	char file_path[PATH_MAX + 1];
124 	int fd;
125 
126 	snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file);
127 	fd = open(file_path, O_RDWR);
128 	if (fd < 0) {
129 		log_err("Opening %s", file_path);
130 		return 1;
131 	}
132 
133 	if (dprintf(fd, "%s", buf) <= 0) {
134 		log_err("Writing to %s", file_path);
135 		close(fd);
136 		return 1;
137 	}
138 	close(fd);
139 	return 0;
140 }
141 
142 /**
143  * write_cgroup_file() - Write to a cgroup file
144  * @relative_path: The cgroup path, relative to the workdir
145  * @file: The name of the file in cgroupfs to write to
146  * @buf: Buffer to write to the file
147  *
148  * Write to a file in the given cgroup's directory.
149  *
150  * If successful, 0 is returned.
151  */
152 int write_cgroup_file(const char *relative_path, const char *file,
153 		      const char *buf)
154 {
155 	char cgroup_path[PATH_MAX - 24];
156 
157 	format_cgroup_path(cgroup_path, relative_path);
158 	return __write_cgroup_file(cgroup_path, file, buf);
159 }
160 
161 /**
162  * write_cgroup_file_parent() - Write to a cgroup file in the parent process
163  *                              workdir
164  * @relative_path: The cgroup path, relative to the parent process workdir
165  * @file: The name of the file in cgroupfs to write to
166  * @buf: Buffer to write to the file
167  *
168  * Write to a file in the given cgroup's directory under the parent process
169  * workdir.
170  *
171  * If successful, 0 is returned.
172  */
173 int write_cgroup_file_parent(const char *relative_path, const char *file,
174 			     const char *buf)
175 {
176 	char cgroup_path[PATH_MAX - 24];
177 
178 	format_parent_cgroup_path(cgroup_path, relative_path);
179 	return __write_cgroup_file(cgroup_path, file, buf);
180 }
181 
182 /**
183  * setup_cgroup_environment() - Setup the cgroup environment
184  *
185  * After calling this function, cleanup_cgroup_environment should be called
186  * once testing is complete.
187  *
188  * This function will print an error to stderr and return 1 if it is unable
189  * to setup the cgroup environment. If setup is successful, 0 is returned.
190  */
191 int setup_cgroup_environment(void)
192 {
193 	char cgroup_workdir[PATH_MAX - 24];
194 
195 	format_cgroup_path(cgroup_workdir, "");
196 
197 	if (unshare(CLONE_NEWNS)) {
198 		log_err("unshare");
199 		return 1;
200 	}
201 
202 	if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
203 		log_err("mount fakeroot");
204 		return 1;
205 	}
206 
207 	if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) {
208 		log_err("mount cgroup2");
209 		return 1;
210 	}
211 
212 	/* Cleanup existing failed runs, now that the environment is setup */
213 	cleanup_cgroup_environment();
214 
215 	if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
216 		log_err("mkdir cgroup work dir");
217 		return 1;
218 	}
219 
220 	/* Enable all available controllers to increase test coverage */
221 	if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) ||
222 	    __enable_controllers(cgroup_workdir, NULL))
223 		return 1;
224 
225 	return 0;
226 }
227 
228 static int nftwfunc(const char *filename, const struct stat *statptr,
229 		    int fileflags, struct FTW *pfwt)
230 {
231 	if ((fileflags & FTW_D) && rmdir(filename))
232 		log_err("Removing cgroup: %s", filename);
233 	return 0;
234 }
235 
236 static int join_cgroup_from_top(const char *cgroup_path)
237 {
238 	char cgroup_procs_path[PATH_MAX + 1];
239 	pid_t pid = getpid();
240 	int fd, rc = 0;
241 
242 	snprintf(cgroup_procs_path, sizeof(cgroup_procs_path),
243 		 "%s/cgroup.procs", cgroup_path);
244 
245 	fd = open(cgroup_procs_path, O_WRONLY);
246 	if (fd < 0) {
247 		log_err("Opening Cgroup Procs: %s", cgroup_procs_path);
248 		return 1;
249 	}
250 
251 	if (dprintf(fd, "%d\n", pid) < 0) {
252 		log_err("Joining Cgroup");
253 		rc = 1;
254 	}
255 
256 	close(fd);
257 	return rc;
258 }
259 
260 /**
261  * join_cgroup() - Join a cgroup
262  * @relative_path: The cgroup path, relative to the workdir, to join
263  *
264  * This function expects a cgroup to already be created, relative to the cgroup
265  * work dir, and it joins it. For example, passing "/my-cgroup" as the path
266  * would actually put the calling process into the cgroup
267  * "/cgroup-test-work-dir/my-cgroup"
268  *
269  * On success, it returns 0, otherwise on failure it returns 1.
270  */
271 int join_cgroup(const char *relative_path)
272 {
273 	char cgroup_path[PATH_MAX + 1];
274 
275 	format_cgroup_path(cgroup_path, relative_path);
276 	return join_cgroup_from_top(cgroup_path);
277 }
278 
279 /**
280  * join_parent_cgroup() - Join a cgroup in the parent process workdir
281  * @relative_path: The cgroup path, relative to parent process workdir, to join
282  *
283  * See join_cgroup().
284  *
285  * On success, it returns 0, otherwise on failure it returns 1.
286  */
287 int join_parent_cgroup(const char *relative_path)
288 {
289 	char cgroup_path[PATH_MAX + 1];
290 
291 	format_parent_cgroup_path(cgroup_path, relative_path);
292 	return join_cgroup_from_top(cgroup_path);
293 }
294 
295 /**
296  * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment
297  *
298  * This is an idempotent function to delete all temporary cgroups that
299  * have been created during the test, including the cgroup testing work
300  * directory.
301  *
302  * At call time, it moves the calling process to the root cgroup, and then
303  * runs the deletion process. It is idempotent, and should not fail, unless
304  * a process is lingering.
305  *
306  * On failure, it will print an error to stderr, and try to continue.
307  */
308 void cleanup_cgroup_environment(void)
309 {
310 	char cgroup_workdir[PATH_MAX + 1];
311 
312 	format_cgroup_path(cgroup_workdir, "");
313 	join_cgroup_from_top(CGROUP_MOUNT_PATH);
314 	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
315 }
316 
317 /**
318  * get_root_cgroup() - Get the FD of the root cgroup
319  *
320  * On success, it returns the file descriptor. On failure, it returns -1.
321  * If there is a failure, it prints the error to stderr.
322  */
323 int get_root_cgroup(void)
324 {
325 	int fd;
326 
327 	fd = open(CGROUP_MOUNT_PATH, O_RDONLY);
328 	if (fd < 0) {
329 		log_err("Opening root cgroup");
330 		return -1;
331 	}
332 	return fd;
333 }
334 
335 /**
336  * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
337  * @relative_path: The cgroup path, relative to the workdir, to join
338  *
339  * This function creates a cgroup under the top level workdir and returns the
340  * file descriptor. It is idempotent.
341  *
342  * On success, it returns the file descriptor. On failure it returns -1.
343  * If there is a failure, it prints the error to stderr.
344  */
345 int create_and_get_cgroup(const char *relative_path)
346 {
347 	char cgroup_path[PATH_MAX + 1];
348 	int fd;
349 
350 	format_cgroup_path(cgroup_path, relative_path);
351 	if (mkdir(cgroup_path, 0777) && errno != EEXIST) {
352 		log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path);
353 		return -1;
354 	}
355 
356 	fd = open(cgroup_path, O_RDONLY);
357 	if (fd < 0) {
358 		log_err("Opening Cgroup");
359 		return -1;
360 	}
361 
362 	return fd;
363 }
364 
365 /**
366  * get_cgroup_id() - Get cgroup id for a particular cgroup path
367  * @relative_path: The cgroup path, relative to the workdir, to join
368  *
369  * On success, it returns the cgroup id. On failure it returns 0,
370  * which is an invalid cgroup id.
371  * If there is a failure, it prints the error to stderr.
372  */
373 unsigned long long get_cgroup_id(const char *relative_path)
374 {
375 	int dirfd, err, flags, mount_id, fhsize;
376 	union {
377 		unsigned long long cgid;
378 		unsigned char raw_bytes[8];
379 	} id;
380 	char cgroup_workdir[PATH_MAX + 1];
381 	struct file_handle *fhp, *fhp2;
382 	unsigned long long ret = 0;
383 
384 	format_cgroup_path(cgroup_workdir, relative_path);
385 
386 	dirfd = AT_FDCWD;
387 	flags = 0;
388 	fhsize = sizeof(*fhp);
389 	fhp = calloc(1, fhsize);
390 	if (!fhp) {
391 		log_err("calloc");
392 		return 0;
393 	}
394 	err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags);
395 	if (err >= 0 || fhp->handle_bytes != 8) {
396 		log_err("name_to_handle_at");
397 		goto free_mem;
398 	}
399 
400 	fhsize = sizeof(struct file_handle) + fhp->handle_bytes;
401 	fhp2 = realloc(fhp, fhsize);
402 	if (!fhp2) {
403 		log_err("realloc");
404 		goto free_mem;
405 	}
406 	err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags);
407 	fhp = fhp2;
408 	if (err < 0) {
409 		log_err("name_to_handle_at");
410 		goto free_mem;
411 	}
412 
413 	memcpy(id.raw_bytes, fhp->f_handle, 8);
414 	ret = id.cgid;
415 
416 free_mem:
417 	free(fhp);
418 	return ret;
419 }
420 
421 int cgroup_setup_and_join(const char *path) {
422 	int cg_fd;
423 
424 	if (setup_cgroup_environment()) {
425 		fprintf(stderr, "Failed to setup cgroup environment\n");
426 		return -EINVAL;
427 	}
428 
429 	cg_fd = create_and_get_cgroup(path);
430 	if (cg_fd < 0) {
431 		fprintf(stderr, "Failed to create test cgroup\n");
432 		cleanup_cgroup_environment();
433 		return cg_fd;
434 	}
435 
436 	if (join_cgroup(path)) {
437 		fprintf(stderr, "Failed to join cgroup\n");
438 		cleanup_cgroup_environment();
439 		return -EINVAL;
440 	}
441 	return cg_fd;
442 }
443 
444 /**
445  * setup_classid_environment() - Setup the cgroupv1 net_cls environment
446  *
447  * After calling this function, cleanup_classid_environment should be called
448  * once testing is complete.
449  *
450  * This function will print an error to stderr and return 1 if it is unable
451  * to setup the cgroup environment. If setup is successful, 0 is returned.
452  */
453 int setup_classid_environment(void)
454 {
455 	char cgroup_workdir[PATH_MAX + 1];
456 
457 	format_classid_path(cgroup_workdir);
458 
459 	if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) &&
460 	    errno != EBUSY) {
461 		log_err("mount cgroup base");
462 		return 1;
463 	}
464 
465 	if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) {
466 		log_err("mkdir cgroup net_cls");
467 		return 1;
468 	}
469 
470 	if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") &&
471 	    errno != EBUSY) {
472 		log_err("mount cgroup net_cls");
473 		return 1;
474 	}
475 
476 	cleanup_classid_environment();
477 
478 	if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
479 		log_err("mkdir cgroup work dir");
480 		return 1;
481 	}
482 
483 	return 0;
484 }
485 
486 /**
487  * set_classid() - Set a cgroupv1 net_cls classid
488  * @id: the numeric classid
489  *
490  * Writes the passed classid into the cgroup work dir's net_cls.classid
491  * file in order to later on trigger socket tagging.
492  *
493  * On success, it returns 0, otherwise on failure it returns 1. If there
494  * is a failure, it prints the error to stderr.
495  */
496 int set_classid(unsigned int id)
497 {
498 	char cgroup_workdir[PATH_MAX - 42];
499 	char cgroup_classid_path[PATH_MAX + 1];
500 	int fd, rc = 0;
501 
502 	format_classid_path(cgroup_workdir);
503 	snprintf(cgroup_classid_path, sizeof(cgroup_classid_path),
504 		 "%s/net_cls.classid", cgroup_workdir);
505 
506 	fd = open(cgroup_classid_path, O_WRONLY);
507 	if (fd < 0) {
508 		log_err("Opening cgroup classid: %s", cgroup_classid_path);
509 		return 1;
510 	}
511 
512 	if (dprintf(fd, "%u\n", id) < 0) {
513 		log_err("Setting cgroup classid");
514 		rc = 1;
515 	}
516 
517 	close(fd);
518 	return rc;
519 }
520 
521 /**
522  * join_classid() - Join a cgroupv1 net_cls classid
523  *
524  * This function expects the cgroup work dir to be already created, as we
525  * join it here. This causes the process sockets to be tagged with the given
526  * net_cls classid.
527  *
528  * On success, it returns 0, otherwise on failure it returns 1.
529  */
530 int join_classid(void)
531 {
532 	char cgroup_workdir[PATH_MAX + 1];
533 
534 	format_classid_path(cgroup_workdir);
535 	return join_cgroup_from_top(cgroup_workdir);
536 }
537 
538 /**
539  * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment
540  *
541  * At call time, it moves the calling process to the root cgroup, and then
542  * runs the deletion process.
543  *
544  * On failure, it will print an error to stderr, and try to continue.
545  */
546 void cleanup_classid_environment(void)
547 {
548 	char cgroup_workdir[PATH_MAX + 1];
549 
550 	format_classid_path(cgroup_workdir);
551 	join_cgroup_from_top(NETCLS_MOUNT_PATH);
552 	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
553 }
554