1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <sched.h>
4 #include <sys/mount.h>
5 #include <sys/stat.h>
6 #include <sys/types.h>
7 #include <linux/limits.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <linux/sched.h>
11 #include <fcntl.h>
12 #include <unistd.h>
13 #include <ftw.h>
14 
15 #include "cgroup_helpers.h"
16 
17 /*
18  * To avoid relying on the system setup, when setup_cgroup_env is called
19  * we create a new mount namespace, and cgroup namespace. The cgroupv2
20  * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't
21  * have cgroupv2 enabled at this point in time. It's easier to create our
22  * own mount namespace and manage it ourselves. We assume /mnt exists.
23  *
24  * Related cgroupv1 helpers are named *classid*(), since we only use the
25  * net_cls controller for tagging net_cls.classid. We assume the default
26  * mount under /sys/fs/cgroup/net_cls, which should be the case for the
27  * vast majority of users.
28  */
29 
30 #define WALK_FD_LIMIT			16
31 
32 #define CGROUP_MOUNT_PATH		"/mnt"
33 #define CGROUP_MOUNT_DFLT		"/sys/fs/cgroup"
34 #define NETCLS_MOUNT_PATH		CGROUP_MOUNT_DFLT "/net_cls"
35 #define CGROUP_WORK_DIR			"/cgroup-test-work-dir"
36 #define format_cgroup_path(buf, path) \
37 	snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \
38 	CGROUP_WORK_DIR, getpid(), path)
39 
40 #define format_classid_path(buf)				\
41 	snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH,	\
42 		 CGROUP_WORK_DIR)
43 
44 /**
45  * enable_all_controllers() - Enable all available cgroup v2 controllers
46  *
47  * Enable all available cgroup v2 controllers in order to increase
48  * the code coverage.
49  *
50  * If successful, 0 is returned.
51  */
52 static int enable_all_controllers(char *cgroup_path)
53 {
54 	char path[PATH_MAX + 1];
55 	char buf[PATH_MAX];
56 	char *c, *c2;
57 	int fd, cfd;
58 	ssize_t len;
59 
60 	snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path);
61 	fd = open(path, O_RDONLY);
62 	if (fd < 0) {
63 		log_err("Opening cgroup.controllers: %s", path);
64 		return 1;
65 	}
66 
67 	len = read(fd, buf, sizeof(buf) - 1);
68 	if (len < 0) {
69 		close(fd);
70 		log_err("Reading cgroup.controllers: %s", path);
71 		return 1;
72 	}
73 	buf[len] = 0;
74 	close(fd);
75 
76 	/* No controllers available? We're probably on cgroup v1. */
77 	if (len == 0)
78 		return 0;
79 
80 	snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
81 	cfd = open(path, O_RDWR);
82 	if (cfd < 0) {
83 		log_err("Opening cgroup.subtree_control: %s", path);
84 		return 1;
85 	}
86 
87 	for (c = strtok_r(buf, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
88 		if (dprintf(cfd, "+%s\n", c) <= 0) {
89 			log_err("Enabling controller %s: %s", c, path);
90 			close(cfd);
91 			return 1;
92 		}
93 	}
94 	close(cfd);
95 	return 0;
96 }
97 
98 /**
99  * setup_cgroup_environment() - Setup the cgroup environment
100  *
101  * After calling this function, cleanup_cgroup_environment should be called
102  * once testing is complete.
103  *
104  * This function will print an error to stderr and return 1 if it is unable
105  * to setup the cgroup environment. If setup is successful, 0 is returned.
106  */
107 int setup_cgroup_environment(void)
108 {
109 	char cgroup_workdir[PATH_MAX - 24];
110 
111 	format_cgroup_path(cgroup_workdir, "");
112 
113 	if (unshare(CLONE_NEWNS)) {
114 		log_err("unshare");
115 		return 1;
116 	}
117 
118 	if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
119 		log_err("mount fakeroot");
120 		return 1;
121 	}
122 
123 	if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) {
124 		log_err("mount cgroup2");
125 		return 1;
126 	}
127 
128 	/* Cleanup existing failed runs, now that the environment is setup */
129 	cleanup_cgroup_environment();
130 
131 	if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
132 		log_err("mkdir cgroup work dir");
133 		return 1;
134 	}
135 
136 	if (enable_all_controllers(cgroup_workdir))
137 		return 1;
138 
139 	return 0;
140 }
141 
142 static int nftwfunc(const char *filename, const struct stat *statptr,
143 		    int fileflags, struct FTW *pfwt)
144 {
145 	if ((fileflags & FTW_D) && rmdir(filename))
146 		log_err("Removing cgroup: %s", filename);
147 	return 0;
148 }
149 
150 static int join_cgroup_from_top(const char *cgroup_path)
151 {
152 	char cgroup_procs_path[PATH_MAX + 1];
153 	pid_t pid = getpid();
154 	int fd, rc = 0;
155 
156 	snprintf(cgroup_procs_path, sizeof(cgroup_procs_path),
157 		 "%s/cgroup.procs", cgroup_path);
158 
159 	fd = open(cgroup_procs_path, O_WRONLY);
160 	if (fd < 0) {
161 		log_err("Opening Cgroup Procs: %s", cgroup_procs_path);
162 		return 1;
163 	}
164 
165 	if (dprintf(fd, "%d\n", pid) < 0) {
166 		log_err("Joining Cgroup");
167 		rc = 1;
168 	}
169 
170 	close(fd);
171 	return rc;
172 }
173 
174 /**
175  * join_cgroup() - Join a cgroup
176  * @path: The cgroup path, relative to the workdir, to join
177  *
178  * This function expects a cgroup to already be created, relative to the cgroup
179  * work dir, and it joins it. For example, passing "/my-cgroup" as the path
180  * would actually put the calling process into the cgroup
181  * "/cgroup-test-work-dir/my-cgroup"
182  *
183  * On success, it returns 0, otherwise on failure it returns 1.
184  */
185 int join_cgroup(const char *path)
186 {
187 	char cgroup_path[PATH_MAX + 1];
188 
189 	format_cgroup_path(cgroup_path, path);
190 	return join_cgroup_from_top(cgroup_path);
191 }
192 
193 /**
194  * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment
195  *
196  * This is an idempotent function to delete all temporary cgroups that
197  * have been created during the test, including the cgroup testing work
198  * directory.
199  *
200  * At call time, it moves the calling process to the root cgroup, and then
201  * runs the deletion process. It is idempotent, and should not fail, unless
202  * a process is lingering.
203  *
204  * On failure, it will print an error to stderr, and try to continue.
205  */
206 void cleanup_cgroup_environment(void)
207 {
208 	char cgroup_workdir[PATH_MAX + 1];
209 
210 	format_cgroup_path(cgroup_workdir, "");
211 	join_cgroup_from_top(CGROUP_MOUNT_PATH);
212 	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
213 }
214 
215 /**
216  * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
217  * @path: The cgroup path, relative to the workdir, to join
218  *
219  * This function creates a cgroup under the top level workdir and returns the
220  * file descriptor. It is idempotent.
221  *
222  * On success, it returns the file descriptor. On failure it returns -1.
223  * If there is a failure, it prints the error to stderr.
224  */
225 int create_and_get_cgroup(const char *path)
226 {
227 	char cgroup_path[PATH_MAX + 1];
228 	int fd;
229 
230 	format_cgroup_path(cgroup_path, path);
231 	if (mkdir(cgroup_path, 0777) && errno != EEXIST) {
232 		log_err("mkdiring cgroup %s .. %s", path, cgroup_path);
233 		return -1;
234 	}
235 
236 	fd = open(cgroup_path, O_RDONLY);
237 	if (fd < 0) {
238 		log_err("Opening Cgroup");
239 		return -1;
240 	}
241 
242 	return fd;
243 }
244 
245 /**
246  * get_cgroup_id() - Get cgroup id for a particular cgroup path
247  * @path: The cgroup path, relative to the workdir, to join
248  *
249  * On success, it returns the cgroup id. On failure it returns 0,
250  * which is an invalid cgroup id.
251  * If there is a failure, it prints the error to stderr.
252  */
253 unsigned long long get_cgroup_id(const char *path)
254 {
255 	int dirfd, err, flags, mount_id, fhsize;
256 	union {
257 		unsigned long long cgid;
258 		unsigned char raw_bytes[8];
259 	} id;
260 	char cgroup_workdir[PATH_MAX + 1];
261 	struct file_handle *fhp, *fhp2;
262 	unsigned long long ret = 0;
263 
264 	format_cgroup_path(cgroup_workdir, path);
265 
266 	dirfd = AT_FDCWD;
267 	flags = 0;
268 	fhsize = sizeof(*fhp);
269 	fhp = calloc(1, fhsize);
270 	if (!fhp) {
271 		log_err("calloc");
272 		return 0;
273 	}
274 	err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags);
275 	if (err >= 0 || fhp->handle_bytes != 8) {
276 		log_err("name_to_handle_at");
277 		goto free_mem;
278 	}
279 
280 	fhsize = sizeof(struct file_handle) + fhp->handle_bytes;
281 	fhp2 = realloc(fhp, fhsize);
282 	if (!fhp2) {
283 		log_err("realloc");
284 		goto free_mem;
285 	}
286 	err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags);
287 	fhp = fhp2;
288 	if (err < 0) {
289 		log_err("name_to_handle_at");
290 		goto free_mem;
291 	}
292 
293 	memcpy(id.raw_bytes, fhp->f_handle, 8);
294 	ret = id.cgid;
295 
296 free_mem:
297 	free(fhp);
298 	return ret;
299 }
300 
301 int cgroup_setup_and_join(const char *path) {
302 	int cg_fd;
303 
304 	if (setup_cgroup_environment()) {
305 		fprintf(stderr, "Failed to setup cgroup environment\n");
306 		return -EINVAL;
307 	}
308 
309 	cg_fd = create_and_get_cgroup(path);
310 	if (cg_fd < 0) {
311 		fprintf(stderr, "Failed to create test cgroup\n");
312 		cleanup_cgroup_environment();
313 		return cg_fd;
314 	}
315 
316 	if (join_cgroup(path)) {
317 		fprintf(stderr, "Failed to join cgroup\n");
318 		cleanup_cgroup_environment();
319 		return -EINVAL;
320 	}
321 	return cg_fd;
322 }
323 
324 /**
325  * setup_classid_environment() - Setup the cgroupv1 net_cls environment
326  *
327  * After calling this function, cleanup_classid_environment should be called
328  * once testing is complete.
329  *
330  * This function will print an error to stderr and return 1 if it is unable
331  * to setup the cgroup environment. If setup is successful, 0 is returned.
332  */
333 int setup_classid_environment(void)
334 {
335 	char cgroup_workdir[PATH_MAX + 1];
336 
337 	format_classid_path(cgroup_workdir);
338 
339 	if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) &&
340 	    errno != EBUSY) {
341 		log_err("mount cgroup base");
342 		return 1;
343 	}
344 
345 	if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) {
346 		log_err("mkdir cgroup net_cls");
347 		return 1;
348 	}
349 
350 	if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") &&
351 	    errno != EBUSY) {
352 		log_err("mount cgroup net_cls");
353 		return 1;
354 	}
355 
356 	cleanup_classid_environment();
357 
358 	if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
359 		log_err("mkdir cgroup work dir");
360 		return 1;
361 	}
362 
363 	return 0;
364 }
365 
366 /**
367  * set_classid() - Set a cgroupv1 net_cls classid
368  * @id: the numeric classid
369  *
370  * Writes the passed classid into the cgroup work dir's net_cls.classid
371  * file in order to later on trigger socket tagging.
372  *
373  * On success, it returns 0, otherwise on failure it returns 1. If there
374  * is a failure, it prints the error to stderr.
375  */
376 int set_classid(unsigned int id)
377 {
378 	char cgroup_workdir[PATH_MAX - 42];
379 	char cgroup_classid_path[PATH_MAX + 1];
380 	int fd, rc = 0;
381 
382 	format_classid_path(cgroup_workdir);
383 	snprintf(cgroup_classid_path, sizeof(cgroup_classid_path),
384 		 "%s/net_cls.classid", cgroup_workdir);
385 
386 	fd = open(cgroup_classid_path, O_WRONLY);
387 	if (fd < 0) {
388 		log_err("Opening cgroup classid: %s", cgroup_classid_path);
389 		return 1;
390 	}
391 
392 	if (dprintf(fd, "%u\n", id) < 0) {
393 		log_err("Setting cgroup classid");
394 		rc = 1;
395 	}
396 
397 	close(fd);
398 	return rc;
399 }
400 
401 /**
402  * join_classid() - Join a cgroupv1 net_cls classid
403  *
404  * This function expects the cgroup work dir to be already created, as we
405  * join it here. This causes the process sockets to be tagged with the given
406  * net_cls classid.
407  *
408  * On success, it returns 0, otherwise on failure it returns 1.
409  */
410 int join_classid(void)
411 {
412 	char cgroup_workdir[PATH_MAX + 1];
413 
414 	format_classid_path(cgroup_workdir);
415 	return join_cgroup_from_top(cgroup_workdir);
416 }
417 
418 /**
419  * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment
420  *
421  * At call time, it moves the calling process to the root cgroup, and then
422  * runs the deletion process.
423  *
424  * On failure, it will print an error to stderr, and try to continue.
425  */
426 void cleanup_classid_environment(void)
427 {
428 	char cgroup_workdir[PATH_MAX + 1];
429 
430 	format_classid_path(cgroup_workdir);
431 	join_cgroup_from_top(NETCLS_MOUNT_PATH);
432 	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
433 }
434