1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <sched.h>
4 #include <sys/mount.h>
5 #include <sys/stat.h>
6 #include <sys/types.h>
7 #include <linux/limits.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <linux/sched.h>
11 #include <fcntl.h>
12 #include <unistd.h>
13 #include <ftw.h>
14 
15 #include "cgroup_helpers.h"
16 
17 /*
18  * To avoid relying on the system setup, when setup_cgroup_env is called
19  * we create a new mount namespace, and cgroup namespace. The cgroupv2
20  * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't
21  * have cgroupv2 enabled at this point in time. It's easier to create our
22  * own mount namespace and manage it ourselves. We assume /mnt exists.
23  *
24  * Related cgroupv1 helpers are named *classid*(), since we only use the
25  * net_cls controller for tagging net_cls.classid. We assume the default
26  * mount under /sys/fs/cgroup/net_cls, which should be the case for the
27  * vast majority of users.
28  */
29 
30 #define WALK_FD_LIMIT			16
31 
32 #define CGROUP_MOUNT_PATH		"/mnt"
33 #define CGROUP_MOUNT_DFLT		"/sys/fs/cgroup"
34 #define NETCLS_MOUNT_PATH		CGROUP_MOUNT_DFLT "/net_cls"
35 #define CGROUP_WORK_DIR			"/cgroup-test-work-dir"
36 
37 #define format_cgroup_path(buf, path) \
38 	snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
39 		 CGROUP_WORK_DIR, path)
40 
41 #define format_classid_path(buf)				\
42 	snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH,	\
43 		 CGROUP_WORK_DIR)
44 
45 /**
46  * enable_all_controllers() - Enable all available cgroup v2 controllers
47  *
48  * Enable all available cgroup v2 controllers in order to increase
49  * the code coverage.
50  *
51  * If successful, 0 is returned.
52  */
53 static int enable_all_controllers(char *cgroup_path)
54 {
55 	char path[PATH_MAX + 1];
56 	char buf[PATH_MAX];
57 	char *c, *c2;
58 	int fd, cfd;
59 	ssize_t len;
60 
61 	snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path);
62 	fd = open(path, O_RDONLY);
63 	if (fd < 0) {
64 		log_err("Opening cgroup.controllers: %s", path);
65 		return 1;
66 	}
67 
68 	len = read(fd, buf, sizeof(buf) - 1);
69 	if (len < 0) {
70 		close(fd);
71 		log_err("Reading cgroup.controllers: %s", path);
72 		return 1;
73 	}
74 	buf[len] = 0;
75 	close(fd);
76 
77 	/* No controllers available? We're probably on cgroup v1. */
78 	if (len == 0)
79 		return 0;
80 
81 	snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
82 	cfd = open(path, O_RDWR);
83 	if (cfd < 0) {
84 		log_err("Opening cgroup.subtree_control: %s", path);
85 		return 1;
86 	}
87 
88 	for (c = strtok_r(buf, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
89 		if (dprintf(cfd, "+%s\n", c) <= 0) {
90 			log_err("Enabling controller %s: %s", c, path);
91 			close(cfd);
92 			return 1;
93 		}
94 	}
95 	close(cfd);
96 	return 0;
97 }
98 
99 /**
100  * setup_cgroup_environment() - Setup the cgroup environment
101  *
102  * After calling this function, cleanup_cgroup_environment should be called
103  * once testing is complete.
104  *
105  * This function will print an error to stderr and return 1 if it is unable
106  * to setup the cgroup environment. If setup is successful, 0 is returned.
107  */
108 int setup_cgroup_environment(void)
109 {
110 	char cgroup_workdir[PATH_MAX - 24];
111 
112 	format_cgroup_path(cgroup_workdir, "");
113 
114 	if (unshare(CLONE_NEWNS)) {
115 		log_err("unshare");
116 		return 1;
117 	}
118 
119 	if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
120 		log_err("mount fakeroot");
121 		return 1;
122 	}
123 
124 	if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) {
125 		log_err("mount cgroup2");
126 		return 1;
127 	}
128 
129 	/* Cleanup existing failed runs, now that the environment is setup */
130 	cleanup_cgroup_environment();
131 
132 	if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
133 		log_err("mkdir cgroup work dir");
134 		return 1;
135 	}
136 
137 	if (enable_all_controllers(cgroup_workdir))
138 		return 1;
139 
140 	return 0;
141 }
142 
143 static int nftwfunc(const char *filename, const struct stat *statptr,
144 		    int fileflags, struct FTW *pfwt)
145 {
146 	if ((fileflags & FTW_D) && rmdir(filename))
147 		log_err("Removing cgroup: %s", filename);
148 	return 0;
149 }
150 
151 static int join_cgroup_from_top(const char *cgroup_path)
152 {
153 	char cgroup_procs_path[PATH_MAX + 1];
154 	pid_t pid = getpid();
155 	int fd, rc = 0;
156 
157 	snprintf(cgroup_procs_path, sizeof(cgroup_procs_path),
158 		 "%s/cgroup.procs", cgroup_path);
159 
160 	fd = open(cgroup_procs_path, O_WRONLY);
161 	if (fd < 0) {
162 		log_err("Opening Cgroup Procs: %s", cgroup_procs_path);
163 		return 1;
164 	}
165 
166 	if (dprintf(fd, "%d\n", pid) < 0) {
167 		log_err("Joining Cgroup");
168 		rc = 1;
169 	}
170 
171 	close(fd);
172 	return rc;
173 }
174 
175 /**
176  * join_cgroup() - Join a cgroup
177  * @path: The cgroup path, relative to the workdir, to join
178  *
179  * This function expects a cgroup to already be created, relative to the cgroup
180  * work dir, and it joins it. For example, passing "/my-cgroup" as the path
181  * would actually put the calling process into the cgroup
182  * "/cgroup-test-work-dir/my-cgroup"
183  *
184  * On success, it returns 0, otherwise on failure it returns 1.
185  */
186 int join_cgroup(const char *path)
187 {
188 	char cgroup_path[PATH_MAX + 1];
189 
190 	format_cgroup_path(cgroup_path, path);
191 	return join_cgroup_from_top(cgroup_path);
192 }
193 
194 /**
195  * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment
196  *
197  * This is an idempotent function to delete all temporary cgroups that
198  * have been created during the test, including the cgroup testing work
199  * directory.
200  *
201  * At call time, it moves the calling process to the root cgroup, and then
202  * runs the deletion process. It is idempotent, and should not fail, unless
203  * a process is lingering.
204  *
205  * On failure, it will print an error to stderr, and try to continue.
206  */
207 void cleanup_cgroup_environment(void)
208 {
209 	char cgroup_workdir[PATH_MAX + 1];
210 
211 	format_cgroup_path(cgroup_workdir, "");
212 	join_cgroup_from_top(CGROUP_MOUNT_PATH);
213 	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
214 }
215 
216 /**
217  * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
218  * @path: The cgroup path, relative to the workdir, to join
219  *
220  * This function creates a cgroup under the top level workdir and returns the
221  * file descriptor. It is idempotent.
222  *
223  * On success, it returns the file descriptor. On failure it returns -1.
224  * If there is a failure, it prints the error to stderr.
225  */
226 int create_and_get_cgroup(const char *path)
227 {
228 	char cgroup_path[PATH_MAX + 1];
229 	int fd;
230 
231 	format_cgroup_path(cgroup_path, path);
232 	if (mkdir(cgroup_path, 0777) && errno != EEXIST) {
233 		log_err("mkdiring cgroup %s .. %s", path, cgroup_path);
234 		return -1;
235 	}
236 
237 	fd = open(cgroup_path, O_RDONLY);
238 	if (fd < 0) {
239 		log_err("Opening Cgroup");
240 		return -1;
241 	}
242 
243 	return fd;
244 }
245 
246 /**
247  * get_cgroup_id() - Get cgroup id for a particular cgroup path
248  * @path: The cgroup path, relative to the workdir, to join
249  *
250  * On success, it returns the cgroup id. On failure it returns 0,
251  * which is an invalid cgroup id.
252  * If there is a failure, it prints the error to stderr.
253  */
254 unsigned long long get_cgroup_id(const char *path)
255 {
256 	int dirfd, err, flags, mount_id, fhsize;
257 	union {
258 		unsigned long long cgid;
259 		unsigned char raw_bytes[8];
260 	} id;
261 	char cgroup_workdir[PATH_MAX + 1];
262 	struct file_handle *fhp, *fhp2;
263 	unsigned long long ret = 0;
264 
265 	format_cgroup_path(cgroup_workdir, path);
266 
267 	dirfd = AT_FDCWD;
268 	flags = 0;
269 	fhsize = sizeof(*fhp);
270 	fhp = calloc(1, fhsize);
271 	if (!fhp) {
272 		log_err("calloc");
273 		return 0;
274 	}
275 	err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags);
276 	if (err >= 0 || fhp->handle_bytes != 8) {
277 		log_err("name_to_handle_at");
278 		goto free_mem;
279 	}
280 
281 	fhsize = sizeof(struct file_handle) + fhp->handle_bytes;
282 	fhp2 = realloc(fhp, fhsize);
283 	if (!fhp2) {
284 		log_err("realloc");
285 		goto free_mem;
286 	}
287 	err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags);
288 	fhp = fhp2;
289 	if (err < 0) {
290 		log_err("name_to_handle_at");
291 		goto free_mem;
292 	}
293 
294 	memcpy(id.raw_bytes, fhp->f_handle, 8);
295 	ret = id.cgid;
296 
297 free_mem:
298 	free(fhp);
299 	return ret;
300 }
301 
302 int cgroup_setup_and_join(const char *path) {
303 	int cg_fd;
304 
305 	if (setup_cgroup_environment()) {
306 		fprintf(stderr, "Failed to setup cgroup environment\n");
307 		return -EINVAL;
308 	}
309 
310 	cg_fd = create_and_get_cgroup(path);
311 	if (cg_fd < 0) {
312 		fprintf(stderr, "Failed to create test cgroup\n");
313 		cleanup_cgroup_environment();
314 		return cg_fd;
315 	}
316 
317 	if (join_cgroup(path)) {
318 		fprintf(stderr, "Failed to join cgroup\n");
319 		cleanup_cgroup_environment();
320 		return -EINVAL;
321 	}
322 	return cg_fd;
323 }
324 
325 /**
326  * setup_classid_environment() - Setup the cgroupv1 net_cls environment
327  *
328  * After calling this function, cleanup_classid_environment should be called
329  * once testing is complete.
330  *
331  * This function will print an error to stderr and return 1 if it is unable
332  * to setup the cgroup environment. If setup is successful, 0 is returned.
333  */
334 int setup_classid_environment(void)
335 {
336 	char cgroup_workdir[PATH_MAX + 1];
337 
338 	format_classid_path(cgroup_workdir);
339 
340 	if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) &&
341 	    errno != EBUSY) {
342 		log_err("mount cgroup base");
343 		return 1;
344 	}
345 
346 	if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) {
347 		log_err("mkdir cgroup net_cls");
348 		return 1;
349 	}
350 
351 	if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") &&
352 	    errno != EBUSY) {
353 		log_err("mount cgroup net_cls");
354 		return 1;
355 	}
356 
357 	cleanup_classid_environment();
358 
359 	if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
360 		log_err("mkdir cgroup work dir");
361 		return 1;
362 	}
363 
364 	return 0;
365 }
366 
367 /**
368  * set_classid() - Set a cgroupv1 net_cls classid
369  * @id: the numeric classid
370  *
371  * Writes the passed classid into the cgroup work dir's net_cls.classid
372  * file in order to later on trigger socket tagging.
373  *
374  * On success, it returns 0, otherwise on failure it returns 1. If there
375  * is a failure, it prints the error to stderr.
376  */
377 int set_classid(unsigned int id)
378 {
379 	char cgroup_workdir[PATH_MAX - 42];
380 	char cgroup_classid_path[PATH_MAX + 1];
381 	int fd, rc = 0;
382 
383 	format_classid_path(cgroup_workdir);
384 	snprintf(cgroup_classid_path, sizeof(cgroup_classid_path),
385 		 "%s/net_cls.classid", cgroup_workdir);
386 
387 	fd = open(cgroup_classid_path, O_WRONLY);
388 	if (fd < 0) {
389 		log_err("Opening cgroup classid: %s", cgroup_classid_path);
390 		return 1;
391 	}
392 
393 	if (dprintf(fd, "%u\n", id) < 0) {
394 		log_err("Setting cgroup classid");
395 		rc = 1;
396 	}
397 
398 	close(fd);
399 	return rc;
400 }
401 
402 /**
403  * join_classid() - Join a cgroupv1 net_cls classid
404  *
405  * This function expects the cgroup work dir to be already created, as we
406  * join it here. This causes the process sockets to be tagged with the given
407  * net_cls classid.
408  *
409  * On success, it returns 0, otherwise on failure it returns 1.
410  */
411 int join_classid(void)
412 {
413 	char cgroup_workdir[PATH_MAX + 1];
414 
415 	format_classid_path(cgroup_workdir);
416 	return join_cgroup_from_top(cgroup_workdir);
417 }
418 
419 /**
420  * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment
421  *
422  * At call time, it moves the calling process to the root cgroup, and then
423  * runs the deletion process.
424  *
425  * On failure, it will print an error to stderr, and try to continue.
426  */
427 void cleanup_classid_environment(void)
428 {
429 	char cgroup_workdir[PATH_MAX + 1];
430 
431 	format_classid_path(cgroup_workdir);
432 	join_cgroup_from_top(NETCLS_MOUNT_PATH);
433 	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
434 }
435