1 /* SPDX-License-Identifier: GPL-2.0 */
2 
3 #define _GNU_SOURCE
4 
5 #include <errno.h>
6 #include <fcntl.h>
7 #include <linux/limits.h>
8 #include <signal.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <sys/stat.h>
13 #include <sys/types.h>
14 #include <sys/wait.h>
15 #include <unistd.h>
16 
17 #include "cgroup_util.h"
18 #include "../clone3/clone3_selftests.h"
19 
20 static ssize_t read_text(const char *path, char *buf, size_t max_len)
21 {
22 	ssize_t len;
23 	int fd;
24 
25 	fd = open(path, O_RDONLY);
26 	if (fd < 0)
27 		return fd;
28 
29 	len = read(fd, buf, max_len - 1);
30 	if (len < 0)
31 		goto out;
32 
33 	buf[len] = 0;
34 out:
35 	close(fd);
36 	return len;
37 }
38 
39 static ssize_t write_text(const char *path, char *buf, ssize_t len)
40 {
41 	int fd;
42 
43 	fd = open(path, O_WRONLY | O_APPEND);
44 	if (fd < 0)
45 		return fd;
46 
47 	len = write(fd, buf, len);
48 	if (len < 0) {
49 		close(fd);
50 		return len;
51 	}
52 
53 	close(fd);
54 
55 	return len;
56 }
57 
58 char *cg_name(const char *root, const char *name)
59 {
60 	size_t len = strlen(root) + strlen(name) + 2;
61 	char *ret = malloc(len);
62 
63 	snprintf(ret, len, "%s/%s", root, name);
64 
65 	return ret;
66 }
67 
68 char *cg_name_indexed(const char *root, const char *name, int index)
69 {
70 	size_t len = strlen(root) + strlen(name) + 10;
71 	char *ret = malloc(len);
72 
73 	snprintf(ret, len, "%s/%s_%d", root, name, index);
74 
75 	return ret;
76 }
77 
78 char *cg_control(const char *cgroup, const char *control)
79 {
80 	size_t len = strlen(cgroup) + strlen(control) + 2;
81 	char *ret = malloc(len);
82 
83 	snprintf(ret, len, "%s/%s", cgroup, control);
84 
85 	return ret;
86 }
87 
88 int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
89 {
90 	char path[PATH_MAX];
91 
92 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
93 
94 	if (read_text(path, buf, len) >= 0)
95 		return 0;
96 
97 	return -1;
98 }
99 
100 int cg_read_strcmp(const char *cgroup, const char *control,
101 		   const char *expected)
102 {
103 	size_t size;
104 	char *buf;
105 	int ret;
106 
107 	/* Handle the case of comparing against empty string */
108 	if (!expected)
109 		return -1;
110 	else
111 		size = strlen(expected) + 1;
112 
113 	buf = malloc(size);
114 	if (!buf)
115 		return -1;
116 
117 	if (cg_read(cgroup, control, buf, size)) {
118 		free(buf);
119 		return -1;
120 	}
121 
122 	ret = strcmp(expected, buf);
123 	free(buf);
124 	return ret;
125 }
126 
127 int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
128 {
129 	char buf[PAGE_SIZE];
130 
131 	if (cg_read(cgroup, control, buf, sizeof(buf)))
132 		return -1;
133 
134 	return strstr(buf, needle) ? 0 : -1;
135 }
136 
137 long cg_read_long(const char *cgroup, const char *control)
138 {
139 	char buf[128];
140 
141 	if (cg_read(cgroup, control, buf, sizeof(buf)))
142 		return -1;
143 
144 	return atol(buf);
145 }
146 
147 long cg_read_key_long(const char *cgroup, const char *control, const char *key)
148 {
149 	char buf[PAGE_SIZE];
150 	char *ptr;
151 
152 	if (cg_read(cgroup, control, buf, sizeof(buf)))
153 		return -1;
154 
155 	ptr = strstr(buf, key);
156 	if (!ptr)
157 		return -1;
158 
159 	return atol(ptr + strlen(key));
160 }
161 
162 long cg_read_lc(const char *cgroup, const char *control)
163 {
164 	char buf[PAGE_SIZE];
165 	const char delim[] = "\n";
166 	char *line;
167 	long cnt = 0;
168 
169 	if (cg_read(cgroup, control, buf, sizeof(buf)))
170 		return -1;
171 
172 	for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
173 		cnt++;
174 
175 	return cnt;
176 }
177 
178 int cg_write(const char *cgroup, const char *control, char *buf)
179 {
180 	char path[PATH_MAX];
181 	ssize_t len = strlen(buf);
182 
183 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
184 
185 	if (write_text(path, buf, len) == len)
186 		return 0;
187 
188 	return -1;
189 }
190 
191 int cg_find_unified_root(char *root, size_t len)
192 {
193 	char buf[10 * PAGE_SIZE];
194 	char *fs, *mount, *type;
195 	const char delim[] = "\n\t ";
196 
197 	if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
198 		return -1;
199 
200 	/*
201 	 * Example:
202 	 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
203 	 */
204 	for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
205 		mount = strtok(NULL, delim);
206 		type = strtok(NULL, delim);
207 		strtok(NULL, delim);
208 		strtok(NULL, delim);
209 		strtok(NULL, delim);
210 
211 		if (strcmp(type, "cgroup2") == 0) {
212 			strncpy(root, mount, len);
213 			return 0;
214 		}
215 	}
216 
217 	return -1;
218 }
219 
220 int cg_create(const char *cgroup)
221 {
222 	return mkdir(cgroup, 0644);
223 }
224 
225 int cg_wait_for_proc_count(const char *cgroup, int count)
226 {
227 	char buf[10 * PAGE_SIZE] = {0};
228 	int attempts;
229 	char *ptr;
230 
231 	for (attempts = 10; attempts >= 0; attempts--) {
232 		int nr = 0;
233 
234 		if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
235 			break;
236 
237 		for (ptr = buf; *ptr; ptr++)
238 			if (*ptr == '\n')
239 				nr++;
240 
241 		if (nr >= count)
242 			return 0;
243 
244 		usleep(100000);
245 	}
246 
247 	return -1;
248 }
249 
250 int cg_killall(const char *cgroup)
251 {
252 	char buf[PAGE_SIZE];
253 	char *ptr = buf;
254 
255 	if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
256 		return -1;
257 
258 	while (ptr < buf + sizeof(buf)) {
259 		int pid = strtol(ptr, &ptr, 10);
260 
261 		if (pid == 0)
262 			break;
263 		if (*ptr)
264 			ptr++;
265 		else
266 			break;
267 		if (kill(pid, SIGKILL))
268 			return -1;
269 	}
270 
271 	return 0;
272 }
273 
274 int cg_destroy(const char *cgroup)
275 {
276 	int ret;
277 
278 retry:
279 	ret = rmdir(cgroup);
280 	if (ret && errno == EBUSY) {
281 		cg_killall(cgroup);
282 		usleep(100);
283 		goto retry;
284 	}
285 
286 	if (ret && errno == ENOENT)
287 		ret = 0;
288 
289 	return ret;
290 }
291 
292 int cg_enter(const char *cgroup, int pid)
293 {
294 	char pidbuf[64];
295 
296 	snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
297 	return cg_write(cgroup, "cgroup.procs", pidbuf);
298 }
299 
300 int cg_enter_current(const char *cgroup)
301 {
302 	return cg_write(cgroup, "cgroup.procs", "0");
303 }
304 
305 int cg_enter_current_thread(const char *cgroup)
306 {
307 	return cg_write(cgroup, "cgroup.threads", "0");
308 }
309 
310 int cg_run(const char *cgroup,
311 	   int (*fn)(const char *cgroup, void *arg),
312 	   void *arg)
313 {
314 	int pid, retcode;
315 
316 	pid = fork();
317 	if (pid < 0) {
318 		return pid;
319 	} else if (pid == 0) {
320 		char buf[64];
321 
322 		snprintf(buf, sizeof(buf), "%d", getpid());
323 		if (cg_write(cgroup, "cgroup.procs", buf))
324 			exit(EXIT_FAILURE);
325 		exit(fn(cgroup, arg));
326 	} else {
327 		waitpid(pid, &retcode, 0);
328 		if (WIFEXITED(retcode))
329 			return WEXITSTATUS(retcode);
330 		else
331 			return -1;
332 	}
333 }
334 
335 pid_t clone_into_cgroup(int cgroup_fd)
336 {
337 #ifdef CLONE_ARGS_SIZE_VER2
338 	pid_t pid;
339 
340 	struct clone_args args = {
341 		.flags = CLONE_INTO_CGROUP,
342 		.exit_signal = SIGCHLD,
343 		.cgroup = cgroup_fd,
344 	};
345 
346 	pid = sys_clone3(&args, sizeof(struct clone_args));
347 	/*
348 	 * Verify that this is a genuine test failure:
349 	 * ENOSYS -> clone3() not available
350 	 * E2BIG  -> CLONE_INTO_CGROUP not available
351 	 */
352 	if (pid < 0 && (errno == ENOSYS || errno == E2BIG))
353 		goto pretend_enosys;
354 
355 	return pid;
356 
357 pretend_enosys:
358 #endif
359 	errno = ENOSYS;
360 	return -ENOSYS;
361 }
362 
363 int clone_reap(pid_t pid, int options)
364 {
365 	int ret;
366 	siginfo_t info = {
367 		.si_signo = 0,
368 	};
369 
370 again:
371 	ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD);
372 	if (ret < 0) {
373 		if (errno == EINTR)
374 			goto again;
375 		return -1;
376 	}
377 
378 	if (options & WEXITED) {
379 		if (WIFEXITED(info.si_status))
380 			return WEXITSTATUS(info.si_status);
381 	}
382 
383 	if (options & WSTOPPED) {
384 		if (WIFSTOPPED(info.si_status))
385 			return WSTOPSIG(info.si_status);
386 	}
387 
388 	if (options & WCONTINUED) {
389 		if (WIFCONTINUED(info.si_status))
390 			return 0;
391 	}
392 
393 	return -1;
394 }
395 
396 int dirfd_open_opath(const char *dir)
397 {
398 	return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH);
399 }
400 
401 #define close_prot_errno(fd)                                                   \
402 	if (fd >= 0) {                                                         \
403 		int _e_ = errno;                                               \
404 		close(fd);                                                     \
405 		errno = _e_;                                                   \
406 	}
407 
408 static int clone_into_cgroup_run_nowait(const char *cgroup,
409 					int (*fn)(const char *cgroup, void *arg),
410 					void *arg)
411 {
412 	int cgroup_fd;
413 	pid_t pid;
414 
415 	cgroup_fd =  dirfd_open_opath(cgroup);
416 	if (cgroup_fd < 0)
417 		return -1;
418 
419 	pid = clone_into_cgroup(cgroup_fd);
420 	close_prot_errno(cgroup_fd);
421 	if (pid == 0)
422 		exit(fn(cgroup, arg));
423 
424 	return pid;
425 }
426 
427 int cg_run_nowait(const char *cgroup,
428 		  int (*fn)(const char *cgroup, void *arg),
429 		  void *arg)
430 {
431 	int pid;
432 
433 	pid = clone_into_cgroup_run_nowait(cgroup, fn, arg);
434 	if (pid > 0)
435 		return pid;
436 
437 	/* Genuine test failure. */
438 	if (pid < 0 && errno != ENOSYS)
439 		return -1;
440 
441 	pid = fork();
442 	if (pid == 0) {
443 		char buf[64];
444 
445 		snprintf(buf, sizeof(buf), "%d", getpid());
446 		if (cg_write(cgroup, "cgroup.procs", buf))
447 			exit(EXIT_FAILURE);
448 		exit(fn(cgroup, arg));
449 	}
450 
451 	return pid;
452 }
453 
454 int get_temp_fd(void)
455 {
456 	return open(".", O_TMPFILE | O_RDWR | O_EXCL);
457 }
458 
459 int alloc_pagecache(int fd, size_t size)
460 {
461 	char buf[PAGE_SIZE];
462 	struct stat st;
463 	int i;
464 
465 	if (fstat(fd, &st))
466 		goto cleanup;
467 
468 	size += st.st_size;
469 
470 	if (ftruncate(fd, size))
471 		goto cleanup;
472 
473 	for (i = 0; i < size; i += sizeof(buf))
474 		read(fd, buf, sizeof(buf));
475 
476 	return 0;
477 
478 cleanup:
479 	return -1;
480 }
481 
482 int alloc_anon(const char *cgroup, void *arg)
483 {
484 	size_t size = (unsigned long)arg;
485 	char *buf, *ptr;
486 
487 	buf = malloc(size);
488 	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
489 		*ptr = 0;
490 
491 	free(buf);
492 	return 0;
493 }
494 
495 int is_swap_enabled(void)
496 {
497 	char buf[PAGE_SIZE];
498 	const char delim[] = "\n";
499 	int cnt = 0;
500 	char *line;
501 
502 	if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
503 		return -1;
504 
505 	for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
506 		cnt++;
507 
508 	return cnt > 1;
509 }
510 
511 int set_oom_adj_score(int pid, int score)
512 {
513 	char path[PATH_MAX];
514 	int fd, len;
515 
516 	sprintf(path, "/proc/%d/oom_score_adj", pid);
517 
518 	fd = open(path, O_WRONLY | O_APPEND);
519 	if (fd < 0)
520 		return fd;
521 
522 	len = dprintf(fd, "%d", score);
523 	if (len < 0) {
524 		close(fd);
525 		return len;
526 	}
527 
528 	close(fd);
529 	return 0;
530 }
531 
532 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
533 {
534 	char path[PATH_MAX];
535 
536 	if (!pid)
537 		snprintf(path, sizeof(path), "/proc/%s/%s",
538 			 thread ? "thread-self" : "self", item);
539 	else
540 		snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
541 
542 	return read_text(path, buf, size);
543 }
544 
545 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
546 {
547 	char buf[PAGE_SIZE];
548 
549 	if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0)
550 		return -1;
551 
552 	return strstr(buf, needle) ? 0 : -1;
553 }
554 
555 int clone_into_cgroup_run_wait(const char *cgroup)
556 {
557 	int cgroup_fd;
558 	pid_t pid;
559 
560 	cgroup_fd =  dirfd_open_opath(cgroup);
561 	if (cgroup_fd < 0)
562 		return -1;
563 
564 	pid = clone_into_cgroup(cgroup_fd);
565 	close_prot_errno(cgroup_fd);
566 	if (pid < 0)
567 		return -1;
568 
569 	if (pid == 0)
570 		exit(EXIT_SUCCESS);
571 
572 	/*
573 	 * We don't care whether this fails. We only care whether the initial
574 	 * clone succeeded.
575 	 */
576 	(void)clone_reap(pid, WEXITED);
577 	return 0;
578 }
579