184092dbcSRoman Gushchin /* SPDX-License-Identifier: GPL-2.0 */
284092dbcSRoman Gushchin 
384092dbcSRoman Gushchin #define _GNU_SOURCE
484092dbcSRoman Gushchin 
584092dbcSRoman Gushchin #include <errno.h>
684092dbcSRoman Gushchin #include <fcntl.h>
784092dbcSRoman Gushchin #include <linux/limits.h>
88075e4f6SChristian Brauner #include <poll.h>
984092dbcSRoman Gushchin #include <signal.h>
1084092dbcSRoman Gushchin #include <stdio.h>
1184092dbcSRoman Gushchin #include <stdlib.h>
1284092dbcSRoman Gushchin #include <string.h>
138075e4f6SChristian Brauner #include <sys/inotify.h>
1484092dbcSRoman Gushchin #include <sys/stat.h>
1584092dbcSRoman Gushchin #include <sys/types.h>
1684092dbcSRoman Gushchin #include <sys/wait.h>
1784092dbcSRoman Gushchin #include <unistd.h>
1884092dbcSRoman Gushchin 
1984092dbcSRoman Gushchin #include "cgroup_util.h"
209bd5910dSChristian Brauner #include "../clone3/clone3_selftests.h"
2184092dbcSRoman Gushchin 
2284092dbcSRoman Gushchin static ssize_t read_text(const char *path, char *buf, size_t max_len)
2384092dbcSRoman Gushchin {
2484092dbcSRoman Gushchin 	ssize_t len;
2584092dbcSRoman Gushchin 	int fd;
2684092dbcSRoman Gushchin 
2784092dbcSRoman Gushchin 	fd = open(path, O_RDONLY);
2884092dbcSRoman Gushchin 	if (fd < 0)
2984092dbcSRoman Gushchin 		return fd;
3084092dbcSRoman Gushchin 
3184092dbcSRoman Gushchin 	len = read(fd, buf, max_len - 1);
3284092dbcSRoman Gushchin 	if (len < 0)
3384092dbcSRoman Gushchin 		goto out;
3484092dbcSRoman Gushchin 
3584092dbcSRoman Gushchin 	buf[len] = 0;
3684092dbcSRoman Gushchin out:
3784092dbcSRoman Gushchin 	close(fd);
3884092dbcSRoman Gushchin 	return len;
3984092dbcSRoman Gushchin }
4084092dbcSRoman Gushchin 
4153c3daf8SDan Carpenter static ssize_t write_text(const char *path, char *buf, ssize_t len)
4284092dbcSRoman Gushchin {
4384092dbcSRoman Gushchin 	int fd;
4484092dbcSRoman Gushchin 
4584092dbcSRoman Gushchin 	fd = open(path, O_WRONLY | O_APPEND);
4684092dbcSRoman Gushchin 	if (fd < 0)
4784092dbcSRoman Gushchin 		return fd;
4884092dbcSRoman Gushchin 
4984092dbcSRoman Gushchin 	len = write(fd, buf, len);
5084092dbcSRoman Gushchin 	if (len < 0) {
5184092dbcSRoman Gushchin 		close(fd);
5284092dbcSRoman Gushchin 		return len;
5384092dbcSRoman Gushchin 	}
5484092dbcSRoman Gushchin 
5584092dbcSRoman Gushchin 	close(fd);
5684092dbcSRoman Gushchin 
5784092dbcSRoman Gushchin 	return len;
5884092dbcSRoman Gushchin }
5984092dbcSRoman Gushchin 
6084092dbcSRoman Gushchin char *cg_name(const char *root, const char *name)
6184092dbcSRoman Gushchin {
6284092dbcSRoman Gushchin 	size_t len = strlen(root) + strlen(name) + 2;
6384092dbcSRoman Gushchin 	char *ret = malloc(len);
6484092dbcSRoman Gushchin 
6584092dbcSRoman Gushchin 	snprintf(ret, len, "%s/%s", root, name);
6684092dbcSRoman Gushchin 
6784092dbcSRoman Gushchin 	return ret;
6884092dbcSRoman Gushchin }
6984092dbcSRoman Gushchin 
7084092dbcSRoman Gushchin char *cg_name_indexed(const char *root, const char *name, int index)
7184092dbcSRoman Gushchin {
7284092dbcSRoman Gushchin 	size_t len = strlen(root) + strlen(name) + 10;
7384092dbcSRoman Gushchin 	char *ret = malloc(len);
7484092dbcSRoman Gushchin 
7584092dbcSRoman Gushchin 	snprintf(ret, len, "%s/%s_%d", root, name, index);
7684092dbcSRoman Gushchin 
7784092dbcSRoman Gushchin 	return ret;
7884092dbcSRoman Gushchin }
7984092dbcSRoman Gushchin 
805313bfe4SRoman Gushchin char *cg_control(const char *cgroup, const char *control)
815313bfe4SRoman Gushchin {
825313bfe4SRoman Gushchin 	size_t len = strlen(cgroup) + strlen(control) + 2;
835313bfe4SRoman Gushchin 	char *ret = malloc(len);
845313bfe4SRoman Gushchin 
855313bfe4SRoman Gushchin 	snprintf(ret, len, "%s/%s", cgroup, control);
865313bfe4SRoman Gushchin 
875313bfe4SRoman Gushchin 	return ret;
885313bfe4SRoman Gushchin }
895313bfe4SRoman Gushchin 
9084092dbcSRoman Gushchin int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
9184092dbcSRoman Gushchin {
9284092dbcSRoman Gushchin 	char path[PATH_MAX];
9384092dbcSRoman Gushchin 
9484092dbcSRoman Gushchin 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
9584092dbcSRoman Gushchin 
9684092dbcSRoman Gushchin 	if (read_text(path, buf, len) >= 0)
9784092dbcSRoman Gushchin 		return 0;
9884092dbcSRoman Gushchin 
9984092dbcSRoman Gushchin 	return -1;
10084092dbcSRoman Gushchin }
10184092dbcSRoman Gushchin 
10284092dbcSRoman Gushchin int cg_read_strcmp(const char *cgroup, const char *control,
10384092dbcSRoman Gushchin 		   const char *expected)
10484092dbcSRoman Gushchin {
10548c2bb0bSJay Kamat 	size_t size;
10684092dbcSRoman Gushchin 	char *buf;
10748c2bb0bSJay Kamat 	int ret;
10848c2bb0bSJay Kamat 
10948c2bb0bSJay Kamat 	/* Handle the case of comparing against empty string */
11048c2bb0bSJay Kamat 	if (!expected)
111d8300206SGaurav Singh 		return -1;
11248c2bb0bSJay Kamat 	else
11348c2bb0bSJay Kamat 		size = strlen(expected) + 1;
11484092dbcSRoman Gushchin 
11584092dbcSRoman Gushchin 	buf = malloc(size);
11684092dbcSRoman Gushchin 	if (!buf)
11784092dbcSRoman Gushchin 		return -1;
11884092dbcSRoman Gushchin 
11948c2bb0bSJay Kamat 	if (cg_read(cgroup, control, buf, size)) {
12048c2bb0bSJay Kamat 		free(buf);
12184092dbcSRoman Gushchin 		return -1;
12248c2bb0bSJay Kamat 	}
12384092dbcSRoman Gushchin 
12448c2bb0bSJay Kamat 	ret = strcmp(expected, buf);
12548c2bb0bSJay Kamat 	free(buf);
12648c2bb0bSJay Kamat 	return ret;
12784092dbcSRoman Gushchin }
12884092dbcSRoman Gushchin 
12984092dbcSRoman Gushchin int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
13084092dbcSRoman Gushchin {
13184092dbcSRoman Gushchin 	char buf[PAGE_SIZE];
13284092dbcSRoman Gushchin 
13384092dbcSRoman Gushchin 	if (cg_read(cgroup, control, buf, sizeof(buf)))
13484092dbcSRoman Gushchin 		return -1;
13584092dbcSRoman Gushchin 
13684092dbcSRoman Gushchin 	return strstr(buf, needle) ? 0 : -1;
13784092dbcSRoman Gushchin }
13884092dbcSRoman Gushchin 
13984092dbcSRoman Gushchin long cg_read_long(const char *cgroup, const char *control)
14084092dbcSRoman Gushchin {
14184092dbcSRoman Gushchin 	char buf[128];
14284092dbcSRoman Gushchin 
14384092dbcSRoman Gushchin 	if (cg_read(cgroup, control, buf, sizeof(buf)))
14484092dbcSRoman Gushchin 		return -1;
14584092dbcSRoman Gushchin 
14684092dbcSRoman Gushchin 	return atol(buf);
14784092dbcSRoman Gushchin }
14884092dbcSRoman Gushchin 
14984092dbcSRoman Gushchin long cg_read_key_long(const char *cgroup, const char *control, const char *key)
15084092dbcSRoman Gushchin {
15184092dbcSRoman Gushchin 	char buf[PAGE_SIZE];
15284092dbcSRoman Gushchin 	char *ptr;
15384092dbcSRoman Gushchin 
15484092dbcSRoman Gushchin 	if (cg_read(cgroup, control, buf, sizeof(buf)))
15584092dbcSRoman Gushchin 		return -1;
15684092dbcSRoman Gushchin 
15784092dbcSRoman Gushchin 	ptr = strstr(buf, key);
15884092dbcSRoman Gushchin 	if (!ptr)
15984092dbcSRoman Gushchin 		return -1;
16084092dbcSRoman Gushchin 
16184092dbcSRoman Gushchin 	return atol(ptr + strlen(key));
16284092dbcSRoman Gushchin }
16384092dbcSRoman Gushchin 
16411318989SMichal Koutný long cg_read_lc(const char *cgroup, const char *control)
16511318989SMichal Koutný {
16611318989SMichal Koutný 	char buf[PAGE_SIZE];
16711318989SMichal Koutný 	const char delim[] = "\n";
16811318989SMichal Koutný 	char *line;
16911318989SMichal Koutný 	long cnt = 0;
17011318989SMichal Koutný 
17111318989SMichal Koutný 	if (cg_read(cgroup, control, buf, sizeof(buf)))
17211318989SMichal Koutný 		return -1;
17311318989SMichal Koutný 
17411318989SMichal Koutný 	for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
17511318989SMichal Koutný 		cnt++;
17611318989SMichal Koutný 
17711318989SMichal Koutný 	return cnt;
17811318989SMichal Koutný }
17911318989SMichal Koutný 
18084092dbcSRoman Gushchin int cg_write(const char *cgroup, const char *control, char *buf)
18184092dbcSRoman Gushchin {
18284092dbcSRoman Gushchin 	char path[PATH_MAX];
18353c3daf8SDan Carpenter 	ssize_t len = strlen(buf);
18484092dbcSRoman Gushchin 
18584092dbcSRoman Gushchin 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
18684092dbcSRoman Gushchin 
18784092dbcSRoman Gushchin 	if (write_text(path, buf, len) == len)
18884092dbcSRoman Gushchin 		return 0;
18984092dbcSRoman Gushchin 
19084092dbcSRoman Gushchin 	return -1;
19184092dbcSRoman Gushchin }
19284092dbcSRoman Gushchin 
193*6376b22cSDavid Vernet int cg_write_numeric(const char *cgroup, const char *control, long value)
194*6376b22cSDavid Vernet {
195*6376b22cSDavid Vernet 	char buf[64];
196*6376b22cSDavid Vernet 	int ret;
197*6376b22cSDavid Vernet 
198*6376b22cSDavid Vernet 	ret = sprintf(buf, "%lu", value);
199*6376b22cSDavid Vernet 	if (ret < 0)
200*6376b22cSDavid Vernet 		return ret;
201*6376b22cSDavid Vernet 
202*6376b22cSDavid Vernet 	return cg_write(cgroup, control, buf);
203*6376b22cSDavid Vernet }
204*6376b22cSDavid Vernet 
20584092dbcSRoman Gushchin int cg_find_unified_root(char *root, size_t len)
20684092dbcSRoman Gushchin {
20784092dbcSRoman Gushchin 	char buf[10 * PAGE_SIZE];
20884092dbcSRoman Gushchin 	char *fs, *mount, *type;
20984092dbcSRoman Gushchin 	const char delim[] = "\n\t ";
21084092dbcSRoman Gushchin 
21184092dbcSRoman Gushchin 	if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
21284092dbcSRoman Gushchin 		return -1;
21384092dbcSRoman Gushchin 
21484092dbcSRoman Gushchin 	/*
21584092dbcSRoman Gushchin 	 * Example:
21684092dbcSRoman Gushchin 	 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
21784092dbcSRoman Gushchin 	 */
21884092dbcSRoman Gushchin 	for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
21984092dbcSRoman Gushchin 		mount = strtok(NULL, delim);
22084092dbcSRoman Gushchin 		type = strtok(NULL, delim);
22184092dbcSRoman Gushchin 		strtok(NULL, delim);
22284092dbcSRoman Gushchin 		strtok(NULL, delim);
22384092dbcSRoman Gushchin 		strtok(NULL, delim);
22484092dbcSRoman Gushchin 
225b59b1baaSChris Down 		if (strcmp(type, "cgroup2") == 0) {
22684092dbcSRoman Gushchin 			strncpy(root, mount, len);
22784092dbcSRoman Gushchin 			return 0;
22884092dbcSRoman Gushchin 		}
22984092dbcSRoman Gushchin 	}
23084092dbcSRoman Gushchin 
23184092dbcSRoman Gushchin 	return -1;
23284092dbcSRoman Gushchin }
23384092dbcSRoman Gushchin 
23484092dbcSRoman Gushchin int cg_create(const char *cgroup)
23584092dbcSRoman Gushchin {
236b09c2baaSTejun Heo 	return mkdir(cgroup, 0755);
23784092dbcSRoman Gushchin }
23884092dbcSRoman Gushchin 
2395313bfe4SRoman Gushchin int cg_wait_for_proc_count(const char *cgroup, int count)
2405313bfe4SRoman Gushchin {
2415313bfe4SRoman Gushchin 	char buf[10 * PAGE_SIZE] = {0};
2425313bfe4SRoman Gushchin 	int attempts;
2435313bfe4SRoman Gushchin 	char *ptr;
2445313bfe4SRoman Gushchin 
2455313bfe4SRoman Gushchin 	for (attempts = 10; attempts >= 0; attempts--) {
2465313bfe4SRoman Gushchin 		int nr = 0;
2475313bfe4SRoman Gushchin 
2485313bfe4SRoman Gushchin 		if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
2495313bfe4SRoman Gushchin 			break;
2505313bfe4SRoman Gushchin 
2515313bfe4SRoman Gushchin 		for (ptr = buf; *ptr; ptr++)
2525313bfe4SRoman Gushchin 			if (*ptr == '\n')
2535313bfe4SRoman Gushchin 				nr++;
2545313bfe4SRoman Gushchin 
2555313bfe4SRoman Gushchin 		if (nr >= count)
2565313bfe4SRoman Gushchin 			return 0;
2575313bfe4SRoman Gushchin 
2585313bfe4SRoman Gushchin 		usleep(100000);
2595313bfe4SRoman Gushchin 	}
2605313bfe4SRoman Gushchin 
2615313bfe4SRoman Gushchin 	return -1;
2625313bfe4SRoman Gushchin }
2635313bfe4SRoman Gushchin 
2645313bfe4SRoman Gushchin int cg_killall(const char *cgroup)
26584092dbcSRoman Gushchin {
26684092dbcSRoman Gushchin 	char buf[PAGE_SIZE];
26784092dbcSRoman Gushchin 	char *ptr = buf;
26884092dbcSRoman Gushchin 
2690de3103fSChristian Brauner 	/* If cgroup.kill exists use it. */
2700de3103fSChristian Brauner 	if (!cg_write(cgroup, "cgroup.kill", "1"))
2710de3103fSChristian Brauner 		return 0;
2720de3103fSChristian Brauner 
27384092dbcSRoman Gushchin 	if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
27484092dbcSRoman Gushchin 		return -1;
27584092dbcSRoman Gushchin 
27684092dbcSRoman Gushchin 	while (ptr < buf + sizeof(buf)) {
27784092dbcSRoman Gushchin 		int pid = strtol(ptr, &ptr, 10);
27884092dbcSRoman Gushchin 
27984092dbcSRoman Gushchin 		if (pid == 0)
28084092dbcSRoman Gushchin 			break;
28184092dbcSRoman Gushchin 		if (*ptr)
28284092dbcSRoman Gushchin 			ptr++;
28384092dbcSRoman Gushchin 		else
28484092dbcSRoman Gushchin 			break;
28584092dbcSRoman Gushchin 		if (kill(pid, SIGKILL))
28684092dbcSRoman Gushchin 			return -1;
28784092dbcSRoman Gushchin 	}
28884092dbcSRoman Gushchin 
28984092dbcSRoman Gushchin 	return 0;
29084092dbcSRoman Gushchin }
29184092dbcSRoman Gushchin 
29284092dbcSRoman Gushchin int cg_destroy(const char *cgroup)
29384092dbcSRoman Gushchin {
29484092dbcSRoman Gushchin 	int ret;
29584092dbcSRoman Gushchin 
29684092dbcSRoman Gushchin retry:
29784092dbcSRoman Gushchin 	ret = rmdir(cgroup);
29884092dbcSRoman Gushchin 	if (ret && errno == EBUSY) {
299ff9fb7cbSRoman Gushchin 		cg_killall(cgroup);
30084092dbcSRoman Gushchin 		usleep(100);
30184092dbcSRoman Gushchin 		goto retry;
30284092dbcSRoman Gushchin 	}
30384092dbcSRoman Gushchin 
30484092dbcSRoman Gushchin 	if (ret && errno == ENOENT)
30584092dbcSRoman Gushchin 		ret = 0;
30684092dbcSRoman Gushchin 
30784092dbcSRoman Gushchin 	return ret;
30884092dbcSRoman Gushchin }
30984092dbcSRoman Gushchin 
3105313bfe4SRoman Gushchin int cg_enter(const char *cgroup, int pid)
3115313bfe4SRoman Gushchin {
3125313bfe4SRoman Gushchin 	char pidbuf[64];
3135313bfe4SRoman Gushchin 
3145313bfe4SRoman Gushchin 	snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
3155313bfe4SRoman Gushchin 	return cg_write(cgroup, "cgroup.procs", pidbuf);
3165313bfe4SRoman Gushchin }
3175313bfe4SRoman Gushchin 
318d863cb03SClaudio int cg_enter_current(const char *cgroup)
319d863cb03SClaudio {
32058c9f75bSMichal Koutný 	return cg_write(cgroup, "cgroup.procs", "0");
32158c9f75bSMichal Koutný }
322d863cb03SClaudio 
32358c9f75bSMichal Koutný int cg_enter_current_thread(const char *cgroup)
32458c9f75bSMichal Koutný {
32558c9f75bSMichal Koutný 	return cg_write(cgroup, "cgroup.threads", "0");
326d863cb03SClaudio }
327d863cb03SClaudio 
32884092dbcSRoman Gushchin int cg_run(const char *cgroup,
32984092dbcSRoman Gushchin 	   int (*fn)(const char *cgroup, void *arg),
33084092dbcSRoman Gushchin 	   void *arg)
33184092dbcSRoman Gushchin {
33284092dbcSRoman Gushchin 	int pid, retcode;
33384092dbcSRoman Gushchin 
33484092dbcSRoman Gushchin 	pid = fork();
33584092dbcSRoman Gushchin 	if (pid < 0) {
33684092dbcSRoman Gushchin 		return pid;
33784092dbcSRoman Gushchin 	} else if (pid == 0) {
33884092dbcSRoman Gushchin 		char buf[64];
33984092dbcSRoman Gushchin 
34084092dbcSRoman Gushchin 		snprintf(buf, sizeof(buf), "%d", getpid());
34184092dbcSRoman Gushchin 		if (cg_write(cgroup, "cgroup.procs", buf))
34284092dbcSRoman Gushchin 			exit(EXIT_FAILURE);
34384092dbcSRoman Gushchin 		exit(fn(cgroup, arg));
34484092dbcSRoman Gushchin 	} else {
34584092dbcSRoman Gushchin 		waitpid(pid, &retcode, 0);
34684092dbcSRoman Gushchin 		if (WIFEXITED(retcode))
34784092dbcSRoman Gushchin 			return WEXITSTATUS(retcode);
34884092dbcSRoman Gushchin 		else
34984092dbcSRoman Gushchin 			return -1;
35084092dbcSRoman Gushchin 	}
35184092dbcSRoman Gushchin }
35284092dbcSRoman Gushchin 
3539bd5910dSChristian Brauner pid_t clone_into_cgroup(int cgroup_fd)
3549bd5910dSChristian Brauner {
3559bd5910dSChristian Brauner #ifdef CLONE_ARGS_SIZE_VER2
3569bd5910dSChristian Brauner 	pid_t pid;
3579bd5910dSChristian Brauner 
358c2e46f6bSSachin Sant 	struct __clone_args args = {
3599bd5910dSChristian Brauner 		.flags = CLONE_INTO_CGROUP,
3609bd5910dSChristian Brauner 		.exit_signal = SIGCHLD,
3619bd5910dSChristian Brauner 		.cgroup = cgroup_fd,
3629bd5910dSChristian Brauner 	};
3639bd5910dSChristian Brauner 
364c2e46f6bSSachin Sant 	pid = sys_clone3(&args, sizeof(struct __clone_args));
3659bd5910dSChristian Brauner 	/*
3669bd5910dSChristian Brauner 	 * Verify that this is a genuine test failure:
3679bd5910dSChristian Brauner 	 * ENOSYS -> clone3() not available
3689bd5910dSChristian Brauner 	 * E2BIG  -> CLONE_INTO_CGROUP not available
3699bd5910dSChristian Brauner 	 */
3709bd5910dSChristian Brauner 	if (pid < 0 && (errno == ENOSYS || errno == E2BIG))
3719bd5910dSChristian Brauner 		goto pretend_enosys;
3729bd5910dSChristian Brauner 
3739bd5910dSChristian Brauner 	return pid;
3749bd5910dSChristian Brauner 
3759bd5910dSChristian Brauner pretend_enosys:
3769bd5910dSChristian Brauner #endif
3779bd5910dSChristian Brauner 	errno = ENOSYS;
3789bd5910dSChristian Brauner 	return -ENOSYS;
3799bd5910dSChristian Brauner }
3809bd5910dSChristian Brauner 
3819bd5910dSChristian Brauner int clone_reap(pid_t pid, int options)
3829bd5910dSChristian Brauner {
3839bd5910dSChristian Brauner 	int ret;
3849bd5910dSChristian Brauner 	siginfo_t info = {
3859bd5910dSChristian Brauner 		.si_signo = 0,
3869bd5910dSChristian Brauner 	};
3879bd5910dSChristian Brauner 
3889bd5910dSChristian Brauner again:
3899bd5910dSChristian Brauner 	ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD);
3909bd5910dSChristian Brauner 	if (ret < 0) {
3919bd5910dSChristian Brauner 		if (errno == EINTR)
3929bd5910dSChristian Brauner 			goto again;
3939bd5910dSChristian Brauner 		return -1;
3949bd5910dSChristian Brauner 	}
3959bd5910dSChristian Brauner 
3969bd5910dSChristian Brauner 	if (options & WEXITED) {
3979bd5910dSChristian Brauner 		if (WIFEXITED(info.si_status))
3989bd5910dSChristian Brauner 			return WEXITSTATUS(info.si_status);
3999bd5910dSChristian Brauner 	}
4009bd5910dSChristian Brauner 
4019bd5910dSChristian Brauner 	if (options & WSTOPPED) {
4029bd5910dSChristian Brauner 		if (WIFSTOPPED(info.si_status))
4039bd5910dSChristian Brauner 			return WSTOPSIG(info.si_status);
4049bd5910dSChristian Brauner 	}
4059bd5910dSChristian Brauner 
4069bd5910dSChristian Brauner 	if (options & WCONTINUED) {
4079bd5910dSChristian Brauner 		if (WIFCONTINUED(info.si_status))
4089bd5910dSChristian Brauner 			return 0;
4099bd5910dSChristian Brauner 	}
4109bd5910dSChristian Brauner 
4119bd5910dSChristian Brauner 	return -1;
4129bd5910dSChristian Brauner }
4139bd5910dSChristian Brauner 
4149bd5910dSChristian Brauner int dirfd_open_opath(const char *dir)
4159bd5910dSChristian Brauner {
4169bd5910dSChristian Brauner 	return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH);
4179bd5910dSChristian Brauner }
4189bd5910dSChristian Brauner 
4199bd5910dSChristian Brauner #define close_prot_errno(fd)                                                   \
4209bd5910dSChristian Brauner 	if (fd >= 0) {                                                         \
4219bd5910dSChristian Brauner 		int _e_ = errno;                                               \
4229bd5910dSChristian Brauner 		close(fd);                                                     \
4239bd5910dSChristian Brauner 		errno = _e_;                                                   \
4249bd5910dSChristian Brauner 	}
4259bd5910dSChristian Brauner 
4269bd5910dSChristian Brauner static int clone_into_cgroup_run_nowait(const char *cgroup,
4279bd5910dSChristian Brauner 					int (*fn)(const char *cgroup, void *arg),
4289bd5910dSChristian Brauner 					void *arg)
4299bd5910dSChristian Brauner {
4309bd5910dSChristian Brauner 	int cgroup_fd;
4319bd5910dSChristian Brauner 	pid_t pid;
4329bd5910dSChristian Brauner 
4339bd5910dSChristian Brauner 	cgroup_fd =  dirfd_open_opath(cgroup);
4349bd5910dSChristian Brauner 	if (cgroup_fd < 0)
4359bd5910dSChristian Brauner 		return -1;
4369bd5910dSChristian Brauner 
4379bd5910dSChristian Brauner 	pid = clone_into_cgroup(cgroup_fd);
4389bd5910dSChristian Brauner 	close_prot_errno(cgroup_fd);
4399bd5910dSChristian Brauner 	if (pid == 0)
4409bd5910dSChristian Brauner 		exit(fn(cgroup, arg));
4419bd5910dSChristian Brauner 
4429bd5910dSChristian Brauner 	return pid;
4439bd5910dSChristian Brauner }
4449bd5910dSChristian Brauner 
44584092dbcSRoman Gushchin int cg_run_nowait(const char *cgroup,
44684092dbcSRoman Gushchin 		  int (*fn)(const char *cgroup, void *arg),
44784092dbcSRoman Gushchin 		  void *arg)
44884092dbcSRoman Gushchin {
44984092dbcSRoman Gushchin 	int pid;
45084092dbcSRoman Gushchin 
4519bd5910dSChristian Brauner 	pid = clone_into_cgroup_run_nowait(cgroup, fn, arg);
4529bd5910dSChristian Brauner 	if (pid > 0)
4539bd5910dSChristian Brauner 		return pid;
4549bd5910dSChristian Brauner 
4559bd5910dSChristian Brauner 	/* Genuine test failure. */
4569bd5910dSChristian Brauner 	if (pid < 0 && errno != ENOSYS)
4579bd5910dSChristian Brauner 		return -1;
4589bd5910dSChristian Brauner 
45984092dbcSRoman Gushchin 	pid = fork();
46084092dbcSRoman Gushchin 	if (pid == 0) {
46184092dbcSRoman Gushchin 		char buf[64];
46284092dbcSRoman Gushchin 
46384092dbcSRoman Gushchin 		snprintf(buf, sizeof(buf), "%d", getpid());
46484092dbcSRoman Gushchin 		if (cg_write(cgroup, "cgroup.procs", buf))
46584092dbcSRoman Gushchin 			exit(EXIT_FAILURE);
46684092dbcSRoman Gushchin 		exit(fn(cgroup, arg));
46784092dbcSRoman Gushchin 	}
46884092dbcSRoman Gushchin 
46984092dbcSRoman Gushchin 	return pid;
47084092dbcSRoman Gushchin }
47184092dbcSRoman Gushchin 
47284092dbcSRoman Gushchin int get_temp_fd(void)
47384092dbcSRoman Gushchin {
47484092dbcSRoman Gushchin 	return open(".", O_TMPFILE | O_RDWR | O_EXCL);
47584092dbcSRoman Gushchin }
47684092dbcSRoman Gushchin 
47784092dbcSRoman Gushchin int alloc_pagecache(int fd, size_t size)
47884092dbcSRoman Gushchin {
47984092dbcSRoman Gushchin 	char buf[PAGE_SIZE];
48084092dbcSRoman Gushchin 	struct stat st;
48184092dbcSRoman Gushchin 	int i;
48284092dbcSRoman Gushchin 
48384092dbcSRoman Gushchin 	if (fstat(fd, &st))
48484092dbcSRoman Gushchin 		goto cleanup;
48584092dbcSRoman Gushchin 
48684092dbcSRoman Gushchin 	size += st.st_size;
48784092dbcSRoman Gushchin 
48884092dbcSRoman Gushchin 	if (ftruncate(fd, size))
48984092dbcSRoman Gushchin 		goto cleanup;
49084092dbcSRoman Gushchin 
49184092dbcSRoman Gushchin 	for (i = 0; i < size; i += sizeof(buf))
49284092dbcSRoman Gushchin 		read(fd, buf, sizeof(buf));
49384092dbcSRoman Gushchin 
49484092dbcSRoman Gushchin 	return 0;
49584092dbcSRoman Gushchin 
49684092dbcSRoman Gushchin cleanup:
49784092dbcSRoman Gushchin 	return -1;
49884092dbcSRoman Gushchin }
49984092dbcSRoman Gushchin 
50084092dbcSRoman Gushchin int alloc_anon(const char *cgroup, void *arg)
50184092dbcSRoman Gushchin {
50284092dbcSRoman Gushchin 	size_t size = (unsigned long)arg;
50384092dbcSRoman Gushchin 	char *buf, *ptr;
50484092dbcSRoman Gushchin 
50584092dbcSRoman Gushchin 	buf = malloc(size);
50684092dbcSRoman Gushchin 	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
50784092dbcSRoman Gushchin 		*ptr = 0;
50884092dbcSRoman Gushchin 
50984092dbcSRoman Gushchin 	free(buf);
51084092dbcSRoman Gushchin 	return 0;
51184092dbcSRoman Gushchin }
512478b2784SMike Rapoport 
513478b2784SMike Rapoport int is_swap_enabled(void)
514478b2784SMike Rapoport {
515478b2784SMike Rapoport 	char buf[PAGE_SIZE];
516478b2784SMike Rapoport 	const char delim[] = "\n";
517478b2784SMike Rapoport 	int cnt = 0;
518478b2784SMike Rapoport 	char *line;
519478b2784SMike Rapoport 
520478b2784SMike Rapoport 	if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
521478b2784SMike Rapoport 		return -1;
522478b2784SMike Rapoport 
523478b2784SMike Rapoport 	for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
524478b2784SMike Rapoport 		cnt++;
525478b2784SMike Rapoport 
526478b2784SMike Rapoport 	return cnt > 1;
527478b2784SMike Rapoport }
528a987785dSJay Kamat 
529a987785dSJay Kamat int set_oom_adj_score(int pid, int score)
530a987785dSJay Kamat {
531a987785dSJay Kamat 	char path[PATH_MAX];
532a987785dSJay Kamat 	int fd, len;
533a987785dSJay Kamat 
534a987785dSJay Kamat 	sprintf(path, "/proc/%d/oom_score_adj", pid);
535a987785dSJay Kamat 
536a987785dSJay Kamat 	fd = open(path, O_WRONLY | O_APPEND);
537a987785dSJay Kamat 	if (fd < 0)
538a987785dSJay Kamat 		return fd;
539a987785dSJay Kamat 
540a987785dSJay Kamat 	len = dprintf(fd, "%d", score);
541a987785dSJay Kamat 	if (len < 0) {
542a987785dSJay Kamat 		close(fd);
543a987785dSJay Kamat 		return len;
544a987785dSJay Kamat 	}
545a987785dSJay Kamat 
546a987785dSJay Kamat 	close(fd);
547a987785dSJay Kamat 	return 0;
548a987785dSJay Kamat }
5495313bfe4SRoman Gushchin 
55058c9f75bSMichal Koutný ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
5515313bfe4SRoman Gushchin {
5525313bfe4SRoman Gushchin 	char path[PATH_MAX];
5535313bfe4SRoman Gushchin 
55458c9f75bSMichal Koutný 	if (!pid)
55558c9f75bSMichal Koutný 		snprintf(path, sizeof(path), "/proc/%s/%s",
55658c9f75bSMichal Koutný 			 thread ? "thread-self" : "self", item);
55758c9f75bSMichal Koutný 	else
5585313bfe4SRoman Gushchin 		snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
5595313bfe4SRoman Gushchin 
5605313bfe4SRoman Gushchin 	return read_text(path, buf, size);
5615313bfe4SRoman Gushchin }
56211318989SMichal Koutný 
56311318989SMichal Koutný int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
56411318989SMichal Koutný {
56511318989SMichal Koutný 	char buf[PAGE_SIZE];
56611318989SMichal Koutný 
56711318989SMichal Koutný 	if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0)
56811318989SMichal Koutný 		return -1;
56911318989SMichal Koutný 
57011318989SMichal Koutný 	return strstr(buf, needle) ? 0 : -1;
57111318989SMichal Koutný }
5729bd5910dSChristian Brauner 
5739bd5910dSChristian Brauner int clone_into_cgroup_run_wait(const char *cgroup)
5749bd5910dSChristian Brauner {
5759bd5910dSChristian Brauner 	int cgroup_fd;
5769bd5910dSChristian Brauner 	pid_t pid;
5779bd5910dSChristian Brauner 
5789bd5910dSChristian Brauner 	cgroup_fd =  dirfd_open_opath(cgroup);
5799bd5910dSChristian Brauner 	if (cgroup_fd < 0)
5809bd5910dSChristian Brauner 		return -1;
5819bd5910dSChristian Brauner 
5829bd5910dSChristian Brauner 	pid = clone_into_cgroup(cgroup_fd);
5839bd5910dSChristian Brauner 	close_prot_errno(cgroup_fd);
5849bd5910dSChristian Brauner 	if (pid < 0)
5859bd5910dSChristian Brauner 		return -1;
5869bd5910dSChristian Brauner 
5879bd5910dSChristian Brauner 	if (pid == 0)
5889bd5910dSChristian Brauner 		exit(EXIT_SUCCESS);
5899bd5910dSChristian Brauner 
5909bd5910dSChristian Brauner 	/*
5919bd5910dSChristian Brauner 	 * We don't care whether this fails. We only care whether the initial
5929bd5910dSChristian Brauner 	 * clone succeeded.
5939bd5910dSChristian Brauner 	 */
5949bd5910dSChristian Brauner 	(void)clone_reap(pid, WEXITED);
5959bd5910dSChristian Brauner 	return 0;
5969bd5910dSChristian Brauner }
5978075e4f6SChristian Brauner 
5986323ec54SShakeel Butt static int __prepare_for_wait(const char *cgroup, const char *filename)
5998075e4f6SChristian Brauner {
6008075e4f6SChristian Brauner 	int fd, ret = -1;
6018075e4f6SChristian Brauner 
6028075e4f6SChristian Brauner 	fd = inotify_init1(0);
6038075e4f6SChristian Brauner 	if (fd == -1)
6048075e4f6SChristian Brauner 		return fd;
6058075e4f6SChristian Brauner 
6066323ec54SShakeel Butt 	ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY);
6078075e4f6SChristian Brauner 	if (ret == -1) {
6088075e4f6SChristian Brauner 		close(fd);
6098075e4f6SChristian Brauner 		fd = -1;
6108075e4f6SChristian Brauner 	}
6118075e4f6SChristian Brauner 
6128075e4f6SChristian Brauner 	return fd;
6138075e4f6SChristian Brauner }
6148075e4f6SChristian Brauner 
6156323ec54SShakeel Butt int cg_prepare_for_wait(const char *cgroup)
6166323ec54SShakeel Butt {
6176323ec54SShakeel Butt 	return __prepare_for_wait(cgroup, "cgroup.events");
6186323ec54SShakeel Butt }
6196323ec54SShakeel Butt 
6206323ec54SShakeel Butt int memcg_prepare_for_wait(const char *cgroup)
6216323ec54SShakeel Butt {
6226323ec54SShakeel Butt 	return __prepare_for_wait(cgroup, "memory.events");
6236323ec54SShakeel Butt }
6246323ec54SShakeel Butt 
6258075e4f6SChristian Brauner int cg_wait_for(int fd)
6268075e4f6SChristian Brauner {
6278075e4f6SChristian Brauner 	int ret = -1;
6288075e4f6SChristian Brauner 	struct pollfd fds = {
6298075e4f6SChristian Brauner 		.fd = fd,
6308075e4f6SChristian Brauner 		.events = POLLIN,
6318075e4f6SChristian Brauner 	};
6328075e4f6SChristian Brauner 
6338075e4f6SChristian Brauner 	while (true) {
6348075e4f6SChristian Brauner 		ret = poll(&fds, 1, 10000);
6358075e4f6SChristian Brauner 
6368075e4f6SChristian Brauner 		if (ret == -1) {
6378075e4f6SChristian Brauner 			if (errno == EINTR)
6388075e4f6SChristian Brauner 				continue;
6398075e4f6SChristian Brauner 
6408075e4f6SChristian Brauner 			break;
6418075e4f6SChristian Brauner 		}
6428075e4f6SChristian Brauner 
6438075e4f6SChristian Brauner 		if (ret > 0 && fds.revents & POLLIN) {
6448075e4f6SChristian Brauner 			ret = 0;
6458075e4f6SChristian Brauner 			break;
6468075e4f6SChristian Brauner 		}
6478075e4f6SChristian Brauner 	}
6488075e4f6SChristian Brauner 
6498075e4f6SChristian Brauner 	return ret;
6508075e4f6SChristian Brauner }
651