1 // SPDX-License-Identifier: GPL-2.0
2 
3 #define _GNU_SOURCE
4 #include <errno.h>
5 #include <fcntl.h>
6 #include <limits.h>
7 #include <linux/types.h>
8 #include <sched.h>
9 #include <signal.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <syscall.h>
14 #include <sys/prctl.h>
15 #include <sys/wait.h>
16 #include <unistd.h>
17 #include <sys/socket.h>
18 #include <sys/stat.h>
19 #include <linux/kcmp.h>
20 
21 #include "pidfd.h"
22 #include "../clone3/clone3_selftests.h"
23 #include "../kselftest.h"
24 #include "../kselftest_harness.h"
25 
26 enum {
27 	PIDFD_NS_USER,
28 	PIDFD_NS_MNT,
29 	PIDFD_NS_PID,
30 	PIDFD_NS_UTS,
31 	PIDFD_NS_IPC,
32 	PIDFD_NS_NET,
33 	PIDFD_NS_CGROUP,
34 	PIDFD_NS_PIDCLD,
35 	PIDFD_NS_MAX
36 };
37 
38 const struct ns_info {
39 	const char *name;
40 	int flag;
41 } ns_info[] = {
42 	[PIDFD_NS_USER]   = { "user",             CLONE_NEWUSER,   },
43 	[PIDFD_NS_MNT]    = { "mnt",              CLONE_NEWNS,     },
44 	[PIDFD_NS_PID]    = { "pid",              CLONE_NEWPID,    },
45 	[PIDFD_NS_UTS]    = { "uts",              CLONE_NEWUTS,    },
46 	[PIDFD_NS_IPC]    = { "ipc",              CLONE_NEWIPC,    },
47 	[PIDFD_NS_NET]    = { "net",              CLONE_NEWNET,    },
48 	[PIDFD_NS_CGROUP] = { "cgroup",           CLONE_NEWCGROUP, },
49 	[PIDFD_NS_PIDCLD] = { "pid_for_children", 0,               },
50 };
51 
52 FIXTURE(current_nsset)
53 {
54 	pid_t pid;
55 	int pidfd;
56 	int nsfds[PIDFD_NS_MAX];
57 
58 	pid_t child_pid_exited;
59 	int child_pidfd_exited;
60 
61 	pid_t child_pid1;
62 	int child_pidfd1;
63 	int child_nsfds1[PIDFD_NS_MAX];
64 
65 	pid_t child_pid2;
66 	int child_pidfd2;
67 	int child_nsfds2[PIDFD_NS_MAX];
68 };
69 
70 static int sys_waitid(int which, pid_t pid, int options)
71 {
72 	return syscall(__NR_waitid, which, pid, NULL, options, NULL);
73 }
74 
75 pid_t create_child(int *pidfd, unsigned flags)
76 {
77 	struct clone_args args = {
78 		.flags		= CLONE_PIDFD | flags,
79 		.exit_signal	= SIGCHLD,
80 		.pidfd		= ptr_to_u64(pidfd),
81 	};
82 
83 	return sys_clone3(&args, sizeof(struct clone_args));
84 }
85 
86 FIXTURE_SETUP(current_nsset)
87 {
88 	int i, proc_fd, ret;
89 
90 	for (i = 0; i < PIDFD_NS_MAX; i++) {
91 		self->nsfds[i]		= -EBADF;
92 		self->child_nsfds1[i]	= -EBADF;
93 		self->child_nsfds2[i]	= -EBADF;
94 	}
95 
96 	proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC);
97 	ASSERT_GE(proc_fd, 0) {
98 		TH_LOG("%m - Failed to open /proc/self/ns");
99 	}
100 
101 	self->pid = getpid();
102 	for (i = 0; i < PIDFD_NS_MAX; i++) {
103 		const struct ns_info *info = &ns_info[i];
104 		self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
105 		if (self->nsfds[i] < 0) {
106 			EXPECT_EQ(errno, ENOENT) {
107 				TH_LOG("%m - Failed to open %s namespace for process %d",
108 				       info->name, self->pid);
109 			}
110 		}
111 	}
112 
113 	self->pidfd = sys_pidfd_open(self->pid, 0);
114 	EXPECT_GT(self->pidfd, 0) {
115 		TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
116 	}
117 
118 	/* Create task that exits right away. */
119 	self->child_pid_exited = create_child(&self->child_pidfd_exited,
120 					      CLONE_NEWUSER | CLONE_NEWNET);
121 	EXPECT_GT(self->child_pid_exited, 0);
122 
123 	if (self->child_pid_exited == 0)
124 		_exit(EXIT_SUCCESS);
125 
126 	ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0);
127 
128 	self->pidfd = sys_pidfd_open(self->pid, 0);
129 	EXPECT_GE(self->pidfd, 0) {
130 		TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
131 	}
132 
133 	/* Create tasks that will be stopped. */
134 	self->child_pid1 = create_child(&self->child_pidfd1,
135 					CLONE_NEWUSER | CLONE_NEWNS |
136 					CLONE_NEWCGROUP | CLONE_NEWIPC |
137 					CLONE_NEWUTS | CLONE_NEWPID |
138 					CLONE_NEWNET);
139 	EXPECT_GE(self->child_pid1, 0);
140 
141 	if (self->child_pid1 == 0) {
142 		pause();
143 		_exit(EXIT_SUCCESS);
144 	}
145 
146 	self->child_pid2 = create_child(&self->child_pidfd2,
147 					CLONE_NEWUSER | CLONE_NEWNS |
148 					CLONE_NEWCGROUP | CLONE_NEWIPC |
149 					CLONE_NEWUTS | CLONE_NEWPID |
150 					CLONE_NEWNET);
151 	EXPECT_GE(self->child_pid2, 0);
152 
153 	if (self->child_pid2 == 0) {
154 		pause();
155 		_exit(EXIT_SUCCESS);
156 	}
157 
158 	for (i = 0; i < PIDFD_NS_MAX; i++) {
159 		char p[100];
160 
161 		const struct ns_info *info = &ns_info[i];
162 
163 		self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
164 		if (self->nsfds[i] < 0) {
165 			EXPECT_EQ(errno, ENOENT) {
166 				TH_LOG("%m - Failed to open %s namespace for process %d",
167 				       info->name, self->pid);
168 			}
169 		}
170 
171 		ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
172 			       self->child_pid1, info->name);
173 		EXPECT_GT(ret, 0);
174 		EXPECT_LT(ret, sizeof(p));
175 
176 		self->child_nsfds1[i] = open(p, O_RDONLY | O_CLOEXEC);
177 		if (self->child_nsfds1[i] < 0) {
178 			EXPECT_EQ(errno, ENOENT) {
179 				TH_LOG("%m - Failed to open %s namespace for process %d",
180 				       info->name, self->child_pid1);
181 			}
182 		}
183 
184 		ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
185 			       self->child_pid2, info->name);
186 		EXPECT_GT(ret, 0);
187 		EXPECT_LT(ret, sizeof(p));
188 
189 		self->child_nsfds2[i] = open(p, O_RDONLY | O_CLOEXEC);
190 		if (self->child_nsfds2[i] < 0) {
191 			EXPECT_EQ(errno, ENOENT) {
192 				TH_LOG("%m - Failed to open %s namespace for process %d",
193 				       info->name, self->child_pid1);
194 			}
195 		}
196 	}
197 
198 	close(proc_fd);
199 }
200 
201 FIXTURE_TEARDOWN(current_nsset)
202 {
203 	int i;
204 
205 	ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd1,
206 					SIGKILL, NULL, 0), 0);
207 	ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd2,
208 					SIGKILL, NULL, 0), 0);
209 
210 	for (i = 0; i < PIDFD_NS_MAX; i++) {
211 		if (self->nsfds[i] >= 0)
212 			close(self->nsfds[i]);
213 		if (self->child_nsfds1[i] >= 0)
214 			close(self->child_nsfds1[i]);
215 		if (self->child_nsfds2[i] >= 0)
216 			close(self->child_nsfds2[i]);
217 	}
218 
219 	if (self->child_pidfd1 >= 0)
220 		EXPECT_EQ(0, close(self->child_pidfd1));
221 	if (self->child_pidfd2 >= 0)
222 		EXPECT_EQ(0, close(self->child_pidfd2));
223 	ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0);
224 	ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0);
225 	ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0);
226 }
227 
228 static int preserve_ns(const int pid, const char *ns)
229 {
230 	int ret;
231 	char path[50];
232 
233 	ret = snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns);
234 	if (ret < 0 || (size_t)ret >= sizeof(path))
235 		return -EIO;
236 
237 	return open(path, O_RDONLY | O_CLOEXEC);
238 }
239 
240 static int in_same_namespace(int ns_fd1, pid_t pid2, const char *ns)
241 {
242 	int ns_fd2 = -EBADF;
243 	int ret = -1;
244 	struct stat ns_st1, ns_st2;
245 
246 	ret = fstat(ns_fd1, &ns_st1);
247 	if (ret < 0)
248 		return -1;
249 
250 	ns_fd2 = preserve_ns(pid2, ns);
251 	if (ns_fd2 < 0)
252 		return -1;
253 
254 	ret = fstat(ns_fd2, &ns_st2);
255 	close(ns_fd2);
256 	if (ret < 0)
257 		return -1;
258 
259 	/* processes are in the same namespace */
260 	if ((ns_st1.st_dev == ns_st2.st_dev) &&
261 	    (ns_st1.st_ino == ns_st2.st_ino))
262 		return 1;
263 
264 	/* processes are in different namespaces */
265 	return 0;
266 }
267 
268 /* Test that we can't pass garbage to the kernel. */
269 TEST_F(current_nsset, invalid_flags)
270 {
271 	ASSERT_NE(setns(self->pidfd, 0), 0);
272 	EXPECT_EQ(errno, EINVAL);
273 
274 	ASSERT_NE(setns(self->pidfd, -1), 0);
275 	EXPECT_EQ(errno, EINVAL);
276 
277 	ASSERT_NE(setns(self->pidfd, CLONE_VM), 0);
278 	EXPECT_EQ(errno, EINVAL);
279 
280 	ASSERT_NE(setns(self->pidfd, CLONE_NEWUSER | CLONE_VM), 0);
281 	EXPECT_EQ(errno, EINVAL);
282 }
283 
284 /* Test that we can't attach to a task that has already exited. */
285 TEST_F(current_nsset, pidfd_exited_child)
286 {
287 	int i;
288 	pid_t pid;
289 
290 	ASSERT_NE(setns(self->child_pidfd_exited, CLONE_NEWUSER | CLONE_NEWNET),
291 		  0);
292 	EXPECT_EQ(errno, ESRCH);
293 
294 	pid = getpid();
295 	for (i = 0; i < PIDFD_NS_MAX; i++) {
296 		const struct ns_info *info = &ns_info[i];
297 		/* Verify that we haven't changed any namespaces. */
298 		if (self->nsfds[i] >= 0)
299 			ASSERT_EQ(in_same_namespace(self->nsfds[i], pid, info->name), 1);
300 	}
301 }
302 
303 TEST_F(current_nsset, pidfd_incremental_setns)
304 {
305 	int i;
306 	pid_t pid;
307 
308 	pid = getpid();
309 	for (i = 0; i < PIDFD_NS_MAX; i++) {
310 		const struct ns_info *info = &ns_info[i];
311 		int nsfd;
312 
313 		if (self->child_nsfds1[i] < 0)
314 			continue;
315 
316 		if (info->flag) {
317 			ASSERT_EQ(setns(self->child_pidfd1, info->flag), 0) {
318 				TH_LOG("%m - Failed to setns to %s namespace of %d via pidfd %d",
319 				       info->name, self->child_pid1,
320 				       self->child_pidfd1);
321 			}
322 		}
323 
324 		/* Verify that we have changed to the correct namespaces. */
325 		if (info->flag == CLONE_NEWPID)
326 			nsfd = self->nsfds[i];
327 		else
328 			nsfd = self->child_nsfds1[i];
329 		ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
330 			TH_LOG("setns failed to place us correctly into %s namespace of %d via pidfd %d",
331 			       info->name, self->child_pid1,
332 			       self->child_pidfd1);
333 		}
334 		TH_LOG("Managed to correctly setns to %s namespace of %d via pidfd %d",
335 		       info->name, self->child_pid1, self->child_pidfd1);
336 	}
337 }
338 
339 TEST_F(current_nsset, nsfd_incremental_setns)
340 {
341 	int i;
342 	pid_t pid;
343 
344 	pid = getpid();
345 	for (i = 0; i < PIDFD_NS_MAX; i++) {
346 		const struct ns_info *info = &ns_info[i];
347 		int nsfd;
348 
349 		if (self->child_nsfds1[i] < 0)
350 			continue;
351 
352 		if (info->flag) {
353 			ASSERT_EQ(setns(self->child_nsfds1[i], info->flag), 0) {
354 				TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d",
355 				       info->name, self->child_pid1,
356 				       self->child_nsfds1[i]);
357 			}
358 		}
359 
360 		/* Verify that we have changed to the correct namespaces. */
361 		if (info->flag == CLONE_NEWPID)
362 			nsfd = self->nsfds[i];
363 		else
364 			nsfd = self->child_nsfds1[i];
365 		ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
366 			TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d",
367 			       info->name, self->child_pid1,
368 			       self->child_nsfds1[i]);
369 		}
370 		TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d",
371 		       info->name, self->child_pid1, self->child_nsfds1[i]);
372 	}
373 }
374 
375 TEST_F(current_nsset, pidfd_one_shot_setns)
376 {
377 	unsigned flags = 0;
378 	int i;
379 	pid_t pid;
380 
381 	for (i = 0; i < PIDFD_NS_MAX; i++) {
382 		const struct ns_info *info = &ns_info[i];
383 
384 		if (self->child_nsfds1[i] < 0)
385 			continue;
386 
387 		flags |= info->flag;
388 		TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
389 		       info->name, self->child_pid1);
390 	}
391 
392 	ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
393 		TH_LOG("%m - Failed to setns to namespaces of %d",
394 		       self->child_pid1);
395 	}
396 
397 	pid = getpid();
398 	for (i = 0; i < PIDFD_NS_MAX; i++) {
399 		const struct ns_info *info = &ns_info[i];
400 		int nsfd;
401 
402 		if (self->child_nsfds1[i] < 0)
403 			continue;
404 
405 		/* Verify that we have changed to the correct namespaces. */
406 		if (info->flag == CLONE_NEWPID)
407 			nsfd = self->nsfds[i];
408 		else
409 			nsfd = self->child_nsfds1[i];
410 		ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
411 			TH_LOG("setns failed to place us correctly into %s namespace of %d",
412 			       info->name, self->child_pid1);
413 		}
414 		TH_LOG("Managed to correctly setns to %s namespace of %d",
415 		       info->name, self->child_pid1);
416 	}
417 }
418 
419 TEST_F(current_nsset, no_foul_play)
420 {
421 	unsigned flags = 0;
422 	int i;
423 
424 	for (i = 0; i < PIDFD_NS_MAX; i++) {
425 		const struct ns_info *info = &ns_info[i];
426 
427 		if (self->child_nsfds1[i] < 0)
428 			continue;
429 
430 		flags |= info->flag;
431 		if (info->flag) /* No use logging pid_for_children. */
432 			TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
433 			       info->name, self->child_pid1);
434 	}
435 
436 	ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
437 		TH_LOG("%m - Failed to setns to namespaces of %d vid pidfd %d",
438 		       self->child_pid1, self->child_pidfd1);
439 	}
440 
441 	/*
442 	 * Can't setns to a user namespace outside of our hierarchy since we
443 	 * don't have caps in there and didn't create it. That means that under
444 	 * no circumstances should we be able to setns to any of the other
445 	 * ones since they aren't owned by our user namespace.
446 	 */
447 	for (i = 0; i < PIDFD_NS_MAX; i++) {
448 		const struct ns_info *info = &ns_info[i];
449 
450 		if (self->child_nsfds2[i] < 0 || !info->flag)
451 			continue;
452 
453 		ASSERT_NE(setns(self->child_pidfd2, info->flag), 0) {
454 			TH_LOG("Managed to setns to %s namespace of %d via pidfd %d",
455 			       info->name, self->child_pid2,
456 			       self->child_pidfd2);
457 		}
458 		TH_LOG("%m - Correctly failed to setns to %s namespace of %d via pidfd %d",
459 		       info->name, self->child_pid2,
460 		       self->child_pidfd2);
461 
462 		ASSERT_NE(setns(self->child_nsfds2[i], info->flag), 0) {
463 			TH_LOG("Managed to setns to %s namespace of %d via nsfd %d",
464 			       info->name, self->child_pid2,
465 			       self->child_nsfds2[i]);
466 		}
467 		TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d",
468 		       info->name, self->child_pid2,
469 		       self->child_nsfds2[i]);
470 	}
471 }
472 
473 TEST(setns_einval)
474 {
475 	int fd;
476 
477 	fd = sys_memfd_create("rostock", 0);
478 	EXPECT_GT(fd, 0);
479 
480 	ASSERT_NE(setns(fd, 0), 0);
481 	EXPECT_EQ(errno, EINVAL);
482 	close(fd);
483 }
484 
485 TEST_HARNESS_MAIN
486