1 // SPDX-License-Identifier: GPL-2.0 2 3 #define _GNU_SOURCE 4 #include <errno.h> 5 #include <fcntl.h> 6 #include <limits.h> 7 #include <linux/types.h> 8 #include <sched.h> 9 #include <signal.h> 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <string.h> 13 #include <syscall.h> 14 #include <sys/prctl.h> 15 #include <sys/wait.h> 16 #include <unistd.h> 17 #include <sys/socket.h> 18 #include <sys/stat.h> 19 #include <linux/kcmp.h> 20 21 #include "pidfd.h" 22 #include "../clone3/clone3_selftests.h" 23 #include "../kselftest.h" 24 #include "../kselftest_harness.h" 25 26 enum { 27 PIDFD_NS_USER, 28 PIDFD_NS_MNT, 29 PIDFD_NS_PID, 30 PIDFD_NS_UTS, 31 PIDFD_NS_IPC, 32 PIDFD_NS_NET, 33 PIDFD_NS_CGROUP, 34 PIDFD_NS_PIDCLD, 35 PIDFD_NS_MAX 36 }; 37 38 const struct ns_info { 39 const char *name; 40 int flag; 41 } ns_info[] = { 42 [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, }, 43 [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, }, 44 [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, }, 45 [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, }, 46 [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, }, 47 [PIDFD_NS_NET] = { "net", CLONE_NEWNET, }, 48 [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, }, 49 [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, }, 50 }; 51 52 FIXTURE(current_nsset) 53 { 54 pid_t pid; 55 int pidfd; 56 int nsfds[PIDFD_NS_MAX]; 57 58 pid_t child_pid_exited; 59 int child_pidfd_exited; 60 61 pid_t child_pid1; 62 int child_pidfd1; 63 int child_nsfds1[PIDFD_NS_MAX]; 64 65 pid_t child_pid2; 66 int child_pidfd2; 67 int child_nsfds2[PIDFD_NS_MAX]; 68 }; 69 70 static int sys_waitid(int which, pid_t pid, int options) 71 { 72 return syscall(__NR_waitid, which, pid, NULL, options, NULL); 73 } 74 75 pid_t create_child(int *pidfd, unsigned flags) 76 { 77 struct clone_args args = { 78 .flags = CLONE_PIDFD | flags, 79 .exit_signal = SIGCHLD, 80 .pidfd = ptr_to_u64(pidfd), 81 }; 82 83 return sys_clone3(&args, sizeof(struct clone_args)); 84 } 85 86 FIXTURE_SETUP(current_nsset) 87 { 88 int i, proc_fd, ret; 89 90 for (i = 0; i < PIDFD_NS_MAX; i++) { 91 self->nsfds[i] = -EBADF; 92 self->child_nsfds1[i] = -EBADF; 93 self->child_nsfds2[i] = -EBADF; 94 } 95 96 proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC); 97 ASSERT_GE(proc_fd, 0) { 98 TH_LOG("%m - Failed to open /proc/self/ns"); 99 } 100 101 self->pid = getpid(); 102 for (i = 0; i < PIDFD_NS_MAX; i++) { 103 const struct ns_info *info = &ns_info[i]; 104 self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC); 105 if (self->nsfds[i] < 0) { 106 EXPECT_EQ(errno, ENOENT) { 107 TH_LOG("%m - Failed to open %s namespace for process %d", 108 info->name, self->pid); 109 } 110 } 111 } 112 113 self->pidfd = sys_pidfd_open(self->pid, 0); 114 EXPECT_GT(self->pidfd, 0) { 115 TH_LOG("%m - Failed to open pidfd for process %d", self->pid); 116 } 117 118 /* Create task that exits right away. */ 119 self->child_pid_exited = create_child(&self->child_pidfd_exited, 120 CLONE_NEWUSER | CLONE_NEWNET); 121 EXPECT_GT(self->child_pid_exited, 0); 122 123 if (self->child_pid_exited == 0) 124 _exit(EXIT_SUCCESS); 125 126 ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0); 127 128 self->pidfd = sys_pidfd_open(self->pid, 0); 129 EXPECT_GE(self->pidfd, 0) { 130 TH_LOG("%m - Failed to open pidfd for process %d", self->pid); 131 } 132 133 /* Create tasks that will be stopped. */ 134 self->child_pid1 = create_child(&self->child_pidfd1, 135 CLONE_NEWUSER | CLONE_NEWNS | 136 CLONE_NEWCGROUP | CLONE_NEWIPC | 137 CLONE_NEWUTS | CLONE_NEWPID | 138 CLONE_NEWNET); 139 EXPECT_GE(self->child_pid1, 0); 140 141 if (self->child_pid1 == 0) { 142 pause(); 143 _exit(EXIT_SUCCESS); 144 } 145 146 self->child_pid2 = create_child(&self->child_pidfd2, 147 CLONE_NEWUSER | CLONE_NEWNS | 148 CLONE_NEWCGROUP | CLONE_NEWIPC | 149 CLONE_NEWUTS | CLONE_NEWPID | 150 CLONE_NEWNET); 151 EXPECT_GE(self->child_pid2, 0); 152 153 if (self->child_pid2 == 0) { 154 pause(); 155 _exit(EXIT_SUCCESS); 156 } 157 158 for (i = 0; i < PIDFD_NS_MAX; i++) { 159 char p[100]; 160 161 const struct ns_info *info = &ns_info[i]; 162 163 self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC); 164 if (self->nsfds[i] < 0) { 165 EXPECT_EQ(errno, ENOENT) { 166 TH_LOG("%m - Failed to open %s namespace for process %d", 167 info->name, self->pid); 168 } 169 } 170 171 ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s", 172 self->child_pid1, info->name); 173 EXPECT_GT(ret, 0); 174 EXPECT_LT(ret, sizeof(p)); 175 176 self->child_nsfds1[i] = open(p, O_RDONLY | O_CLOEXEC); 177 if (self->child_nsfds1[i] < 0) { 178 EXPECT_EQ(errno, ENOENT) { 179 TH_LOG("%m - Failed to open %s namespace for process %d", 180 info->name, self->child_pid1); 181 } 182 } 183 184 ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s", 185 self->child_pid2, info->name); 186 EXPECT_GT(ret, 0); 187 EXPECT_LT(ret, sizeof(p)); 188 189 self->child_nsfds2[i] = open(p, O_RDONLY | O_CLOEXEC); 190 if (self->child_nsfds2[i] < 0) { 191 EXPECT_EQ(errno, ENOENT) { 192 TH_LOG("%m - Failed to open %s namespace for process %d", 193 info->name, self->child_pid1); 194 } 195 } 196 } 197 198 close(proc_fd); 199 } 200 201 FIXTURE_TEARDOWN(current_nsset) 202 { 203 int i; 204 205 ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd1, 206 SIGKILL, NULL, 0), 0); 207 ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd2, 208 SIGKILL, NULL, 0), 0); 209 210 for (i = 0; i < PIDFD_NS_MAX; i++) { 211 if (self->nsfds[i] >= 0) 212 close(self->nsfds[i]); 213 if (self->child_nsfds1[i] >= 0) 214 close(self->child_nsfds1[i]); 215 if (self->child_nsfds2[i] >= 0) 216 close(self->child_nsfds2[i]); 217 } 218 219 if (self->child_pidfd1 >= 0) 220 EXPECT_EQ(0, close(self->child_pidfd1)); 221 if (self->child_pidfd2 >= 0) 222 EXPECT_EQ(0, close(self->child_pidfd2)); 223 ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0); 224 ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0); 225 ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0); 226 } 227 228 static int preserve_ns(const int pid, const char *ns) 229 { 230 int ret; 231 char path[50]; 232 233 ret = snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns); 234 if (ret < 0 || (size_t)ret >= sizeof(path)) 235 return -EIO; 236 237 return open(path, O_RDONLY | O_CLOEXEC); 238 } 239 240 static int in_same_namespace(int ns_fd1, pid_t pid2, const char *ns) 241 { 242 int ns_fd2 = -EBADF; 243 int ret = -1; 244 struct stat ns_st1, ns_st2; 245 246 ret = fstat(ns_fd1, &ns_st1); 247 if (ret < 0) 248 return -1; 249 250 ns_fd2 = preserve_ns(pid2, ns); 251 if (ns_fd2 < 0) 252 return -1; 253 254 ret = fstat(ns_fd2, &ns_st2); 255 close(ns_fd2); 256 if (ret < 0) 257 return -1; 258 259 /* processes are in the same namespace */ 260 if ((ns_st1.st_dev == ns_st2.st_dev) && 261 (ns_st1.st_ino == ns_st2.st_ino)) 262 return 1; 263 264 /* processes are in different namespaces */ 265 return 0; 266 } 267 268 /* Test that we can't pass garbage to the kernel. */ 269 TEST_F(current_nsset, invalid_flags) 270 { 271 ASSERT_NE(setns(self->pidfd, 0), 0); 272 EXPECT_EQ(errno, EINVAL); 273 274 ASSERT_NE(setns(self->pidfd, -1), 0); 275 EXPECT_EQ(errno, EINVAL); 276 277 ASSERT_NE(setns(self->pidfd, CLONE_VM), 0); 278 EXPECT_EQ(errno, EINVAL); 279 280 ASSERT_NE(setns(self->pidfd, CLONE_NEWUSER | CLONE_VM), 0); 281 EXPECT_EQ(errno, EINVAL); 282 } 283 284 /* Test that we can't attach to a task that has already exited. */ 285 TEST_F(current_nsset, pidfd_exited_child) 286 { 287 int i; 288 pid_t pid; 289 290 ASSERT_NE(setns(self->child_pidfd_exited, CLONE_NEWUSER | CLONE_NEWNET), 291 0); 292 EXPECT_EQ(errno, ESRCH); 293 294 pid = getpid(); 295 for (i = 0; i < PIDFD_NS_MAX; i++) { 296 const struct ns_info *info = &ns_info[i]; 297 /* Verify that we haven't changed any namespaces. */ 298 if (self->nsfds[i] >= 0) 299 ASSERT_EQ(in_same_namespace(self->nsfds[i], pid, info->name), 1); 300 } 301 } 302 303 TEST_F(current_nsset, pidfd_incremental_setns) 304 { 305 int i; 306 pid_t pid; 307 308 pid = getpid(); 309 for (i = 0; i < PIDFD_NS_MAX; i++) { 310 const struct ns_info *info = &ns_info[i]; 311 int nsfd; 312 313 if (self->child_nsfds1[i] < 0) 314 continue; 315 316 if (info->flag) { 317 ASSERT_EQ(setns(self->child_pidfd1, info->flag), 0) { 318 TH_LOG("%m - Failed to setns to %s namespace of %d via pidfd %d", 319 info->name, self->child_pid1, 320 self->child_pidfd1); 321 } 322 } 323 324 /* Verify that we have changed to the correct namespaces. */ 325 if (info->flag == CLONE_NEWPID) 326 nsfd = self->nsfds[i]; 327 else 328 nsfd = self->child_nsfds1[i]; 329 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { 330 TH_LOG("setns failed to place us correctly into %s namespace of %d via pidfd %d", 331 info->name, self->child_pid1, 332 self->child_pidfd1); 333 } 334 TH_LOG("Managed to correctly setns to %s namespace of %d via pidfd %d", 335 info->name, self->child_pid1, self->child_pidfd1); 336 } 337 } 338 339 TEST_F(current_nsset, nsfd_incremental_setns) 340 { 341 int i; 342 pid_t pid; 343 344 pid = getpid(); 345 for (i = 0; i < PIDFD_NS_MAX; i++) { 346 const struct ns_info *info = &ns_info[i]; 347 int nsfd; 348 349 if (self->child_nsfds1[i] < 0) 350 continue; 351 352 if (info->flag) { 353 ASSERT_EQ(setns(self->child_nsfds1[i], info->flag), 0) { 354 TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d", 355 info->name, self->child_pid1, 356 self->child_nsfds1[i]); 357 } 358 } 359 360 /* Verify that we have changed to the correct namespaces. */ 361 if (info->flag == CLONE_NEWPID) 362 nsfd = self->nsfds[i]; 363 else 364 nsfd = self->child_nsfds1[i]; 365 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { 366 TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d", 367 info->name, self->child_pid1, 368 self->child_nsfds1[i]); 369 } 370 TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d", 371 info->name, self->child_pid1, self->child_nsfds1[i]); 372 } 373 } 374 375 TEST_F(current_nsset, pidfd_one_shot_setns) 376 { 377 unsigned flags = 0; 378 int i; 379 pid_t pid; 380 381 for (i = 0; i < PIDFD_NS_MAX; i++) { 382 const struct ns_info *info = &ns_info[i]; 383 384 if (self->child_nsfds1[i] < 0) 385 continue; 386 387 flags |= info->flag; 388 TH_LOG("Adding %s namespace of %d to list of namespaces to attach to", 389 info->name, self->child_pid1); 390 } 391 392 ASSERT_EQ(setns(self->child_pidfd1, flags), 0) { 393 TH_LOG("%m - Failed to setns to namespaces of %d", 394 self->child_pid1); 395 } 396 397 pid = getpid(); 398 for (i = 0; i < PIDFD_NS_MAX; i++) { 399 const struct ns_info *info = &ns_info[i]; 400 int nsfd; 401 402 if (self->child_nsfds1[i] < 0) 403 continue; 404 405 /* Verify that we have changed to the correct namespaces. */ 406 if (info->flag == CLONE_NEWPID) 407 nsfd = self->nsfds[i]; 408 else 409 nsfd = self->child_nsfds1[i]; 410 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { 411 TH_LOG("setns failed to place us correctly into %s namespace of %d", 412 info->name, self->child_pid1); 413 } 414 TH_LOG("Managed to correctly setns to %s namespace of %d", 415 info->name, self->child_pid1); 416 } 417 } 418 419 TEST_F(current_nsset, no_foul_play) 420 { 421 unsigned flags = 0; 422 int i; 423 424 for (i = 0; i < PIDFD_NS_MAX; i++) { 425 const struct ns_info *info = &ns_info[i]; 426 427 if (self->child_nsfds1[i] < 0) 428 continue; 429 430 flags |= info->flag; 431 if (info->flag) /* No use logging pid_for_children. */ 432 TH_LOG("Adding %s namespace of %d to list of namespaces to attach to", 433 info->name, self->child_pid1); 434 } 435 436 ASSERT_EQ(setns(self->child_pidfd1, flags), 0) { 437 TH_LOG("%m - Failed to setns to namespaces of %d vid pidfd %d", 438 self->child_pid1, self->child_pidfd1); 439 } 440 441 /* 442 * Can't setns to a user namespace outside of our hierarchy since we 443 * don't have caps in there and didn't create it. That means that under 444 * no circumstances should we be able to setns to any of the other 445 * ones since they aren't owned by our user namespace. 446 */ 447 for (i = 0; i < PIDFD_NS_MAX; i++) { 448 const struct ns_info *info = &ns_info[i]; 449 450 if (self->child_nsfds2[i] < 0 || !info->flag) 451 continue; 452 453 ASSERT_NE(setns(self->child_pidfd2, info->flag), 0) { 454 TH_LOG("Managed to setns to %s namespace of %d via pidfd %d", 455 info->name, self->child_pid2, 456 self->child_pidfd2); 457 } 458 TH_LOG("%m - Correctly failed to setns to %s namespace of %d via pidfd %d", 459 info->name, self->child_pid2, 460 self->child_pidfd2); 461 462 ASSERT_NE(setns(self->child_nsfds2[i], info->flag), 0) { 463 TH_LOG("Managed to setns to %s namespace of %d via nsfd %d", 464 info->name, self->child_pid2, 465 self->child_nsfds2[i]); 466 } 467 TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d", 468 info->name, self->child_pid2, 469 self->child_nsfds2[i]); 470 } 471 } 472 473 TEST_HARNESS_MAIN 474