1 // SPDX-License-Identifier: GPL-2.0 2 3 #define _GNU_SOURCE 4 #include <errno.h> 5 #include <fcntl.h> 6 #include <limits.h> 7 #include <linux/types.h> 8 #include <sched.h> 9 #include <signal.h> 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <string.h> 13 #include <syscall.h> 14 #include <sys/prctl.h> 15 #include <sys/wait.h> 16 #include <unistd.h> 17 #include <sys/socket.h> 18 #include <sys/stat.h> 19 #include <linux/kcmp.h> 20 21 #include "pidfd.h" 22 #include "../clone3/clone3_selftests.h" 23 #include "../kselftest.h" 24 #include "../kselftest_harness.h" 25 26 enum { 27 PIDFD_NS_USER, 28 PIDFD_NS_MNT, 29 PIDFD_NS_PID, 30 PIDFD_NS_UTS, 31 PIDFD_NS_IPC, 32 PIDFD_NS_NET, 33 PIDFD_NS_CGROUP, 34 PIDFD_NS_PIDCLD, 35 PIDFD_NS_TIME, 36 PIDFD_NS_MAX 37 }; 38 39 const struct ns_info { 40 const char *name; 41 int flag; 42 } ns_info[] = { 43 [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, }, 44 [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, }, 45 [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, }, 46 [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, }, 47 [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, }, 48 [PIDFD_NS_NET] = { "net", CLONE_NEWNET, }, 49 [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, }, 50 [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, }, 51 [PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, }, 52 }; 53 54 FIXTURE(current_nsset) 55 { 56 pid_t pid; 57 int pidfd; 58 int nsfds[PIDFD_NS_MAX]; 59 60 pid_t child_pid_exited; 61 int child_pidfd_exited; 62 63 pid_t child_pid1; 64 int child_pidfd1; 65 int child_nsfds1[PIDFD_NS_MAX]; 66 67 pid_t child_pid2; 68 int child_pidfd2; 69 int child_nsfds2[PIDFD_NS_MAX]; 70 }; 71 72 static int sys_waitid(int which, pid_t pid, int options) 73 { 74 return syscall(__NR_waitid, which, pid, NULL, options, NULL); 75 } 76 77 pid_t create_child(int *pidfd, unsigned flags) 78 { 79 struct clone_args args = { 80 .flags = CLONE_PIDFD | flags, 81 .exit_signal = SIGCHLD, 82 .pidfd = ptr_to_u64(pidfd), 83 }; 84 85 return sys_clone3(&args, sizeof(struct clone_args)); 86 } 87 88 static bool switch_timens(void) 89 { 90 int fd, ret; 91 92 if (unshare(CLONE_NEWTIME)) 93 return false; 94 95 fd = open("/proc/self/ns/time_for_children", O_RDONLY | O_CLOEXEC); 96 if (fd < 0) 97 return false; 98 99 ret = setns(fd, CLONE_NEWTIME); 100 close(fd); 101 return ret == 0; 102 } 103 104 static ssize_t read_nointr(int fd, void *buf, size_t count) 105 { 106 ssize_t ret; 107 108 do { 109 ret = read(fd, buf, count); 110 } while (ret < 0 && errno == EINTR); 111 112 return ret; 113 } 114 115 static ssize_t write_nointr(int fd, const void *buf, size_t count) 116 { 117 ssize_t ret; 118 119 do { 120 ret = write(fd, buf, count); 121 } while (ret < 0 && errno == EINTR); 122 123 return ret; 124 } 125 126 FIXTURE_SETUP(current_nsset) 127 { 128 int i, proc_fd, ret; 129 int ipc_sockets[2]; 130 char c; 131 132 for (i = 0; i < PIDFD_NS_MAX; i++) { 133 self->nsfds[i] = -EBADF; 134 self->child_nsfds1[i] = -EBADF; 135 self->child_nsfds2[i] = -EBADF; 136 } 137 138 proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC); 139 ASSERT_GE(proc_fd, 0) { 140 TH_LOG("%m - Failed to open /proc/self/ns"); 141 } 142 143 self->pid = getpid(); 144 for (i = 0; i < PIDFD_NS_MAX; i++) { 145 const struct ns_info *info = &ns_info[i]; 146 self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC); 147 if (self->nsfds[i] < 0) { 148 EXPECT_EQ(errno, ENOENT) { 149 TH_LOG("%m - Failed to open %s namespace for process %d", 150 info->name, self->pid); 151 } 152 } 153 } 154 155 self->pidfd = sys_pidfd_open(self->pid, 0); 156 EXPECT_GT(self->pidfd, 0) { 157 TH_LOG("%m - Failed to open pidfd for process %d", self->pid); 158 } 159 160 /* Create task that exits right away. */ 161 self->child_pid_exited = create_child(&self->child_pidfd_exited, 162 CLONE_NEWUSER | CLONE_NEWNET); 163 EXPECT_GT(self->child_pid_exited, 0); 164 165 if (self->child_pid_exited == 0) 166 _exit(EXIT_SUCCESS); 167 168 ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0); 169 170 self->pidfd = sys_pidfd_open(self->pid, 0); 171 EXPECT_GE(self->pidfd, 0) { 172 TH_LOG("%m - Failed to open pidfd for process %d", self->pid); 173 } 174 175 ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); 176 EXPECT_EQ(ret, 0); 177 178 /* Create tasks that will be stopped. */ 179 self->child_pid1 = create_child(&self->child_pidfd1, 180 CLONE_NEWUSER | CLONE_NEWNS | 181 CLONE_NEWCGROUP | CLONE_NEWIPC | 182 CLONE_NEWUTS | CLONE_NEWPID | 183 CLONE_NEWNET); 184 EXPECT_GE(self->child_pid1, 0); 185 186 if (self->child_pid1 == 0) { 187 close(ipc_sockets[0]); 188 189 if (!switch_timens()) 190 _exit(EXIT_FAILURE); 191 192 if (write_nointr(ipc_sockets[1], "1", 1) < 0) 193 _exit(EXIT_FAILURE); 194 195 close(ipc_sockets[1]); 196 197 pause(); 198 _exit(EXIT_SUCCESS); 199 } 200 201 close(ipc_sockets[1]); 202 ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); 203 close(ipc_sockets[0]); 204 205 ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); 206 EXPECT_EQ(ret, 0); 207 208 self->child_pid2 = create_child(&self->child_pidfd2, 209 CLONE_NEWUSER | CLONE_NEWNS | 210 CLONE_NEWCGROUP | CLONE_NEWIPC | 211 CLONE_NEWUTS | CLONE_NEWPID | 212 CLONE_NEWNET); 213 EXPECT_GE(self->child_pid2, 0); 214 215 if (self->child_pid2 == 0) { 216 close(ipc_sockets[0]); 217 218 if (!switch_timens()) 219 _exit(EXIT_FAILURE); 220 221 if (write_nointr(ipc_sockets[1], "1", 1) < 0) 222 _exit(EXIT_FAILURE); 223 224 close(ipc_sockets[1]); 225 226 pause(); 227 _exit(EXIT_SUCCESS); 228 } 229 230 close(ipc_sockets[1]); 231 ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); 232 close(ipc_sockets[0]); 233 234 for (i = 0; i < PIDFD_NS_MAX; i++) { 235 char p[100]; 236 237 const struct ns_info *info = &ns_info[i]; 238 239 self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC); 240 if (self->nsfds[i] < 0) { 241 EXPECT_EQ(errno, ENOENT) { 242 TH_LOG("%m - Failed to open %s namespace for process %d", 243 info->name, self->pid); 244 } 245 } 246 247 ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s", 248 self->child_pid1, info->name); 249 EXPECT_GT(ret, 0); 250 EXPECT_LT(ret, sizeof(p)); 251 252 self->child_nsfds1[i] = open(p, O_RDONLY | O_CLOEXEC); 253 if (self->child_nsfds1[i] < 0) { 254 EXPECT_EQ(errno, ENOENT) { 255 TH_LOG("%m - Failed to open %s namespace for process %d", 256 info->name, self->child_pid1); 257 } 258 } 259 260 ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s", 261 self->child_pid2, info->name); 262 EXPECT_GT(ret, 0); 263 EXPECT_LT(ret, sizeof(p)); 264 265 self->child_nsfds2[i] = open(p, O_RDONLY | O_CLOEXEC); 266 if (self->child_nsfds2[i] < 0) { 267 EXPECT_EQ(errno, ENOENT) { 268 TH_LOG("%m - Failed to open %s namespace for process %d", 269 info->name, self->child_pid1); 270 } 271 } 272 } 273 274 close(proc_fd); 275 } 276 277 FIXTURE_TEARDOWN(current_nsset) 278 { 279 int i; 280 281 ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd1, 282 SIGKILL, NULL, 0), 0); 283 ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd2, 284 SIGKILL, NULL, 0), 0); 285 286 for (i = 0; i < PIDFD_NS_MAX; i++) { 287 if (self->nsfds[i] >= 0) 288 close(self->nsfds[i]); 289 if (self->child_nsfds1[i] >= 0) 290 close(self->child_nsfds1[i]); 291 if (self->child_nsfds2[i] >= 0) 292 close(self->child_nsfds2[i]); 293 } 294 295 if (self->child_pidfd1 >= 0) 296 EXPECT_EQ(0, close(self->child_pidfd1)); 297 if (self->child_pidfd2 >= 0) 298 EXPECT_EQ(0, close(self->child_pidfd2)); 299 ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0); 300 ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0); 301 ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0); 302 } 303 304 static int preserve_ns(const int pid, const char *ns) 305 { 306 int ret; 307 char path[50]; 308 309 ret = snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns); 310 if (ret < 0 || (size_t)ret >= sizeof(path)) 311 return -EIO; 312 313 return open(path, O_RDONLY | O_CLOEXEC); 314 } 315 316 static int in_same_namespace(int ns_fd1, pid_t pid2, const char *ns) 317 { 318 int ns_fd2 = -EBADF; 319 int ret = -1; 320 struct stat ns_st1, ns_st2; 321 322 ret = fstat(ns_fd1, &ns_st1); 323 if (ret < 0) 324 return -1; 325 326 ns_fd2 = preserve_ns(pid2, ns); 327 if (ns_fd2 < 0) 328 return -1; 329 330 ret = fstat(ns_fd2, &ns_st2); 331 close(ns_fd2); 332 if (ret < 0) 333 return -1; 334 335 /* processes are in the same namespace */ 336 if ((ns_st1.st_dev == ns_st2.st_dev) && 337 (ns_st1.st_ino == ns_st2.st_ino)) 338 return 1; 339 340 /* processes are in different namespaces */ 341 return 0; 342 } 343 344 /* Test that we can't pass garbage to the kernel. */ 345 TEST_F(current_nsset, invalid_flags) 346 { 347 ASSERT_NE(setns(self->pidfd, 0), 0); 348 EXPECT_EQ(errno, EINVAL); 349 350 ASSERT_NE(setns(self->pidfd, -1), 0); 351 EXPECT_EQ(errno, EINVAL); 352 353 ASSERT_NE(setns(self->pidfd, CLONE_VM), 0); 354 EXPECT_EQ(errno, EINVAL); 355 356 ASSERT_NE(setns(self->pidfd, CLONE_NEWUSER | CLONE_VM), 0); 357 EXPECT_EQ(errno, EINVAL); 358 } 359 360 /* Test that we can't attach to a task that has already exited. */ 361 TEST_F(current_nsset, pidfd_exited_child) 362 { 363 int i; 364 pid_t pid; 365 366 ASSERT_NE(setns(self->child_pidfd_exited, CLONE_NEWUSER | CLONE_NEWNET), 367 0); 368 EXPECT_EQ(errno, ESRCH); 369 370 pid = getpid(); 371 for (i = 0; i < PIDFD_NS_MAX; i++) { 372 const struct ns_info *info = &ns_info[i]; 373 /* Verify that we haven't changed any namespaces. */ 374 if (self->nsfds[i] >= 0) 375 ASSERT_EQ(in_same_namespace(self->nsfds[i], pid, info->name), 1); 376 } 377 } 378 379 TEST_F(current_nsset, pidfd_incremental_setns) 380 { 381 int i; 382 pid_t pid; 383 384 pid = getpid(); 385 for (i = 0; i < PIDFD_NS_MAX; i++) { 386 const struct ns_info *info = &ns_info[i]; 387 int nsfd; 388 389 if (self->child_nsfds1[i] < 0) 390 continue; 391 392 if (info->flag) { 393 ASSERT_EQ(setns(self->child_pidfd1, info->flag), 0) { 394 TH_LOG("%m - Failed to setns to %s namespace of %d via pidfd %d", 395 info->name, self->child_pid1, 396 self->child_pidfd1); 397 } 398 } 399 400 /* Verify that we have changed to the correct namespaces. */ 401 if (info->flag == CLONE_NEWPID) 402 nsfd = self->nsfds[i]; 403 else 404 nsfd = self->child_nsfds1[i]; 405 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { 406 TH_LOG("setns failed to place us correctly into %s namespace of %d via pidfd %d", 407 info->name, self->child_pid1, 408 self->child_pidfd1); 409 } 410 TH_LOG("Managed to correctly setns to %s namespace of %d via pidfd %d", 411 info->name, self->child_pid1, self->child_pidfd1); 412 } 413 } 414 415 TEST_F(current_nsset, nsfd_incremental_setns) 416 { 417 int i; 418 pid_t pid; 419 420 pid = getpid(); 421 for (i = 0; i < PIDFD_NS_MAX; i++) { 422 const struct ns_info *info = &ns_info[i]; 423 int nsfd; 424 425 if (self->child_nsfds1[i] < 0) 426 continue; 427 428 if (info->flag) { 429 ASSERT_EQ(setns(self->child_nsfds1[i], info->flag), 0) { 430 TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d", 431 info->name, self->child_pid1, 432 self->child_nsfds1[i]); 433 } 434 } 435 436 /* Verify that we have changed to the correct namespaces. */ 437 if (info->flag == CLONE_NEWPID) 438 nsfd = self->nsfds[i]; 439 else 440 nsfd = self->child_nsfds1[i]; 441 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { 442 TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d", 443 info->name, self->child_pid1, 444 self->child_nsfds1[i]); 445 } 446 TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d", 447 info->name, self->child_pid1, self->child_nsfds1[i]); 448 } 449 } 450 451 TEST_F(current_nsset, pidfd_one_shot_setns) 452 { 453 unsigned flags = 0; 454 int i; 455 pid_t pid; 456 457 for (i = 0; i < PIDFD_NS_MAX; i++) { 458 const struct ns_info *info = &ns_info[i]; 459 460 if (self->child_nsfds1[i] < 0) 461 continue; 462 463 flags |= info->flag; 464 TH_LOG("Adding %s namespace of %d to list of namespaces to attach to", 465 info->name, self->child_pid1); 466 } 467 468 ASSERT_EQ(setns(self->child_pidfd1, flags), 0) { 469 TH_LOG("%m - Failed to setns to namespaces of %d", 470 self->child_pid1); 471 } 472 473 pid = getpid(); 474 for (i = 0; i < PIDFD_NS_MAX; i++) { 475 const struct ns_info *info = &ns_info[i]; 476 int nsfd; 477 478 if (self->child_nsfds1[i] < 0) 479 continue; 480 481 /* Verify that we have changed to the correct namespaces. */ 482 if (info->flag == CLONE_NEWPID) 483 nsfd = self->nsfds[i]; 484 else 485 nsfd = self->child_nsfds1[i]; 486 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { 487 TH_LOG("setns failed to place us correctly into %s namespace of %d", 488 info->name, self->child_pid1); 489 } 490 TH_LOG("Managed to correctly setns to %s namespace of %d", 491 info->name, self->child_pid1); 492 } 493 } 494 495 TEST_F(current_nsset, no_foul_play) 496 { 497 unsigned flags = 0; 498 int i; 499 500 for (i = 0; i < PIDFD_NS_MAX; i++) { 501 const struct ns_info *info = &ns_info[i]; 502 503 if (self->child_nsfds1[i] < 0) 504 continue; 505 506 flags |= info->flag; 507 if (info->flag) /* No use logging pid_for_children. */ 508 TH_LOG("Adding %s namespace of %d to list of namespaces to attach to", 509 info->name, self->child_pid1); 510 } 511 512 ASSERT_EQ(setns(self->child_pidfd1, flags), 0) { 513 TH_LOG("%m - Failed to setns to namespaces of %d vid pidfd %d", 514 self->child_pid1, self->child_pidfd1); 515 } 516 517 /* 518 * Can't setns to a user namespace outside of our hierarchy since we 519 * don't have caps in there and didn't create it. That means that under 520 * no circumstances should we be able to setns to any of the other 521 * ones since they aren't owned by our user namespace. 522 */ 523 for (i = 0; i < PIDFD_NS_MAX; i++) { 524 const struct ns_info *info = &ns_info[i]; 525 526 if (self->child_nsfds2[i] < 0 || !info->flag) 527 continue; 528 529 ASSERT_NE(setns(self->child_pidfd2, info->flag), 0) { 530 TH_LOG("Managed to setns to %s namespace of %d via pidfd %d", 531 info->name, self->child_pid2, 532 self->child_pidfd2); 533 } 534 TH_LOG("%m - Correctly failed to setns to %s namespace of %d via pidfd %d", 535 info->name, self->child_pid2, 536 self->child_pidfd2); 537 538 ASSERT_NE(setns(self->child_nsfds2[i], info->flag), 0) { 539 TH_LOG("Managed to setns to %s namespace of %d via nsfd %d", 540 info->name, self->child_pid2, 541 self->child_nsfds2[i]); 542 } 543 TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d", 544 info->name, self->child_pid2, 545 self->child_nsfds2[i]); 546 } 547 } 548 549 TEST(setns_einval) 550 { 551 int fd; 552 553 fd = sys_memfd_create("rostock", 0); 554 EXPECT_GT(fd, 0); 555 556 ASSERT_NE(setns(fd, 0), 0); 557 EXPECT_EQ(errno, EINVAL); 558 close(fd); 559 } 560 561 TEST_HARNESS_MAIN 562