1 // SPDX-License-Identifier: GPL-2.0 2 3 #define _GNU_SOURCE 4 #include <errno.h> 5 #include <fcntl.h> 6 #include <limits.h> 7 #include <linux/types.h> 8 #include <sched.h> 9 #include <signal.h> 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <string.h> 13 #include <syscall.h> 14 #include <sys/prctl.h> 15 #include <sys/wait.h> 16 #include <unistd.h> 17 #include <sys/socket.h> 18 #include <sys/stat.h> 19 #include <linux/kcmp.h> 20 21 #include "pidfd.h" 22 #include "../clone3/clone3_selftests.h" 23 #include "../kselftest_harness.h" 24 25 enum { 26 PIDFD_NS_USER, 27 PIDFD_NS_MNT, 28 PIDFD_NS_PID, 29 PIDFD_NS_UTS, 30 PIDFD_NS_IPC, 31 PIDFD_NS_NET, 32 PIDFD_NS_CGROUP, 33 PIDFD_NS_PIDCLD, 34 PIDFD_NS_TIME, 35 PIDFD_NS_MAX 36 }; 37 38 const struct ns_info { 39 const char *name; 40 int flag; 41 } ns_info[] = { 42 [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, }, 43 [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, }, 44 [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, }, 45 [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, }, 46 [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, }, 47 [PIDFD_NS_NET] = { "net", CLONE_NEWNET, }, 48 [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, }, 49 [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, }, 50 [PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, }, 51 }; 52 53 FIXTURE(current_nsset) 54 { 55 pid_t pid; 56 int pidfd; 57 int nsfds[PIDFD_NS_MAX]; 58 59 pid_t child_pid_exited; 60 int child_pidfd_exited; 61 62 pid_t child_pid1; 63 int child_pidfd1; 64 int child_nsfds1[PIDFD_NS_MAX]; 65 66 pid_t child_pid2; 67 int child_pidfd2; 68 int child_nsfds2[PIDFD_NS_MAX]; 69 }; 70 71 static int sys_waitid(int which, pid_t pid, int options) 72 { 73 return syscall(__NR_waitid, which, pid, NULL, options, NULL); 74 } 75 76 pid_t create_child(int *pidfd, unsigned flags) 77 { 78 struct clone_args args = { 79 .flags = CLONE_PIDFD | flags, 80 .exit_signal = SIGCHLD, 81 .pidfd = ptr_to_u64(pidfd), 82 }; 83 84 return sys_clone3(&args, sizeof(struct clone_args)); 85 } 86 87 static bool switch_timens(void) 88 { 89 int fd, ret; 90 91 if (unshare(CLONE_NEWTIME)) 92 return false; 93 94 fd = open("/proc/self/ns/time_for_children", O_RDONLY | O_CLOEXEC); 95 if (fd < 0) 96 return false; 97 98 ret = setns(fd, CLONE_NEWTIME); 99 close(fd); 100 return ret == 0; 101 } 102 103 static ssize_t read_nointr(int fd, void *buf, size_t count) 104 { 105 ssize_t ret; 106 107 do { 108 ret = read(fd, buf, count); 109 } while (ret < 0 && errno == EINTR); 110 111 return ret; 112 } 113 114 static ssize_t write_nointr(int fd, const void *buf, size_t count) 115 { 116 ssize_t ret; 117 118 do { 119 ret = write(fd, buf, count); 120 } while (ret < 0 && errno == EINTR); 121 122 return ret; 123 } 124 125 FIXTURE_SETUP(current_nsset) 126 { 127 int i, proc_fd, ret; 128 int ipc_sockets[2]; 129 char c; 130 131 for (i = 0; i < PIDFD_NS_MAX; i++) { 132 self->nsfds[i] = -EBADF; 133 self->child_nsfds1[i] = -EBADF; 134 self->child_nsfds2[i] = -EBADF; 135 } 136 137 proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC); 138 ASSERT_GE(proc_fd, 0) { 139 TH_LOG("%m - Failed to open /proc/self/ns"); 140 } 141 142 self->pid = getpid(); 143 for (i = 0; i < PIDFD_NS_MAX; i++) { 144 const struct ns_info *info = &ns_info[i]; 145 self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC); 146 if (self->nsfds[i] < 0) { 147 EXPECT_EQ(errno, ENOENT) { 148 TH_LOG("%m - Failed to open %s namespace for process %d", 149 info->name, self->pid); 150 } 151 } 152 } 153 154 self->pidfd = sys_pidfd_open(self->pid, 0); 155 EXPECT_GT(self->pidfd, 0) { 156 TH_LOG("%m - Failed to open pidfd for process %d", self->pid); 157 } 158 159 /* Create task that exits right away. */ 160 self->child_pid_exited = create_child(&self->child_pidfd_exited, 161 CLONE_NEWUSER | CLONE_NEWNET); 162 EXPECT_GT(self->child_pid_exited, 0); 163 164 if (self->child_pid_exited == 0) 165 _exit(EXIT_SUCCESS); 166 167 ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0); 168 169 self->pidfd = sys_pidfd_open(self->pid, 0); 170 EXPECT_GE(self->pidfd, 0) { 171 TH_LOG("%m - Failed to open pidfd for process %d", self->pid); 172 } 173 174 ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); 175 EXPECT_EQ(ret, 0); 176 177 /* Create tasks that will be stopped. */ 178 self->child_pid1 = create_child(&self->child_pidfd1, 179 CLONE_NEWUSER | CLONE_NEWNS | 180 CLONE_NEWCGROUP | CLONE_NEWIPC | 181 CLONE_NEWUTS | CLONE_NEWPID | 182 CLONE_NEWNET); 183 EXPECT_GE(self->child_pid1, 0); 184 185 if (self->child_pid1 == 0) { 186 close(ipc_sockets[0]); 187 188 if (!switch_timens()) 189 _exit(EXIT_FAILURE); 190 191 if (write_nointr(ipc_sockets[1], "1", 1) < 0) 192 _exit(EXIT_FAILURE); 193 194 close(ipc_sockets[1]); 195 196 pause(); 197 _exit(EXIT_SUCCESS); 198 } 199 200 close(ipc_sockets[1]); 201 ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); 202 close(ipc_sockets[0]); 203 204 ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); 205 EXPECT_EQ(ret, 0); 206 207 self->child_pid2 = create_child(&self->child_pidfd2, 208 CLONE_NEWUSER | CLONE_NEWNS | 209 CLONE_NEWCGROUP | CLONE_NEWIPC | 210 CLONE_NEWUTS | CLONE_NEWPID | 211 CLONE_NEWNET); 212 EXPECT_GE(self->child_pid2, 0); 213 214 if (self->child_pid2 == 0) { 215 close(ipc_sockets[0]); 216 217 if (!switch_timens()) 218 _exit(EXIT_FAILURE); 219 220 if (write_nointr(ipc_sockets[1], "1", 1) < 0) 221 _exit(EXIT_FAILURE); 222 223 close(ipc_sockets[1]); 224 225 pause(); 226 _exit(EXIT_SUCCESS); 227 } 228 229 close(ipc_sockets[1]); 230 ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); 231 close(ipc_sockets[0]); 232 233 for (i = 0; i < PIDFD_NS_MAX; i++) { 234 char p[100]; 235 236 const struct ns_info *info = &ns_info[i]; 237 238 self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC); 239 if (self->nsfds[i] < 0) { 240 EXPECT_EQ(errno, ENOENT) { 241 TH_LOG("%m - Failed to open %s namespace for process %d", 242 info->name, self->pid); 243 } 244 } 245 246 ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s", 247 self->child_pid1, info->name); 248 EXPECT_GT(ret, 0); 249 EXPECT_LT(ret, sizeof(p)); 250 251 self->child_nsfds1[i] = open(p, O_RDONLY | O_CLOEXEC); 252 if (self->child_nsfds1[i] < 0) { 253 EXPECT_EQ(errno, ENOENT) { 254 TH_LOG("%m - Failed to open %s namespace for process %d", 255 info->name, self->child_pid1); 256 } 257 } 258 259 ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s", 260 self->child_pid2, info->name); 261 EXPECT_GT(ret, 0); 262 EXPECT_LT(ret, sizeof(p)); 263 264 self->child_nsfds2[i] = open(p, O_RDONLY | O_CLOEXEC); 265 if (self->child_nsfds2[i] < 0) { 266 EXPECT_EQ(errno, ENOENT) { 267 TH_LOG("%m - Failed to open %s namespace for process %d", 268 info->name, self->child_pid1); 269 } 270 } 271 } 272 273 close(proc_fd); 274 } 275 276 FIXTURE_TEARDOWN(current_nsset) 277 { 278 int i; 279 280 ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd1, 281 SIGKILL, NULL, 0), 0); 282 ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd2, 283 SIGKILL, NULL, 0), 0); 284 285 for (i = 0; i < PIDFD_NS_MAX; i++) { 286 if (self->nsfds[i] >= 0) 287 close(self->nsfds[i]); 288 if (self->child_nsfds1[i] >= 0) 289 close(self->child_nsfds1[i]); 290 if (self->child_nsfds2[i] >= 0) 291 close(self->child_nsfds2[i]); 292 } 293 294 if (self->child_pidfd1 >= 0) 295 EXPECT_EQ(0, close(self->child_pidfd1)); 296 if (self->child_pidfd2 >= 0) 297 EXPECT_EQ(0, close(self->child_pidfd2)); 298 ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0); 299 ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0); 300 ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0); 301 } 302 303 static int preserve_ns(const int pid, const char *ns) 304 { 305 int ret; 306 char path[50]; 307 308 ret = snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns); 309 if (ret < 0 || (size_t)ret >= sizeof(path)) 310 return -EIO; 311 312 return open(path, O_RDONLY | O_CLOEXEC); 313 } 314 315 static int in_same_namespace(int ns_fd1, pid_t pid2, const char *ns) 316 { 317 int ns_fd2 = -EBADF; 318 int ret = -1; 319 struct stat ns_st1, ns_st2; 320 321 ret = fstat(ns_fd1, &ns_st1); 322 if (ret < 0) 323 return -1; 324 325 ns_fd2 = preserve_ns(pid2, ns); 326 if (ns_fd2 < 0) 327 return -1; 328 329 ret = fstat(ns_fd2, &ns_st2); 330 close(ns_fd2); 331 if (ret < 0) 332 return -1; 333 334 /* processes are in the same namespace */ 335 if ((ns_st1.st_dev == ns_st2.st_dev) && 336 (ns_st1.st_ino == ns_st2.st_ino)) 337 return 1; 338 339 /* processes are in different namespaces */ 340 return 0; 341 } 342 343 /* Test that we can't pass garbage to the kernel. */ 344 TEST_F(current_nsset, invalid_flags) 345 { 346 ASSERT_NE(setns(self->pidfd, 0), 0); 347 EXPECT_EQ(errno, EINVAL); 348 349 ASSERT_NE(setns(self->pidfd, -1), 0); 350 EXPECT_EQ(errno, EINVAL); 351 352 ASSERT_NE(setns(self->pidfd, CLONE_VM), 0); 353 EXPECT_EQ(errno, EINVAL); 354 355 ASSERT_NE(setns(self->pidfd, CLONE_NEWUSER | CLONE_VM), 0); 356 EXPECT_EQ(errno, EINVAL); 357 } 358 359 /* Test that we can't attach to a task that has already exited. */ 360 TEST_F(current_nsset, pidfd_exited_child) 361 { 362 int i; 363 pid_t pid; 364 365 ASSERT_NE(setns(self->child_pidfd_exited, CLONE_NEWUSER | CLONE_NEWNET), 366 0); 367 EXPECT_EQ(errno, ESRCH); 368 369 pid = getpid(); 370 for (i = 0; i < PIDFD_NS_MAX; i++) { 371 const struct ns_info *info = &ns_info[i]; 372 /* Verify that we haven't changed any namespaces. */ 373 if (self->nsfds[i] >= 0) 374 ASSERT_EQ(in_same_namespace(self->nsfds[i], pid, info->name), 1); 375 } 376 } 377 378 TEST_F(current_nsset, pidfd_incremental_setns) 379 { 380 int i; 381 pid_t pid; 382 383 pid = getpid(); 384 for (i = 0; i < PIDFD_NS_MAX; i++) { 385 const struct ns_info *info = &ns_info[i]; 386 int nsfd; 387 388 if (self->child_nsfds1[i] < 0) 389 continue; 390 391 if (info->flag) { 392 ASSERT_EQ(setns(self->child_pidfd1, info->flag), 0) { 393 TH_LOG("%m - Failed to setns to %s namespace of %d via pidfd %d", 394 info->name, self->child_pid1, 395 self->child_pidfd1); 396 } 397 } 398 399 /* Verify that we have changed to the correct namespaces. */ 400 if (info->flag == CLONE_NEWPID) 401 nsfd = self->nsfds[i]; 402 else 403 nsfd = self->child_nsfds1[i]; 404 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { 405 TH_LOG("setns failed to place us correctly into %s namespace of %d via pidfd %d", 406 info->name, self->child_pid1, 407 self->child_pidfd1); 408 } 409 TH_LOG("Managed to correctly setns to %s namespace of %d via pidfd %d", 410 info->name, self->child_pid1, self->child_pidfd1); 411 } 412 } 413 414 TEST_F(current_nsset, nsfd_incremental_setns) 415 { 416 int i; 417 pid_t pid; 418 419 pid = getpid(); 420 for (i = 0; i < PIDFD_NS_MAX; i++) { 421 const struct ns_info *info = &ns_info[i]; 422 int nsfd; 423 424 if (self->child_nsfds1[i] < 0) 425 continue; 426 427 if (info->flag) { 428 ASSERT_EQ(setns(self->child_nsfds1[i], info->flag), 0) { 429 TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d", 430 info->name, self->child_pid1, 431 self->child_nsfds1[i]); 432 } 433 } 434 435 /* Verify that we have changed to the correct namespaces. */ 436 if (info->flag == CLONE_NEWPID) 437 nsfd = self->nsfds[i]; 438 else 439 nsfd = self->child_nsfds1[i]; 440 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { 441 TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d", 442 info->name, self->child_pid1, 443 self->child_nsfds1[i]); 444 } 445 TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d", 446 info->name, self->child_pid1, self->child_nsfds1[i]); 447 } 448 } 449 450 TEST_F(current_nsset, pidfd_one_shot_setns) 451 { 452 unsigned flags = 0; 453 int i; 454 pid_t pid; 455 456 for (i = 0; i < PIDFD_NS_MAX; i++) { 457 const struct ns_info *info = &ns_info[i]; 458 459 if (self->child_nsfds1[i] < 0) 460 continue; 461 462 flags |= info->flag; 463 TH_LOG("Adding %s namespace of %d to list of namespaces to attach to", 464 info->name, self->child_pid1); 465 } 466 467 ASSERT_EQ(setns(self->child_pidfd1, flags), 0) { 468 TH_LOG("%m - Failed to setns to namespaces of %d", 469 self->child_pid1); 470 } 471 472 pid = getpid(); 473 for (i = 0; i < PIDFD_NS_MAX; i++) { 474 const struct ns_info *info = &ns_info[i]; 475 int nsfd; 476 477 if (self->child_nsfds1[i] < 0) 478 continue; 479 480 /* Verify that we have changed to the correct namespaces. */ 481 if (info->flag == CLONE_NEWPID) 482 nsfd = self->nsfds[i]; 483 else 484 nsfd = self->child_nsfds1[i]; 485 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { 486 TH_LOG("setns failed to place us correctly into %s namespace of %d", 487 info->name, self->child_pid1); 488 } 489 TH_LOG("Managed to correctly setns to %s namespace of %d", 490 info->name, self->child_pid1); 491 } 492 } 493 494 TEST_F(current_nsset, no_foul_play) 495 { 496 unsigned flags = 0; 497 int i; 498 499 for (i = 0; i < PIDFD_NS_MAX; i++) { 500 const struct ns_info *info = &ns_info[i]; 501 502 if (self->child_nsfds1[i] < 0) 503 continue; 504 505 flags |= info->flag; 506 if (info->flag) /* No use logging pid_for_children. */ 507 TH_LOG("Adding %s namespace of %d to list of namespaces to attach to", 508 info->name, self->child_pid1); 509 } 510 511 ASSERT_EQ(setns(self->child_pidfd1, flags), 0) { 512 TH_LOG("%m - Failed to setns to namespaces of %d vid pidfd %d", 513 self->child_pid1, self->child_pidfd1); 514 } 515 516 /* 517 * Can't setns to a user namespace outside of our hierarchy since we 518 * don't have caps in there and didn't create it. That means that under 519 * no circumstances should we be able to setns to any of the other 520 * ones since they aren't owned by our user namespace. 521 */ 522 for (i = 0; i < PIDFD_NS_MAX; i++) { 523 const struct ns_info *info = &ns_info[i]; 524 525 if (self->child_nsfds2[i] < 0 || !info->flag) 526 continue; 527 528 ASSERT_NE(setns(self->child_pidfd2, info->flag), 0) { 529 TH_LOG("Managed to setns to %s namespace of %d via pidfd %d", 530 info->name, self->child_pid2, 531 self->child_pidfd2); 532 } 533 TH_LOG("%m - Correctly failed to setns to %s namespace of %d via pidfd %d", 534 info->name, self->child_pid2, 535 self->child_pidfd2); 536 537 ASSERT_NE(setns(self->child_nsfds2[i], info->flag), 0) { 538 TH_LOG("Managed to setns to %s namespace of %d via nsfd %d", 539 info->name, self->child_pid2, 540 self->child_nsfds2[i]); 541 } 542 TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d", 543 info->name, self->child_pid2, 544 self->child_nsfds2[i]); 545 } 546 } 547 548 TEST(setns_einval) 549 { 550 int fd; 551 552 fd = sys_memfd_create("rostock", 0); 553 EXPECT_GT(fd, 0); 554 555 ASSERT_NE(setns(fd, 0), 0); 556 EXPECT_EQ(errno, EINVAL); 557 close(fd); 558 } 559 560 TEST_HARNESS_MAIN 561