1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 4 * 5 * Test code for seccomp bpf. 6 */ 7 8 #define _GNU_SOURCE 9 #include <sys/types.h> 10 11 /* 12 * glibc 2.26 and later have SIGSYS in siginfo_t. Before that, 13 * we need to use the kernel's siginfo.h file and trick glibc 14 * into accepting it. 15 */ 16 #if !__GLIBC_PREREQ(2, 26) 17 # include <asm/siginfo.h> 18 # define __have_siginfo_t 1 19 # define __have_sigval_t 1 20 # define __have_sigevent_t 1 21 #endif 22 23 #include <errno.h> 24 #include <linux/filter.h> 25 #include <sys/prctl.h> 26 #include <sys/ptrace.h> 27 #include <sys/user.h> 28 #include <linux/prctl.h> 29 #include <linux/ptrace.h> 30 #include <linux/seccomp.h> 31 #include <pthread.h> 32 #include <semaphore.h> 33 #include <signal.h> 34 #include <stddef.h> 35 #include <stdbool.h> 36 #include <string.h> 37 #include <time.h> 38 #include <limits.h> 39 #include <linux/elf.h> 40 #include <sys/uio.h> 41 #include <sys/utsname.h> 42 #include <sys/fcntl.h> 43 #include <sys/mman.h> 44 #include <sys/times.h> 45 #include <sys/socket.h> 46 #include <sys/ioctl.h> 47 #include <linux/kcmp.h> 48 #include <sys/resource.h> 49 50 #include <unistd.h> 51 #include <sys/syscall.h> 52 #include <poll.h> 53 54 #include "../kselftest_harness.h" 55 #include "../clone3/clone3_selftests.h" 56 57 /* Attempt to de-conflict with the selftests tree. */ 58 #ifndef SKIP 59 #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) 60 #endif 61 62 #ifndef PR_SET_PTRACER 63 # define PR_SET_PTRACER 0x59616d61 64 #endif 65 66 #ifndef PR_SET_NO_NEW_PRIVS 67 #define PR_SET_NO_NEW_PRIVS 38 68 #define PR_GET_NO_NEW_PRIVS 39 69 #endif 70 71 #ifndef PR_SECCOMP_EXT 72 #define PR_SECCOMP_EXT 43 73 #endif 74 75 #ifndef SECCOMP_EXT_ACT 76 #define SECCOMP_EXT_ACT 1 77 #endif 78 79 #ifndef SECCOMP_EXT_ACT_TSYNC 80 #define SECCOMP_EXT_ACT_TSYNC 1 81 #endif 82 83 #ifndef SECCOMP_MODE_STRICT 84 #define SECCOMP_MODE_STRICT 1 85 #endif 86 87 #ifndef SECCOMP_MODE_FILTER 88 #define SECCOMP_MODE_FILTER 2 89 #endif 90 91 #ifndef SECCOMP_RET_ALLOW 92 struct seccomp_data { 93 int nr; 94 __u32 arch; 95 __u64 instruction_pointer; 96 __u64 args[6]; 97 }; 98 #endif 99 100 #ifndef SECCOMP_RET_KILL_PROCESS 101 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ 102 #define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ 103 #endif 104 #ifndef SECCOMP_RET_KILL 105 #define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD 106 #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ 107 #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ 108 #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ 109 #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ 110 #endif 111 #ifndef SECCOMP_RET_LOG 112 #define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ 113 #endif 114 115 #ifndef __NR_seccomp 116 # if defined(__i386__) 117 # define __NR_seccomp 354 118 # elif defined(__x86_64__) 119 # define __NR_seccomp 317 120 # elif defined(__arm__) 121 # define __NR_seccomp 383 122 # elif defined(__aarch64__) 123 # define __NR_seccomp 277 124 # elif defined(__riscv) 125 # define __NR_seccomp 277 126 # elif defined(__csky__) 127 # define __NR_seccomp 277 128 # elif defined(__hppa__) 129 # define __NR_seccomp 338 130 # elif defined(__powerpc__) 131 # define __NR_seccomp 358 132 # elif defined(__s390__) 133 # define __NR_seccomp 348 134 # elif defined(__xtensa__) 135 # define __NR_seccomp 337 136 # else 137 # warning "seccomp syscall number unknown for this architecture" 138 # define __NR_seccomp 0xffff 139 # endif 140 #endif 141 142 #ifndef SECCOMP_SET_MODE_STRICT 143 #define SECCOMP_SET_MODE_STRICT 0 144 #endif 145 146 #ifndef SECCOMP_SET_MODE_FILTER 147 #define SECCOMP_SET_MODE_FILTER 1 148 #endif 149 150 #ifndef SECCOMP_GET_ACTION_AVAIL 151 #define SECCOMP_GET_ACTION_AVAIL 2 152 #endif 153 154 #ifndef SECCOMP_GET_NOTIF_SIZES 155 #define SECCOMP_GET_NOTIF_SIZES 3 156 #endif 157 158 #ifndef SECCOMP_FILTER_FLAG_TSYNC 159 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) 160 #endif 161 162 #ifndef SECCOMP_FILTER_FLAG_LOG 163 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1) 164 #endif 165 166 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW 167 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) 168 #endif 169 170 #ifndef PTRACE_SECCOMP_GET_METADATA 171 #define PTRACE_SECCOMP_GET_METADATA 0x420d 172 173 struct seccomp_metadata { 174 __u64 filter_off; /* Input: which filter */ 175 __u64 flags; /* Output: filter's flags */ 176 }; 177 #endif 178 179 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER 180 #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) 181 #endif 182 183 #ifndef SECCOMP_RET_USER_NOTIF 184 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U 185 186 #define SECCOMP_IOC_MAGIC '!' 187 #define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) 188 #define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) 189 #define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) 190 #define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) 191 192 /* Flags for seccomp notification fd ioctl. */ 193 #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) 194 #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ 195 struct seccomp_notif_resp) 196 #define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64) 197 198 struct seccomp_notif { 199 __u64 id; 200 __u32 pid; 201 __u32 flags; 202 struct seccomp_data data; 203 }; 204 205 struct seccomp_notif_resp { 206 __u64 id; 207 __s64 val; 208 __s32 error; 209 __u32 flags; 210 }; 211 212 struct seccomp_notif_sizes { 213 __u16 seccomp_notif; 214 __u16 seccomp_notif_resp; 215 __u16 seccomp_data; 216 }; 217 #endif 218 219 #ifndef SECCOMP_IOCTL_NOTIF_ADDFD 220 /* On success, the return value is the remote process's added fd number */ 221 #define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \ 222 struct seccomp_notif_addfd) 223 224 /* valid flags for seccomp_notif_addfd */ 225 #define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ 226 227 struct seccomp_notif_addfd { 228 __u64 id; 229 __u32 flags; 230 __u32 srcfd; 231 __u32 newfd; 232 __u32 newfd_flags; 233 }; 234 #endif 235 236 struct seccomp_notif_addfd_small { 237 __u64 id; 238 char weird[4]; 239 }; 240 #define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL \ 241 SECCOMP_IOW(3, struct seccomp_notif_addfd_small) 242 243 struct seccomp_notif_addfd_big { 244 union { 245 struct seccomp_notif_addfd addfd; 246 char buf[sizeof(struct seccomp_notif_addfd) + 8]; 247 }; 248 }; 249 #define SECCOMP_IOCTL_NOTIF_ADDFD_BIG \ 250 SECCOMP_IOWR(3, struct seccomp_notif_addfd_big) 251 252 #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY 253 #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 254 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 255 #endif 256 257 #ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE 258 #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001 259 #endif 260 261 #ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH 262 #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4) 263 #endif 264 265 #ifndef seccomp 266 int seccomp(unsigned int op, unsigned int flags, void *args) 267 { 268 errno = 0; 269 return syscall(__NR_seccomp, op, flags, args); 270 } 271 #endif 272 273 #if __BYTE_ORDER == __LITTLE_ENDIAN 274 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) 275 #elif __BYTE_ORDER == __BIG_ENDIAN 276 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32)) 277 #else 278 #error "wut? Unknown __BYTE_ORDER?!" 279 #endif 280 281 #define SIBLING_EXIT_UNKILLED 0xbadbeef 282 #define SIBLING_EXIT_FAILURE 0xbadface 283 #define SIBLING_EXIT_NEWPRIVS 0xbadfeed 284 285 static int __filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2) 286 { 287 #ifdef __NR_kcmp 288 errno = 0; 289 return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2); 290 #else 291 errno = ENOSYS; 292 return -1; 293 #endif 294 } 295 296 /* Have TH_LOG report actual location filecmp() is used. */ 297 #define filecmp(pid1, pid2, fd1, fd2) ({ \ 298 int _ret; \ 299 \ 300 _ret = __filecmp(pid1, pid2, fd1, fd2); \ 301 if (_ret != 0) { \ 302 if (_ret < 0 && errno == ENOSYS) { \ 303 TH_LOG("kcmp() syscall missing (test is less accurate)");\ 304 _ret = 0; \ 305 } \ 306 } \ 307 _ret; }) 308 309 TEST(kcmp) 310 { 311 int ret; 312 313 ret = __filecmp(getpid(), getpid(), 1, 1); 314 EXPECT_EQ(ret, 0); 315 if (ret != 0 && errno == ENOSYS) 316 SKIP(return, "Kernel does not support kcmp() (missing CONFIG_CHECKPOINT_RESTORE?)"); 317 } 318 319 TEST(mode_strict_support) 320 { 321 long ret; 322 323 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); 324 ASSERT_EQ(0, ret) { 325 TH_LOG("Kernel does not support CONFIG_SECCOMP"); 326 } 327 syscall(__NR_exit, 0); 328 } 329 330 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL) 331 { 332 long ret; 333 334 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); 335 ASSERT_EQ(0, ret) { 336 TH_LOG("Kernel does not support CONFIG_SECCOMP"); 337 } 338 syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 339 NULL, NULL, NULL); 340 EXPECT_FALSE(true) { 341 TH_LOG("Unreachable!"); 342 } 343 } 344 345 /* Note! This doesn't test no new privs behavior */ 346 TEST(no_new_privs_support) 347 { 348 long ret; 349 350 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 351 EXPECT_EQ(0, ret) { 352 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 353 } 354 } 355 356 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */ 357 TEST(mode_filter_support) 358 { 359 long ret; 360 361 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 362 ASSERT_EQ(0, ret) { 363 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 364 } 365 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL); 366 EXPECT_EQ(-1, ret); 367 EXPECT_EQ(EFAULT, errno) { 368 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!"); 369 } 370 } 371 372 TEST(mode_filter_without_nnp) 373 { 374 struct sock_filter filter[] = { 375 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 376 }; 377 struct sock_fprog prog = { 378 .len = (unsigned short)ARRAY_SIZE(filter), 379 .filter = filter, 380 }; 381 long ret; 382 383 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0); 384 ASSERT_LE(0, ret) { 385 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS"); 386 } 387 errno = 0; 388 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 389 /* Succeeds with CAP_SYS_ADMIN, fails without */ 390 /* TODO(wad) check caps not euid */ 391 if (geteuid()) { 392 EXPECT_EQ(-1, ret); 393 EXPECT_EQ(EACCES, errno); 394 } else { 395 EXPECT_EQ(0, ret); 396 } 397 } 398 399 #define MAX_INSNS_PER_PATH 32768 400 401 TEST(filter_size_limits) 402 { 403 int i; 404 int count = BPF_MAXINSNS + 1; 405 struct sock_filter allow[] = { 406 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 407 }; 408 struct sock_filter *filter; 409 struct sock_fprog prog = { }; 410 long ret; 411 412 filter = calloc(count, sizeof(*filter)); 413 ASSERT_NE(NULL, filter); 414 415 for (i = 0; i < count; i++) 416 filter[i] = allow[0]; 417 418 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 419 ASSERT_EQ(0, ret); 420 421 prog.filter = filter; 422 prog.len = count; 423 424 /* Too many filter instructions in a single filter. */ 425 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 426 ASSERT_NE(0, ret) { 427 TH_LOG("Installing %d insn filter was allowed", prog.len); 428 } 429 430 /* One less is okay, though. */ 431 prog.len -= 1; 432 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 433 ASSERT_EQ(0, ret) { 434 TH_LOG("Installing %d insn filter wasn't allowed", prog.len); 435 } 436 } 437 438 TEST(filter_chain_limits) 439 { 440 int i; 441 int count = BPF_MAXINSNS; 442 struct sock_filter allow[] = { 443 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 444 }; 445 struct sock_filter *filter; 446 struct sock_fprog prog = { }; 447 long ret; 448 449 filter = calloc(count, sizeof(*filter)); 450 ASSERT_NE(NULL, filter); 451 452 for (i = 0; i < count; i++) 453 filter[i] = allow[0]; 454 455 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 456 ASSERT_EQ(0, ret); 457 458 prog.filter = filter; 459 prog.len = 1; 460 461 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 462 ASSERT_EQ(0, ret); 463 464 prog.len = count; 465 466 /* Too many total filter instructions. */ 467 for (i = 0; i < MAX_INSNS_PER_PATH; i++) { 468 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 469 if (ret != 0) 470 break; 471 } 472 ASSERT_NE(0, ret) { 473 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)", 474 i, count, i * (count + 4)); 475 } 476 } 477 478 TEST(mode_filter_cannot_move_to_strict) 479 { 480 struct sock_filter filter[] = { 481 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 482 }; 483 struct sock_fprog prog = { 484 .len = (unsigned short)ARRAY_SIZE(filter), 485 .filter = filter, 486 }; 487 long ret; 488 489 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 490 ASSERT_EQ(0, ret); 491 492 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 493 ASSERT_EQ(0, ret); 494 495 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0); 496 EXPECT_EQ(-1, ret); 497 EXPECT_EQ(EINVAL, errno); 498 } 499 500 501 TEST(mode_filter_get_seccomp) 502 { 503 struct sock_filter filter[] = { 504 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 505 }; 506 struct sock_fprog prog = { 507 .len = (unsigned short)ARRAY_SIZE(filter), 508 .filter = filter, 509 }; 510 long ret; 511 512 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 513 ASSERT_EQ(0, ret); 514 515 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 516 EXPECT_EQ(0, ret); 517 518 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 519 ASSERT_EQ(0, ret); 520 521 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 522 EXPECT_EQ(2, ret); 523 } 524 525 526 TEST(ALLOW_all) 527 { 528 struct sock_filter filter[] = { 529 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 530 }; 531 struct sock_fprog prog = { 532 .len = (unsigned short)ARRAY_SIZE(filter), 533 .filter = filter, 534 }; 535 long ret; 536 537 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 538 ASSERT_EQ(0, ret); 539 540 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 541 ASSERT_EQ(0, ret); 542 } 543 544 TEST(empty_prog) 545 { 546 struct sock_filter filter[] = { 547 }; 548 struct sock_fprog prog = { 549 .len = (unsigned short)ARRAY_SIZE(filter), 550 .filter = filter, 551 }; 552 long ret; 553 554 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 555 ASSERT_EQ(0, ret); 556 557 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 558 EXPECT_EQ(-1, ret); 559 EXPECT_EQ(EINVAL, errno); 560 } 561 562 TEST(log_all) 563 { 564 struct sock_filter filter[] = { 565 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), 566 }; 567 struct sock_fprog prog = { 568 .len = (unsigned short)ARRAY_SIZE(filter), 569 .filter = filter, 570 }; 571 long ret; 572 pid_t parent = getppid(); 573 574 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 575 ASSERT_EQ(0, ret); 576 577 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 578 ASSERT_EQ(0, ret); 579 580 /* getppid() should succeed and be logged (no check for logging) */ 581 EXPECT_EQ(parent, syscall(__NR_getppid)); 582 } 583 584 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS) 585 { 586 struct sock_filter filter[] = { 587 BPF_STMT(BPF_RET|BPF_K, 0x10000000U), 588 }; 589 struct sock_fprog prog = { 590 .len = (unsigned short)ARRAY_SIZE(filter), 591 .filter = filter, 592 }; 593 long ret; 594 595 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 596 ASSERT_EQ(0, ret); 597 598 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 599 ASSERT_EQ(0, ret); 600 EXPECT_EQ(0, syscall(__NR_getpid)) { 601 TH_LOG("getpid() shouldn't ever return"); 602 } 603 } 604 605 /* return code >= 0x80000000 is unused. */ 606 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS) 607 { 608 struct sock_filter filter[] = { 609 BPF_STMT(BPF_RET|BPF_K, 0x90000000U), 610 }; 611 struct sock_fprog prog = { 612 .len = (unsigned short)ARRAY_SIZE(filter), 613 .filter = filter, 614 }; 615 long ret; 616 617 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 618 ASSERT_EQ(0, ret); 619 620 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 621 ASSERT_EQ(0, ret); 622 EXPECT_EQ(0, syscall(__NR_getpid)) { 623 TH_LOG("getpid() shouldn't ever return"); 624 } 625 } 626 627 TEST_SIGNAL(KILL_all, SIGSYS) 628 { 629 struct sock_filter filter[] = { 630 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 631 }; 632 struct sock_fprog prog = { 633 .len = (unsigned short)ARRAY_SIZE(filter), 634 .filter = filter, 635 }; 636 long ret; 637 638 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 639 ASSERT_EQ(0, ret); 640 641 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 642 ASSERT_EQ(0, ret); 643 } 644 645 TEST_SIGNAL(KILL_one, SIGSYS) 646 { 647 struct sock_filter filter[] = { 648 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 649 offsetof(struct seccomp_data, nr)), 650 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 651 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 652 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 653 }; 654 struct sock_fprog prog = { 655 .len = (unsigned short)ARRAY_SIZE(filter), 656 .filter = filter, 657 }; 658 long ret; 659 pid_t parent = getppid(); 660 661 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 662 ASSERT_EQ(0, ret); 663 664 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 665 ASSERT_EQ(0, ret); 666 667 EXPECT_EQ(parent, syscall(__NR_getppid)); 668 /* getpid() should never return. */ 669 EXPECT_EQ(0, syscall(__NR_getpid)); 670 } 671 672 TEST_SIGNAL(KILL_one_arg_one, SIGSYS) 673 { 674 void *fatal_address; 675 struct sock_filter filter[] = { 676 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 677 offsetof(struct seccomp_data, nr)), 678 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0), 679 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 680 /* Only both with lower 32-bit for now. */ 681 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)), 682 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 683 (unsigned long)&fatal_address, 0, 1), 684 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 685 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 686 }; 687 struct sock_fprog prog = { 688 .len = (unsigned short)ARRAY_SIZE(filter), 689 .filter = filter, 690 }; 691 long ret; 692 pid_t parent = getppid(); 693 struct tms timebuf; 694 clock_t clock = times(&timebuf); 695 696 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 697 ASSERT_EQ(0, ret); 698 699 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 700 ASSERT_EQ(0, ret); 701 702 EXPECT_EQ(parent, syscall(__NR_getppid)); 703 EXPECT_LE(clock, syscall(__NR_times, &timebuf)); 704 /* times() should never return. */ 705 EXPECT_EQ(0, syscall(__NR_times, &fatal_address)); 706 } 707 708 TEST_SIGNAL(KILL_one_arg_six, SIGSYS) 709 { 710 #ifndef __NR_mmap2 711 int sysno = __NR_mmap; 712 #else 713 int sysno = __NR_mmap2; 714 #endif 715 struct sock_filter filter[] = { 716 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 717 offsetof(struct seccomp_data, nr)), 718 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0), 719 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 720 /* Only both with lower 32-bit for now. */ 721 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)), 722 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1), 723 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 724 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 725 }; 726 struct sock_fprog prog = { 727 .len = (unsigned short)ARRAY_SIZE(filter), 728 .filter = filter, 729 }; 730 long ret; 731 pid_t parent = getppid(); 732 int fd; 733 void *map1, *map2; 734 int page_size = sysconf(_SC_PAGESIZE); 735 736 ASSERT_LT(0, page_size); 737 738 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 739 ASSERT_EQ(0, ret); 740 741 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 742 ASSERT_EQ(0, ret); 743 744 fd = open("/dev/zero", O_RDONLY); 745 ASSERT_NE(-1, fd); 746 747 EXPECT_EQ(parent, syscall(__NR_getppid)); 748 map1 = (void *)syscall(sysno, 749 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size); 750 EXPECT_NE(MAP_FAILED, map1); 751 /* mmap2() should never return. */ 752 map2 = (void *)syscall(sysno, 753 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE); 754 EXPECT_EQ(MAP_FAILED, map2); 755 756 /* The test failed, so clean up the resources. */ 757 munmap(map1, page_size); 758 munmap(map2, page_size); 759 close(fd); 760 } 761 762 /* This is a thread task to die via seccomp filter violation. */ 763 void *kill_thread(void *data) 764 { 765 bool die = (bool)data; 766 767 if (die) { 768 prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 769 return (void *)SIBLING_EXIT_FAILURE; 770 } 771 772 return (void *)SIBLING_EXIT_UNKILLED; 773 } 774 775 /* Prepare a thread that will kill itself or both of us. */ 776 void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process) 777 { 778 pthread_t thread; 779 void *status; 780 /* Kill only when calling __NR_prctl. */ 781 struct sock_filter filter_thread[] = { 782 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 783 offsetof(struct seccomp_data, nr)), 784 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), 785 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), 786 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 787 }; 788 struct sock_fprog prog_thread = { 789 .len = (unsigned short)ARRAY_SIZE(filter_thread), 790 .filter = filter_thread, 791 }; 792 struct sock_filter filter_process[] = { 793 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 794 offsetof(struct seccomp_data, nr)), 795 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), 796 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS), 797 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 798 }; 799 struct sock_fprog prog_process = { 800 .len = (unsigned short)ARRAY_SIZE(filter_process), 801 .filter = filter_process, 802 }; 803 804 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 805 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 806 } 807 808 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, 809 kill_process ? &prog_process : &prog_thread)); 810 811 /* 812 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS 813 * flag cannot be downgraded by a new filter. 814 */ 815 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); 816 817 /* Start a thread that will exit immediately. */ 818 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false)); 819 ASSERT_EQ(0, pthread_join(thread, &status)); 820 ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status); 821 822 /* Start a thread that will die immediately. */ 823 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true)); 824 ASSERT_EQ(0, pthread_join(thread, &status)); 825 ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status); 826 827 /* 828 * If we get here, only the spawned thread died. Let the parent know 829 * the whole process didn't die (i.e. this thread, the spawner, 830 * stayed running). 831 */ 832 exit(42); 833 } 834 835 TEST(KILL_thread) 836 { 837 int status; 838 pid_t child_pid; 839 840 child_pid = fork(); 841 ASSERT_LE(0, child_pid); 842 if (child_pid == 0) { 843 kill_thread_or_group(_metadata, false); 844 _exit(38); 845 } 846 847 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 848 849 /* If only the thread was killed, we'll see exit 42. */ 850 ASSERT_TRUE(WIFEXITED(status)); 851 ASSERT_EQ(42, WEXITSTATUS(status)); 852 } 853 854 TEST(KILL_process) 855 { 856 int status; 857 pid_t child_pid; 858 859 child_pid = fork(); 860 ASSERT_LE(0, child_pid); 861 if (child_pid == 0) { 862 kill_thread_or_group(_metadata, true); 863 _exit(38); 864 } 865 866 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 867 868 /* If the entire process was killed, we'll see SIGSYS. */ 869 ASSERT_TRUE(WIFSIGNALED(status)); 870 ASSERT_EQ(SIGSYS, WTERMSIG(status)); 871 } 872 873 /* TODO(wad) add 64-bit versus 32-bit arg tests. */ 874 TEST(arg_out_of_range) 875 { 876 struct sock_filter filter[] = { 877 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)), 878 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 879 }; 880 struct sock_fprog prog = { 881 .len = (unsigned short)ARRAY_SIZE(filter), 882 .filter = filter, 883 }; 884 long ret; 885 886 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 887 ASSERT_EQ(0, ret); 888 889 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 890 EXPECT_EQ(-1, ret); 891 EXPECT_EQ(EINVAL, errno); 892 } 893 894 #define ERRNO_FILTER(name, errno) \ 895 struct sock_filter _read_filter_##name[] = { \ 896 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, \ 897 offsetof(struct seccomp_data, nr)), \ 898 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), \ 899 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno), \ 900 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), \ 901 }; \ 902 struct sock_fprog prog_##name = { \ 903 .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \ 904 .filter = _read_filter_##name, \ 905 } 906 907 /* Make sure basic errno values are correctly passed through a filter. */ 908 TEST(ERRNO_valid) 909 { 910 ERRNO_FILTER(valid, E2BIG); 911 long ret; 912 pid_t parent = getppid(); 913 914 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 915 ASSERT_EQ(0, ret); 916 917 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid); 918 ASSERT_EQ(0, ret); 919 920 EXPECT_EQ(parent, syscall(__NR_getppid)); 921 EXPECT_EQ(-1, read(0, NULL, 0)); 922 EXPECT_EQ(E2BIG, errno); 923 } 924 925 /* Make sure an errno of zero is correctly handled by the arch code. */ 926 TEST(ERRNO_zero) 927 { 928 ERRNO_FILTER(zero, 0); 929 long ret; 930 pid_t parent = getppid(); 931 932 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 933 ASSERT_EQ(0, ret); 934 935 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero); 936 ASSERT_EQ(0, ret); 937 938 EXPECT_EQ(parent, syscall(__NR_getppid)); 939 /* "errno" of 0 is ok. */ 940 EXPECT_EQ(0, read(0, NULL, 0)); 941 } 942 943 /* 944 * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller. 945 * This tests that the errno value gets capped correctly, fixed by 946 * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO"). 947 */ 948 TEST(ERRNO_capped) 949 { 950 ERRNO_FILTER(capped, 4096); 951 long ret; 952 pid_t parent = getppid(); 953 954 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 955 ASSERT_EQ(0, ret); 956 957 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped); 958 ASSERT_EQ(0, ret); 959 960 EXPECT_EQ(parent, syscall(__NR_getppid)); 961 EXPECT_EQ(-1, read(0, NULL, 0)); 962 EXPECT_EQ(4095, errno); 963 } 964 965 /* 966 * Filters are processed in reverse order: last applied is executed first. 967 * Since only the SECCOMP_RET_ACTION mask is tested for return values, the 968 * SECCOMP_RET_DATA mask results will follow the most recently applied 969 * matching filter return (and not the lowest or highest value). 970 */ 971 TEST(ERRNO_order) 972 { 973 ERRNO_FILTER(first, 11); 974 ERRNO_FILTER(second, 13); 975 ERRNO_FILTER(third, 12); 976 long ret; 977 pid_t parent = getppid(); 978 979 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 980 ASSERT_EQ(0, ret); 981 982 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first); 983 ASSERT_EQ(0, ret); 984 985 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second); 986 ASSERT_EQ(0, ret); 987 988 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third); 989 ASSERT_EQ(0, ret); 990 991 EXPECT_EQ(parent, syscall(__NR_getppid)); 992 EXPECT_EQ(-1, read(0, NULL, 0)); 993 EXPECT_EQ(12, errno); 994 } 995 996 FIXTURE(TRAP) { 997 struct sock_fprog prog; 998 }; 999 1000 FIXTURE_SETUP(TRAP) 1001 { 1002 struct sock_filter filter[] = { 1003 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1004 offsetof(struct seccomp_data, nr)), 1005 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 1006 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), 1007 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1008 }; 1009 1010 memset(&self->prog, 0, sizeof(self->prog)); 1011 self->prog.filter = malloc(sizeof(filter)); 1012 ASSERT_NE(NULL, self->prog.filter); 1013 memcpy(self->prog.filter, filter, sizeof(filter)); 1014 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 1015 } 1016 1017 FIXTURE_TEARDOWN(TRAP) 1018 { 1019 if (self->prog.filter) 1020 free(self->prog.filter); 1021 } 1022 1023 TEST_F_SIGNAL(TRAP, dfl, SIGSYS) 1024 { 1025 long ret; 1026 1027 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1028 ASSERT_EQ(0, ret); 1029 1030 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1031 ASSERT_EQ(0, ret); 1032 syscall(__NR_getpid); 1033 } 1034 1035 /* Ensure that SIGSYS overrides SIG_IGN */ 1036 TEST_F_SIGNAL(TRAP, ign, SIGSYS) 1037 { 1038 long ret; 1039 1040 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1041 ASSERT_EQ(0, ret); 1042 1043 signal(SIGSYS, SIG_IGN); 1044 1045 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1046 ASSERT_EQ(0, ret); 1047 syscall(__NR_getpid); 1048 } 1049 1050 static siginfo_t TRAP_info; 1051 static volatile int TRAP_nr; 1052 static void TRAP_action(int nr, siginfo_t *info, void *void_context) 1053 { 1054 memcpy(&TRAP_info, info, sizeof(TRAP_info)); 1055 TRAP_nr = nr; 1056 } 1057 1058 TEST_F(TRAP, handler) 1059 { 1060 int ret, test; 1061 struct sigaction act; 1062 sigset_t mask; 1063 1064 memset(&act, 0, sizeof(act)); 1065 sigemptyset(&mask); 1066 sigaddset(&mask, SIGSYS); 1067 1068 act.sa_sigaction = &TRAP_action; 1069 act.sa_flags = SA_SIGINFO; 1070 ret = sigaction(SIGSYS, &act, NULL); 1071 ASSERT_EQ(0, ret) { 1072 TH_LOG("sigaction failed"); 1073 } 1074 ret = sigprocmask(SIG_UNBLOCK, &mask, NULL); 1075 ASSERT_EQ(0, ret) { 1076 TH_LOG("sigprocmask failed"); 1077 } 1078 1079 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1080 ASSERT_EQ(0, ret); 1081 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1082 ASSERT_EQ(0, ret); 1083 TRAP_nr = 0; 1084 memset(&TRAP_info, 0, sizeof(TRAP_info)); 1085 /* Expect the registers to be rolled back. (nr = error) may vary 1086 * based on arch. */ 1087 ret = syscall(__NR_getpid); 1088 /* Silence gcc warning about volatile. */ 1089 test = TRAP_nr; 1090 EXPECT_EQ(SIGSYS, test); 1091 struct local_sigsys { 1092 void *_call_addr; /* calling user insn */ 1093 int _syscall; /* triggering system call number */ 1094 unsigned int _arch; /* AUDIT_ARCH_* of syscall */ 1095 } *sigsys = (struct local_sigsys *) 1096 #ifdef si_syscall 1097 &(TRAP_info.si_call_addr); 1098 #else 1099 &TRAP_info.si_pid; 1100 #endif 1101 EXPECT_EQ(__NR_getpid, sigsys->_syscall); 1102 /* Make sure arch is non-zero. */ 1103 EXPECT_NE(0, sigsys->_arch); 1104 EXPECT_NE(0, (unsigned long)sigsys->_call_addr); 1105 } 1106 1107 FIXTURE(precedence) { 1108 struct sock_fprog allow; 1109 struct sock_fprog log; 1110 struct sock_fprog trace; 1111 struct sock_fprog error; 1112 struct sock_fprog trap; 1113 struct sock_fprog kill; 1114 }; 1115 1116 FIXTURE_SETUP(precedence) 1117 { 1118 struct sock_filter allow_insns[] = { 1119 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1120 }; 1121 struct sock_filter log_insns[] = { 1122 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1123 offsetof(struct seccomp_data, nr)), 1124 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1125 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1126 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), 1127 }; 1128 struct sock_filter trace_insns[] = { 1129 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1130 offsetof(struct seccomp_data, nr)), 1131 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1132 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1133 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE), 1134 }; 1135 struct sock_filter error_insns[] = { 1136 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1137 offsetof(struct seccomp_data, nr)), 1138 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1139 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1140 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO), 1141 }; 1142 struct sock_filter trap_insns[] = { 1143 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1144 offsetof(struct seccomp_data, nr)), 1145 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1146 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1147 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), 1148 }; 1149 struct sock_filter kill_insns[] = { 1150 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1151 offsetof(struct seccomp_data, nr)), 1152 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1153 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1154 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 1155 }; 1156 1157 memset(self, 0, sizeof(*self)); 1158 #define FILTER_ALLOC(_x) \ 1159 self->_x.filter = malloc(sizeof(_x##_insns)); \ 1160 ASSERT_NE(NULL, self->_x.filter); \ 1161 memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \ 1162 self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns) 1163 FILTER_ALLOC(allow); 1164 FILTER_ALLOC(log); 1165 FILTER_ALLOC(trace); 1166 FILTER_ALLOC(error); 1167 FILTER_ALLOC(trap); 1168 FILTER_ALLOC(kill); 1169 } 1170 1171 FIXTURE_TEARDOWN(precedence) 1172 { 1173 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter) 1174 FILTER_FREE(allow); 1175 FILTER_FREE(log); 1176 FILTER_FREE(trace); 1177 FILTER_FREE(error); 1178 FILTER_FREE(trap); 1179 FILTER_FREE(kill); 1180 } 1181 1182 TEST_F(precedence, allow_ok) 1183 { 1184 pid_t parent, res = 0; 1185 long ret; 1186 1187 parent = getppid(); 1188 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1189 ASSERT_EQ(0, ret); 1190 1191 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1192 ASSERT_EQ(0, ret); 1193 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1194 ASSERT_EQ(0, ret); 1195 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1196 ASSERT_EQ(0, ret); 1197 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1198 ASSERT_EQ(0, ret); 1199 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1200 ASSERT_EQ(0, ret); 1201 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1202 ASSERT_EQ(0, ret); 1203 /* Should work just fine. */ 1204 res = syscall(__NR_getppid); 1205 EXPECT_EQ(parent, res); 1206 } 1207 1208 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS) 1209 { 1210 pid_t parent, res = 0; 1211 long ret; 1212 1213 parent = getppid(); 1214 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1215 ASSERT_EQ(0, ret); 1216 1217 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1218 ASSERT_EQ(0, ret); 1219 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1220 ASSERT_EQ(0, ret); 1221 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1222 ASSERT_EQ(0, ret); 1223 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1224 ASSERT_EQ(0, ret); 1225 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1226 ASSERT_EQ(0, ret); 1227 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1228 ASSERT_EQ(0, ret); 1229 /* Should work just fine. */ 1230 res = syscall(__NR_getppid); 1231 EXPECT_EQ(parent, res); 1232 /* getpid() should never return. */ 1233 res = syscall(__NR_getpid); 1234 EXPECT_EQ(0, res); 1235 } 1236 1237 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS) 1238 { 1239 pid_t parent; 1240 long ret; 1241 1242 parent = getppid(); 1243 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1244 ASSERT_EQ(0, ret); 1245 1246 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1247 ASSERT_EQ(0, ret); 1248 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1249 ASSERT_EQ(0, ret); 1250 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1251 ASSERT_EQ(0, ret); 1252 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1253 ASSERT_EQ(0, ret); 1254 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1255 ASSERT_EQ(0, ret); 1256 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1257 ASSERT_EQ(0, ret); 1258 /* Should work just fine. */ 1259 EXPECT_EQ(parent, syscall(__NR_getppid)); 1260 /* getpid() should never return. */ 1261 EXPECT_EQ(0, syscall(__NR_getpid)); 1262 } 1263 1264 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS) 1265 { 1266 pid_t parent; 1267 long ret; 1268 1269 parent = getppid(); 1270 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1271 ASSERT_EQ(0, ret); 1272 1273 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1274 ASSERT_EQ(0, ret); 1275 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1276 ASSERT_EQ(0, ret); 1277 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1278 ASSERT_EQ(0, ret); 1279 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1280 ASSERT_EQ(0, ret); 1281 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1282 ASSERT_EQ(0, ret); 1283 /* Should work just fine. */ 1284 EXPECT_EQ(parent, syscall(__NR_getppid)); 1285 /* getpid() should never return. */ 1286 EXPECT_EQ(0, syscall(__NR_getpid)); 1287 } 1288 1289 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS) 1290 { 1291 pid_t parent; 1292 long ret; 1293 1294 parent = getppid(); 1295 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1296 ASSERT_EQ(0, ret); 1297 1298 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1299 ASSERT_EQ(0, ret); 1300 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1301 ASSERT_EQ(0, ret); 1302 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1303 ASSERT_EQ(0, ret); 1304 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1305 ASSERT_EQ(0, ret); 1306 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1307 ASSERT_EQ(0, ret); 1308 /* Should work just fine. */ 1309 EXPECT_EQ(parent, syscall(__NR_getppid)); 1310 /* getpid() should never return. */ 1311 EXPECT_EQ(0, syscall(__NR_getpid)); 1312 } 1313 1314 TEST_F(precedence, errno_is_third) 1315 { 1316 pid_t parent; 1317 long ret; 1318 1319 parent = getppid(); 1320 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1321 ASSERT_EQ(0, ret); 1322 1323 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1324 ASSERT_EQ(0, ret); 1325 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1326 ASSERT_EQ(0, ret); 1327 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1328 ASSERT_EQ(0, ret); 1329 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1330 ASSERT_EQ(0, ret); 1331 /* Should work just fine. */ 1332 EXPECT_EQ(parent, syscall(__NR_getppid)); 1333 EXPECT_EQ(0, syscall(__NR_getpid)); 1334 } 1335 1336 TEST_F(precedence, errno_is_third_in_any_order) 1337 { 1338 pid_t parent; 1339 long ret; 1340 1341 parent = getppid(); 1342 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1343 ASSERT_EQ(0, ret); 1344 1345 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1346 ASSERT_EQ(0, ret); 1347 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1348 ASSERT_EQ(0, ret); 1349 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1350 ASSERT_EQ(0, ret); 1351 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1352 ASSERT_EQ(0, ret); 1353 /* Should work just fine. */ 1354 EXPECT_EQ(parent, syscall(__NR_getppid)); 1355 EXPECT_EQ(0, syscall(__NR_getpid)); 1356 } 1357 1358 TEST_F(precedence, trace_is_fourth) 1359 { 1360 pid_t parent; 1361 long ret; 1362 1363 parent = getppid(); 1364 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1365 ASSERT_EQ(0, ret); 1366 1367 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1368 ASSERT_EQ(0, ret); 1369 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1370 ASSERT_EQ(0, ret); 1371 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1372 ASSERT_EQ(0, ret); 1373 /* Should work just fine. */ 1374 EXPECT_EQ(parent, syscall(__NR_getppid)); 1375 /* No ptracer */ 1376 EXPECT_EQ(-1, syscall(__NR_getpid)); 1377 } 1378 1379 TEST_F(precedence, trace_is_fourth_in_any_order) 1380 { 1381 pid_t parent; 1382 long ret; 1383 1384 parent = getppid(); 1385 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1386 ASSERT_EQ(0, ret); 1387 1388 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1389 ASSERT_EQ(0, ret); 1390 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1391 ASSERT_EQ(0, ret); 1392 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1393 ASSERT_EQ(0, ret); 1394 /* Should work just fine. */ 1395 EXPECT_EQ(parent, syscall(__NR_getppid)); 1396 /* No ptracer */ 1397 EXPECT_EQ(-1, syscall(__NR_getpid)); 1398 } 1399 1400 TEST_F(precedence, log_is_fifth) 1401 { 1402 pid_t mypid, parent; 1403 long ret; 1404 1405 mypid = getpid(); 1406 parent = getppid(); 1407 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1408 ASSERT_EQ(0, ret); 1409 1410 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1411 ASSERT_EQ(0, ret); 1412 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1413 ASSERT_EQ(0, ret); 1414 /* Should work just fine. */ 1415 EXPECT_EQ(parent, syscall(__NR_getppid)); 1416 /* Should also work just fine */ 1417 EXPECT_EQ(mypid, syscall(__NR_getpid)); 1418 } 1419 1420 TEST_F(precedence, log_is_fifth_in_any_order) 1421 { 1422 pid_t mypid, parent; 1423 long ret; 1424 1425 mypid = getpid(); 1426 parent = getppid(); 1427 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1428 ASSERT_EQ(0, ret); 1429 1430 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1431 ASSERT_EQ(0, ret); 1432 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1433 ASSERT_EQ(0, ret); 1434 /* Should work just fine. */ 1435 EXPECT_EQ(parent, syscall(__NR_getppid)); 1436 /* Should also work just fine */ 1437 EXPECT_EQ(mypid, syscall(__NR_getpid)); 1438 } 1439 1440 #ifndef PTRACE_O_TRACESECCOMP 1441 #define PTRACE_O_TRACESECCOMP 0x00000080 1442 #endif 1443 1444 /* Catch the Ubuntu 12.04 value error. */ 1445 #if PTRACE_EVENT_SECCOMP != 7 1446 #undef PTRACE_EVENT_SECCOMP 1447 #endif 1448 1449 #ifndef PTRACE_EVENT_SECCOMP 1450 #define PTRACE_EVENT_SECCOMP 7 1451 #endif 1452 1453 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP) 1454 bool tracer_running; 1455 void tracer_stop(int sig) 1456 { 1457 tracer_running = false; 1458 } 1459 1460 typedef void tracer_func_t(struct __test_metadata *_metadata, 1461 pid_t tracee, int status, void *args); 1462 1463 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, 1464 tracer_func_t tracer_func, void *args, bool ptrace_syscall) 1465 { 1466 int ret = -1; 1467 struct sigaction action = { 1468 .sa_handler = tracer_stop, 1469 }; 1470 1471 /* Allow external shutdown. */ 1472 tracer_running = true; 1473 ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL)); 1474 1475 errno = 0; 1476 while (ret == -1 && errno != EINVAL) 1477 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0); 1478 ASSERT_EQ(0, ret) { 1479 kill(tracee, SIGKILL); 1480 } 1481 /* Wait for attach stop */ 1482 wait(NULL); 1483 1484 ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ? 1485 PTRACE_O_TRACESYSGOOD : 1486 PTRACE_O_TRACESECCOMP); 1487 ASSERT_EQ(0, ret) { 1488 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP"); 1489 kill(tracee, SIGKILL); 1490 } 1491 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, 1492 tracee, NULL, 0); 1493 ASSERT_EQ(0, ret); 1494 1495 /* Unblock the tracee */ 1496 ASSERT_EQ(1, write(fd, "A", 1)); 1497 ASSERT_EQ(0, close(fd)); 1498 1499 /* Run until we're shut down. Must assert to stop execution. */ 1500 while (tracer_running) { 1501 int status; 1502 1503 if (wait(&status) != tracee) 1504 continue; 1505 if (WIFSIGNALED(status) || WIFEXITED(status)) 1506 /* Child is dead. Time to go. */ 1507 return; 1508 1509 /* Check if this is a seccomp event. */ 1510 ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status)); 1511 1512 tracer_func(_metadata, tracee, status, args); 1513 1514 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, 1515 tracee, NULL, 0); 1516 ASSERT_EQ(0, ret); 1517 } 1518 /* Directly report the status of our test harness results. */ 1519 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); 1520 } 1521 1522 /* Common tracer setup/teardown functions. */ 1523 void cont_handler(int num) 1524 { } 1525 pid_t setup_trace_fixture(struct __test_metadata *_metadata, 1526 tracer_func_t func, void *args, bool ptrace_syscall) 1527 { 1528 char sync; 1529 int pipefd[2]; 1530 pid_t tracer_pid; 1531 pid_t tracee = getpid(); 1532 1533 /* Setup a pipe for clean synchronization. */ 1534 ASSERT_EQ(0, pipe(pipefd)); 1535 1536 /* Fork a child which we'll promote to tracer */ 1537 tracer_pid = fork(); 1538 ASSERT_LE(0, tracer_pid); 1539 signal(SIGALRM, cont_handler); 1540 if (tracer_pid == 0) { 1541 close(pipefd[0]); 1542 start_tracer(_metadata, pipefd[1], tracee, func, args, 1543 ptrace_syscall); 1544 syscall(__NR_exit, 0); 1545 } 1546 close(pipefd[1]); 1547 prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0); 1548 read(pipefd[0], &sync, 1); 1549 close(pipefd[0]); 1550 1551 return tracer_pid; 1552 } 1553 1554 void teardown_trace_fixture(struct __test_metadata *_metadata, 1555 pid_t tracer) 1556 { 1557 if (tracer) { 1558 int status; 1559 /* 1560 * Extract the exit code from the other process and 1561 * adopt it for ourselves in case its asserts failed. 1562 */ 1563 ASSERT_EQ(0, kill(tracer, SIGUSR1)); 1564 ASSERT_EQ(tracer, waitpid(tracer, &status, 0)); 1565 if (WEXITSTATUS(status)) 1566 _metadata->passed = 0; 1567 } 1568 } 1569 1570 /* "poke" tracer arguments and function. */ 1571 struct tracer_args_poke_t { 1572 unsigned long poke_addr; 1573 }; 1574 1575 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status, 1576 void *args) 1577 { 1578 int ret; 1579 unsigned long msg; 1580 struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args; 1581 1582 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 1583 EXPECT_EQ(0, ret); 1584 /* If this fails, don't try to recover. */ 1585 ASSERT_EQ(0x1001, msg) { 1586 kill(tracee, SIGKILL); 1587 } 1588 /* 1589 * Poke in the message. 1590 * Registers are not touched to try to keep this relatively arch 1591 * agnostic. 1592 */ 1593 ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001); 1594 EXPECT_EQ(0, ret); 1595 } 1596 1597 FIXTURE(TRACE_poke) { 1598 struct sock_fprog prog; 1599 pid_t tracer; 1600 long poked; 1601 struct tracer_args_poke_t tracer_args; 1602 }; 1603 1604 FIXTURE_SETUP(TRACE_poke) 1605 { 1606 struct sock_filter filter[] = { 1607 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1608 offsetof(struct seccomp_data, nr)), 1609 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), 1610 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001), 1611 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1612 }; 1613 1614 self->poked = 0; 1615 memset(&self->prog, 0, sizeof(self->prog)); 1616 self->prog.filter = malloc(sizeof(filter)); 1617 ASSERT_NE(NULL, self->prog.filter); 1618 memcpy(self->prog.filter, filter, sizeof(filter)); 1619 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 1620 1621 /* Set up tracer args. */ 1622 self->tracer_args.poke_addr = (unsigned long)&self->poked; 1623 1624 /* Launch tracer. */ 1625 self->tracer = setup_trace_fixture(_metadata, tracer_poke, 1626 &self->tracer_args, false); 1627 } 1628 1629 FIXTURE_TEARDOWN(TRACE_poke) 1630 { 1631 teardown_trace_fixture(_metadata, self->tracer); 1632 if (self->prog.filter) 1633 free(self->prog.filter); 1634 } 1635 1636 TEST_F(TRACE_poke, read_has_side_effects) 1637 { 1638 ssize_t ret; 1639 1640 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1641 ASSERT_EQ(0, ret); 1642 1643 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1644 ASSERT_EQ(0, ret); 1645 1646 EXPECT_EQ(0, self->poked); 1647 ret = read(-1, NULL, 0); 1648 EXPECT_EQ(-1, ret); 1649 EXPECT_EQ(0x1001, self->poked); 1650 } 1651 1652 TEST_F(TRACE_poke, getpid_runs_normally) 1653 { 1654 long ret; 1655 1656 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1657 ASSERT_EQ(0, ret); 1658 1659 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1660 ASSERT_EQ(0, ret); 1661 1662 EXPECT_EQ(0, self->poked); 1663 EXPECT_NE(0, syscall(__NR_getpid)); 1664 EXPECT_EQ(0, self->poked); 1665 } 1666 1667 #if defined(__x86_64__) 1668 # define ARCH_REGS struct user_regs_struct 1669 # define SYSCALL_NUM orig_rax 1670 # define SYSCALL_RET rax 1671 #elif defined(__i386__) 1672 # define ARCH_REGS struct user_regs_struct 1673 # define SYSCALL_NUM orig_eax 1674 # define SYSCALL_RET eax 1675 #elif defined(__arm__) 1676 # define ARCH_REGS struct pt_regs 1677 # define SYSCALL_NUM ARM_r7 1678 # define SYSCALL_RET ARM_r0 1679 #elif defined(__aarch64__) 1680 # define ARCH_REGS struct user_pt_regs 1681 # define SYSCALL_NUM regs[8] 1682 # define SYSCALL_RET regs[0] 1683 #elif defined(__riscv) && __riscv_xlen == 64 1684 # define ARCH_REGS struct user_regs_struct 1685 # define SYSCALL_NUM a7 1686 # define SYSCALL_RET a0 1687 #elif defined(__csky__) 1688 # define ARCH_REGS struct pt_regs 1689 #if defined(__CSKYABIV2__) 1690 # define SYSCALL_NUM regs[3] 1691 #else 1692 # define SYSCALL_NUM regs[9] 1693 #endif 1694 # define SYSCALL_RET a0 1695 #elif defined(__hppa__) 1696 # define ARCH_REGS struct user_regs_struct 1697 # define SYSCALL_NUM gr[20] 1698 # define SYSCALL_RET gr[28] 1699 #elif defined(__powerpc__) 1700 # define ARCH_REGS struct pt_regs 1701 # define SYSCALL_NUM gpr[0] 1702 # define SYSCALL_RET gpr[3] 1703 #elif defined(__s390__) 1704 # define ARCH_REGS s390_regs 1705 # define SYSCALL_NUM gprs[2] 1706 # define SYSCALL_RET gprs[2] 1707 # define SYSCALL_NUM_RET_SHARE_REG 1708 #elif defined(__mips__) 1709 # define ARCH_REGS struct pt_regs 1710 # define SYSCALL_NUM regs[2] 1711 # define SYSCALL_SYSCALL_NUM regs[4] 1712 # define SYSCALL_RET regs[2] 1713 # define SYSCALL_NUM_RET_SHARE_REG 1714 #elif defined(__xtensa__) 1715 # define ARCH_REGS struct user_pt_regs 1716 # define SYSCALL_NUM syscall 1717 /* 1718 * On xtensa syscall return value is in the register 1719 * a2 of the current window which is not fixed. 1720 */ 1721 #define SYSCALL_RET(reg) a[(reg).windowbase * 4 + 2] 1722 #else 1723 # error "Do not know how to find your architecture's registers and syscalls" 1724 #endif 1725 1726 /* When the syscall return can't be changed, stub out the tests for it. */ 1727 #ifdef SYSCALL_NUM_RET_SHARE_REG 1728 # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) 1729 #else 1730 # define EXPECT_SYSCALL_RETURN(val, action) \ 1731 do { \ 1732 errno = 0; \ 1733 if (val < 0) { \ 1734 EXPECT_EQ(-1, action); \ 1735 EXPECT_EQ(-(val), errno); \ 1736 } else { \ 1737 EXPECT_EQ(val, action); \ 1738 } \ 1739 } while (0) 1740 #endif 1741 1742 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for 1743 * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). 1744 */ 1745 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__) 1746 #define HAVE_GETREGS 1747 #endif 1748 1749 /* Architecture-specific syscall fetching routine. */ 1750 int get_syscall(struct __test_metadata *_metadata, pid_t tracee) 1751 { 1752 ARCH_REGS regs; 1753 #ifdef HAVE_GETREGS 1754 EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, ®s)) { 1755 TH_LOG("PTRACE_GETREGS failed"); 1756 return -1; 1757 } 1758 #else 1759 struct iovec iov; 1760 1761 iov.iov_base = ®s; 1762 iov.iov_len = sizeof(regs); 1763 EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) { 1764 TH_LOG("PTRACE_GETREGSET failed"); 1765 return -1; 1766 } 1767 #endif 1768 1769 #if defined(__mips__) 1770 if (regs.SYSCALL_NUM == __NR_O32_Linux) 1771 return regs.SYSCALL_SYSCALL_NUM; 1772 #endif 1773 return regs.SYSCALL_NUM; 1774 } 1775 1776 /* Architecture-specific syscall changing routine. */ 1777 void change_syscall(struct __test_metadata *_metadata, 1778 pid_t tracee, int syscall, int result) 1779 { 1780 int ret; 1781 ARCH_REGS regs; 1782 #ifdef HAVE_GETREGS 1783 ret = ptrace(PTRACE_GETREGS, tracee, 0, ®s); 1784 #else 1785 struct iovec iov; 1786 iov.iov_base = ®s; 1787 iov.iov_len = sizeof(regs); 1788 ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); 1789 #endif 1790 EXPECT_EQ(0, ret) {} 1791 1792 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ 1793 defined(__s390__) || defined(__hppa__) || defined(__riscv) || \ 1794 defined(__xtensa__) || defined(__csky__) 1795 { 1796 regs.SYSCALL_NUM = syscall; 1797 } 1798 #elif defined(__mips__) 1799 { 1800 if (regs.SYSCALL_NUM == __NR_O32_Linux) 1801 regs.SYSCALL_SYSCALL_NUM = syscall; 1802 else 1803 regs.SYSCALL_NUM = syscall; 1804 } 1805 1806 #elif defined(__arm__) 1807 # ifndef PTRACE_SET_SYSCALL 1808 # define PTRACE_SET_SYSCALL 23 1809 # endif 1810 { 1811 ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall); 1812 EXPECT_EQ(0, ret); 1813 } 1814 1815 #elif defined(__aarch64__) 1816 # ifndef NT_ARM_SYSTEM_CALL 1817 # define NT_ARM_SYSTEM_CALL 0x404 1818 # endif 1819 { 1820 iov.iov_base = &syscall; 1821 iov.iov_len = sizeof(syscall); 1822 ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL, 1823 &iov); 1824 EXPECT_EQ(0, ret); 1825 } 1826 1827 #else 1828 ASSERT_EQ(1, 0) { 1829 TH_LOG("How is the syscall changed on this architecture?"); 1830 } 1831 #endif 1832 1833 /* If syscall is skipped, change return value. */ 1834 if (syscall == -1) 1835 #ifdef SYSCALL_NUM_RET_SHARE_REG 1836 TH_LOG("Can't modify syscall return on this architecture"); 1837 1838 #elif defined(__xtensa__) 1839 regs.SYSCALL_RET(regs) = result; 1840 #else 1841 regs.SYSCALL_RET = result; 1842 #endif 1843 1844 #ifdef HAVE_GETREGS 1845 ret = ptrace(PTRACE_SETREGS, tracee, 0, ®s); 1846 #else 1847 iov.iov_base = ®s; 1848 iov.iov_len = sizeof(regs); 1849 ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov); 1850 #endif 1851 EXPECT_EQ(0, ret); 1852 } 1853 1854 void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, 1855 int status, void *args) 1856 { 1857 int ret; 1858 unsigned long msg; 1859 1860 /* Make sure we got the right message. */ 1861 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 1862 EXPECT_EQ(0, ret); 1863 1864 /* Validate and take action on expected syscalls. */ 1865 switch (msg) { 1866 case 0x1002: 1867 /* change getpid to getppid. */ 1868 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); 1869 change_syscall(_metadata, tracee, __NR_getppid, 0); 1870 break; 1871 case 0x1003: 1872 /* skip gettid with valid return code. */ 1873 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); 1874 change_syscall(_metadata, tracee, -1, 45000); 1875 break; 1876 case 0x1004: 1877 /* skip openat with error. */ 1878 EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee)); 1879 change_syscall(_metadata, tracee, -1, -ESRCH); 1880 break; 1881 case 0x1005: 1882 /* do nothing (allow getppid) */ 1883 EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee)); 1884 break; 1885 default: 1886 EXPECT_EQ(0, msg) { 1887 TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg); 1888 kill(tracee, SIGKILL); 1889 } 1890 } 1891 1892 } 1893 1894 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, 1895 int status, void *args) 1896 { 1897 int ret, nr; 1898 unsigned long msg; 1899 static bool entry; 1900 1901 /* 1902 * The traditional way to tell PTRACE_SYSCALL entry/exit 1903 * is by counting. 1904 */ 1905 entry = !entry; 1906 1907 /* Make sure we got an appropriate message. */ 1908 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 1909 EXPECT_EQ(0, ret); 1910 EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY 1911 : PTRACE_EVENTMSG_SYSCALL_EXIT, msg); 1912 1913 if (!entry) 1914 return; 1915 1916 nr = get_syscall(_metadata, tracee); 1917 1918 if (nr == __NR_getpid) 1919 change_syscall(_metadata, tracee, __NR_getppid, 0); 1920 if (nr == __NR_gettid) 1921 change_syscall(_metadata, tracee, -1, 45000); 1922 if (nr == __NR_openat) 1923 change_syscall(_metadata, tracee, -1, -ESRCH); 1924 } 1925 1926 FIXTURE(TRACE_syscall) { 1927 struct sock_fprog prog; 1928 pid_t tracer, mytid, mypid, parent; 1929 }; 1930 1931 FIXTURE_VARIANT(TRACE_syscall) { 1932 /* 1933 * All of the SECCOMP_RET_TRACE behaviors can be tested with either 1934 * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL. 1935 * This indicates if we should use SECCOMP_RET_TRACE (false), or 1936 * ptrace (true). 1937 */ 1938 bool use_ptrace; 1939 }; 1940 1941 FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) { 1942 .use_ptrace = true, 1943 }; 1944 1945 FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) { 1946 .use_ptrace = false, 1947 }; 1948 1949 FIXTURE_SETUP(TRACE_syscall) 1950 { 1951 struct sock_filter filter[] = { 1952 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1953 offsetof(struct seccomp_data, nr)), 1954 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 1955 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002), 1956 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1), 1957 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003), 1958 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1), 1959 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004), 1960 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 1961 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005), 1962 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1963 }; 1964 struct sock_fprog prog = { 1965 .len = (unsigned short)ARRAY_SIZE(filter), 1966 .filter = filter, 1967 }; 1968 long ret; 1969 1970 /* Prepare some testable syscall results. */ 1971 self->mytid = syscall(__NR_gettid); 1972 ASSERT_GT(self->mytid, 0); 1973 ASSERT_NE(self->mytid, 1) { 1974 TH_LOG("Running this test as init is not supported. :)"); 1975 } 1976 1977 self->mypid = getpid(); 1978 ASSERT_GT(self->mypid, 0); 1979 ASSERT_EQ(self->mytid, self->mypid); 1980 1981 self->parent = getppid(); 1982 ASSERT_GT(self->parent, 0); 1983 ASSERT_NE(self->parent, self->mypid); 1984 1985 /* Launch tracer. */ 1986 self->tracer = setup_trace_fixture(_metadata, 1987 variant->use_ptrace ? tracer_ptrace 1988 : tracer_seccomp, 1989 NULL, variant->use_ptrace); 1990 1991 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1992 ASSERT_EQ(0, ret); 1993 1994 if (variant->use_ptrace) 1995 return; 1996 1997 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 1998 ASSERT_EQ(0, ret); 1999 } 2000 2001 FIXTURE_TEARDOWN(TRACE_syscall) 2002 { 2003 teardown_trace_fixture(_metadata, self->tracer); 2004 } 2005 2006 TEST(negative_ENOSYS) 2007 { 2008 /* 2009 * There should be no difference between an "internal" skip 2010 * and userspace asking for syscall "-1". 2011 */ 2012 errno = 0; 2013 EXPECT_EQ(-1, syscall(-1)); 2014 EXPECT_EQ(errno, ENOSYS); 2015 /* And no difference for "still not valid but not -1". */ 2016 errno = 0; 2017 EXPECT_EQ(-1, syscall(-101)); 2018 EXPECT_EQ(errno, ENOSYS); 2019 } 2020 2021 TEST_F(TRACE_syscall, negative_ENOSYS) 2022 { 2023 negative_ENOSYS(_metadata); 2024 } 2025 2026 TEST_F(TRACE_syscall, syscall_allowed) 2027 { 2028 /* getppid works as expected (no changes). */ 2029 EXPECT_EQ(self->parent, syscall(__NR_getppid)); 2030 EXPECT_NE(self->mypid, syscall(__NR_getppid)); 2031 } 2032 2033 TEST_F(TRACE_syscall, syscall_redirected) 2034 { 2035 /* getpid has been redirected to getppid as expected. */ 2036 EXPECT_EQ(self->parent, syscall(__NR_getpid)); 2037 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 2038 } 2039 2040 TEST_F(TRACE_syscall, syscall_errno) 2041 { 2042 /* Tracer should skip the open syscall, resulting in ESRCH. */ 2043 EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); 2044 } 2045 2046 TEST_F(TRACE_syscall, syscall_faked) 2047 { 2048 /* Tracer skips the gettid syscall and store altered return value. */ 2049 EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); 2050 } 2051 2052 TEST_F(TRACE_syscall, skip_after) 2053 { 2054 struct sock_filter filter[] = { 2055 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2056 offsetof(struct seccomp_data, nr)), 2057 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2058 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM), 2059 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2060 }; 2061 struct sock_fprog prog = { 2062 .len = (unsigned short)ARRAY_SIZE(filter), 2063 .filter = filter, 2064 }; 2065 long ret; 2066 2067 /* Install additional "errno on getppid" filter. */ 2068 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2069 ASSERT_EQ(0, ret); 2070 2071 /* Tracer will redirect getpid to getppid, and we should see EPERM. */ 2072 errno = 0; 2073 EXPECT_EQ(-1, syscall(__NR_getpid)); 2074 EXPECT_EQ(EPERM, errno); 2075 } 2076 2077 TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS) 2078 { 2079 struct sock_filter filter[] = { 2080 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2081 offsetof(struct seccomp_data, nr)), 2082 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2083 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2084 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2085 }; 2086 struct sock_fprog prog = { 2087 .len = (unsigned short)ARRAY_SIZE(filter), 2088 .filter = filter, 2089 }; 2090 long ret; 2091 2092 /* Install additional "death on getppid" filter. */ 2093 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2094 ASSERT_EQ(0, ret); 2095 2096 /* Tracer will redirect getpid to getppid, and we should die. */ 2097 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 2098 } 2099 2100 TEST(seccomp_syscall) 2101 { 2102 struct sock_filter filter[] = { 2103 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2104 }; 2105 struct sock_fprog prog = { 2106 .len = (unsigned short)ARRAY_SIZE(filter), 2107 .filter = filter, 2108 }; 2109 long ret; 2110 2111 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2112 ASSERT_EQ(0, ret) { 2113 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2114 } 2115 2116 /* Reject insane operation. */ 2117 ret = seccomp(-1, 0, &prog); 2118 ASSERT_NE(ENOSYS, errno) { 2119 TH_LOG("Kernel does not support seccomp syscall!"); 2120 } 2121 EXPECT_EQ(EINVAL, errno) { 2122 TH_LOG("Did not reject crazy op value!"); 2123 } 2124 2125 /* Reject strict with flags or pointer. */ 2126 ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL); 2127 EXPECT_EQ(EINVAL, errno) { 2128 TH_LOG("Did not reject mode strict with flags!"); 2129 } 2130 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog); 2131 EXPECT_EQ(EINVAL, errno) { 2132 TH_LOG("Did not reject mode strict with uargs!"); 2133 } 2134 2135 /* Reject insane args for filter. */ 2136 ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog); 2137 EXPECT_EQ(EINVAL, errno) { 2138 TH_LOG("Did not reject crazy filter flags!"); 2139 } 2140 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL); 2141 EXPECT_EQ(EFAULT, errno) { 2142 TH_LOG("Did not reject NULL filter!"); 2143 } 2144 2145 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2146 EXPECT_EQ(0, errno) { 2147 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s", 2148 strerror(errno)); 2149 } 2150 } 2151 2152 TEST(seccomp_syscall_mode_lock) 2153 { 2154 struct sock_filter filter[] = { 2155 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2156 }; 2157 struct sock_fprog prog = { 2158 .len = (unsigned short)ARRAY_SIZE(filter), 2159 .filter = filter, 2160 }; 2161 long ret; 2162 2163 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 2164 ASSERT_EQ(0, ret) { 2165 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2166 } 2167 2168 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2169 ASSERT_NE(ENOSYS, errno) { 2170 TH_LOG("Kernel does not support seccomp syscall!"); 2171 } 2172 EXPECT_EQ(0, ret) { 2173 TH_LOG("Could not install filter!"); 2174 } 2175 2176 /* Make sure neither entry point will switch to strict. */ 2177 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0); 2178 EXPECT_EQ(EINVAL, errno) { 2179 TH_LOG("Switched to mode strict!"); 2180 } 2181 2182 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL); 2183 EXPECT_EQ(EINVAL, errno) { 2184 TH_LOG("Switched to mode strict!"); 2185 } 2186 } 2187 2188 /* 2189 * Test detection of known and unknown filter flags. Userspace needs to be able 2190 * to check if a filter flag is supported by the current kernel and a good way 2191 * of doing that is by attempting to enter filter mode, with the flag bit in 2192 * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates 2193 * that the flag is valid and EINVAL indicates that the flag is invalid. 2194 */ 2195 TEST(detect_seccomp_filter_flags) 2196 { 2197 unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, 2198 SECCOMP_FILTER_FLAG_LOG, 2199 SECCOMP_FILTER_FLAG_SPEC_ALLOW, 2200 SECCOMP_FILTER_FLAG_NEW_LISTENER, 2201 SECCOMP_FILTER_FLAG_TSYNC_ESRCH }; 2202 unsigned int exclusive[] = { 2203 SECCOMP_FILTER_FLAG_TSYNC, 2204 SECCOMP_FILTER_FLAG_NEW_LISTENER }; 2205 unsigned int flag, all_flags, exclusive_mask; 2206 int i; 2207 long ret; 2208 2209 /* Test detection of individual known-good filter flags */ 2210 for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { 2211 int bits = 0; 2212 2213 flag = flags[i]; 2214 /* Make sure the flag is a single bit! */ 2215 while (flag) { 2216 if (flag & 0x1) 2217 bits ++; 2218 flag >>= 1; 2219 } 2220 ASSERT_EQ(1, bits); 2221 flag = flags[i]; 2222 2223 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2224 ASSERT_NE(ENOSYS, errno) { 2225 TH_LOG("Kernel does not support seccomp syscall!"); 2226 } 2227 EXPECT_EQ(-1, ret); 2228 EXPECT_EQ(EFAULT, errno) { 2229 TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!", 2230 flag); 2231 } 2232 2233 all_flags |= flag; 2234 } 2235 2236 /* 2237 * Test detection of all known-good filter flags combined. But 2238 * for the exclusive flags we need to mask them out and try them 2239 * individually for the "all flags" testing. 2240 */ 2241 exclusive_mask = 0; 2242 for (i = 0; i < ARRAY_SIZE(exclusive); i++) 2243 exclusive_mask |= exclusive[i]; 2244 for (i = 0; i < ARRAY_SIZE(exclusive); i++) { 2245 flag = all_flags & ~exclusive_mask; 2246 flag |= exclusive[i]; 2247 2248 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2249 EXPECT_EQ(-1, ret); 2250 EXPECT_EQ(EFAULT, errno) { 2251 TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", 2252 flag); 2253 } 2254 } 2255 2256 /* Test detection of an unknown filter flags, without exclusives. */ 2257 flag = -1; 2258 flag &= ~exclusive_mask; 2259 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2260 EXPECT_EQ(-1, ret); 2261 EXPECT_EQ(EINVAL, errno) { 2262 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!", 2263 flag); 2264 } 2265 2266 /* 2267 * Test detection of an unknown filter flag that may simply need to be 2268 * added to this test 2269 */ 2270 flag = flags[ARRAY_SIZE(flags) - 1] << 1; 2271 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2272 EXPECT_EQ(-1, ret); 2273 EXPECT_EQ(EINVAL, errno) { 2274 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?", 2275 flag); 2276 } 2277 } 2278 2279 TEST(TSYNC_first) 2280 { 2281 struct sock_filter filter[] = { 2282 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2283 }; 2284 struct sock_fprog prog = { 2285 .len = (unsigned short)ARRAY_SIZE(filter), 2286 .filter = filter, 2287 }; 2288 long ret; 2289 2290 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 2291 ASSERT_EQ(0, ret) { 2292 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2293 } 2294 2295 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2296 &prog); 2297 ASSERT_NE(ENOSYS, errno) { 2298 TH_LOG("Kernel does not support seccomp syscall!"); 2299 } 2300 EXPECT_EQ(0, ret) { 2301 TH_LOG("Could not install initial filter with TSYNC!"); 2302 } 2303 } 2304 2305 #define TSYNC_SIBLINGS 2 2306 struct tsync_sibling { 2307 pthread_t tid; 2308 pid_t system_tid; 2309 sem_t *started; 2310 pthread_cond_t *cond; 2311 pthread_mutex_t *mutex; 2312 int diverge; 2313 int num_waits; 2314 struct sock_fprog *prog; 2315 struct __test_metadata *metadata; 2316 }; 2317 2318 /* 2319 * To avoid joining joined threads (which is not allowed by Bionic), 2320 * make sure we both successfully join and clear the tid to skip a 2321 * later join attempt during fixture teardown. Any remaining threads 2322 * will be directly killed during teardown. 2323 */ 2324 #define PTHREAD_JOIN(tid, status) \ 2325 do { \ 2326 int _rc = pthread_join(tid, status); \ 2327 if (_rc) { \ 2328 TH_LOG("pthread_join of tid %u failed: %d\n", \ 2329 (unsigned int)tid, _rc); \ 2330 } else { \ 2331 tid = 0; \ 2332 } \ 2333 } while (0) 2334 2335 FIXTURE(TSYNC) { 2336 struct sock_fprog root_prog, apply_prog; 2337 struct tsync_sibling sibling[TSYNC_SIBLINGS]; 2338 sem_t started; 2339 pthread_cond_t cond; 2340 pthread_mutex_t mutex; 2341 int sibling_count; 2342 }; 2343 2344 FIXTURE_SETUP(TSYNC) 2345 { 2346 struct sock_filter root_filter[] = { 2347 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2348 }; 2349 struct sock_filter apply_filter[] = { 2350 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2351 offsetof(struct seccomp_data, nr)), 2352 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), 2353 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2354 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2355 }; 2356 2357 memset(&self->root_prog, 0, sizeof(self->root_prog)); 2358 memset(&self->apply_prog, 0, sizeof(self->apply_prog)); 2359 memset(&self->sibling, 0, sizeof(self->sibling)); 2360 self->root_prog.filter = malloc(sizeof(root_filter)); 2361 ASSERT_NE(NULL, self->root_prog.filter); 2362 memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter)); 2363 self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter); 2364 2365 self->apply_prog.filter = malloc(sizeof(apply_filter)); 2366 ASSERT_NE(NULL, self->apply_prog.filter); 2367 memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter)); 2368 self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter); 2369 2370 self->sibling_count = 0; 2371 pthread_mutex_init(&self->mutex, NULL); 2372 pthread_cond_init(&self->cond, NULL); 2373 sem_init(&self->started, 0, 0); 2374 self->sibling[0].tid = 0; 2375 self->sibling[0].cond = &self->cond; 2376 self->sibling[0].started = &self->started; 2377 self->sibling[0].mutex = &self->mutex; 2378 self->sibling[0].diverge = 0; 2379 self->sibling[0].num_waits = 1; 2380 self->sibling[0].prog = &self->root_prog; 2381 self->sibling[0].metadata = _metadata; 2382 self->sibling[1].tid = 0; 2383 self->sibling[1].cond = &self->cond; 2384 self->sibling[1].started = &self->started; 2385 self->sibling[1].mutex = &self->mutex; 2386 self->sibling[1].diverge = 0; 2387 self->sibling[1].prog = &self->root_prog; 2388 self->sibling[1].num_waits = 1; 2389 self->sibling[1].metadata = _metadata; 2390 } 2391 2392 FIXTURE_TEARDOWN(TSYNC) 2393 { 2394 int sib = 0; 2395 2396 if (self->root_prog.filter) 2397 free(self->root_prog.filter); 2398 if (self->apply_prog.filter) 2399 free(self->apply_prog.filter); 2400 2401 for ( ; sib < self->sibling_count; ++sib) { 2402 struct tsync_sibling *s = &self->sibling[sib]; 2403 2404 if (!s->tid) 2405 continue; 2406 /* 2407 * If a thread is still running, it may be stuck, so hit 2408 * it over the head really hard. 2409 */ 2410 pthread_kill(s->tid, 9); 2411 } 2412 pthread_mutex_destroy(&self->mutex); 2413 pthread_cond_destroy(&self->cond); 2414 sem_destroy(&self->started); 2415 } 2416 2417 void *tsync_sibling(void *data) 2418 { 2419 long ret = 0; 2420 struct tsync_sibling *me = data; 2421 2422 me->system_tid = syscall(__NR_gettid); 2423 2424 pthread_mutex_lock(me->mutex); 2425 if (me->diverge) { 2426 /* Just re-apply the root prog to fork the tree */ 2427 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 2428 me->prog, 0, 0); 2429 } 2430 sem_post(me->started); 2431 /* Return outside of started so parent notices failures. */ 2432 if (ret) { 2433 pthread_mutex_unlock(me->mutex); 2434 return (void *)SIBLING_EXIT_FAILURE; 2435 } 2436 do { 2437 pthread_cond_wait(me->cond, me->mutex); 2438 me->num_waits = me->num_waits - 1; 2439 } while (me->num_waits); 2440 pthread_mutex_unlock(me->mutex); 2441 2442 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 2443 if (!ret) 2444 return (void *)SIBLING_EXIT_NEWPRIVS; 2445 read(0, NULL, 0); 2446 return (void *)SIBLING_EXIT_UNKILLED; 2447 } 2448 2449 void tsync_start_sibling(struct tsync_sibling *sibling) 2450 { 2451 pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling); 2452 } 2453 2454 TEST_F(TSYNC, siblings_fail_prctl) 2455 { 2456 long ret; 2457 void *status; 2458 struct sock_filter filter[] = { 2459 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2460 offsetof(struct seccomp_data, nr)), 2461 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), 2462 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL), 2463 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2464 }; 2465 struct sock_fprog prog = { 2466 .len = (unsigned short)ARRAY_SIZE(filter), 2467 .filter = filter, 2468 }; 2469 2470 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2471 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2472 } 2473 2474 /* Check prctl failure detection by requesting sib 0 diverge. */ 2475 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2476 ASSERT_NE(ENOSYS, errno) { 2477 TH_LOG("Kernel does not support seccomp syscall!"); 2478 } 2479 ASSERT_EQ(0, ret) { 2480 TH_LOG("setting filter failed"); 2481 } 2482 2483 self->sibling[0].diverge = 1; 2484 tsync_start_sibling(&self->sibling[0]); 2485 tsync_start_sibling(&self->sibling[1]); 2486 2487 while (self->sibling_count < TSYNC_SIBLINGS) { 2488 sem_wait(&self->started); 2489 self->sibling_count++; 2490 } 2491 2492 /* Signal the threads to clean up*/ 2493 pthread_mutex_lock(&self->mutex); 2494 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2495 TH_LOG("cond broadcast non-zero"); 2496 } 2497 pthread_mutex_unlock(&self->mutex); 2498 2499 /* Ensure diverging sibling failed to call prctl. */ 2500 PTHREAD_JOIN(self->sibling[0].tid, &status); 2501 EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status); 2502 PTHREAD_JOIN(self->sibling[1].tid, &status); 2503 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2504 } 2505 2506 TEST_F(TSYNC, two_siblings_with_ancestor) 2507 { 2508 long ret; 2509 void *status; 2510 2511 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2512 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2513 } 2514 2515 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2516 ASSERT_NE(ENOSYS, errno) { 2517 TH_LOG("Kernel does not support seccomp syscall!"); 2518 } 2519 ASSERT_EQ(0, ret) { 2520 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2521 } 2522 tsync_start_sibling(&self->sibling[0]); 2523 tsync_start_sibling(&self->sibling[1]); 2524 2525 while (self->sibling_count < TSYNC_SIBLINGS) { 2526 sem_wait(&self->started); 2527 self->sibling_count++; 2528 } 2529 2530 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2531 &self->apply_prog); 2532 ASSERT_EQ(0, ret) { 2533 TH_LOG("Could install filter on all threads!"); 2534 } 2535 /* Tell the siblings to test the policy */ 2536 pthread_mutex_lock(&self->mutex); 2537 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2538 TH_LOG("cond broadcast non-zero"); 2539 } 2540 pthread_mutex_unlock(&self->mutex); 2541 /* Ensure they are both killed and don't exit cleanly. */ 2542 PTHREAD_JOIN(self->sibling[0].tid, &status); 2543 EXPECT_EQ(0x0, (long)status); 2544 PTHREAD_JOIN(self->sibling[1].tid, &status); 2545 EXPECT_EQ(0x0, (long)status); 2546 } 2547 2548 TEST_F(TSYNC, two_sibling_want_nnp) 2549 { 2550 void *status; 2551 2552 /* start siblings before any prctl() operations */ 2553 tsync_start_sibling(&self->sibling[0]); 2554 tsync_start_sibling(&self->sibling[1]); 2555 while (self->sibling_count < TSYNC_SIBLINGS) { 2556 sem_wait(&self->started); 2557 self->sibling_count++; 2558 } 2559 2560 /* Tell the siblings to test no policy */ 2561 pthread_mutex_lock(&self->mutex); 2562 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2563 TH_LOG("cond broadcast non-zero"); 2564 } 2565 pthread_mutex_unlock(&self->mutex); 2566 2567 /* Ensure they are both upset about lacking nnp. */ 2568 PTHREAD_JOIN(self->sibling[0].tid, &status); 2569 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); 2570 PTHREAD_JOIN(self->sibling[1].tid, &status); 2571 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); 2572 } 2573 2574 TEST_F(TSYNC, two_siblings_with_no_filter) 2575 { 2576 long ret; 2577 void *status; 2578 2579 /* start siblings before any prctl() operations */ 2580 tsync_start_sibling(&self->sibling[0]); 2581 tsync_start_sibling(&self->sibling[1]); 2582 while (self->sibling_count < TSYNC_SIBLINGS) { 2583 sem_wait(&self->started); 2584 self->sibling_count++; 2585 } 2586 2587 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2588 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2589 } 2590 2591 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2592 &self->apply_prog); 2593 ASSERT_NE(ENOSYS, errno) { 2594 TH_LOG("Kernel does not support seccomp syscall!"); 2595 } 2596 ASSERT_EQ(0, ret) { 2597 TH_LOG("Could install filter on all threads!"); 2598 } 2599 2600 /* Tell the siblings to test the policy */ 2601 pthread_mutex_lock(&self->mutex); 2602 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2603 TH_LOG("cond broadcast non-zero"); 2604 } 2605 pthread_mutex_unlock(&self->mutex); 2606 2607 /* Ensure they are both killed and don't exit cleanly. */ 2608 PTHREAD_JOIN(self->sibling[0].tid, &status); 2609 EXPECT_EQ(0x0, (long)status); 2610 PTHREAD_JOIN(self->sibling[1].tid, &status); 2611 EXPECT_EQ(0x0, (long)status); 2612 } 2613 2614 TEST_F(TSYNC, two_siblings_with_one_divergence) 2615 { 2616 long ret; 2617 void *status; 2618 2619 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2620 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2621 } 2622 2623 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2624 ASSERT_NE(ENOSYS, errno) { 2625 TH_LOG("Kernel does not support seccomp syscall!"); 2626 } 2627 ASSERT_EQ(0, ret) { 2628 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2629 } 2630 self->sibling[0].diverge = 1; 2631 tsync_start_sibling(&self->sibling[0]); 2632 tsync_start_sibling(&self->sibling[1]); 2633 2634 while (self->sibling_count < TSYNC_SIBLINGS) { 2635 sem_wait(&self->started); 2636 self->sibling_count++; 2637 } 2638 2639 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2640 &self->apply_prog); 2641 ASSERT_EQ(self->sibling[0].system_tid, ret) { 2642 TH_LOG("Did not fail on diverged sibling."); 2643 } 2644 2645 /* Wake the threads */ 2646 pthread_mutex_lock(&self->mutex); 2647 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2648 TH_LOG("cond broadcast non-zero"); 2649 } 2650 pthread_mutex_unlock(&self->mutex); 2651 2652 /* Ensure they are both unkilled. */ 2653 PTHREAD_JOIN(self->sibling[0].tid, &status); 2654 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2655 PTHREAD_JOIN(self->sibling[1].tid, &status); 2656 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2657 } 2658 2659 TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err) 2660 { 2661 long ret, flags; 2662 void *status; 2663 2664 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2665 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2666 } 2667 2668 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2669 ASSERT_NE(ENOSYS, errno) { 2670 TH_LOG("Kernel does not support seccomp syscall!"); 2671 } 2672 ASSERT_EQ(0, ret) { 2673 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2674 } 2675 self->sibling[0].diverge = 1; 2676 tsync_start_sibling(&self->sibling[0]); 2677 tsync_start_sibling(&self->sibling[1]); 2678 2679 while (self->sibling_count < TSYNC_SIBLINGS) { 2680 sem_wait(&self->started); 2681 self->sibling_count++; 2682 } 2683 2684 flags = SECCOMP_FILTER_FLAG_TSYNC | \ 2685 SECCOMP_FILTER_FLAG_TSYNC_ESRCH; 2686 ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog); 2687 ASSERT_EQ(ESRCH, errno) { 2688 TH_LOG("Did not return ESRCH for diverged sibling."); 2689 } 2690 ASSERT_EQ(-1, ret) { 2691 TH_LOG("Did not fail on diverged sibling."); 2692 } 2693 2694 /* Wake the threads */ 2695 pthread_mutex_lock(&self->mutex); 2696 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2697 TH_LOG("cond broadcast non-zero"); 2698 } 2699 pthread_mutex_unlock(&self->mutex); 2700 2701 /* Ensure they are both unkilled. */ 2702 PTHREAD_JOIN(self->sibling[0].tid, &status); 2703 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2704 PTHREAD_JOIN(self->sibling[1].tid, &status); 2705 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2706 } 2707 2708 TEST_F(TSYNC, two_siblings_not_under_filter) 2709 { 2710 long ret, sib; 2711 void *status; 2712 struct timespec delay = { .tv_nsec = 100000000 }; 2713 2714 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2715 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2716 } 2717 2718 /* 2719 * Sibling 0 will have its own seccomp policy 2720 * and Sibling 1 will not be under seccomp at 2721 * all. Sibling 1 will enter seccomp and 0 2722 * will cause failure. 2723 */ 2724 self->sibling[0].diverge = 1; 2725 tsync_start_sibling(&self->sibling[0]); 2726 tsync_start_sibling(&self->sibling[1]); 2727 2728 while (self->sibling_count < TSYNC_SIBLINGS) { 2729 sem_wait(&self->started); 2730 self->sibling_count++; 2731 } 2732 2733 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2734 ASSERT_NE(ENOSYS, errno) { 2735 TH_LOG("Kernel does not support seccomp syscall!"); 2736 } 2737 ASSERT_EQ(0, ret) { 2738 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2739 } 2740 2741 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2742 &self->apply_prog); 2743 ASSERT_EQ(ret, self->sibling[0].system_tid) { 2744 TH_LOG("Did not fail on diverged sibling."); 2745 } 2746 sib = 1; 2747 if (ret == self->sibling[0].system_tid) 2748 sib = 0; 2749 2750 pthread_mutex_lock(&self->mutex); 2751 2752 /* Increment the other siblings num_waits so we can clean up 2753 * the one we just saw. 2754 */ 2755 self->sibling[!sib].num_waits += 1; 2756 2757 /* Signal the thread to clean up*/ 2758 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2759 TH_LOG("cond broadcast non-zero"); 2760 } 2761 pthread_mutex_unlock(&self->mutex); 2762 PTHREAD_JOIN(self->sibling[sib].tid, &status); 2763 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2764 /* Poll for actual task death. pthread_join doesn't guarantee it. */ 2765 while (!kill(self->sibling[sib].system_tid, 0)) 2766 nanosleep(&delay, NULL); 2767 /* Switch to the remaining sibling */ 2768 sib = !sib; 2769 2770 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2771 &self->apply_prog); 2772 ASSERT_EQ(0, ret) { 2773 TH_LOG("Expected the remaining sibling to sync"); 2774 }; 2775 2776 pthread_mutex_lock(&self->mutex); 2777 2778 /* If remaining sibling didn't have a chance to wake up during 2779 * the first broadcast, manually reduce the num_waits now. 2780 */ 2781 if (self->sibling[sib].num_waits > 1) 2782 self->sibling[sib].num_waits = 1; 2783 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2784 TH_LOG("cond broadcast non-zero"); 2785 } 2786 pthread_mutex_unlock(&self->mutex); 2787 PTHREAD_JOIN(self->sibling[sib].tid, &status); 2788 EXPECT_EQ(0, (long)status); 2789 /* Poll for actual task death. pthread_join doesn't guarantee it. */ 2790 while (!kill(self->sibling[sib].system_tid, 0)) 2791 nanosleep(&delay, NULL); 2792 2793 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2794 &self->apply_prog); 2795 ASSERT_EQ(0, ret); /* just us chickens */ 2796 } 2797 2798 /* Make sure restarted syscalls are seen directly as "restart_syscall". */ 2799 TEST(syscall_restart) 2800 { 2801 long ret; 2802 unsigned long msg; 2803 pid_t child_pid; 2804 int pipefd[2]; 2805 int status; 2806 siginfo_t info = { }; 2807 struct sock_filter filter[] = { 2808 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2809 offsetof(struct seccomp_data, nr)), 2810 2811 #ifdef __NR_sigreturn 2812 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 7, 0), 2813 #endif 2814 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 6, 0), 2815 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 5, 0), 2816 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 4, 0), 2817 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 5, 0), 2818 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_clock_nanosleep, 4, 0), 2819 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0), 2820 2821 /* Allow __NR_write for easy logging. */ 2822 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1), 2823 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2824 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2825 /* The nanosleep jump target. */ 2826 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100), 2827 /* The restart_syscall jump target. */ 2828 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200), 2829 }; 2830 struct sock_fprog prog = { 2831 .len = (unsigned short)ARRAY_SIZE(filter), 2832 .filter = filter, 2833 }; 2834 #if defined(__arm__) 2835 struct utsname utsbuf; 2836 #endif 2837 2838 ASSERT_EQ(0, pipe(pipefd)); 2839 2840 child_pid = fork(); 2841 ASSERT_LE(0, child_pid); 2842 if (child_pid == 0) { 2843 /* Child uses EXPECT not ASSERT to deliver status correctly. */ 2844 char buf = ' '; 2845 struct timespec timeout = { }; 2846 2847 /* Attach parent as tracer and stop. */ 2848 EXPECT_EQ(0, ptrace(PTRACE_TRACEME)); 2849 EXPECT_EQ(0, raise(SIGSTOP)); 2850 2851 EXPECT_EQ(0, close(pipefd[1])); 2852 2853 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2854 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2855 } 2856 2857 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2858 EXPECT_EQ(0, ret) { 2859 TH_LOG("Failed to install filter!"); 2860 } 2861 2862 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { 2863 TH_LOG("Failed to read() sync from parent"); 2864 } 2865 EXPECT_EQ('.', buf) { 2866 TH_LOG("Failed to get sync data from read()"); 2867 } 2868 2869 /* Start nanosleep to be interrupted. */ 2870 timeout.tv_sec = 1; 2871 errno = 0; 2872 EXPECT_EQ(0, nanosleep(&timeout, NULL)) { 2873 TH_LOG("Call to nanosleep() failed (errno %d)", errno); 2874 } 2875 2876 /* Read final sync from parent. */ 2877 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { 2878 TH_LOG("Failed final read() from parent"); 2879 } 2880 EXPECT_EQ('!', buf) { 2881 TH_LOG("Failed to get final data from read()"); 2882 } 2883 2884 /* Directly report the status of our test harness results. */ 2885 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS 2886 : EXIT_FAILURE); 2887 } 2888 EXPECT_EQ(0, close(pipefd[0])); 2889 2890 /* Attach to child, setup options, and release. */ 2891 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2892 ASSERT_EQ(true, WIFSTOPPED(status)); 2893 ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL, 2894 PTRACE_O_TRACESECCOMP)); 2895 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2896 ASSERT_EQ(1, write(pipefd[1], ".", 1)); 2897 2898 /* Wait for nanosleep() to start. */ 2899 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2900 ASSERT_EQ(true, WIFSTOPPED(status)); 2901 ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); 2902 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); 2903 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); 2904 ASSERT_EQ(0x100, msg); 2905 ret = get_syscall(_metadata, child_pid); 2906 EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep); 2907 2908 /* Might as well check siginfo for sanity while we're here. */ 2909 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); 2910 ASSERT_EQ(SIGTRAP, info.si_signo); 2911 ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code); 2912 EXPECT_EQ(0, info.si_errno); 2913 EXPECT_EQ(getuid(), info.si_uid); 2914 /* Verify signal delivery came from child (seccomp-triggered). */ 2915 EXPECT_EQ(child_pid, info.si_pid); 2916 2917 /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */ 2918 ASSERT_EQ(0, kill(child_pid, SIGSTOP)); 2919 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2920 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2921 ASSERT_EQ(true, WIFSTOPPED(status)); 2922 ASSERT_EQ(SIGSTOP, WSTOPSIG(status)); 2923 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); 2924 /* 2925 * There is no siginfo on SIGSTOP any more, so we can't verify 2926 * signal delivery came from parent now (getpid() == info.si_pid). 2927 * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com 2928 * At least verify the SIGSTOP via PTRACE_GETSIGINFO. 2929 */ 2930 EXPECT_EQ(SIGSTOP, info.si_signo); 2931 2932 /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */ 2933 ASSERT_EQ(0, kill(child_pid, SIGCONT)); 2934 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2935 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2936 ASSERT_EQ(true, WIFSTOPPED(status)); 2937 ASSERT_EQ(SIGCONT, WSTOPSIG(status)); 2938 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2939 2940 /* Wait for restart_syscall() to start. */ 2941 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2942 ASSERT_EQ(true, WIFSTOPPED(status)); 2943 ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); 2944 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); 2945 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); 2946 2947 ASSERT_EQ(0x200, msg); 2948 ret = get_syscall(_metadata, child_pid); 2949 #if defined(__arm__) 2950 /* 2951 * FIXME: 2952 * - native ARM registers do NOT expose true syscall. 2953 * - compat ARM registers on ARM64 DO expose true syscall. 2954 */ 2955 ASSERT_EQ(0, uname(&utsbuf)); 2956 if (strncmp(utsbuf.machine, "arm", 3) == 0) { 2957 EXPECT_EQ(__NR_nanosleep, ret); 2958 } else 2959 #endif 2960 { 2961 EXPECT_EQ(__NR_restart_syscall, ret); 2962 } 2963 2964 /* Write again to end test. */ 2965 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2966 ASSERT_EQ(1, write(pipefd[1], "!", 1)); 2967 EXPECT_EQ(0, close(pipefd[1])); 2968 2969 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2970 if (WIFSIGNALED(status) || WEXITSTATUS(status)) 2971 _metadata->passed = 0; 2972 } 2973 2974 TEST_SIGNAL(filter_flag_log, SIGSYS) 2975 { 2976 struct sock_filter allow_filter[] = { 2977 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2978 }; 2979 struct sock_filter kill_filter[] = { 2980 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2981 offsetof(struct seccomp_data, nr)), 2982 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 2983 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2984 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2985 }; 2986 struct sock_fprog allow_prog = { 2987 .len = (unsigned short)ARRAY_SIZE(allow_filter), 2988 .filter = allow_filter, 2989 }; 2990 struct sock_fprog kill_prog = { 2991 .len = (unsigned short)ARRAY_SIZE(kill_filter), 2992 .filter = kill_filter, 2993 }; 2994 long ret; 2995 pid_t parent = getppid(); 2996 2997 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2998 ASSERT_EQ(0, ret); 2999 3000 /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */ 3001 ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG, 3002 &allow_prog); 3003 ASSERT_NE(ENOSYS, errno) { 3004 TH_LOG("Kernel does not support seccomp syscall!"); 3005 } 3006 EXPECT_NE(0, ret) { 3007 TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!"); 3008 } 3009 EXPECT_EQ(EINVAL, errno) { 3010 TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!"); 3011 } 3012 3013 /* Verify that a simple, permissive filter can be added with no flags */ 3014 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog); 3015 EXPECT_EQ(0, ret); 3016 3017 /* See if the same filter can be added with the FILTER_FLAG_LOG flag */ 3018 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, 3019 &allow_prog); 3020 ASSERT_NE(EINVAL, errno) { 3021 TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!"); 3022 } 3023 EXPECT_EQ(0, ret); 3024 3025 /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */ 3026 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, 3027 &kill_prog); 3028 EXPECT_EQ(0, ret); 3029 3030 EXPECT_EQ(parent, syscall(__NR_getppid)); 3031 /* getpid() should never return. */ 3032 EXPECT_EQ(0, syscall(__NR_getpid)); 3033 } 3034 3035 TEST(get_action_avail) 3036 { 3037 __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP, 3038 SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE, 3039 SECCOMP_RET_LOG, SECCOMP_RET_ALLOW }; 3040 __u32 unknown_action = 0x10000000U; 3041 int i; 3042 long ret; 3043 3044 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]); 3045 ASSERT_NE(ENOSYS, errno) { 3046 TH_LOG("Kernel does not support seccomp syscall!"); 3047 } 3048 ASSERT_NE(EINVAL, errno) { 3049 TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!"); 3050 } 3051 EXPECT_EQ(ret, 0); 3052 3053 for (i = 0; i < ARRAY_SIZE(actions); i++) { 3054 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]); 3055 EXPECT_EQ(ret, 0) { 3056 TH_LOG("Expected action (0x%X) not available!", 3057 actions[i]); 3058 } 3059 } 3060 3061 /* Check that an unknown action is handled properly (EOPNOTSUPP) */ 3062 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action); 3063 EXPECT_EQ(ret, -1); 3064 EXPECT_EQ(errno, EOPNOTSUPP); 3065 } 3066 3067 TEST(get_metadata) 3068 { 3069 pid_t pid; 3070 int pipefd[2]; 3071 char buf; 3072 struct seccomp_metadata md; 3073 long ret; 3074 3075 /* Only real root can get metadata. */ 3076 if (geteuid()) { 3077 SKIP(return, "get_metadata requires real root"); 3078 return; 3079 } 3080 3081 ASSERT_EQ(0, pipe(pipefd)); 3082 3083 pid = fork(); 3084 ASSERT_GE(pid, 0); 3085 if (pid == 0) { 3086 struct sock_filter filter[] = { 3087 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3088 }; 3089 struct sock_fprog prog = { 3090 .len = (unsigned short)ARRAY_SIZE(filter), 3091 .filter = filter, 3092 }; 3093 3094 /* one with log, one without */ 3095 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 3096 SECCOMP_FILTER_FLAG_LOG, &prog)); 3097 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)); 3098 3099 EXPECT_EQ(0, close(pipefd[0])); 3100 ASSERT_EQ(1, write(pipefd[1], "1", 1)); 3101 ASSERT_EQ(0, close(pipefd[1])); 3102 3103 while (1) 3104 sleep(100); 3105 } 3106 3107 ASSERT_EQ(0, close(pipefd[1])); 3108 ASSERT_EQ(1, read(pipefd[0], &buf, 1)); 3109 3110 ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid)); 3111 ASSERT_EQ(pid, waitpid(pid, NULL, 0)); 3112 3113 /* Past here must not use ASSERT or child process is never killed. */ 3114 3115 md.filter_off = 0; 3116 errno = 0; 3117 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); 3118 EXPECT_EQ(sizeof(md), ret) { 3119 if (errno == EINVAL) 3120 SKIP(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)"); 3121 } 3122 3123 EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG); 3124 EXPECT_EQ(md.filter_off, 0); 3125 3126 md.filter_off = 1; 3127 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); 3128 EXPECT_EQ(sizeof(md), ret); 3129 EXPECT_EQ(md.flags, 0); 3130 EXPECT_EQ(md.filter_off, 1); 3131 3132 skip: 3133 ASSERT_EQ(0, kill(pid, SIGKILL)); 3134 } 3135 3136 static int user_notif_syscall(int nr, unsigned int flags) 3137 { 3138 struct sock_filter filter[] = { 3139 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, 3140 offsetof(struct seccomp_data, nr)), 3141 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1), 3142 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF), 3143 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), 3144 }; 3145 3146 struct sock_fprog prog = { 3147 .len = (unsigned short)ARRAY_SIZE(filter), 3148 .filter = filter, 3149 }; 3150 3151 return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog); 3152 } 3153 3154 #define USER_NOTIF_MAGIC INT_MAX 3155 TEST(user_notification_basic) 3156 { 3157 pid_t pid; 3158 long ret; 3159 int status, listener; 3160 struct seccomp_notif req = {}; 3161 struct seccomp_notif_resp resp = {}; 3162 struct pollfd pollfd; 3163 3164 struct sock_filter filter[] = { 3165 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3166 }; 3167 struct sock_fprog prog = { 3168 .len = (unsigned short)ARRAY_SIZE(filter), 3169 .filter = filter, 3170 }; 3171 3172 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3173 ASSERT_EQ(0, ret) { 3174 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3175 } 3176 3177 pid = fork(); 3178 ASSERT_GE(pid, 0); 3179 3180 /* Check that we get -ENOSYS with no listener attached */ 3181 if (pid == 0) { 3182 if (user_notif_syscall(__NR_getppid, 0) < 0) 3183 exit(1); 3184 ret = syscall(__NR_getppid); 3185 exit(ret >= 0 || errno != ENOSYS); 3186 } 3187 3188 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3189 EXPECT_EQ(true, WIFEXITED(status)); 3190 EXPECT_EQ(0, WEXITSTATUS(status)); 3191 3192 /* Add some no-op filters for grins. */ 3193 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3194 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3195 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3196 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3197 3198 /* Check that the basic notification machinery works */ 3199 listener = user_notif_syscall(__NR_getppid, 3200 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3201 ASSERT_GE(listener, 0); 3202 3203 /* Installing a second listener in the chain should EBUSY */ 3204 EXPECT_EQ(user_notif_syscall(__NR_getppid, 3205 SECCOMP_FILTER_FLAG_NEW_LISTENER), 3206 -1); 3207 EXPECT_EQ(errno, EBUSY); 3208 3209 pid = fork(); 3210 ASSERT_GE(pid, 0); 3211 3212 if (pid == 0) { 3213 ret = syscall(__NR_getppid); 3214 exit(ret != USER_NOTIF_MAGIC); 3215 } 3216 3217 pollfd.fd = listener; 3218 pollfd.events = POLLIN | POLLOUT; 3219 3220 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3221 EXPECT_EQ(pollfd.revents, POLLIN); 3222 3223 /* Test that we can't pass garbage to the kernel. */ 3224 memset(&req, 0, sizeof(req)); 3225 req.pid = -1; 3226 errno = 0; 3227 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req); 3228 EXPECT_EQ(-1, ret); 3229 EXPECT_EQ(EINVAL, errno); 3230 3231 if (ret) { 3232 req.pid = 0; 3233 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3234 } 3235 3236 pollfd.fd = listener; 3237 pollfd.events = POLLIN | POLLOUT; 3238 3239 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3240 EXPECT_EQ(pollfd.revents, POLLOUT); 3241 3242 EXPECT_EQ(req.data.nr, __NR_getppid); 3243 3244 resp.id = req.id; 3245 resp.error = 0; 3246 resp.val = USER_NOTIF_MAGIC; 3247 3248 /* check that we make sure flags == 0 */ 3249 resp.flags = 1; 3250 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3251 EXPECT_EQ(errno, EINVAL); 3252 3253 resp.flags = 0; 3254 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3255 3256 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3257 EXPECT_EQ(true, WIFEXITED(status)); 3258 EXPECT_EQ(0, WEXITSTATUS(status)); 3259 } 3260 3261 TEST(user_notification_with_tsync) 3262 { 3263 int ret; 3264 unsigned int flags; 3265 3266 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3267 ASSERT_EQ(0, ret) { 3268 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3269 } 3270 3271 /* these were exclusive */ 3272 flags = SECCOMP_FILTER_FLAG_NEW_LISTENER | 3273 SECCOMP_FILTER_FLAG_TSYNC; 3274 ASSERT_EQ(-1, user_notif_syscall(__NR_getppid, flags)); 3275 ASSERT_EQ(EINVAL, errno); 3276 3277 /* but now they're not */ 3278 flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH; 3279 ret = user_notif_syscall(__NR_getppid, flags); 3280 close(ret); 3281 ASSERT_LE(0, ret); 3282 } 3283 3284 TEST(user_notification_kill_in_middle) 3285 { 3286 pid_t pid; 3287 long ret; 3288 int listener; 3289 struct seccomp_notif req = {}; 3290 struct seccomp_notif_resp resp = {}; 3291 3292 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3293 ASSERT_EQ(0, ret) { 3294 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3295 } 3296 3297 listener = user_notif_syscall(__NR_getppid, 3298 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3299 ASSERT_GE(listener, 0); 3300 3301 /* 3302 * Check that nothing bad happens when we kill the task in the middle 3303 * of a syscall. 3304 */ 3305 pid = fork(); 3306 ASSERT_GE(pid, 0); 3307 3308 if (pid == 0) { 3309 ret = syscall(__NR_getppid); 3310 exit(ret != USER_NOTIF_MAGIC); 3311 } 3312 3313 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3314 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0); 3315 3316 EXPECT_EQ(kill(pid, SIGKILL), 0); 3317 EXPECT_EQ(waitpid(pid, NULL, 0), pid); 3318 3319 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1); 3320 3321 resp.id = req.id; 3322 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp); 3323 EXPECT_EQ(ret, -1); 3324 EXPECT_EQ(errno, ENOENT); 3325 } 3326 3327 static int handled = -1; 3328 3329 static void signal_handler(int signal) 3330 { 3331 if (write(handled, "c", 1) != 1) 3332 perror("write from signal"); 3333 } 3334 3335 TEST(user_notification_signal) 3336 { 3337 pid_t pid; 3338 long ret; 3339 int status, listener, sk_pair[2]; 3340 struct seccomp_notif req = {}; 3341 struct seccomp_notif_resp resp = {}; 3342 char c; 3343 3344 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3345 ASSERT_EQ(0, ret) { 3346 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3347 } 3348 3349 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); 3350 3351 listener = user_notif_syscall(__NR_gettid, 3352 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3353 ASSERT_GE(listener, 0); 3354 3355 pid = fork(); 3356 ASSERT_GE(pid, 0); 3357 3358 if (pid == 0) { 3359 close(sk_pair[0]); 3360 handled = sk_pair[1]; 3361 if (signal(SIGUSR1, signal_handler) == SIG_ERR) { 3362 perror("signal"); 3363 exit(1); 3364 } 3365 /* 3366 * ERESTARTSYS behavior is a bit hard to test, because we need 3367 * to rely on a signal that has not yet been handled. Let's at 3368 * least check that the error code gets propagated through, and 3369 * hope that it doesn't break when there is actually a signal :) 3370 */ 3371 ret = syscall(__NR_gettid); 3372 exit(!(ret == -1 && errno == 512)); 3373 } 3374 3375 close(sk_pair[1]); 3376 3377 memset(&req, 0, sizeof(req)); 3378 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3379 3380 EXPECT_EQ(kill(pid, SIGUSR1), 0); 3381 3382 /* 3383 * Make sure the signal really is delivered, which means we're not 3384 * stuck in the user notification code any more and the notification 3385 * should be dead. 3386 */ 3387 EXPECT_EQ(read(sk_pair[0], &c, 1), 1); 3388 3389 resp.id = req.id; 3390 resp.error = -EPERM; 3391 resp.val = 0; 3392 3393 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3394 EXPECT_EQ(errno, ENOENT); 3395 3396 memset(&req, 0, sizeof(req)); 3397 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3398 3399 resp.id = req.id; 3400 resp.error = -512; /* -ERESTARTSYS */ 3401 resp.val = 0; 3402 3403 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3404 3405 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3406 EXPECT_EQ(true, WIFEXITED(status)); 3407 EXPECT_EQ(0, WEXITSTATUS(status)); 3408 } 3409 3410 TEST(user_notification_closed_listener) 3411 { 3412 pid_t pid; 3413 long ret; 3414 int status, listener; 3415 3416 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3417 ASSERT_EQ(0, ret) { 3418 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3419 } 3420 3421 listener = user_notif_syscall(__NR_getppid, 3422 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3423 ASSERT_GE(listener, 0); 3424 3425 /* 3426 * Check that we get an ENOSYS when the listener is closed. 3427 */ 3428 pid = fork(); 3429 ASSERT_GE(pid, 0); 3430 if (pid == 0) { 3431 close(listener); 3432 ret = syscall(__NR_getppid); 3433 exit(ret != -1 && errno != ENOSYS); 3434 } 3435 3436 close(listener); 3437 3438 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3439 EXPECT_EQ(true, WIFEXITED(status)); 3440 EXPECT_EQ(0, WEXITSTATUS(status)); 3441 } 3442 3443 /* 3444 * Check that a pid in a child namespace still shows up as valid in ours. 3445 */ 3446 TEST(user_notification_child_pid_ns) 3447 { 3448 pid_t pid; 3449 int status, listener; 3450 struct seccomp_notif req = {}; 3451 struct seccomp_notif_resp resp = {}; 3452 3453 ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0) { 3454 if (errno == EINVAL) 3455 SKIP(return, "kernel missing CLONE_NEWUSER support"); 3456 }; 3457 3458 listener = user_notif_syscall(__NR_getppid, 3459 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3460 ASSERT_GE(listener, 0); 3461 3462 pid = fork(); 3463 ASSERT_GE(pid, 0); 3464 3465 if (pid == 0) 3466 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3467 3468 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3469 EXPECT_EQ(req.pid, pid); 3470 3471 resp.id = req.id; 3472 resp.error = 0; 3473 resp.val = USER_NOTIF_MAGIC; 3474 3475 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3476 3477 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3478 EXPECT_EQ(true, WIFEXITED(status)); 3479 EXPECT_EQ(0, WEXITSTATUS(status)); 3480 close(listener); 3481 } 3482 3483 /* 3484 * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e. 3485 * invalid. 3486 */ 3487 TEST(user_notification_sibling_pid_ns) 3488 { 3489 pid_t pid, pid2; 3490 int status, listener; 3491 struct seccomp_notif req = {}; 3492 struct seccomp_notif_resp resp = {}; 3493 3494 ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) { 3495 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3496 } 3497 3498 listener = user_notif_syscall(__NR_getppid, 3499 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3500 ASSERT_GE(listener, 0); 3501 3502 pid = fork(); 3503 ASSERT_GE(pid, 0); 3504 3505 if (pid == 0) { 3506 ASSERT_EQ(unshare(CLONE_NEWPID), 0); 3507 3508 pid2 = fork(); 3509 ASSERT_GE(pid2, 0); 3510 3511 if (pid2 == 0) 3512 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3513 3514 EXPECT_EQ(waitpid(pid2, &status, 0), pid2); 3515 EXPECT_EQ(true, WIFEXITED(status)); 3516 EXPECT_EQ(0, WEXITSTATUS(status)); 3517 exit(WEXITSTATUS(status)); 3518 } 3519 3520 /* Create the sibling ns, and sibling in it. */ 3521 ASSERT_EQ(unshare(CLONE_NEWPID), 0) { 3522 if (errno == EPERM) 3523 SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN"); 3524 } 3525 ASSERT_EQ(errno, 0); 3526 3527 pid2 = fork(); 3528 ASSERT_GE(pid2, 0); 3529 3530 if (pid2 == 0) { 3531 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3532 /* 3533 * The pid should be 0, i.e. the task is in some namespace that 3534 * we can't "see". 3535 */ 3536 EXPECT_EQ(req.pid, 0); 3537 3538 resp.id = req.id; 3539 resp.error = 0; 3540 resp.val = USER_NOTIF_MAGIC; 3541 3542 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3543 exit(0); 3544 } 3545 3546 close(listener); 3547 3548 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3549 EXPECT_EQ(true, WIFEXITED(status)); 3550 EXPECT_EQ(0, WEXITSTATUS(status)); 3551 3552 EXPECT_EQ(waitpid(pid2, &status, 0), pid2); 3553 EXPECT_EQ(true, WIFEXITED(status)); 3554 EXPECT_EQ(0, WEXITSTATUS(status)); 3555 } 3556 3557 TEST(user_notification_fault_recv) 3558 { 3559 pid_t pid; 3560 int status, listener; 3561 struct seccomp_notif req = {}; 3562 struct seccomp_notif_resp resp = {}; 3563 3564 ASSERT_EQ(unshare(CLONE_NEWUSER), 0); 3565 3566 listener = user_notif_syscall(__NR_getppid, 3567 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3568 ASSERT_GE(listener, 0); 3569 3570 pid = fork(); 3571 ASSERT_GE(pid, 0); 3572 3573 if (pid == 0) 3574 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3575 3576 /* Do a bad recv() */ 3577 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1); 3578 EXPECT_EQ(errno, EFAULT); 3579 3580 /* We should still be able to receive this notification, though. */ 3581 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3582 EXPECT_EQ(req.pid, pid); 3583 3584 resp.id = req.id; 3585 resp.error = 0; 3586 resp.val = USER_NOTIF_MAGIC; 3587 3588 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3589 3590 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3591 EXPECT_EQ(true, WIFEXITED(status)); 3592 EXPECT_EQ(0, WEXITSTATUS(status)); 3593 } 3594 3595 TEST(seccomp_get_notif_sizes) 3596 { 3597 struct seccomp_notif_sizes sizes; 3598 3599 ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0); 3600 EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif)); 3601 EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp)); 3602 } 3603 3604 TEST(user_notification_continue) 3605 { 3606 pid_t pid; 3607 long ret; 3608 int status, listener; 3609 struct seccomp_notif req = {}; 3610 struct seccomp_notif_resp resp = {}; 3611 struct pollfd pollfd; 3612 3613 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3614 ASSERT_EQ(0, ret) { 3615 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3616 } 3617 3618 listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); 3619 ASSERT_GE(listener, 0); 3620 3621 pid = fork(); 3622 ASSERT_GE(pid, 0); 3623 3624 if (pid == 0) { 3625 int dup_fd, pipe_fds[2]; 3626 pid_t self; 3627 3628 ASSERT_GE(pipe(pipe_fds), 0); 3629 3630 dup_fd = dup(pipe_fds[0]); 3631 ASSERT_GE(dup_fd, 0); 3632 EXPECT_NE(pipe_fds[0], dup_fd); 3633 3634 self = getpid(); 3635 ASSERT_EQ(filecmp(self, self, pipe_fds[0], dup_fd), 0); 3636 exit(0); 3637 } 3638 3639 pollfd.fd = listener; 3640 pollfd.events = POLLIN | POLLOUT; 3641 3642 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3643 EXPECT_EQ(pollfd.revents, POLLIN); 3644 3645 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3646 3647 pollfd.fd = listener; 3648 pollfd.events = POLLIN | POLLOUT; 3649 3650 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3651 EXPECT_EQ(pollfd.revents, POLLOUT); 3652 3653 EXPECT_EQ(req.data.nr, __NR_dup); 3654 3655 resp.id = req.id; 3656 resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE; 3657 3658 /* 3659 * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other 3660 * args be set to 0. 3661 */ 3662 resp.error = 0; 3663 resp.val = USER_NOTIF_MAGIC; 3664 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3665 EXPECT_EQ(errno, EINVAL); 3666 3667 resp.error = USER_NOTIF_MAGIC; 3668 resp.val = 0; 3669 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3670 EXPECT_EQ(errno, EINVAL); 3671 3672 resp.error = 0; 3673 resp.val = 0; 3674 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) { 3675 if (errno == EINVAL) 3676 SKIP(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE"); 3677 } 3678 3679 skip: 3680 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3681 EXPECT_EQ(true, WIFEXITED(status)); 3682 EXPECT_EQ(0, WEXITSTATUS(status)) { 3683 if (WEXITSTATUS(status) == 2) { 3684 SKIP(return, "Kernel does not support kcmp() syscall"); 3685 return; 3686 } 3687 } 3688 } 3689 3690 TEST(user_notification_filter_empty) 3691 { 3692 pid_t pid; 3693 long ret; 3694 int status; 3695 struct pollfd pollfd; 3696 struct clone_args args = { 3697 .flags = CLONE_FILES, 3698 .exit_signal = SIGCHLD, 3699 }; 3700 3701 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3702 ASSERT_EQ(0, ret) { 3703 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3704 } 3705 3706 pid = sys_clone3(&args, sizeof(args)); 3707 ASSERT_GE(pid, 0); 3708 3709 if (pid == 0) { 3710 int listener; 3711 3712 listener = user_notif_syscall(__NR_mknod, SECCOMP_FILTER_FLAG_NEW_LISTENER); 3713 if (listener < 0) 3714 _exit(EXIT_FAILURE); 3715 3716 if (dup2(listener, 200) != 200) 3717 _exit(EXIT_FAILURE); 3718 3719 close(listener); 3720 3721 _exit(EXIT_SUCCESS); 3722 } 3723 3724 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3725 EXPECT_EQ(true, WIFEXITED(status)); 3726 EXPECT_EQ(0, WEXITSTATUS(status)); 3727 3728 /* 3729 * The seccomp filter has become unused so we should be notified once 3730 * the kernel gets around to cleaning up task struct. 3731 */ 3732 pollfd.fd = 200; 3733 pollfd.events = POLLHUP; 3734 3735 EXPECT_GT(poll(&pollfd, 1, 2000), 0); 3736 EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); 3737 } 3738 3739 static void *do_thread(void *data) 3740 { 3741 return NULL; 3742 } 3743 3744 TEST(user_notification_filter_empty_threaded) 3745 { 3746 pid_t pid; 3747 long ret; 3748 int status; 3749 struct pollfd pollfd; 3750 struct clone_args args = { 3751 .flags = CLONE_FILES, 3752 .exit_signal = SIGCHLD, 3753 }; 3754 3755 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3756 ASSERT_EQ(0, ret) { 3757 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3758 } 3759 3760 pid = sys_clone3(&args, sizeof(args)); 3761 ASSERT_GE(pid, 0); 3762 3763 if (pid == 0) { 3764 pid_t pid1, pid2; 3765 int listener, status; 3766 pthread_t thread; 3767 3768 listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); 3769 if (listener < 0) 3770 _exit(EXIT_FAILURE); 3771 3772 if (dup2(listener, 200) != 200) 3773 _exit(EXIT_FAILURE); 3774 3775 close(listener); 3776 3777 pid1 = fork(); 3778 if (pid1 < 0) 3779 _exit(EXIT_FAILURE); 3780 3781 if (pid1 == 0) 3782 _exit(EXIT_SUCCESS); 3783 3784 pid2 = fork(); 3785 if (pid2 < 0) 3786 _exit(EXIT_FAILURE); 3787 3788 if (pid2 == 0) 3789 _exit(EXIT_SUCCESS); 3790 3791 if (pthread_create(&thread, NULL, do_thread, NULL) || 3792 pthread_join(thread, NULL)) 3793 _exit(EXIT_FAILURE); 3794 3795 if (pthread_create(&thread, NULL, do_thread, NULL) || 3796 pthread_join(thread, NULL)) 3797 _exit(EXIT_FAILURE); 3798 3799 if (waitpid(pid1, &status, 0) != pid1 || !WIFEXITED(status) || 3800 WEXITSTATUS(status)) 3801 _exit(EXIT_FAILURE); 3802 3803 if (waitpid(pid2, &status, 0) != pid2 || !WIFEXITED(status) || 3804 WEXITSTATUS(status)) 3805 _exit(EXIT_FAILURE); 3806 3807 exit(EXIT_SUCCESS); 3808 } 3809 3810 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3811 EXPECT_EQ(true, WIFEXITED(status)); 3812 EXPECT_EQ(0, WEXITSTATUS(status)); 3813 3814 /* 3815 * The seccomp filter has become unused so we should be notified once 3816 * the kernel gets around to cleaning up task struct. 3817 */ 3818 pollfd.fd = 200; 3819 pollfd.events = POLLHUP; 3820 3821 EXPECT_GT(poll(&pollfd, 1, 2000), 0); 3822 EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); 3823 } 3824 3825 TEST(user_notification_addfd) 3826 { 3827 pid_t pid; 3828 long ret; 3829 int status, listener, memfd, fd; 3830 struct seccomp_notif_addfd addfd = {}; 3831 struct seccomp_notif_addfd_small small = {}; 3832 struct seccomp_notif_addfd_big big = {}; 3833 struct seccomp_notif req = {}; 3834 struct seccomp_notif_resp resp = {}; 3835 /* 100 ms */ 3836 struct timespec delay = { .tv_nsec = 100000000 }; 3837 3838 memfd = memfd_create("test", 0); 3839 ASSERT_GE(memfd, 0); 3840 3841 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3842 ASSERT_EQ(0, ret) { 3843 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3844 } 3845 3846 /* Check that the basic notification machinery works */ 3847 listener = user_notif_syscall(__NR_getppid, 3848 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3849 ASSERT_GE(listener, 0); 3850 3851 pid = fork(); 3852 ASSERT_GE(pid, 0); 3853 3854 if (pid == 0) { 3855 if (syscall(__NR_getppid) != USER_NOTIF_MAGIC) 3856 exit(1); 3857 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3858 } 3859 3860 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3861 3862 addfd.srcfd = memfd; 3863 addfd.newfd = 0; 3864 addfd.id = req.id; 3865 addfd.flags = 0x0; 3866 3867 /* Verify bad newfd_flags cannot be set */ 3868 addfd.newfd_flags = ~O_CLOEXEC; 3869 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 3870 EXPECT_EQ(errno, EINVAL); 3871 addfd.newfd_flags = O_CLOEXEC; 3872 3873 /* Verify bad flags cannot be set */ 3874 addfd.flags = 0xff; 3875 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 3876 EXPECT_EQ(errno, EINVAL); 3877 addfd.flags = 0; 3878 3879 /* Verify that remote_fd cannot be set without setting flags */ 3880 addfd.newfd = 1; 3881 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 3882 EXPECT_EQ(errno, EINVAL); 3883 addfd.newfd = 0; 3884 3885 /* Verify small size cannot be set */ 3886 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1); 3887 EXPECT_EQ(errno, EINVAL); 3888 3889 /* Verify we can't send bits filled in unknown buffer area */ 3890 memset(&big, 0xAA, sizeof(big)); 3891 big.addfd = addfd; 3892 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1); 3893 EXPECT_EQ(errno, E2BIG); 3894 3895 3896 /* Verify we can set an arbitrary remote fd */ 3897 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); 3898 /* 3899 * The child has fds 0(stdin), 1(stdout), 2(stderr), 3(memfd), 3900 * 4(listener), so the newly allocated fd should be 5. 3901 */ 3902 EXPECT_EQ(fd, 5); 3903 EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); 3904 3905 /* Verify we can set an arbitrary remote fd with large size */ 3906 memset(&big, 0x0, sizeof(big)); 3907 big.addfd = addfd; 3908 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big); 3909 EXPECT_EQ(fd, 6); 3910 3911 /* Verify we can set a specific remote fd */ 3912 addfd.newfd = 42; 3913 addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; 3914 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); 3915 EXPECT_EQ(fd, 42); 3916 EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); 3917 3918 /* Resume syscall */ 3919 resp.id = req.id; 3920 resp.error = 0; 3921 resp.val = USER_NOTIF_MAGIC; 3922 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3923 3924 /* 3925 * This sets the ID of the ADD FD to the last request plus 1. The 3926 * notification ID increments 1 per notification. 3927 */ 3928 addfd.id = req.id + 1; 3929 3930 /* This spins until the underlying notification is generated */ 3931 while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 && 3932 errno != -EINPROGRESS) 3933 nanosleep(&delay, NULL); 3934 3935 memset(&req, 0, sizeof(req)); 3936 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3937 ASSERT_EQ(addfd.id, req.id); 3938 3939 resp.id = req.id; 3940 resp.error = 0; 3941 resp.val = USER_NOTIF_MAGIC; 3942 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3943 3944 /* Wait for child to finish. */ 3945 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3946 EXPECT_EQ(true, WIFEXITED(status)); 3947 EXPECT_EQ(0, WEXITSTATUS(status)); 3948 3949 close(memfd); 3950 } 3951 3952 TEST(user_notification_addfd_rlimit) 3953 { 3954 pid_t pid; 3955 long ret; 3956 int status, listener, memfd; 3957 struct seccomp_notif_addfd addfd = {}; 3958 struct seccomp_notif req = {}; 3959 struct seccomp_notif_resp resp = {}; 3960 const struct rlimit lim = { 3961 .rlim_cur = 0, 3962 .rlim_max = 0, 3963 }; 3964 3965 memfd = memfd_create("test", 0); 3966 ASSERT_GE(memfd, 0); 3967 3968 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3969 ASSERT_EQ(0, ret) { 3970 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3971 } 3972 3973 /* Check that the basic notification machinery works */ 3974 listener = user_notif_syscall(__NR_getppid, 3975 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3976 ASSERT_GE(listener, 0); 3977 3978 pid = fork(); 3979 ASSERT_GE(pid, 0); 3980 3981 if (pid == 0) 3982 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3983 3984 3985 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3986 3987 ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0); 3988 3989 addfd.srcfd = memfd; 3990 addfd.newfd_flags = O_CLOEXEC; 3991 addfd.newfd = 0; 3992 addfd.id = req.id; 3993 addfd.flags = 0; 3994 3995 /* Should probably spot check /proc/sys/fs/file-nr */ 3996 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 3997 EXPECT_EQ(errno, EMFILE); 3998 3999 addfd.newfd = 100; 4000 addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; 4001 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4002 EXPECT_EQ(errno, EBADF); 4003 4004 resp.id = req.id; 4005 resp.error = 0; 4006 resp.val = USER_NOTIF_MAGIC; 4007 4008 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4009 4010 /* Wait for child to finish. */ 4011 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4012 EXPECT_EQ(true, WIFEXITED(status)); 4013 EXPECT_EQ(0, WEXITSTATUS(status)); 4014 4015 close(memfd); 4016 } 4017 4018 /* 4019 * TODO: 4020 * - expand NNP testing 4021 * - better arch-specific TRACE and TRAP handlers. 4022 * - endianness checking when appropriate 4023 * - 64-bit arg prodding 4024 * - arch value testing (x86 modes especially) 4025 * - verify that FILTER_FLAG_LOG filters generate log messages 4026 * - verify that RET_LOG generates log messages 4027 */ 4028 4029 TEST_HARNESS_MAIN 4030