1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 4 * 5 * Test code for seccomp bpf. 6 */ 7 8 #define _GNU_SOURCE 9 #include <sys/types.h> 10 11 /* 12 * glibc 2.26 and later have SIGSYS in siginfo_t. Before that, 13 * we need to use the kernel's siginfo.h file and trick glibc 14 * into accepting it. 15 */ 16 #if !__GLIBC_PREREQ(2, 26) 17 # include <asm/siginfo.h> 18 # define __have_siginfo_t 1 19 # define __have_sigval_t 1 20 # define __have_sigevent_t 1 21 #endif 22 23 #include <errno.h> 24 #include <linux/filter.h> 25 #include <sys/prctl.h> 26 #include <sys/ptrace.h> 27 #include <sys/user.h> 28 #include <linux/prctl.h> 29 #include <linux/ptrace.h> 30 #include <linux/seccomp.h> 31 #include <pthread.h> 32 #include <semaphore.h> 33 #include <signal.h> 34 #include <stddef.h> 35 #include <stdbool.h> 36 #include <string.h> 37 #include <time.h> 38 #include <limits.h> 39 #include <linux/elf.h> 40 #include <sys/uio.h> 41 #include <sys/utsname.h> 42 #include <sys/fcntl.h> 43 #include <sys/mman.h> 44 #include <sys/times.h> 45 #include <sys/socket.h> 46 #include <sys/ioctl.h> 47 #include <linux/kcmp.h> 48 #include <sys/resource.h> 49 50 #include <unistd.h> 51 #include <sys/syscall.h> 52 #include <poll.h> 53 54 #include "../kselftest_harness.h" 55 #include "../clone3/clone3_selftests.h" 56 57 /* Attempt to de-conflict with the selftests tree. */ 58 #ifndef SKIP 59 #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) 60 #endif 61 62 #ifndef PR_SET_PTRACER 63 # define PR_SET_PTRACER 0x59616d61 64 #endif 65 66 #ifndef PR_SET_NO_NEW_PRIVS 67 #define PR_SET_NO_NEW_PRIVS 38 68 #define PR_GET_NO_NEW_PRIVS 39 69 #endif 70 71 #ifndef PR_SECCOMP_EXT 72 #define PR_SECCOMP_EXT 43 73 #endif 74 75 #ifndef SECCOMP_EXT_ACT 76 #define SECCOMP_EXT_ACT 1 77 #endif 78 79 #ifndef SECCOMP_EXT_ACT_TSYNC 80 #define SECCOMP_EXT_ACT_TSYNC 1 81 #endif 82 83 #ifndef SECCOMP_MODE_STRICT 84 #define SECCOMP_MODE_STRICT 1 85 #endif 86 87 #ifndef SECCOMP_MODE_FILTER 88 #define SECCOMP_MODE_FILTER 2 89 #endif 90 91 #ifndef SECCOMP_RET_ALLOW 92 struct seccomp_data { 93 int nr; 94 __u32 arch; 95 __u64 instruction_pointer; 96 __u64 args[6]; 97 }; 98 #endif 99 100 #ifndef SECCOMP_RET_KILL_PROCESS 101 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ 102 #define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ 103 #endif 104 #ifndef SECCOMP_RET_KILL 105 #define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD 106 #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ 107 #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ 108 #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ 109 #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ 110 #endif 111 #ifndef SECCOMP_RET_LOG 112 #define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ 113 #endif 114 115 #ifndef __NR_seccomp 116 # if defined(__i386__) 117 # define __NR_seccomp 354 118 # elif defined(__x86_64__) 119 # define __NR_seccomp 317 120 # elif defined(__arm__) 121 # define __NR_seccomp 383 122 # elif defined(__aarch64__) 123 # define __NR_seccomp 277 124 # elif defined(__riscv) 125 # define __NR_seccomp 277 126 # elif defined(__hppa__) 127 # define __NR_seccomp 338 128 # elif defined(__powerpc__) 129 # define __NR_seccomp 358 130 # elif defined(__s390__) 131 # define __NR_seccomp 348 132 # else 133 # warning "seccomp syscall number unknown for this architecture" 134 # define __NR_seccomp 0xffff 135 # endif 136 #endif 137 138 #ifndef SECCOMP_SET_MODE_STRICT 139 #define SECCOMP_SET_MODE_STRICT 0 140 #endif 141 142 #ifndef SECCOMP_SET_MODE_FILTER 143 #define SECCOMP_SET_MODE_FILTER 1 144 #endif 145 146 #ifndef SECCOMP_GET_ACTION_AVAIL 147 #define SECCOMP_GET_ACTION_AVAIL 2 148 #endif 149 150 #ifndef SECCOMP_GET_NOTIF_SIZES 151 #define SECCOMP_GET_NOTIF_SIZES 3 152 #endif 153 154 #ifndef SECCOMP_FILTER_FLAG_TSYNC 155 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) 156 #endif 157 158 #ifndef SECCOMP_FILTER_FLAG_LOG 159 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1) 160 #endif 161 162 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW 163 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) 164 #endif 165 166 #ifndef PTRACE_SECCOMP_GET_METADATA 167 #define PTRACE_SECCOMP_GET_METADATA 0x420d 168 169 struct seccomp_metadata { 170 __u64 filter_off; /* Input: which filter */ 171 __u64 flags; /* Output: filter's flags */ 172 }; 173 #endif 174 175 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER 176 #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) 177 #endif 178 179 #ifndef SECCOMP_RET_USER_NOTIF 180 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U 181 182 #define SECCOMP_IOC_MAGIC '!' 183 #define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) 184 #define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) 185 #define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) 186 #define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) 187 188 /* Flags for seccomp notification fd ioctl. */ 189 #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) 190 #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ 191 struct seccomp_notif_resp) 192 #define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64) 193 194 struct seccomp_notif { 195 __u64 id; 196 __u32 pid; 197 __u32 flags; 198 struct seccomp_data data; 199 }; 200 201 struct seccomp_notif_resp { 202 __u64 id; 203 __s64 val; 204 __s32 error; 205 __u32 flags; 206 }; 207 208 struct seccomp_notif_sizes { 209 __u16 seccomp_notif; 210 __u16 seccomp_notif_resp; 211 __u16 seccomp_data; 212 }; 213 #endif 214 215 #ifndef SECCOMP_IOCTL_NOTIF_ADDFD 216 /* On success, the return value is the remote process's added fd number */ 217 #define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \ 218 struct seccomp_notif_addfd) 219 220 /* valid flags for seccomp_notif_addfd */ 221 #define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ 222 223 struct seccomp_notif_addfd { 224 __u64 id; 225 __u32 flags; 226 __u32 srcfd; 227 __u32 newfd; 228 __u32 newfd_flags; 229 }; 230 #endif 231 232 struct seccomp_notif_addfd_small { 233 __u64 id; 234 char weird[4]; 235 }; 236 #define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL \ 237 SECCOMP_IOW(3, struct seccomp_notif_addfd_small) 238 239 struct seccomp_notif_addfd_big { 240 union { 241 struct seccomp_notif_addfd addfd; 242 char buf[sizeof(struct seccomp_notif_addfd) + 8]; 243 }; 244 }; 245 #define SECCOMP_IOCTL_NOTIF_ADDFD_BIG \ 246 SECCOMP_IOWR(3, struct seccomp_notif_addfd_big) 247 248 #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY 249 #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 250 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 251 #endif 252 253 #ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE 254 #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001 255 #endif 256 257 #ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH 258 #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4) 259 #endif 260 261 #ifndef seccomp 262 int seccomp(unsigned int op, unsigned int flags, void *args) 263 { 264 errno = 0; 265 return syscall(__NR_seccomp, op, flags, args); 266 } 267 #endif 268 269 #if __BYTE_ORDER == __LITTLE_ENDIAN 270 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) 271 #elif __BYTE_ORDER == __BIG_ENDIAN 272 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32)) 273 #else 274 #error "wut? Unknown __BYTE_ORDER?!" 275 #endif 276 277 #define SIBLING_EXIT_UNKILLED 0xbadbeef 278 #define SIBLING_EXIT_FAILURE 0xbadface 279 #define SIBLING_EXIT_NEWPRIVS 0xbadfeed 280 281 static int __filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2) 282 { 283 #ifdef __NR_kcmp 284 errno = 0; 285 return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2); 286 #else 287 errno = ENOSYS; 288 return -1; 289 #endif 290 } 291 292 /* Have TH_LOG report actual location filecmp() is used. */ 293 #define filecmp(pid1, pid2, fd1, fd2) ({ \ 294 int _ret; \ 295 \ 296 _ret = __filecmp(pid1, pid2, fd1, fd2); \ 297 if (_ret != 0) { \ 298 if (_ret < 0 && errno == ENOSYS) { \ 299 TH_LOG("kcmp() syscall missing (test is less accurate)");\ 300 _ret = 0; \ 301 } \ 302 } \ 303 _ret; }) 304 305 TEST(kcmp) 306 { 307 int ret; 308 309 ret = __filecmp(getpid(), getpid(), 1, 1); 310 EXPECT_EQ(ret, 0); 311 if (ret != 0 && errno == ENOSYS) 312 SKIP(return, "Kernel does not support kcmp() (missing CONFIG_CHECKPOINT_RESTORE?)"); 313 } 314 315 TEST(mode_strict_support) 316 { 317 long ret; 318 319 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); 320 ASSERT_EQ(0, ret) { 321 TH_LOG("Kernel does not support CONFIG_SECCOMP"); 322 } 323 syscall(__NR_exit, 0); 324 } 325 326 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL) 327 { 328 long ret; 329 330 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); 331 ASSERT_EQ(0, ret) { 332 TH_LOG("Kernel does not support CONFIG_SECCOMP"); 333 } 334 syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 335 NULL, NULL, NULL); 336 EXPECT_FALSE(true) { 337 TH_LOG("Unreachable!"); 338 } 339 } 340 341 /* Note! This doesn't test no new privs behavior */ 342 TEST(no_new_privs_support) 343 { 344 long ret; 345 346 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 347 EXPECT_EQ(0, ret) { 348 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 349 } 350 } 351 352 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */ 353 TEST(mode_filter_support) 354 { 355 long ret; 356 357 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 358 ASSERT_EQ(0, ret) { 359 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 360 } 361 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL); 362 EXPECT_EQ(-1, ret); 363 EXPECT_EQ(EFAULT, errno) { 364 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!"); 365 } 366 } 367 368 TEST(mode_filter_without_nnp) 369 { 370 struct sock_filter filter[] = { 371 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 372 }; 373 struct sock_fprog prog = { 374 .len = (unsigned short)ARRAY_SIZE(filter), 375 .filter = filter, 376 }; 377 long ret; 378 379 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0); 380 ASSERT_LE(0, ret) { 381 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS"); 382 } 383 errno = 0; 384 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 385 /* Succeeds with CAP_SYS_ADMIN, fails without */ 386 /* TODO(wad) check caps not euid */ 387 if (geteuid()) { 388 EXPECT_EQ(-1, ret); 389 EXPECT_EQ(EACCES, errno); 390 } else { 391 EXPECT_EQ(0, ret); 392 } 393 } 394 395 #define MAX_INSNS_PER_PATH 32768 396 397 TEST(filter_size_limits) 398 { 399 int i; 400 int count = BPF_MAXINSNS + 1; 401 struct sock_filter allow[] = { 402 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 403 }; 404 struct sock_filter *filter; 405 struct sock_fprog prog = { }; 406 long ret; 407 408 filter = calloc(count, sizeof(*filter)); 409 ASSERT_NE(NULL, filter); 410 411 for (i = 0; i < count; i++) 412 filter[i] = allow[0]; 413 414 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 415 ASSERT_EQ(0, ret); 416 417 prog.filter = filter; 418 prog.len = count; 419 420 /* Too many filter instructions in a single filter. */ 421 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 422 ASSERT_NE(0, ret) { 423 TH_LOG("Installing %d insn filter was allowed", prog.len); 424 } 425 426 /* One less is okay, though. */ 427 prog.len -= 1; 428 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 429 ASSERT_EQ(0, ret) { 430 TH_LOG("Installing %d insn filter wasn't allowed", prog.len); 431 } 432 } 433 434 TEST(filter_chain_limits) 435 { 436 int i; 437 int count = BPF_MAXINSNS; 438 struct sock_filter allow[] = { 439 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 440 }; 441 struct sock_filter *filter; 442 struct sock_fprog prog = { }; 443 long ret; 444 445 filter = calloc(count, sizeof(*filter)); 446 ASSERT_NE(NULL, filter); 447 448 for (i = 0; i < count; i++) 449 filter[i] = allow[0]; 450 451 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 452 ASSERT_EQ(0, ret); 453 454 prog.filter = filter; 455 prog.len = 1; 456 457 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 458 ASSERT_EQ(0, ret); 459 460 prog.len = count; 461 462 /* Too many total filter instructions. */ 463 for (i = 0; i < MAX_INSNS_PER_PATH; i++) { 464 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 465 if (ret != 0) 466 break; 467 } 468 ASSERT_NE(0, ret) { 469 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)", 470 i, count, i * (count + 4)); 471 } 472 } 473 474 TEST(mode_filter_cannot_move_to_strict) 475 { 476 struct sock_filter filter[] = { 477 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 478 }; 479 struct sock_fprog prog = { 480 .len = (unsigned short)ARRAY_SIZE(filter), 481 .filter = filter, 482 }; 483 long ret; 484 485 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 486 ASSERT_EQ(0, ret); 487 488 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 489 ASSERT_EQ(0, ret); 490 491 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0); 492 EXPECT_EQ(-1, ret); 493 EXPECT_EQ(EINVAL, errno); 494 } 495 496 497 TEST(mode_filter_get_seccomp) 498 { 499 struct sock_filter filter[] = { 500 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 501 }; 502 struct sock_fprog prog = { 503 .len = (unsigned short)ARRAY_SIZE(filter), 504 .filter = filter, 505 }; 506 long ret; 507 508 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 509 ASSERT_EQ(0, ret); 510 511 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 512 EXPECT_EQ(0, ret); 513 514 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 515 ASSERT_EQ(0, ret); 516 517 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 518 EXPECT_EQ(2, ret); 519 } 520 521 522 TEST(ALLOW_all) 523 { 524 struct sock_filter filter[] = { 525 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 526 }; 527 struct sock_fprog prog = { 528 .len = (unsigned short)ARRAY_SIZE(filter), 529 .filter = filter, 530 }; 531 long ret; 532 533 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 534 ASSERT_EQ(0, ret); 535 536 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 537 ASSERT_EQ(0, ret); 538 } 539 540 TEST(empty_prog) 541 { 542 struct sock_filter filter[] = { 543 }; 544 struct sock_fprog prog = { 545 .len = (unsigned short)ARRAY_SIZE(filter), 546 .filter = filter, 547 }; 548 long ret; 549 550 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 551 ASSERT_EQ(0, ret); 552 553 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 554 EXPECT_EQ(-1, ret); 555 EXPECT_EQ(EINVAL, errno); 556 } 557 558 TEST(log_all) 559 { 560 struct sock_filter filter[] = { 561 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), 562 }; 563 struct sock_fprog prog = { 564 .len = (unsigned short)ARRAY_SIZE(filter), 565 .filter = filter, 566 }; 567 long ret; 568 pid_t parent = getppid(); 569 570 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 571 ASSERT_EQ(0, ret); 572 573 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 574 ASSERT_EQ(0, ret); 575 576 /* getppid() should succeed and be logged (no check for logging) */ 577 EXPECT_EQ(parent, syscall(__NR_getppid)); 578 } 579 580 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS) 581 { 582 struct sock_filter filter[] = { 583 BPF_STMT(BPF_RET|BPF_K, 0x10000000U), 584 }; 585 struct sock_fprog prog = { 586 .len = (unsigned short)ARRAY_SIZE(filter), 587 .filter = filter, 588 }; 589 long ret; 590 591 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 592 ASSERT_EQ(0, ret); 593 594 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 595 ASSERT_EQ(0, ret); 596 EXPECT_EQ(0, syscall(__NR_getpid)) { 597 TH_LOG("getpid() shouldn't ever return"); 598 } 599 } 600 601 /* return code >= 0x80000000 is unused. */ 602 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS) 603 { 604 struct sock_filter filter[] = { 605 BPF_STMT(BPF_RET|BPF_K, 0x90000000U), 606 }; 607 struct sock_fprog prog = { 608 .len = (unsigned short)ARRAY_SIZE(filter), 609 .filter = filter, 610 }; 611 long ret; 612 613 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 614 ASSERT_EQ(0, ret); 615 616 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 617 ASSERT_EQ(0, ret); 618 EXPECT_EQ(0, syscall(__NR_getpid)) { 619 TH_LOG("getpid() shouldn't ever return"); 620 } 621 } 622 623 TEST_SIGNAL(KILL_all, SIGSYS) 624 { 625 struct sock_filter filter[] = { 626 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 627 }; 628 struct sock_fprog prog = { 629 .len = (unsigned short)ARRAY_SIZE(filter), 630 .filter = filter, 631 }; 632 long ret; 633 634 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 635 ASSERT_EQ(0, ret); 636 637 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 638 ASSERT_EQ(0, ret); 639 } 640 641 TEST_SIGNAL(KILL_one, SIGSYS) 642 { 643 struct sock_filter filter[] = { 644 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 645 offsetof(struct seccomp_data, nr)), 646 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 647 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 648 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 649 }; 650 struct sock_fprog prog = { 651 .len = (unsigned short)ARRAY_SIZE(filter), 652 .filter = filter, 653 }; 654 long ret; 655 pid_t parent = getppid(); 656 657 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 658 ASSERT_EQ(0, ret); 659 660 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 661 ASSERT_EQ(0, ret); 662 663 EXPECT_EQ(parent, syscall(__NR_getppid)); 664 /* getpid() should never return. */ 665 EXPECT_EQ(0, syscall(__NR_getpid)); 666 } 667 668 TEST_SIGNAL(KILL_one_arg_one, SIGSYS) 669 { 670 void *fatal_address; 671 struct sock_filter filter[] = { 672 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 673 offsetof(struct seccomp_data, nr)), 674 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0), 675 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 676 /* Only both with lower 32-bit for now. */ 677 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)), 678 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 679 (unsigned long)&fatal_address, 0, 1), 680 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 681 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 682 }; 683 struct sock_fprog prog = { 684 .len = (unsigned short)ARRAY_SIZE(filter), 685 .filter = filter, 686 }; 687 long ret; 688 pid_t parent = getppid(); 689 struct tms timebuf; 690 clock_t clock = times(&timebuf); 691 692 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 693 ASSERT_EQ(0, ret); 694 695 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 696 ASSERT_EQ(0, ret); 697 698 EXPECT_EQ(parent, syscall(__NR_getppid)); 699 EXPECT_LE(clock, syscall(__NR_times, &timebuf)); 700 /* times() should never return. */ 701 EXPECT_EQ(0, syscall(__NR_times, &fatal_address)); 702 } 703 704 TEST_SIGNAL(KILL_one_arg_six, SIGSYS) 705 { 706 #ifndef __NR_mmap2 707 int sysno = __NR_mmap; 708 #else 709 int sysno = __NR_mmap2; 710 #endif 711 struct sock_filter filter[] = { 712 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 713 offsetof(struct seccomp_data, nr)), 714 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0), 715 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 716 /* Only both with lower 32-bit for now. */ 717 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)), 718 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1), 719 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 720 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 721 }; 722 struct sock_fprog prog = { 723 .len = (unsigned short)ARRAY_SIZE(filter), 724 .filter = filter, 725 }; 726 long ret; 727 pid_t parent = getppid(); 728 int fd; 729 void *map1, *map2; 730 int page_size = sysconf(_SC_PAGESIZE); 731 732 ASSERT_LT(0, page_size); 733 734 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 735 ASSERT_EQ(0, ret); 736 737 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 738 ASSERT_EQ(0, ret); 739 740 fd = open("/dev/zero", O_RDONLY); 741 ASSERT_NE(-1, fd); 742 743 EXPECT_EQ(parent, syscall(__NR_getppid)); 744 map1 = (void *)syscall(sysno, 745 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size); 746 EXPECT_NE(MAP_FAILED, map1); 747 /* mmap2() should never return. */ 748 map2 = (void *)syscall(sysno, 749 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE); 750 EXPECT_EQ(MAP_FAILED, map2); 751 752 /* The test failed, so clean up the resources. */ 753 munmap(map1, page_size); 754 munmap(map2, page_size); 755 close(fd); 756 } 757 758 /* This is a thread task to die via seccomp filter violation. */ 759 void *kill_thread(void *data) 760 { 761 bool die = (bool)data; 762 763 if (die) { 764 prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 765 return (void *)SIBLING_EXIT_FAILURE; 766 } 767 768 return (void *)SIBLING_EXIT_UNKILLED; 769 } 770 771 /* Prepare a thread that will kill itself or both of us. */ 772 void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process) 773 { 774 pthread_t thread; 775 void *status; 776 /* Kill only when calling __NR_prctl. */ 777 struct sock_filter filter_thread[] = { 778 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 779 offsetof(struct seccomp_data, nr)), 780 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), 781 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), 782 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 783 }; 784 struct sock_fprog prog_thread = { 785 .len = (unsigned short)ARRAY_SIZE(filter_thread), 786 .filter = filter_thread, 787 }; 788 struct sock_filter filter_process[] = { 789 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 790 offsetof(struct seccomp_data, nr)), 791 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), 792 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS), 793 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 794 }; 795 struct sock_fprog prog_process = { 796 .len = (unsigned short)ARRAY_SIZE(filter_process), 797 .filter = filter_process, 798 }; 799 800 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 801 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 802 } 803 804 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, 805 kill_process ? &prog_process : &prog_thread)); 806 807 /* 808 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS 809 * flag cannot be downgraded by a new filter. 810 */ 811 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); 812 813 /* Start a thread that will exit immediately. */ 814 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false)); 815 ASSERT_EQ(0, pthread_join(thread, &status)); 816 ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status); 817 818 /* Start a thread that will die immediately. */ 819 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true)); 820 ASSERT_EQ(0, pthread_join(thread, &status)); 821 ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status); 822 823 /* 824 * If we get here, only the spawned thread died. Let the parent know 825 * the whole process didn't die (i.e. this thread, the spawner, 826 * stayed running). 827 */ 828 exit(42); 829 } 830 831 TEST(KILL_thread) 832 { 833 int status; 834 pid_t child_pid; 835 836 child_pid = fork(); 837 ASSERT_LE(0, child_pid); 838 if (child_pid == 0) { 839 kill_thread_or_group(_metadata, false); 840 _exit(38); 841 } 842 843 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 844 845 /* If only the thread was killed, we'll see exit 42. */ 846 ASSERT_TRUE(WIFEXITED(status)); 847 ASSERT_EQ(42, WEXITSTATUS(status)); 848 } 849 850 TEST(KILL_process) 851 { 852 int status; 853 pid_t child_pid; 854 855 child_pid = fork(); 856 ASSERT_LE(0, child_pid); 857 if (child_pid == 0) { 858 kill_thread_or_group(_metadata, true); 859 _exit(38); 860 } 861 862 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 863 864 /* If the entire process was killed, we'll see SIGSYS. */ 865 ASSERT_TRUE(WIFSIGNALED(status)); 866 ASSERT_EQ(SIGSYS, WTERMSIG(status)); 867 } 868 869 /* TODO(wad) add 64-bit versus 32-bit arg tests. */ 870 TEST(arg_out_of_range) 871 { 872 struct sock_filter filter[] = { 873 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)), 874 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 875 }; 876 struct sock_fprog prog = { 877 .len = (unsigned short)ARRAY_SIZE(filter), 878 .filter = filter, 879 }; 880 long ret; 881 882 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 883 ASSERT_EQ(0, ret); 884 885 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 886 EXPECT_EQ(-1, ret); 887 EXPECT_EQ(EINVAL, errno); 888 } 889 890 #define ERRNO_FILTER(name, errno) \ 891 struct sock_filter _read_filter_##name[] = { \ 892 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, \ 893 offsetof(struct seccomp_data, nr)), \ 894 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), \ 895 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno), \ 896 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), \ 897 }; \ 898 struct sock_fprog prog_##name = { \ 899 .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \ 900 .filter = _read_filter_##name, \ 901 } 902 903 /* Make sure basic errno values are correctly passed through a filter. */ 904 TEST(ERRNO_valid) 905 { 906 ERRNO_FILTER(valid, E2BIG); 907 long ret; 908 pid_t parent = getppid(); 909 910 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 911 ASSERT_EQ(0, ret); 912 913 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid); 914 ASSERT_EQ(0, ret); 915 916 EXPECT_EQ(parent, syscall(__NR_getppid)); 917 EXPECT_EQ(-1, read(0, NULL, 0)); 918 EXPECT_EQ(E2BIG, errno); 919 } 920 921 /* Make sure an errno of zero is correctly handled by the arch code. */ 922 TEST(ERRNO_zero) 923 { 924 ERRNO_FILTER(zero, 0); 925 long ret; 926 pid_t parent = getppid(); 927 928 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 929 ASSERT_EQ(0, ret); 930 931 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero); 932 ASSERT_EQ(0, ret); 933 934 EXPECT_EQ(parent, syscall(__NR_getppid)); 935 /* "errno" of 0 is ok. */ 936 EXPECT_EQ(0, read(0, NULL, 0)); 937 } 938 939 /* 940 * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller. 941 * This tests that the errno value gets capped correctly, fixed by 942 * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO"). 943 */ 944 TEST(ERRNO_capped) 945 { 946 ERRNO_FILTER(capped, 4096); 947 long ret; 948 pid_t parent = getppid(); 949 950 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 951 ASSERT_EQ(0, ret); 952 953 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped); 954 ASSERT_EQ(0, ret); 955 956 EXPECT_EQ(parent, syscall(__NR_getppid)); 957 EXPECT_EQ(-1, read(0, NULL, 0)); 958 EXPECT_EQ(4095, errno); 959 } 960 961 /* 962 * Filters are processed in reverse order: last applied is executed first. 963 * Since only the SECCOMP_RET_ACTION mask is tested for return values, the 964 * SECCOMP_RET_DATA mask results will follow the most recently applied 965 * matching filter return (and not the lowest or highest value). 966 */ 967 TEST(ERRNO_order) 968 { 969 ERRNO_FILTER(first, 11); 970 ERRNO_FILTER(second, 13); 971 ERRNO_FILTER(third, 12); 972 long ret; 973 pid_t parent = getppid(); 974 975 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 976 ASSERT_EQ(0, ret); 977 978 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first); 979 ASSERT_EQ(0, ret); 980 981 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second); 982 ASSERT_EQ(0, ret); 983 984 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third); 985 ASSERT_EQ(0, ret); 986 987 EXPECT_EQ(parent, syscall(__NR_getppid)); 988 EXPECT_EQ(-1, read(0, NULL, 0)); 989 EXPECT_EQ(12, errno); 990 } 991 992 FIXTURE(TRAP) { 993 struct sock_fprog prog; 994 }; 995 996 FIXTURE_SETUP(TRAP) 997 { 998 struct sock_filter filter[] = { 999 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1000 offsetof(struct seccomp_data, nr)), 1001 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 1002 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), 1003 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1004 }; 1005 1006 memset(&self->prog, 0, sizeof(self->prog)); 1007 self->prog.filter = malloc(sizeof(filter)); 1008 ASSERT_NE(NULL, self->prog.filter); 1009 memcpy(self->prog.filter, filter, sizeof(filter)); 1010 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 1011 } 1012 1013 FIXTURE_TEARDOWN(TRAP) 1014 { 1015 if (self->prog.filter) 1016 free(self->prog.filter); 1017 } 1018 1019 TEST_F_SIGNAL(TRAP, dfl, SIGSYS) 1020 { 1021 long ret; 1022 1023 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1024 ASSERT_EQ(0, ret); 1025 1026 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1027 ASSERT_EQ(0, ret); 1028 syscall(__NR_getpid); 1029 } 1030 1031 /* Ensure that SIGSYS overrides SIG_IGN */ 1032 TEST_F_SIGNAL(TRAP, ign, SIGSYS) 1033 { 1034 long ret; 1035 1036 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1037 ASSERT_EQ(0, ret); 1038 1039 signal(SIGSYS, SIG_IGN); 1040 1041 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1042 ASSERT_EQ(0, ret); 1043 syscall(__NR_getpid); 1044 } 1045 1046 static siginfo_t TRAP_info; 1047 static volatile int TRAP_nr; 1048 static void TRAP_action(int nr, siginfo_t *info, void *void_context) 1049 { 1050 memcpy(&TRAP_info, info, sizeof(TRAP_info)); 1051 TRAP_nr = nr; 1052 } 1053 1054 TEST_F(TRAP, handler) 1055 { 1056 int ret, test; 1057 struct sigaction act; 1058 sigset_t mask; 1059 1060 memset(&act, 0, sizeof(act)); 1061 sigemptyset(&mask); 1062 sigaddset(&mask, SIGSYS); 1063 1064 act.sa_sigaction = &TRAP_action; 1065 act.sa_flags = SA_SIGINFO; 1066 ret = sigaction(SIGSYS, &act, NULL); 1067 ASSERT_EQ(0, ret) { 1068 TH_LOG("sigaction failed"); 1069 } 1070 ret = sigprocmask(SIG_UNBLOCK, &mask, NULL); 1071 ASSERT_EQ(0, ret) { 1072 TH_LOG("sigprocmask failed"); 1073 } 1074 1075 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1076 ASSERT_EQ(0, ret); 1077 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1078 ASSERT_EQ(0, ret); 1079 TRAP_nr = 0; 1080 memset(&TRAP_info, 0, sizeof(TRAP_info)); 1081 /* Expect the registers to be rolled back. (nr = error) may vary 1082 * based on arch. */ 1083 ret = syscall(__NR_getpid); 1084 /* Silence gcc warning about volatile. */ 1085 test = TRAP_nr; 1086 EXPECT_EQ(SIGSYS, test); 1087 struct local_sigsys { 1088 void *_call_addr; /* calling user insn */ 1089 int _syscall; /* triggering system call number */ 1090 unsigned int _arch; /* AUDIT_ARCH_* of syscall */ 1091 } *sigsys = (struct local_sigsys *) 1092 #ifdef si_syscall 1093 &(TRAP_info.si_call_addr); 1094 #else 1095 &TRAP_info.si_pid; 1096 #endif 1097 EXPECT_EQ(__NR_getpid, sigsys->_syscall); 1098 /* Make sure arch is non-zero. */ 1099 EXPECT_NE(0, sigsys->_arch); 1100 EXPECT_NE(0, (unsigned long)sigsys->_call_addr); 1101 } 1102 1103 FIXTURE(precedence) { 1104 struct sock_fprog allow; 1105 struct sock_fprog log; 1106 struct sock_fprog trace; 1107 struct sock_fprog error; 1108 struct sock_fprog trap; 1109 struct sock_fprog kill; 1110 }; 1111 1112 FIXTURE_SETUP(precedence) 1113 { 1114 struct sock_filter allow_insns[] = { 1115 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1116 }; 1117 struct sock_filter log_insns[] = { 1118 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1119 offsetof(struct seccomp_data, nr)), 1120 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1121 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1122 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), 1123 }; 1124 struct sock_filter trace_insns[] = { 1125 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1126 offsetof(struct seccomp_data, nr)), 1127 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1128 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1129 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE), 1130 }; 1131 struct sock_filter error_insns[] = { 1132 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1133 offsetof(struct seccomp_data, nr)), 1134 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1135 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1136 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO), 1137 }; 1138 struct sock_filter trap_insns[] = { 1139 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1140 offsetof(struct seccomp_data, nr)), 1141 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1142 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1143 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), 1144 }; 1145 struct sock_filter kill_insns[] = { 1146 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1147 offsetof(struct seccomp_data, nr)), 1148 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1149 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1150 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 1151 }; 1152 1153 memset(self, 0, sizeof(*self)); 1154 #define FILTER_ALLOC(_x) \ 1155 self->_x.filter = malloc(sizeof(_x##_insns)); \ 1156 ASSERT_NE(NULL, self->_x.filter); \ 1157 memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \ 1158 self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns) 1159 FILTER_ALLOC(allow); 1160 FILTER_ALLOC(log); 1161 FILTER_ALLOC(trace); 1162 FILTER_ALLOC(error); 1163 FILTER_ALLOC(trap); 1164 FILTER_ALLOC(kill); 1165 } 1166 1167 FIXTURE_TEARDOWN(precedence) 1168 { 1169 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter) 1170 FILTER_FREE(allow); 1171 FILTER_FREE(log); 1172 FILTER_FREE(trace); 1173 FILTER_FREE(error); 1174 FILTER_FREE(trap); 1175 FILTER_FREE(kill); 1176 } 1177 1178 TEST_F(precedence, allow_ok) 1179 { 1180 pid_t parent, res = 0; 1181 long ret; 1182 1183 parent = getppid(); 1184 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1185 ASSERT_EQ(0, ret); 1186 1187 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1188 ASSERT_EQ(0, ret); 1189 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1190 ASSERT_EQ(0, ret); 1191 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1192 ASSERT_EQ(0, ret); 1193 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1194 ASSERT_EQ(0, ret); 1195 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1196 ASSERT_EQ(0, ret); 1197 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1198 ASSERT_EQ(0, ret); 1199 /* Should work just fine. */ 1200 res = syscall(__NR_getppid); 1201 EXPECT_EQ(parent, res); 1202 } 1203 1204 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS) 1205 { 1206 pid_t parent, res = 0; 1207 long ret; 1208 1209 parent = getppid(); 1210 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1211 ASSERT_EQ(0, ret); 1212 1213 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1214 ASSERT_EQ(0, ret); 1215 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1216 ASSERT_EQ(0, ret); 1217 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1218 ASSERT_EQ(0, ret); 1219 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1220 ASSERT_EQ(0, ret); 1221 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1222 ASSERT_EQ(0, ret); 1223 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1224 ASSERT_EQ(0, ret); 1225 /* Should work just fine. */ 1226 res = syscall(__NR_getppid); 1227 EXPECT_EQ(parent, res); 1228 /* getpid() should never return. */ 1229 res = syscall(__NR_getpid); 1230 EXPECT_EQ(0, res); 1231 } 1232 1233 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS) 1234 { 1235 pid_t parent; 1236 long ret; 1237 1238 parent = getppid(); 1239 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1240 ASSERT_EQ(0, ret); 1241 1242 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1243 ASSERT_EQ(0, ret); 1244 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1245 ASSERT_EQ(0, ret); 1246 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1247 ASSERT_EQ(0, ret); 1248 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1249 ASSERT_EQ(0, ret); 1250 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1251 ASSERT_EQ(0, ret); 1252 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1253 ASSERT_EQ(0, ret); 1254 /* Should work just fine. */ 1255 EXPECT_EQ(parent, syscall(__NR_getppid)); 1256 /* getpid() should never return. */ 1257 EXPECT_EQ(0, syscall(__NR_getpid)); 1258 } 1259 1260 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS) 1261 { 1262 pid_t parent; 1263 long ret; 1264 1265 parent = getppid(); 1266 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1267 ASSERT_EQ(0, ret); 1268 1269 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1270 ASSERT_EQ(0, ret); 1271 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1272 ASSERT_EQ(0, ret); 1273 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1274 ASSERT_EQ(0, ret); 1275 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1276 ASSERT_EQ(0, ret); 1277 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1278 ASSERT_EQ(0, ret); 1279 /* Should work just fine. */ 1280 EXPECT_EQ(parent, syscall(__NR_getppid)); 1281 /* getpid() should never return. */ 1282 EXPECT_EQ(0, syscall(__NR_getpid)); 1283 } 1284 1285 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS) 1286 { 1287 pid_t parent; 1288 long ret; 1289 1290 parent = getppid(); 1291 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1292 ASSERT_EQ(0, ret); 1293 1294 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1295 ASSERT_EQ(0, ret); 1296 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1297 ASSERT_EQ(0, ret); 1298 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1299 ASSERT_EQ(0, ret); 1300 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1301 ASSERT_EQ(0, ret); 1302 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1303 ASSERT_EQ(0, ret); 1304 /* Should work just fine. */ 1305 EXPECT_EQ(parent, syscall(__NR_getppid)); 1306 /* getpid() should never return. */ 1307 EXPECT_EQ(0, syscall(__NR_getpid)); 1308 } 1309 1310 TEST_F(precedence, errno_is_third) 1311 { 1312 pid_t parent; 1313 long ret; 1314 1315 parent = getppid(); 1316 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1317 ASSERT_EQ(0, ret); 1318 1319 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1320 ASSERT_EQ(0, ret); 1321 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1322 ASSERT_EQ(0, ret); 1323 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1324 ASSERT_EQ(0, ret); 1325 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1326 ASSERT_EQ(0, ret); 1327 /* Should work just fine. */ 1328 EXPECT_EQ(parent, syscall(__NR_getppid)); 1329 EXPECT_EQ(0, syscall(__NR_getpid)); 1330 } 1331 1332 TEST_F(precedence, errno_is_third_in_any_order) 1333 { 1334 pid_t parent; 1335 long ret; 1336 1337 parent = getppid(); 1338 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1339 ASSERT_EQ(0, ret); 1340 1341 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1342 ASSERT_EQ(0, ret); 1343 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1344 ASSERT_EQ(0, ret); 1345 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1346 ASSERT_EQ(0, ret); 1347 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1348 ASSERT_EQ(0, ret); 1349 /* Should work just fine. */ 1350 EXPECT_EQ(parent, syscall(__NR_getppid)); 1351 EXPECT_EQ(0, syscall(__NR_getpid)); 1352 } 1353 1354 TEST_F(precedence, trace_is_fourth) 1355 { 1356 pid_t parent; 1357 long ret; 1358 1359 parent = getppid(); 1360 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1361 ASSERT_EQ(0, ret); 1362 1363 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1364 ASSERT_EQ(0, ret); 1365 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1366 ASSERT_EQ(0, ret); 1367 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1368 ASSERT_EQ(0, ret); 1369 /* Should work just fine. */ 1370 EXPECT_EQ(parent, syscall(__NR_getppid)); 1371 /* No ptracer */ 1372 EXPECT_EQ(-1, syscall(__NR_getpid)); 1373 } 1374 1375 TEST_F(precedence, trace_is_fourth_in_any_order) 1376 { 1377 pid_t parent; 1378 long ret; 1379 1380 parent = getppid(); 1381 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1382 ASSERT_EQ(0, ret); 1383 1384 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1385 ASSERT_EQ(0, ret); 1386 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1387 ASSERT_EQ(0, ret); 1388 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1389 ASSERT_EQ(0, ret); 1390 /* Should work just fine. */ 1391 EXPECT_EQ(parent, syscall(__NR_getppid)); 1392 /* No ptracer */ 1393 EXPECT_EQ(-1, syscall(__NR_getpid)); 1394 } 1395 1396 TEST_F(precedence, log_is_fifth) 1397 { 1398 pid_t mypid, parent; 1399 long ret; 1400 1401 mypid = getpid(); 1402 parent = getppid(); 1403 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1404 ASSERT_EQ(0, ret); 1405 1406 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1407 ASSERT_EQ(0, ret); 1408 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1409 ASSERT_EQ(0, ret); 1410 /* Should work just fine. */ 1411 EXPECT_EQ(parent, syscall(__NR_getppid)); 1412 /* Should also work just fine */ 1413 EXPECT_EQ(mypid, syscall(__NR_getpid)); 1414 } 1415 1416 TEST_F(precedence, log_is_fifth_in_any_order) 1417 { 1418 pid_t mypid, parent; 1419 long ret; 1420 1421 mypid = getpid(); 1422 parent = getppid(); 1423 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1424 ASSERT_EQ(0, ret); 1425 1426 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1427 ASSERT_EQ(0, ret); 1428 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1429 ASSERT_EQ(0, ret); 1430 /* Should work just fine. */ 1431 EXPECT_EQ(parent, syscall(__NR_getppid)); 1432 /* Should also work just fine */ 1433 EXPECT_EQ(mypid, syscall(__NR_getpid)); 1434 } 1435 1436 #ifndef PTRACE_O_TRACESECCOMP 1437 #define PTRACE_O_TRACESECCOMP 0x00000080 1438 #endif 1439 1440 /* Catch the Ubuntu 12.04 value error. */ 1441 #if PTRACE_EVENT_SECCOMP != 7 1442 #undef PTRACE_EVENT_SECCOMP 1443 #endif 1444 1445 #ifndef PTRACE_EVENT_SECCOMP 1446 #define PTRACE_EVENT_SECCOMP 7 1447 #endif 1448 1449 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP) 1450 bool tracer_running; 1451 void tracer_stop(int sig) 1452 { 1453 tracer_running = false; 1454 } 1455 1456 typedef void tracer_func_t(struct __test_metadata *_metadata, 1457 pid_t tracee, int status, void *args); 1458 1459 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, 1460 tracer_func_t tracer_func, void *args, bool ptrace_syscall) 1461 { 1462 int ret = -1; 1463 struct sigaction action = { 1464 .sa_handler = tracer_stop, 1465 }; 1466 1467 /* Allow external shutdown. */ 1468 tracer_running = true; 1469 ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL)); 1470 1471 errno = 0; 1472 while (ret == -1 && errno != EINVAL) 1473 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0); 1474 ASSERT_EQ(0, ret) { 1475 kill(tracee, SIGKILL); 1476 } 1477 /* Wait for attach stop */ 1478 wait(NULL); 1479 1480 ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ? 1481 PTRACE_O_TRACESYSGOOD : 1482 PTRACE_O_TRACESECCOMP); 1483 ASSERT_EQ(0, ret) { 1484 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP"); 1485 kill(tracee, SIGKILL); 1486 } 1487 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, 1488 tracee, NULL, 0); 1489 ASSERT_EQ(0, ret); 1490 1491 /* Unblock the tracee */ 1492 ASSERT_EQ(1, write(fd, "A", 1)); 1493 ASSERT_EQ(0, close(fd)); 1494 1495 /* Run until we're shut down. Must assert to stop execution. */ 1496 while (tracer_running) { 1497 int status; 1498 1499 if (wait(&status) != tracee) 1500 continue; 1501 if (WIFSIGNALED(status) || WIFEXITED(status)) 1502 /* Child is dead. Time to go. */ 1503 return; 1504 1505 /* Check if this is a seccomp event. */ 1506 ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status)); 1507 1508 tracer_func(_metadata, tracee, status, args); 1509 1510 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, 1511 tracee, NULL, 0); 1512 ASSERT_EQ(0, ret); 1513 } 1514 /* Directly report the status of our test harness results. */ 1515 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); 1516 } 1517 1518 /* Common tracer setup/teardown functions. */ 1519 void cont_handler(int num) 1520 { } 1521 pid_t setup_trace_fixture(struct __test_metadata *_metadata, 1522 tracer_func_t func, void *args, bool ptrace_syscall) 1523 { 1524 char sync; 1525 int pipefd[2]; 1526 pid_t tracer_pid; 1527 pid_t tracee = getpid(); 1528 1529 /* Setup a pipe for clean synchronization. */ 1530 ASSERT_EQ(0, pipe(pipefd)); 1531 1532 /* Fork a child which we'll promote to tracer */ 1533 tracer_pid = fork(); 1534 ASSERT_LE(0, tracer_pid); 1535 signal(SIGALRM, cont_handler); 1536 if (tracer_pid == 0) { 1537 close(pipefd[0]); 1538 start_tracer(_metadata, pipefd[1], tracee, func, args, 1539 ptrace_syscall); 1540 syscall(__NR_exit, 0); 1541 } 1542 close(pipefd[1]); 1543 prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0); 1544 read(pipefd[0], &sync, 1); 1545 close(pipefd[0]); 1546 1547 return tracer_pid; 1548 } 1549 1550 void teardown_trace_fixture(struct __test_metadata *_metadata, 1551 pid_t tracer) 1552 { 1553 if (tracer) { 1554 int status; 1555 /* 1556 * Extract the exit code from the other process and 1557 * adopt it for ourselves in case its asserts failed. 1558 */ 1559 ASSERT_EQ(0, kill(tracer, SIGUSR1)); 1560 ASSERT_EQ(tracer, waitpid(tracer, &status, 0)); 1561 if (WEXITSTATUS(status)) 1562 _metadata->passed = 0; 1563 } 1564 } 1565 1566 /* "poke" tracer arguments and function. */ 1567 struct tracer_args_poke_t { 1568 unsigned long poke_addr; 1569 }; 1570 1571 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status, 1572 void *args) 1573 { 1574 int ret; 1575 unsigned long msg; 1576 struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args; 1577 1578 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 1579 EXPECT_EQ(0, ret); 1580 /* If this fails, don't try to recover. */ 1581 ASSERT_EQ(0x1001, msg) { 1582 kill(tracee, SIGKILL); 1583 } 1584 /* 1585 * Poke in the message. 1586 * Registers are not touched to try to keep this relatively arch 1587 * agnostic. 1588 */ 1589 ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001); 1590 EXPECT_EQ(0, ret); 1591 } 1592 1593 FIXTURE(TRACE_poke) { 1594 struct sock_fprog prog; 1595 pid_t tracer; 1596 long poked; 1597 struct tracer_args_poke_t tracer_args; 1598 }; 1599 1600 FIXTURE_SETUP(TRACE_poke) 1601 { 1602 struct sock_filter filter[] = { 1603 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1604 offsetof(struct seccomp_data, nr)), 1605 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), 1606 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001), 1607 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1608 }; 1609 1610 self->poked = 0; 1611 memset(&self->prog, 0, sizeof(self->prog)); 1612 self->prog.filter = malloc(sizeof(filter)); 1613 ASSERT_NE(NULL, self->prog.filter); 1614 memcpy(self->prog.filter, filter, sizeof(filter)); 1615 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 1616 1617 /* Set up tracer args. */ 1618 self->tracer_args.poke_addr = (unsigned long)&self->poked; 1619 1620 /* Launch tracer. */ 1621 self->tracer = setup_trace_fixture(_metadata, tracer_poke, 1622 &self->tracer_args, false); 1623 } 1624 1625 FIXTURE_TEARDOWN(TRACE_poke) 1626 { 1627 teardown_trace_fixture(_metadata, self->tracer); 1628 if (self->prog.filter) 1629 free(self->prog.filter); 1630 } 1631 1632 TEST_F(TRACE_poke, read_has_side_effects) 1633 { 1634 ssize_t ret; 1635 1636 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1637 ASSERT_EQ(0, ret); 1638 1639 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1640 ASSERT_EQ(0, ret); 1641 1642 EXPECT_EQ(0, self->poked); 1643 ret = read(-1, NULL, 0); 1644 EXPECT_EQ(-1, ret); 1645 EXPECT_EQ(0x1001, self->poked); 1646 } 1647 1648 TEST_F(TRACE_poke, getpid_runs_normally) 1649 { 1650 long ret; 1651 1652 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1653 ASSERT_EQ(0, ret); 1654 1655 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1656 ASSERT_EQ(0, ret); 1657 1658 EXPECT_EQ(0, self->poked); 1659 EXPECT_NE(0, syscall(__NR_getpid)); 1660 EXPECT_EQ(0, self->poked); 1661 } 1662 1663 #if defined(__x86_64__) 1664 # define ARCH_REGS struct user_regs_struct 1665 # define SYSCALL_NUM orig_rax 1666 # define SYSCALL_RET rax 1667 #elif defined(__i386__) 1668 # define ARCH_REGS struct user_regs_struct 1669 # define SYSCALL_NUM orig_eax 1670 # define SYSCALL_RET eax 1671 #elif defined(__arm__) 1672 # define ARCH_REGS struct pt_regs 1673 # define SYSCALL_NUM ARM_r7 1674 # define SYSCALL_RET ARM_r0 1675 #elif defined(__aarch64__) 1676 # define ARCH_REGS struct user_pt_regs 1677 # define SYSCALL_NUM regs[8] 1678 # define SYSCALL_RET regs[0] 1679 #elif defined(__riscv) && __riscv_xlen == 64 1680 # define ARCH_REGS struct user_regs_struct 1681 # define SYSCALL_NUM a7 1682 # define SYSCALL_RET a0 1683 #elif defined(__hppa__) 1684 # define ARCH_REGS struct user_regs_struct 1685 # define SYSCALL_NUM gr[20] 1686 # define SYSCALL_RET gr[28] 1687 #elif defined(__powerpc__) 1688 # define ARCH_REGS struct pt_regs 1689 # define SYSCALL_NUM gpr[0] 1690 # define SYSCALL_RET gpr[3] 1691 #elif defined(__s390__) 1692 # define ARCH_REGS s390_regs 1693 # define SYSCALL_NUM gprs[2] 1694 # define SYSCALL_RET gprs[2] 1695 # define SYSCALL_NUM_RET_SHARE_REG 1696 #elif defined(__mips__) 1697 # define ARCH_REGS struct pt_regs 1698 # define SYSCALL_NUM regs[2] 1699 # define SYSCALL_SYSCALL_NUM regs[4] 1700 # define SYSCALL_RET regs[2] 1701 # define SYSCALL_NUM_RET_SHARE_REG 1702 #else 1703 # error "Do not know how to find your architecture's registers and syscalls" 1704 #endif 1705 1706 /* When the syscall return can't be changed, stub out the tests for it. */ 1707 #ifdef SYSCALL_NUM_RET_SHARE_REG 1708 # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) 1709 #else 1710 # define EXPECT_SYSCALL_RETURN(val, action) \ 1711 do { \ 1712 errno = 0; \ 1713 if (val < 0) { \ 1714 EXPECT_EQ(-1, action); \ 1715 EXPECT_EQ(-(val), errno); \ 1716 } else { \ 1717 EXPECT_EQ(val, action); \ 1718 } \ 1719 } while (0) 1720 #endif 1721 1722 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for 1723 * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). 1724 */ 1725 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__) 1726 #define HAVE_GETREGS 1727 #endif 1728 1729 /* Architecture-specific syscall fetching routine. */ 1730 int get_syscall(struct __test_metadata *_metadata, pid_t tracee) 1731 { 1732 ARCH_REGS regs; 1733 #ifdef HAVE_GETREGS 1734 EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, ®s)) { 1735 TH_LOG("PTRACE_GETREGS failed"); 1736 return -1; 1737 } 1738 #else 1739 struct iovec iov; 1740 1741 iov.iov_base = ®s; 1742 iov.iov_len = sizeof(regs); 1743 EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) { 1744 TH_LOG("PTRACE_GETREGSET failed"); 1745 return -1; 1746 } 1747 #endif 1748 1749 #if defined(__mips__) 1750 if (regs.SYSCALL_NUM == __NR_O32_Linux) 1751 return regs.SYSCALL_SYSCALL_NUM; 1752 #endif 1753 return regs.SYSCALL_NUM; 1754 } 1755 1756 /* Architecture-specific syscall changing routine. */ 1757 void change_syscall(struct __test_metadata *_metadata, 1758 pid_t tracee, int syscall, int result) 1759 { 1760 int ret; 1761 ARCH_REGS regs; 1762 #ifdef HAVE_GETREGS 1763 ret = ptrace(PTRACE_GETREGS, tracee, 0, ®s); 1764 #else 1765 struct iovec iov; 1766 iov.iov_base = ®s; 1767 iov.iov_len = sizeof(regs); 1768 ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); 1769 #endif 1770 EXPECT_EQ(0, ret) {} 1771 1772 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ 1773 defined(__s390__) || defined(__hppa__) || defined(__riscv) 1774 { 1775 regs.SYSCALL_NUM = syscall; 1776 } 1777 #elif defined(__mips__) 1778 { 1779 if (regs.SYSCALL_NUM == __NR_O32_Linux) 1780 regs.SYSCALL_SYSCALL_NUM = syscall; 1781 else 1782 regs.SYSCALL_NUM = syscall; 1783 } 1784 1785 #elif defined(__arm__) 1786 # ifndef PTRACE_SET_SYSCALL 1787 # define PTRACE_SET_SYSCALL 23 1788 # endif 1789 { 1790 ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall); 1791 EXPECT_EQ(0, ret); 1792 } 1793 1794 #elif defined(__aarch64__) 1795 # ifndef NT_ARM_SYSTEM_CALL 1796 # define NT_ARM_SYSTEM_CALL 0x404 1797 # endif 1798 { 1799 iov.iov_base = &syscall; 1800 iov.iov_len = sizeof(syscall); 1801 ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL, 1802 &iov); 1803 EXPECT_EQ(0, ret); 1804 } 1805 1806 #else 1807 ASSERT_EQ(1, 0) { 1808 TH_LOG("How is the syscall changed on this architecture?"); 1809 } 1810 #endif 1811 1812 /* If syscall is skipped, change return value. */ 1813 if (syscall == -1) 1814 #ifdef SYSCALL_NUM_RET_SHARE_REG 1815 TH_LOG("Can't modify syscall return on this architecture"); 1816 #else 1817 regs.SYSCALL_RET = result; 1818 #endif 1819 1820 #ifdef HAVE_GETREGS 1821 ret = ptrace(PTRACE_SETREGS, tracee, 0, ®s); 1822 #else 1823 iov.iov_base = ®s; 1824 iov.iov_len = sizeof(regs); 1825 ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov); 1826 #endif 1827 EXPECT_EQ(0, ret); 1828 } 1829 1830 void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, 1831 int status, void *args) 1832 { 1833 int ret; 1834 unsigned long msg; 1835 1836 /* Make sure we got the right message. */ 1837 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 1838 EXPECT_EQ(0, ret); 1839 1840 /* Validate and take action on expected syscalls. */ 1841 switch (msg) { 1842 case 0x1002: 1843 /* change getpid to getppid. */ 1844 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); 1845 change_syscall(_metadata, tracee, __NR_getppid, 0); 1846 break; 1847 case 0x1003: 1848 /* skip gettid with valid return code. */ 1849 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); 1850 change_syscall(_metadata, tracee, -1, 45000); 1851 break; 1852 case 0x1004: 1853 /* skip openat with error. */ 1854 EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee)); 1855 change_syscall(_metadata, tracee, -1, -ESRCH); 1856 break; 1857 case 0x1005: 1858 /* do nothing (allow getppid) */ 1859 EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee)); 1860 break; 1861 default: 1862 EXPECT_EQ(0, msg) { 1863 TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg); 1864 kill(tracee, SIGKILL); 1865 } 1866 } 1867 1868 } 1869 1870 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, 1871 int status, void *args) 1872 { 1873 int ret, nr; 1874 unsigned long msg; 1875 static bool entry; 1876 1877 /* 1878 * The traditional way to tell PTRACE_SYSCALL entry/exit 1879 * is by counting. 1880 */ 1881 entry = !entry; 1882 1883 /* Make sure we got an appropriate message. */ 1884 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 1885 EXPECT_EQ(0, ret); 1886 EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY 1887 : PTRACE_EVENTMSG_SYSCALL_EXIT, msg); 1888 1889 if (!entry) 1890 return; 1891 1892 nr = get_syscall(_metadata, tracee); 1893 1894 if (nr == __NR_getpid) 1895 change_syscall(_metadata, tracee, __NR_getppid, 0); 1896 if (nr == __NR_gettid) 1897 change_syscall(_metadata, tracee, -1, 45000); 1898 if (nr == __NR_openat) 1899 change_syscall(_metadata, tracee, -1, -ESRCH); 1900 } 1901 1902 FIXTURE(TRACE_syscall) { 1903 struct sock_fprog prog; 1904 pid_t tracer, mytid, mypid, parent; 1905 }; 1906 1907 FIXTURE_VARIANT(TRACE_syscall) { 1908 /* 1909 * All of the SECCOMP_RET_TRACE behaviors can be tested with either 1910 * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL. 1911 * This indicates if we should use SECCOMP_RET_TRACE (false), or 1912 * ptrace (true). 1913 */ 1914 bool use_ptrace; 1915 }; 1916 1917 FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) { 1918 .use_ptrace = true, 1919 }; 1920 1921 FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) { 1922 .use_ptrace = false, 1923 }; 1924 1925 FIXTURE_SETUP(TRACE_syscall) 1926 { 1927 struct sock_filter filter[] = { 1928 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1929 offsetof(struct seccomp_data, nr)), 1930 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 1931 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002), 1932 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1), 1933 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003), 1934 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1), 1935 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004), 1936 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 1937 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005), 1938 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1939 }; 1940 struct sock_fprog prog = { 1941 .len = (unsigned short)ARRAY_SIZE(filter), 1942 .filter = filter, 1943 }; 1944 long ret; 1945 1946 /* Prepare some testable syscall results. */ 1947 self->mytid = syscall(__NR_gettid); 1948 ASSERT_GT(self->mytid, 0); 1949 ASSERT_NE(self->mytid, 1) { 1950 TH_LOG("Running this test as init is not supported. :)"); 1951 } 1952 1953 self->mypid = getpid(); 1954 ASSERT_GT(self->mypid, 0); 1955 ASSERT_EQ(self->mytid, self->mypid); 1956 1957 self->parent = getppid(); 1958 ASSERT_GT(self->parent, 0); 1959 ASSERT_NE(self->parent, self->mypid); 1960 1961 /* Launch tracer. */ 1962 self->tracer = setup_trace_fixture(_metadata, 1963 variant->use_ptrace ? tracer_ptrace 1964 : tracer_seccomp, 1965 NULL, variant->use_ptrace); 1966 1967 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1968 ASSERT_EQ(0, ret); 1969 1970 if (variant->use_ptrace) 1971 return; 1972 1973 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 1974 ASSERT_EQ(0, ret); 1975 } 1976 1977 FIXTURE_TEARDOWN(TRACE_syscall) 1978 { 1979 teardown_trace_fixture(_metadata, self->tracer); 1980 } 1981 1982 TEST(negative_ENOSYS) 1983 { 1984 /* 1985 * There should be no difference between an "internal" skip 1986 * and userspace asking for syscall "-1". 1987 */ 1988 errno = 0; 1989 EXPECT_EQ(-1, syscall(-1)); 1990 EXPECT_EQ(errno, ENOSYS); 1991 /* And no difference for "still not valid but not -1". */ 1992 errno = 0; 1993 EXPECT_EQ(-1, syscall(-101)); 1994 EXPECT_EQ(errno, ENOSYS); 1995 } 1996 1997 TEST_F(TRACE_syscall, negative_ENOSYS) 1998 { 1999 negative_ENOSYS(_metadata); 2000 } 2001 2002 TEST_F(TRACE_syscall, syscall_allowed) 2003 { 2004 /* getppid works as expected (no changes). */ 2005 EXPECT_EQ(self->parent, syscall(__NR_getppid)); 2006 EXPECT_NE(self->mypid, syscall(__NR_getppid)); 2007 } 2008 2009 TEST_F(TRACE_syscall, syscall_redirected) 2010 { 2011 /* getpid has been redirected to getppid as expected. */ 2012 EXPECT_EQ(self->parent, syscall(__NR_getpid)); 2013 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 2014 } 2015 2016 TEST_F(TRACE_syscall, syscall_errno) 2017 { 2018 /* Tracer should skip the open syscall, resulting in ESRCH. */ 2019 EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); 2020 } 2021 2022 TEST_F(TRACE_syscall, syscall_faked) 2023 { 2024 /* Tracer skips the gettid syscall and store altered return value. */ 2025 EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); 2026 } 2027 2028 TEST_F(TRACE_syscall, skip_after) 2029 { 2030 struct sock_filter filter[] = { 2031 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2032 offsetof(struct seccomp_data, nr)), 2033 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2034 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM), 2035 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2036 }; 2037 struct sock_fprog prog = { 2038 .len = (unsigned short)ARRAY_SIZE(filter), 2039 .filter = filter, 2040 }; 2041 long ret; 2042 2043 /* Install additional "errno on getppid" filter. */ 2044 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2045 ASSERT_EQ(0, ret); 2046 2047 /* Tracer will redirect getpid to getppid, and we should see EPERM. */ 2048 errno = 0; 2049 EXPECT_EQ(-1, syscall(__NR_getpid)); 2050 EXPECT_EQ(EPERM, errno); 2051 } 2052 2053 TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS) 2054 { 2055 struct sock_filter filter[] = { 2056 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2057 offsetof(struct seccomp_data, nr)), 2058 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2059 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2060 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2061 }; 2062 struct sock_fprog prog = { 2063 .len = (unsigned short)ARRAY_SIZE(filter), 2064 .filter = filter, 2065 }; 2066 long ret; 2067 2068 /* Install additional "death on getppid" filter. */ 2069 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2070 ASSERT_EQ(0, ret); 2071 2072 /* Tracer will redirect getpid to getppid, and we should die. */ 2073 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 2074 } 2075 2076 TEST(seccomp_syscall) 2077 { 2078 struct sock_filter filter[] = { 2079 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2080 }; 2081 struct sock_fprog prog = { 2082 .len = (unsigned short)ARRAY_SIZE(filter), 2083 .filter = filter, 2084 }; 2085 long ret; 2086 2087 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2088 ASSERT_EQ(0, ret) { 2089 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2090 } 2091 2092 /* Reject insane operation. */ 2093 ret = seccomp(-1, 0, &prog); 2094 ASSERT_NE(ENOSYS, errno) { 2095 TH_LOG("Kernel does not support seccomp syscall!"); 2096 } 2097 EXPECT_EQ(EINVAL, errno) { 2098 TH_LOG("Did not reject crazy op value!"); 2099 } 2100 2101 /* Reject strict with flags or pointer. */ 2102 ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL); 2103 EXPECT_EQ(EINVAL, errno) { 2104 TH_LOG("Did not reject mode strict with flags!"); 2105 } 2106 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog); 2107 EXPECT_EQ(EINVAL, errno) { 2108 TH_LOG("Did not reject mode strict with uargs!"); 2109 } 2110 2111 /* Reject insane args for filter. */ 2112 ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog); 2113 EXPECT_EQ(EINVAL, errno) { 2114 TH_LOG("Did not reject crazy filter flags!"); 2115 } 2116 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL); 2117 EXPECT_EQ(EFAULT, errno) { 2118 TH_LOG("Did not reject NULL filter!"); 2119 } 2120 2121 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2122 EXPECT_EQ(0, errno) { 2123 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s", 2124 strerror(errno)); 2125 } 2126 } 2127 2128 TEST(seccomp_syscall_mode_lock) 2129 { 2130 struct sock_filter filter[] = { 2131 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2132 }; 2133 struct sock_fprog prog = { 2134 .len = (unsigned short)ARRAY_SIZE(filter), 2135 .filter = filter, 2136 }; 2137 long ret; 2138 2139 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 2140 ASSERT_EQ(0, ret) { 2141 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2142 } 2143 2144 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2145 ASSERT_NE(ENOSYS, errno) { 2146 TH_LOG("Kernel does not support seccomp syscall!"); 2147 } 2148 EXPECT_EQ(0, ret) { 2149 TH_LOG("Could not install filter!"); 2150 } 2151 2152 /* Make sure neither entry point will switch to strict. */ 2153 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0); 2154 EXPECT_EQ(EINVAL, errno) { 2155 TH_LOG("Switched to mode strict!"); 2156 } 2157 2158 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL); 2159 EXPECT_EQ(EINVAL, errno) { 2160 TH_LOG("Switched to mode strict!"); 2161 } 2162 } 2163 2164 /* 2165 * Test detection of known and unknown filter flags. Userspace needs to be able 2166 * to check if a filter flag is supported by the current kernel and a good way 2167 * of doing that is by attempting to enter filter mode, with the flag bit in 2168 * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates 2169 * that the flag is valid and EINVAL indicates that the flag is invalid. 2170 */ 2171 TEST(detect_seccomp_filter_flags) 2172 { 2173 unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, 2174 SECCOMP_FILTER_FLAG_LOG, 2175 SECCOMP_FILTER_FLAG_SPEC_ALLOW, 2176 SECCOMP_FILTER_FLAG_NEW_LISTENER, 2177 SECCOMP_FILTER_FLAG_TSYNC_ESRCH }; 2178 unsigned int exclusive[] = { 2179 SECCOMP_FILTER_FLAG_TSYNC, 2180 SECCOMP_FILTER_FLAG_NEW_LISTENER }; 2181 unsigned int flag, all_flags, exclusive_mask; 2182 int i; 2183 long ret; 2184 2185 /* Test detection of individual known-good filter flags */ 2186 for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { 2187 int bits = 0; 2188 2189 flag = flags[i]; 2190 /* Make sure the flag is a single bit! */ 2191 while (flag) { 2192 if (flag & 0x1) 2193 bits ++; 2194 flag >>= 1; 2195 } 2196 ASSERT_EQ(1, bits); 2197 flag = flags[i]; 2198 2199 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2200 ASSERT_NE(ENOSYS, errno) { 2201 TH_LOG("Kernel does not support seccomp syscall!"); 2202 } 2203 EXPECT_EQ(-1, ret); 2204 EXPECT_EQ(EFAULT, errno) { 2205 TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!", 2206 flag); 2207 } 2208 2209 all_flags |= flag; 2210 } 2211 2212 /* 2213 * Test detection of all known-good filter flags combined. But 2214 * for the exclusive flags we need to mask them out and try them 2215 * individually for the "all flags" testing. 2216 */ 2217 exclusive_mask = 0; 2218 for (i = 0; i < ARRAY_SIZE(exclusive); i++) 2219 exclusive_mask |= exclusive[i]; 2220 for (i = 0; i < ARRAY_SIZE(exclusive); i++) { 2221 flag = all_flags & ~exclusive_mask; 2222 flag |= exclusive[i]; 2223 2224 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2225 EXPECT_EQ(-1, ret); 2226 EXPECT_EQ(EFAULT, errno) { 2227 TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", 2228 flag); 2229 } 2230 } 2231 2232 /* Test detection of an unknown filter flags, without exclusives. */ 2233 flag = -1; 2234 flag &= ~exclusive_mask; 2235 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2236 EXPECT_EQ(-1, ret); 2237 EXPECT_EQ(EINVAL, errno) { 2238 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!", 2239 flag); 2240 } 2241 2242 /* 2243 * Test detection of an unknown filter flag that may simply need to be 2244 * added to this test 2245 */ 2246 flag = flags[ARRAY_SIZE(flags) - 1] << 1; 2247 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2248 EXPECT_EQ(-1, ret); 2249 EXPECT_EQ(EINVAL, errno) { 2250 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?", 2251 flag); 2252 } 2253 } 2254 2255 TEST(TSYNC_first) 2256 { 2257 struct sock_filter filter[] = { 2258 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2259 }; 2260 struct sock_fprog prog = { 2261 .len = (unsigned short)ARRAY_SIZE(filter), 2262 .filter = filter, 2263 }; 2264 long ret; 2265 2266 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 2267 ASSERT_EQ(0, ret) { 2268 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2269 } 2270 2271 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2272 &prog); 2273 ASSERT_NE(ENOSYS, errno) { 2274 TH_LOG("Kernel does not support seccomp syscall!"); 2275 } 2276 EXPECT_EQ(0, ret) { 2277 TH_LOG("Could not install initial filter with TSYNC!"); 2278 } 2279 } 2280 2281 #define TSYNC_SIBLINGS 2 2282 struct tsync_sibling { 2283 pthread_t tid; 2284 pid_t system_tid; 2285 sem_t *started; 2286 pthread_cond_t *cond; 2287 pthread_mutex_t *mutex; 2288 int diverge; 2289 int num_waits; 2290 struct sock_fprog *prog; 2291 struct __test_metadata *metadata; 2292 }; 2293 2294 /* 2295 * To avoid joining joined threads (which is not allowed by Bionic), 2296 * make sure we both successfully join and clear the tid to skip a 2297 * later join attempt during fixture teardown. Any remaining threads 2298 * will be directly killed during teardown. 2299 */ 2300 #define PTHREAD_JOIN(tid, status) \ 2301 do { \ 2302 int _rc = pthread_join(tid, status); \ 2303 if (_rc) { \ 2304 TH_LOG("pthread_join of tid %u failed: %d\n", \ 2305 (unsigned int)tid, _rc); \ 2306 } else { \ 2307 tid = 0; \ 2308 } \ 2309 } while (0) 2310 2311 FIXTURE(TSYNC) { 2312 struct sock_fprog root_prog, apply_prog; 2313 struct tsync_sibling sibling[TSYNC_SIBLINGS]; 2314 sem_t started; 2315 pthread_cond_t cond; 2316 pthread_mutex_t mutex; 2317 int sibling_count; 2318 }; 2319 2320 FIXTURE_SETUP(TSYNC) 2321 { 2322 struct sock_filter root_filter[] = { 2323 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2324 }; 2325 struct sock_filter apply_filter[] = { 2326 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2327 offsetof(struct seccomp_data, nr)), 2328 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), 2329 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2330 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2331 }; 2332 2333 memset(&self->root_prog, 0, sizeof(self->root_prog)); 2334 memset(&self->apply_prog, 0, sizeof(self->apply_prog)); 2335 memset(&self->sibling, 0, sizeof(self->sibling)); 2336 self->root_prog.filter = malloc(sizeof(root_filter)); 2337 ASSERT_NE(NULL, self->root_prog.filter); 2338 memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter)); 2339 self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter); 2340 2341 self->apply_prog.filter = malloc(sizeof(apply_filter)); 2342 ASSERT_NE(NULL, self->apply_prog.filter); 2343 memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter)); 2344 self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter); 2345 2346 self->sibling_count = 0; 2347 pthread_mutex_init(&self->mutex, NULL); 2348 pthread_cond_init(&self->cond, NULL); 2349 sem_init(&self->started, 0, 0); 2350 self->sibling[0].tid = 0; 2351 self->sibling[0].cond = &self->cond; 2352 self->sibling[0].started = &self->started; 2353 self->sibling[0].mutex = &self->mutex; 2354 self->sibling[0].diverge = 0; 2355 self->sibling[0].num_waits = 1; 2356 self->sibling[0].prog = &self->root_prog; 2357 self->sibling[0].metadata = _metadata; 2358 self->sibling[1].tid = 0; 2359 self->sibling[1].cond = &self->cond; 2360 self->sibling[1].started = &self->started; 2361 self->sibling[1].mutex = &self->mutex; 2362 self->sibling[1].diverge = 0; 2363 self->sibling[1].prog = &self->root_prog; 2364 self->sibling[1].num_waits = 1; 2365 self->sibling[1].metadata = _metadata; 2366 } 2367 2368 FIXTURE_TEARDOWN(TSYNC) 2369 { 2370 int sib = 0; 2371 2372 if (self->root_prog.filter) 2373 free(self->root_prog.filter); 2374 if (self->apply_prog.filter) 2375 free(self->apply_prog.filter); 2376 2377 for ( ; sib < self->sibling_count; ++sib) { 2378 struct tsync_sibling *s = &self->sibling[sib]; 2379 2380 if (!s->tid) 2381 continue; 2382 /* 2383 * If a thread is still running, it may be stuck, so hit 2384 * it over the head really hard. 2385 */ 2386 pthread_kill(s->tid, 9); 2387 } 2388 pthread_mutex_destroy(&self->mutex); 2389 pthread_cond_destroy(&self->cond); 2390 sem_destroy(&self->started); 2391 } 2392 2393 void *tsync_sibling(void *data) 2394 { 2395 long ret = 0; 2396 struct tsync_sibling *me = data; 2397 2398 me->system_tid = syscall(__NR_gettid); 2399 2400 pthread_mutex_lock(me->mutex); 2401 if (me->diverge) { 2402 /* Just re-apply the root prog to fork the tree */ 2403 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 2404 me->prog, 0, 0); 2405 } 2406 sem_post(me->started); 2407 /* Return outside of started so parent notices failures. */ 2408 if (ret) { 2409 pthread_mutex_unlock(me->mutex); 2410 return (void *)SIBLING_EXIT_FAILURE; 2411 } 2412 do { 2413 pthread_cond_wait(me->cond, me->mutex); 2414 me->num_waits = me->num_waits - 1; 2415 } while (me->num_waits); 2416 pthread_mutex_unlock(me->mutex); 2417 2418 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 2419 if (!ret) 2420 return (void *)SIBLING_EXIT_NEWPRIVS; 2421 read(0, NULL, 0); 2422 return (void *)SIBLING_EXIT_UNKILLED; 2423 } 2424 2425 void tsync_start_sibling(struct tsync_sibling *sibling) 2426 { 2427 pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling); 2428 } 2429 2430 TEST_F(TSYNC, siblings_fail_prctl) 2431 { 2432 long ret; 2433 void *status; 2434 struct sock_filter filter[] = { 2435 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2436 offsetof(struct seccomp_data, nr)), 2437 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), 2438 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL), 2439 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2440 }; 2441 struct sock_fprog prog = { 2442 .len = (unsigned short)ARRAY_SIZE(filter), 2443 .filter = filter, 2444 }; 2445 2446 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2447 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2448 } 2449 2450 /* Check prctl failure detection by requesting sib 0 diverge. */ 2451 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2452 ASSERT_NE(ENOSYS, errno) { 2453 TH_LOG("Kernel does not support seccomp syscall!"); 2454 } 2455 ASSERT_EQ(0, ret) { 2456 TH_LOG("setting filter failed"); 2457 } 2458 2459 self->sibling[0].diverge = 1; 2460 tsync_start_sibling(&self->sibling[0]); 2461 tsync_start_sibling(&self->sibling[1]); 2462 2463 while (self->sibling_count < TSYNC_SIBLINGS) { 2464 sem_wait(&self->started); 2465 self->sibling_count++; 2466 } 2467 2468 /* Signal the threads to clean up*/ 2469 pthread_mutex_lock(&self->mutex); 2470 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2471 TH_LOG("cond broadcast non-zero"); 2472 } 2473 pthread_mutex_unlock(&self->mutex); 2474 2475 /* Ensure diverging sibling failed to call prctl. */ 2476 PTHREAD_JOIN(self->sibling[0].tid, &status); 2477 EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status); 2478 PTHREAD_JOIN(self->sibling[1].tid, &status); 2479 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2480 } 2481 2482 TEST_F(TSYNC, two_siblings_with_ancestor) 2483 { 2484 long ret; 2485 void *status; 2486 2487 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2488 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2489 } 2490 2491 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2492 ASSERT_NE(ENOSYS, errno) { 2493 TH_LOG("Kernel does not support seccomp syscall!"); 2494 } 2495 ASSERT_EQ(0, ret) { 2496 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2497 } 2498 tsync_start_sibling(&self->sibling[0]); 2499 tsync_start_sibling(&self->sibling[1]); 2500 2501 while (self->sibling_count < TSYNC_SIBLINGS) { 2502 sem_wait(&self->started); 2503 self->sibling_count++; 2504 } 2505 2506 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2507 &self->apply_prog); 2508 ASSERT_EQ(0, ret) { 2509 TH_LOG("Could install filter on all threads!"); 2510 } 2511 /* Tell the siblings to test the policy */ 2512 pthread_mutex_lock(&self->mutex); 2513 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2514 TH_LOG("cond broadcast non-zero"); 2515 } 2516 pthread_mutex_unlock(&self->mutex); 2517 /* Ensure they are both killed and don't exit cleanly. */ 2518 PTHREAD_JOIN(self->sibling[0].tid, &status); 2519 EXPECT_EQ(0x0, (long)status); 2520 PTHREAD_JOIN(self->sibling[1].tid, &status); 2521 EXPECT_EQ(0x0, (long)status); 2522 } 2523 2524 TEST_F(TSYNC, two_sibling_want_nnp) 2525 { 2526 void *status; 2527 2528 /* start siblings before any prctl() operations */ 2529 tsync_start_sibling(&self->sibling[0]); 2530 tsync_start_sibling(&self->sibling[1]); 2531 while (self->sibling_count < TSYNC_SIBLINGS) { 2532 sem_wait(&self->started); 2533 self->sibling_count++; 2534 } 2535 2536 /* Tell the siblings to test no policy */ 2537 pthread_mutex_lock(&self->mutex); 2538 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2539 TH_LOG("cond broadcast non-zero"); 2540 } 2541 pthread_mutex_unlock(&self->mutex); 2542 2543 /* Ensure they are both upset about lacking nnp. */ 2544 PTHREAD_JOIN(self->sibling[0].tid, &status); 2545 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); 2546 PTHREAD_JOIN(self->sibling[1].tid, &status); 2547 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); 2548 } 2549 2550 TEST_F(TSYNC, two_siblings_with_no_filter) 2551 { 2552 long ret; 2553 void *status; 2554 2555 /* start siblings before any prctl() operations */ 2556 tsync_start_sibling(&self->sibling[0]); 2557 tsync_start_sibling(&self->sibling[1]); 2558 while (self->sibling_count < TSYNC_SIBLINGS) { 2559 sem_wait(&self->started); 2560 self->sibling_count++; 2561 } 2562 2563 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2564 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2565 } 2566 2567 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2568 &self->apply_prog); 2569 ASSERT_NE(ENOSYS, errno) { 2570 TH_LOG("Kernel does not support seccomp syscall!"); 2571 } 2572 ASSERT_EQ(0, ret) { 2573 TH_LOG("Could install filter on all threads!"); 2574 } 2575 2576 /* Tell the siblings to test the policy */ 2577 pthread_mutex_lock(&self->mutex); 2578 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2579 TH_LOG("cond broadcast non-zero"); 2580 } 2581 pthread_mutex_unlock(&self->mutex); 2582 2583 /* Ensure they are both killed and don't exit cleanly. */ 2584 PTHREAD_JOIN(self->sibling[0].tid, &status); 2585 EXPECT_EQ(0x0, (long)status); 2586 PTHREAD_JOIN(self->sibling[1].tid, &status); 2587 EXPECT_EQ(0x0, (long)status); 2588 } 2589 2590 TEST_F(TSYNC, two_siblings_with_one_divergence) 2591 { 2592 long ret; 2593 void *status; 2594 2595 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2596 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2597 } 2598 2599 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2600 ASSERT_NE(ENOSYS, errno) { 2601 TH_LOG("Kernel does not support seccomp syscall!"); 2602 } 2603 ASSERT_EQ(0, ret) { 2604 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2605 } 2606 self->sibling[0].diverge = 1; 2607 tsync_start_sibling(&self->sibling[0]); 2608 tsync_start_sibling(&self->sibling[1]); 2609 2610 while (self->sibling_count < TSYNC_SIBLINGS) { 2611 sem_wait(&self->started); 2612 self->sibling_count++; 2613 } 2614 2615 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2616 &self->apply_prog); 2617 ASSERT_EQ(self->sibling[0].system_tid, ret) { 2618 TH_LOG("Did not fail on diverged sibling."); 2619 } 2620 2621 /* Wake the threads */ 2622 pthread_mutex_lock(&self->mutex); 2623 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2624 TH_LOG("cond broadcast non-zero"); 2625 } 2626 pthread_mutex_unlock(&self->mutex); 2627 2628 /* Ensure they are both unkilled. */ 2629 PTHREAD_JOIN(self->sibling[0].tid, &status); 2630 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2631 PTHREAD_JOIN(self->sibling[1].tid, &status); 2632 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2633 } 2634 2635 TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err) 2636 { 2637 long ret, flags; 2638 void *status; 2639 2640 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2641 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2642 } 2643 2644 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2645 ASSERT_NE(ENOSYS, errno) { 2646 TH_LOG("Kernel does not support seccomp syscall!"); 2647 } 2648 ASSERT_EQ(0, ret) { 2649 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2650 } 2651 self->sibling[0].diverge = 1; 2652 tsync_start_sibling(&self->sibling[0]); 2653 tsync_start_sibling(&self->sibling[1]); 2654 2655 while (self->sibling_count < TSYNC_SIBLINGS) { 2656 sem_wait(&self->started); 2657 self->sibling_count++; 2658 } 2659 2660 flags = SECCOMP_FILTER_FLAG_TSYNC | \ 2661 SECCOMP_FILTER_FLAG_TSYNC_ESRCH; 2662 ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog); 2663 ASSERT_EQ(ESRCH, errno) { 2664 TH_LOG("Did not return ESRCH for diverged sibling."); 2665 } 2666 ASSERT_EQ(-1, ret) { 2667 TH_LOG("Did not fail on diverged sibling."); 2668 } 2669 2670 /* Wake the threads */ 2671 pthread_mutex_lock(&self->mutex); 2672 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2673 TH_LOG("cond broadcast non-zero"); 2674 } 2675 pthread_mutex_unlock(&self->mutex); 2676 2677 /* Ensure they are both unkilled. */ 2678 PTHREAD_JOIN(self->sibling[0].tid, &status); 2679 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2680 PTHREAD_JOIN(self->sibling[1].tid, &status); 2681 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2682 } 2683 2684 TEST_F(TSYNC, two_siblings_not_under_filter) 2685 { 2686 long ret, sib; 2687 void *status; 2688 struct timespec delay = { .tv_nsec = 100000000 }; 2689 2690 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2691 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2692 } 2693 2694 /* 2695 * Sibling 0 will have its own seccomp policy 2696 * and Sibling 1 will not be under seccomp at 2697 * all. Sibling 1 will enter seccomp and 0 2698 * will cause failure. 2699 */ 2700 self->sibling[0].diverge = 1; 2701 tsync_start_sibling(&self->sibling[0]); 2702 tsync_start_sibling(&self->sibling[1]); 2703 2704 while (self->sibling_count < TSYNC_SIBLINGS) { 2705 sem_wait(&self->started); 2706 self->sibling_count++; 2707 } 2708 2709 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2710 ASSERT_NE(ENOSYS, errno) { 2711 TH_LOG("Kernel does not support seccomp syscall!"); 2712 } 2713 ASSERT_EQ(0, ret) { 2714 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2715 } 2716 2717 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2718 &self->apply_prog); 2719 ASSERT_EQ(ret, self->sibling[0].system_tid) { 2720 TH_LOG("Did not fail on diverged sibling."); 2721 } 2722 sib = 1; 2723 if (ret == self->sibling[0].system_tid) 2724 sib = 0; 2725 2726 pthread_mutex_lock(&self->mutex); 2727 2728 /* Increment the other siblings num_waits so we can clean up 2729 * the one we just saw. 2730 */ 2731 self->sibling[!sib].num_waits += 1; 2732 2733 /* Signal the thread to clean up*/ 2734 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2735 TH_LOG("cond broadcast non-zero"); 2736 } 2737 pthread_mutex_unlock(&self->mutex); 2738 PTHREAD_JOIN(self->sibling[sib].tid, &status); 2739 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2740 /* Poll for actual task death. pthread_join doesn't guarantee it. */ 2741 while (!kill(self->sibling[sib].system_tid, 0)) 2742 nanosleep(&delay, NULL); 2743 /* Switch to the remaining sibling */ 2744 sib = !sib; 2745 2746 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2747 &self->apply_prog); 2748 ASSERT_EQ(0, ret) { 2749 TH_LOG("Expected the remaining sibling to sync"); 2750 }; 2751 2752 pthread_mutex_lock(&self->mutex); 2753 2754 /* If remaining sibling didn't have a chance to wake up during 2755 * the first broadcast, manually reduce the num_waits now. 2756 */ 2757 if (self->sibling[sib].num_waits > 1) 2758 self->sibling[sib].num_waits = 1; 2759 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2760 TH_LOG("cond broadcast non-zero"); 2761 } 2762 pthread_mutex_unlock(&self->mutex); 2763 PTHREAD_JOIN(self->sibling[sib].tid, &status); 2764 EXPECT_EQ(0, (long)status); 2765 /* Poll for actual task death. pthread_join doesn't guarantee it. */ 2766 while (!kill(self->sibling[sib].system_tid, 0)) 2767 nanosleep(&delay, NULL); 2768 2769 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2770 &self->apply_prog); 2771 ASSERT_EQ(0, ret); /* just us chickens */ 2772 } 2773 2774 /* Make sure restarted syscalls are seen directly as "restart_syscall". */ 2775 TEST(syscall_restart) 2776 { 2777 long ret; 2778 unsigned long msg; 2779 pid_t child_pid; 2780 int pipefd[2]; 2781 int status; 2782 siginfo_t info = { }; 2783 struct sock_filter filter[] = { 2784 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2785 offsetof(struct seccomp_data, nr)), 2786 2787 #ifdef __NR_sigreturn 2788 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 7, 0), 2789 #endif 2790 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 6, 0), 2791 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 5, 0), 2792 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 4, 0), 2793 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 5, 0), 2794 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_clock_nanosleep, 4, 0), 2795 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0), 2796 2797 /* Allow __NR_write for easy logging. */ 2798 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1), 2799 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2800 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2801 /* The nanosleep jump target. */ 2802 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100), 2803 /* The restart_syscall jump target. */ 2804 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200), 2805 }; 2806 struct sock_fprog prog = { 2807 .len = (unsigned short)ARRAY_SIZE(filter), 2808 .filter = filter, 2809 }; 2810 #if defined(__arm__) 2811 struct utsname utsbuf; 2812 #endif 2813 2814 ASSERT_EQ(0, pipe(pipefd)); 2815 2816 child_pid = fork(); 2817 ASSERT_LE(0, child_pid); 2818 if (child_pid == 0) { 2819 /* Child uses EXPECT not ASSERT to deliver status correctly. */ 2820 char buf = ' '; 2821 struct timespec timeout = { }; 2822 2823 /* Attach parent as tracer and stop. */ 2824 EXPECT_EQ(0, ptrace(PTRACE_TRACEME)); 2825 EXPECT_EQ(0, raise(SIGSTOP)); 2826 2827 EXPECT_EQ(0, close(pipefd[1])); 2828 2829 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2830 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2831 } 2832 2833 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2834 EXPECT_EQ(0, ret) { 2835 TH_LOG("Failed to install filter!"); 2836 } 2837 2838 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { 2839 TH_LOG("Failed to read() sync from parent"); 2840 } 2841 EXPECT_EQ('.', buf) { 2842 TH_LOG("Failed to get sync data from read()"); 2843 } 2844 2845 /* Start nanosleep to be interrupted. */ 2846 timeout.tv_sec = 1; 2847 errno = 0; 2848 EXPECT_EQ(0, nanosleep(&timeout, NULL)) { 2849 TH_LOG("Call to nanosleep() failed (errno %d)", errno); 2850 } 2851 2852 /* Read final sync from parent. */ 2853 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { 2854 TH_LOG("Failed final read() from parent"); 2855 } 2856 EXPECT_EQ('!', buf) { 2857 TH_LOG("Failed to get final data from read()"); 2858 } 2859 2860 /* Directly report the status of our test harness results. */ 2861 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS 2862 : EXIT_FAILURE); 2863 } 2864 EXPECT_EQ(0, close(pipefd[0])); 2865 2866 /* Attach to child, setup options, and release. */ 2867 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2868 ASSERT_EQ(true, WIFSTOPPED(status)); 2869 ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL, 2870 PTRACE_O_TRACESECCOMP)); 2871 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2872 ASSERT_EQ(1, write(pipefd[1], ".", 1)); 2873 2874 /* Wait for nanosleep() to start. */ 2875 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2876 ASSERT_EQ(true, WIFSTOPPED(status)); 2877 ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); 2878 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); 2879 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); 2880 ASSERT_EQ(0x100, msg); 2881 ret = get_syscall(_metadata, child_pid); 2882 EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep); 2883 2884 /* Might as well check siginfo for sanity while we're here. */ 2885 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); 2886 ASSERT_EQ(SIGTRAP, info.si_signo); 2887 ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code); 2888 EXPECT_EQ(0, info.si_errno); 2889 EXPECT_EQ(getuid(), info.si_uid); 2890 /* Verify signal delivery came from child (seccomp-triggered). */ 2891 EXPECT_EQ(child_pid, info.si_pid); 2892 2893 /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */ 2894 ASSERT_EQ(0, kill(child_pid, SIGSTOP)); 2895 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2896 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2897 ASSERT_EQ(true, WIFSTOPPED(status)); 2898 ASSERT_EQ(SIGSTOP, WSTOPSIG(status)); 2899 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); 2900 /* 2901 * There is no siginfo on SIGSTOP any more, so we can't verify 2902 * signal delivery came from parent now (getpid() == info.si_pid). 2903 * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com 2904 * At least verify the SIGSTOP via PTRACE_GETSIGINFO. 2905 */ 2906 EXPECT_EQ(SIGSTOP, info.si_signo); 2907 2908 /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */ 2909 ASSERT_EQ(0, kill(child_pid, SIGCONT)); 2910 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2911 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2912 ASSERT_EQ(true, WIFSTOPPED(status)); 2913 ASSERT_EQ(SIGCONT, WSTOPSIG(status)); 2914 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2915 2916 /* Wait for restart_syscall() to start. */ 2917 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2918 ASSERT_EQ(true, WIFSTOPPED(status)); 2919 ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); 2920 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); 2921 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); 2922 2923 ASSERT_EQ(0x200, msg); 2924 ret = get_syscall(_metadata, child_pid); 2925 #if defined(__arm__) 2926 /* 2927 * FIXME: 2928 * - native ARM registers do NOT expose true syscall. 2929 * - compat ARM registers on ARM64 DO expose true syscall. 2930 */ 2931 ASSERT_EQ(0, uname(&utsbuf)); 2932 if (strncmp(utsbuf.machine, "arm", 3) == 0) { 2933 EXPECT_EQ(__NR_nanosleep, ret); 2934 } else 2935 #endif 2936 { 2937 EXPECT_EQ(__NR_restart_syscall, ret); 2938 } 2939 2940 /* Write again to end test. */ 2941 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2942 ASSERT_EQ(1, write(pipefd[1], "!", 1)); 2943 EXPECT_EQ(0, close(pipefd[1])); 2944 2945 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2946 if (WIFSIGNALED(status) || WEXITSTATUS(status)) 2947 _metadata->passed = 0; 2948 } 2949 2950 TEST_SIGNAL(filter_flag_log, SIGSYS) 2951 { 2952 struct sock_filter allow_filter[] = { 2953 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2954 }; 2955 struct sock_filter kill_filter[] = { 2956 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2957 offsetof(struct seccomp_data, nr)), 2958 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 2959 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2960 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2961 }; 2962 struct sock_fprog allow_prog = { 2963 .len = (unsigned short)ARRAY_SIZE(allow_filter), 2964 .filter = allow_filter, 2965 }; 2966 struct sock_fprog kill_prog = { 2967 .len = (unsigned short)ARRAY_SIZE(kill_filter), 2968 .filter = kill_filter, 2969 }; 2970 long ret; 2971 pid_t parent = getppid(); 2972 2973 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2974 ASSERT_EQ(0, ret); 2975 2976 /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */ 2977 ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG, 2978 &allow_prog); 2979 ASSERT_NE(ENOSYS, errno) { 2980 TH_LOG("Kernel does not support seccomp syscall!"); 2981 } 2982 EXPECT_NE(0, ret) { 2983 TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!"); 2984 } 2985 EXPECT_EQ(EINVAL, errno) { 2986 TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!"); 2987 } 2988 2989 /* Verify that a simple, permissive filter can be added with no flags */ 2990 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog); 2991 EXPECT_EQ(0, ret); 2992 2993 /* See if the same filter can be added with the FILTER_FLAG_LOG flag */ 2994 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, 2995 &allow_prog); 2996 ASSERT_NE(EINVAL, errno) { 2997 TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!"); 2998 } 2999 EXPECT_EQ(0, ret); 3000 3001 /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */ 3002 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, 3003 &kill_prog); 3004 EXPECT_EQ(0, ret); 3005 3006 EXPECT_EQ(parent, syscall(__NR_getppid)); 3007 /* getpid() should never return. */ 3008 EXPECT_EQ(0, syscall(__NR_getpid)); 3009 } 3010 3011 TEST(get_action_avail) 3012 { 3013 __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP, 3014 SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE, 3015 SECCOMP_RET_LOG, SECCOMP_RET_ALLOW }; 3016 __u32 unknown_action = 0x10000000U; 3017 int i; 3018 long ret; 3019 3020 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]); 3021 ASSERT_NE(ENOSYS, errno) { 3022 TH_LOG("Kernel does not support seccomp syscall!"); 3023 } 3024 ASSERT_NE(EINVAL, errno) { 3025 TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!"); 3026 } 3027 EXPECT_EQ(ret, 0); 3028 3029 for (i = 0; i < ARRAY_SIZE(actions); i++) { 3030 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]); 3031 EXPECT_EQ(ret, 0) { 3032 TH_LOG("Expected action (0x%X) not available!", 3033 actions[i]); 3034 } 3035 } 3036 3037 /* Check that an unknown action is handled properly (EOPNOTSUPP) */ 3038 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action); 3039 EXPECT_EQ(ret, -1); 3040 EXPECT_EQ(errno, EOPNOTSUPP); 3041 } 3042 3043 TEST(get_metadata) 3044 { 3045 pid_t pid; 3046 int pipefd[2]; 3047 char buf; 3048 struct seccomp_metadata md; 3049 long ret; 3050 3051 /* Only real root can get metadata. */ 3052 if (geteuid()) { 3053 SKIP(return, "get_metadata requires real root"); 3054 return; 3055 } 3056 3057 ASSERT_EQ(0, pipe(pipefd)); 3058 3059 pid = fork(); 3060 ASSERT_GE(pid, 0); 3061 if (pid == 0) { 3062 struct sock_filter filter[] = { 3063 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3064 }; 3065 struct sock_fprog prog = { 3066 .len = (unsigned short)ARRAY_SIZE(filter), 3067 .filter = filter, 3068 }; 3069 3070 /* one with log, one without */ 3071 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 3072 SECCOMP_FILTER_FLAG_LOG, &prog)); 3073 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)); 3074 3075 EXPECT_EQ(0, close(pipefd[0])); 3076 ASSERT_EQ(1, write(pipefd[1], "1", 1)); 3077 ASSERT_EQ(0, close(pipefd[1])); 3078 3079 while (1) 3080 sleep(100); 3081 } 3082 3083 ASSERT_EQ(0, close(pipefd[1])); 3084 ASSERT_EQ(1, read(pipefd[0], &buf, 1)); 3085 3086 ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid)); 3087 ASSERT_EQ(pid, waitpid(pid, NULL, 0)); 3088 3089 /* Past here must not use ASSERT or child process is never killed. */ 3090 3091 md.filter_off = 0; 3092 errno = 0; 3093 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); 3094 EXPECT_EQ(sizeof(md), ret) { 3095 if (errno == EINVAL) 3096 SKIP(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)"); 3097 } 3098 3099 EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG); 3100 EXPECT_EQ(md.filter_off, 0); 3101 3102 md.filter_off = 1; 3103 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); 3104 EXPECT_EQ(sizeof(md), ret); 3105 EXPECT_EQ(md.flags, 0); 3106 EXPECT_EQ(md.filter_off, 1); 3107 3108 skip: 3109 ASSERT_EQ(0, kill(pid, SIGKILL)); 3110 } 3111 3112 static int user_notif_syscall(int nr, unsigned int flags) 3113 { 3114 struct sock_filter filter[] = { 3115 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, 3116 offsetof(struct seccomp_data, nr)), 3117 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1), 3118 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF), 3119 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), 3120 }; 3121 3122 struct sock_fprog prog = { 3123 .len = (unsigned short)ARRAY_SIZE(filter), 3124 .filter = filter, 3125 }; 3126 3127 return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog); 3128 } 3129 3130 #define USER_NOTIF_MAGIC INT_MAX 3131 TEST(user_notification_basic) 3132 { 3133 pid_t pid; 3134 long ret; 3135 int status, listener; 3136 struct seccomp_notif req = {}; 3137 struct seccomp_notif_resp resp = {}; 3138 struct pollfd pollfd; 3139 3140 struct sock_filter filter[] = { 3141 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3142 }; 3143 struct sock_fprog prog = { 3144 .len = (unsigned short)ARRAY_SIZE(filter), 3145 .filter = filter, 3146 }; 3147 3148 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3149 ASSERT_EQ(0, ret) { 3150 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3151 } 3152 3153 pid = fork(); 3154 ASSERT_GE(pid, 0); 3155 3156 /* Check that we get -ENOSYS with no listener attached */ 3157 if (pid == 0) { 3158 if (user_notif_syscall(__NR_getppid, 0) < 0) 3159 exit(1); 3160 ret = syscall(__NR_getppid); 3161 exit(ret >= 0 || errno != ENOSYS); 3162 } 3163 3164 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3165 EXPECT_EQ(true, WIFEXITED(status)); 3166 EXPECT_EQ(0, WEXITSTATUS(status)); 3167 3168 /* Add some no-op filters for grins. */ 3169 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3170 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3171 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3172 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3173 3174 /* Check that the basic notification machinery works */ 3175 listener = user_notif_syscall(__NR_getppid, 3176 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3177 ASSERT_GE(listener, 0); 3178 3179 /* Installing a second listener in the chain should EBUSY */ 3180 EXPECT_EQ(user_notif_syscall(__NR_getppid, 3181 SECCOMP_FILTER_FLAG_NEW_LISTENER), 3182 -1); 3183 EXPECT_EQ(errno, EBUSY); 3184 3185 pid = fork(); 3186 ASSERT_GE(pid, 0); 3187 3188 if (pid == 0) { 3189 ret = syscall(__NR_getppid); 3190 exit(ret != USER_NOTIF_MAGIC); 3191 } 3192 3193 pollfd.fd = listener; 3194 pollfd.events = POLLIN | POLLOUT; 3195 3196 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3197 EXPECT_EQ(pollfd.revents, POLLIN); 3198 3199 /* Test that we can't pass garbage to the kernel. */ 3200 memset(&req, 0, sizeof(req)); 3201 req.pid = -1; 3202 errno = 0; 3203 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req); 3204 EXPECT_EQ(-1, ret); 3205 EXPECT_EQ(EINVAL, errno); 3206 3207 if (ret) { 3208 req.pid = 0; 3209 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3210 } 3211 3212 pollfd.fd = listener; 3213 pollfd.events = POLLIN | POLLOUT; 3214 3215 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3216 EXPECT_EQ(pollfd.revents, POLLOUT); 3217 3218 EXPECT_EQ(req.data.nr, __NR_getppid); 3219 3220 resp.id = req.id; 3221 resp.error = 0; 3222 resp.val = USER_NOTIF_MAGIC; 3223 3224 /* check that we make sure flags == 0 */ 3225 resp.flags = 1; 3226 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3227 EXPECT_EQ(errno, EINVAL); 3228 3229 resp.flags = 0; 3230 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3231 3232 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3233 EXPECT_EQ(true, WIFEXITED(status)); 3234 EXPECT_EQ(0, WEXITSTATUS(status)); 3235 } 3236 3237 TEST(user_notification_with_tsync) 3238 { 3239 int ret; 3240 unsigned int flags; 3241 3242 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3243 ASSERT_EQ(0, ret) { 3244 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3245 } 3246 3247 /* these were exclusive */ 3248 flags = SECCOMP_FILTER_FLAG_NEW_LISTENER | 3249 SECCOMP_FILTER_FLAG_TSYNC; 3250 ASSERT_EQ(-1, user_notif_syscall(__NR_getppid, flags)); 3251 ASSERT_EQ(EINVAL, errno); 3252 3253 /* but now they're not */ 3254 flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH; 3255 ret = user_notif_syscall(__NR_getppid, flags); 3256 close(ret); 3257 ASSERT_LE(0, ret); 3258 } 3259 3260 TEST(user_notification_kill_in_middle) 3261 { 3262 pid_t pid; 3263 long ret; 3264 int listener; 3265 struct seccomp_notif req = {}; 3266 struct seccomp_notif_resp resp = {}; 3267 3268 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3269 ASSERT_EQ(0, ret) { 3270 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3271 } 3272 3273 listener = user_notif_syscall(__NR_getppid, 3274 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3275 ASSERT_GE(listener, 0); 3276 3277 /* 3278 * Check that nothing bad happens when we kill the task in the middle 3279 * of a syscall. 3280 */ 3281 pid = fork(); 3282 ASSERT_GE(pid, 0); 3283 3284 if (pid == 0) { 3285 ret = syscall(__NR_getppid); 3286 exit(ret != USER_NOTIF_MAGIC); 3287 } 3288 3289 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3290 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0); 3291 3292 EXPECT_EQ(kill(pid, SIGKILL), 0); 3293 EXPECT_EQ(waitpid(pid, NULL, 0), pid); 3294 3295 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1); 3296 3297 resp.id = req.id; 3298 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp); 3299 EXPECT_EQ(ret, -1); 3300 EXPECT_EQ(errno, ENOENT); 3301 } 3302 3303 static int handled = -1; 3304 3305 static void signal_handler(int signal) 3306 { 3307 if (write(handled, "c", 1) != 1) 3308 perror("write from signal"); 3309 } 3310 3311 TEST(user_notification_signal) 3312 { 3313 pid_t pid; 3314 long ret; 3315 int status, listener, sk_pair[2]; 3316 struct seccomp_notif req = {}; 3317 struct seccomp_notif_resp resp = {}; 3318 char c; 3319 3320 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3321 ASSERT_EQ(0, ret) { 3322 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3323 } 3324 3325 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); 3326 3327 listener = user_notif_syscall(__NR_gettid, 3328 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3329 ASSERT_GE(listener, 0); 3330 3331 pid = fork(); 3332 ASSERT_GE(pid, 0); 3333 3334 if (pid == 0) { 3335 close(sk_pair[0]); 3336 handled = sk_pair[1]; 3337 if (signal(SIGUSR1, signal_handler) == SIG_ERR) { 3338 perror("signal"); 3339 exit(1); 3340 } 3341 /* 3342 * ERESTARTSYS behavior is a bit hard to test, because we need 3343 * to rely on a signal that has not yet been handled. Let's at 3344 * least check that the error code gets propagated through, and 3345 * hope that it doesn't break when there is actually a signal :) 3346 */ 3347 ret = syscall(__NR_gettid); 3348 exit(!(ret == -1 && errno == 512)); 3349 } 3350 3351 close(sk_pair[1]); 3352 3353 memset(&req, 0, sizeof(req)); 3354 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3355 3356 EXPECT_EQ(kill(pid, SIGUSR1), 0); 3357 3358 /* 3359 * Make sure the signal really is delivered, which means we're not 3360 * stuck in the user notification code any more and the notification 3361 * should be dead. 3362 */ 3363 EXPECT_EQ(read(sk_pair[0], &c, 1), 1); 3364 3365 resp.id = req.id; 3366 resp.error = -EPERM; 3367 resp.val = 0; 3368 3369 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3370 EXPECT_EQ(errno, ENOENT); 3371 3372 memset(&req, 0, sizeof(req)); 3373 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3374 3375 resp.id = req.id; 3376 resp.error = -512; /* -ERESTARTSYS */ 3377 resp.val = 0; 3378 3379 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3380 3381 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3382 EXPECT_EQ(true, WIFEXITED(status)); 3383 EXPECT_EQ(0, WEXITSTATUS(status)); 3384 } 3385 3386 TEST(user_notification_closed_listener) 3387 { 3388 pid_t pid; 3389 long ret; 3390 int status, listener; 3391 3392 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3393 ASSERT_EQ(0, ret) { 3394 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3395 } 3396 3397 listener = user_notif_syscall(__NR_getppid, 3398 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3399 ASSERT_GE(listener, 0); 3400 3401 /* 3402 * Check that we get an ENOSYS when the listener is closed. 3403 */ 3404 pid = fork(); 3405 ASSERT_GE(pid, 0); 3406 if (pid == 0) { 3407 close(listener); 3408 ret = syscall(__NR_getppid); 3409 exit(ret != -1 && errno != ENOSYS); 3410 } 3411 3412 close(listener); 3413 3414 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3415 EXPECT_EQ(true, WIFEXITED(status)); 3416 EXPECT_EQ(0, WEXITSTATUS(status)); 3417 } 3418 3419 /* 3420 * Check that a pid in a child namespace still shows up as valid in ours. 3421 */ 3422 TEST(user_notification_child_pid_ns) 3423 { 3424 pid_t pid; 3425 int status, listener; 3426 struct seccomp_notif req = {}; 3427 struct seccomp_notif_resp resp = {}; 3428 3429 ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0) { 3430 if (errno == EINVAL) 3431 SKIP(return, "kernel missing CLONE_NEWUSER support"); 3432 }; 3433 3434 listener = user_notif_syscall(__NR_getppid, 3435 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3436 ASSERT_GE(listener, 0); 3437 3438 pid = fork(); 3439 ASSERT_GE(pid, 0); 3440 3441 if (pid == 0) 3442 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3443 3444 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3445 EXPECT_EQ(req.pid, pid); 3446 3447 resp.id = req.id; 3448 resp.error = 0; 3449 resp.val = USER_NOTIF_MAGIC; 3450 3451 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3452 3453 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3454 EXPECT_EQ(true, WIFEXITED(status)); 3455 EXPECT_EQ(0, WEXITSTATUS(status)); 3456 close(listener); 3457 } 3458 3459 /* 3460 * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e. 3461 * invalid. 3462 */ 3463 TEST(user_notification_sibling_pid_ns) 3464 { 3465 pid_t pid, pid2; 3466 int status, listener; 3467 struct seccomp_notif req = {}; 3468 struct seccomp_notif_resp resp = {}; 3469 3470 ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) { 3471 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3472 } 3473 3474 listener = user_notif_syscall(__NR_getppid, 3475 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3476 ASSERT_GE(listener, 0); 3477 3478 pid = fork(); 3479 ASSERT_GE(pid, 0); 3480 3481 if (pid == 0) { 3482 ASSERT_EQ(unshare(CLONE_NEWPID), 0); 3483 3484 pid2 = fork(); 3485 ASSERT_GE(pid2, 0); 3486 3487 if (pid2 == 0) 3488 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3489 3490 EXPECT_EQ(waitpid(pid2, &status, 0), pid2); 3491 EXPECT_EQ(true, WIFEXITED(status)); 3492 EXPECT_EQ(0, WEXITSTATUS(status)); 3493 exit(WEXITSTATUS(status)); 3494 } 3495 3496 /* Create the sibling ns, and sibling in it. */ 3497 ASSERT_EQ(unshare(CLONE_NEWPID), 0) { 3498 if (errno == EPERM) 3499 SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN"); 3500 } 3501 ASSERT_EQ(errno, 0); 3502 3503 pid2 = fork(); 3504 ASSERT_GE(pid2, 0); 3505 3506 if (pid2 == 0) { 3507 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3508 /* 3509 * The pid should be 0, i.e. the task is in some namespace that 3510 * we can't "see". 3511 */ 3512 EXPECT_EQ(req.pid, 0); 3513 3514 resp.id = req.id; 3515 resp.error = 0; 3516 resp.val = USER_NOTIF_MAGIC; 3517 3518 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3519 exit(0); 3520 } 3521 3522 close(listener); 3523 3524 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3525 EXPECT_EQ(true, WIFEXITED(status)); 3526 EXPECT_EQ(0, WEXITSTATUS(status)); 3527 3528 EXPECT_EQ(waitpid(pid2, &status, 0), pid2); 3529 EXPECT_EQ(true, WIFEXITED(status)); 3530 EXPECT_EQ(0, WEXITSTATUS(status)); 3531 } 3532 3533 TEST(user_notification_fault_recv) 3534 { 3535 pid_t pid; 3536 int status, listener; 3537 struct seccomp_notif req = {}; 3538 struct seccomp_notif_resp resp = {}; 3539 3540 ASSERT_EQ(unshare(CLONE_NEWUSER), 0); 3541 3542 listener = user_notif_syscall(__NR_getppid, 3543 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3544 ASSERT_GE(listener, 0); 3545 3546 pid = fork(); 3547 ASSERT_GE(pid, 0); 3548 3549 if (pid == 0) 3550 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3551 3552 /* Do a bad recv() */ 3553 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1); 3554 EXPECT_EQ(errno, EFAULT); 3555 3556 /* We should still be able to receive this notification, though. */ 3557 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3558 EXPECT_EQ(req.pid, pid); 3559 3560 resp.id = req.id; 3561 resp.error = 0; 3562 resp.val = USER_NOTIF_MAGIC; 3563 3564 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3565 3566 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3567 EXPECT_EQ(true, WIFEXITED(status)); 3568 EXPECT_EQ(0, WEXITSTATUS(status)); 3569 } 3570 3571 TEST(seccomp_get_notif_sizes) 3572 { 3573 struct seccomp_notif_sizes sizes; 3574 3575 ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0); 3576 EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif)); 3577 EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp)); 3578 } 3579 3580 TEST(user_notification_continue) 3581 { 3582 pid_t pid; 3583 long ret; 3584 int status, listener; 3585 struct seccomp_notif req = {}; 3586 struct seccomp_notif_resp resp = {}; 3587 struct pollfd pollfd; 3588 3589 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3590 ASSERT_EQ(0, ret) { 3591 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3592 } 3593 3594 listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); 3595 ASSERT_GE(listener, 0); 3596 3597 pid = fork(); 3598 ASSERT_GE(pid, 0); 3599 3600 if (pid == 0) { 3601 int dup_fd, pipe_fds[2]; 3602 pid_t self; 3603 3604 ASSERT_GE(pipe(pipe_fds), 0); 3605 3606 dup_fd = dup(pipe_fds[0]); 3607 ASSERT_GE(dup_fd, 0); 3608 EXPECT_NE(pipe_fds[0], dup_fd); 3609 3610 self = getpid(); 3611 ASSERT_EQ(filecmp(self, self, pipe_fds[0], dup_fd), 0); 3612 exit(0); 3613 } 3614 3615 pollfd.fd = listener; 3616 pollfd.events = POLLIN | POLLOUT; 3617 3618 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3619 EXPECT_EQ(pollfd.revents, POLLIN); 3620 3621 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3622 3623 pollfd.fd = listener; 3624 pollfd.events = POLLIN | POLLOUT; 3625 3626 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3627 EXPECT_EQ(pollfd.revents, POLLOUT); 3628 3629 EXPECT_EQ(req.data.nr, __NR_dup); 3630 3631 resp.id = req.id; 3632 resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE; 3633 3634 /* 3635 * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other 3636 * args be set to 0. 3637 */ 3638 resp.error = 0; 3639 resp.val = USER_NOTIF_MAGIC; 3640 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3641 EXPECT_EQ(errno, EINVAL); 3642 3643 resp.error = USER_NOTIF_MAGIC; 3644 resp.val = 0; 3645 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3646 EXPECT_EQ(errno, EINVAL); 3647 3648 resp.error = 0; 3649 resp.val = 0; 3650 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) { 3651 if (errno == EINVAL) 3652 SKIP(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE"); 3653 } 3654 3655 skip: 3656 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3657 EXPECT_EQ(true, WIFEXITED(status)); 3658 EXPECT_EQ(0, WEXITSTATUS(status)) { 3659 if (WEXITSTATUS(status) == 2) { 3660 SKIP(return, "Kernel does not support kcmp() syscall"); 3661 return; 3662 } 3663 } 3664 } 3665 3666 TEST(user_notification_filter_empty) 3667 { 3668 pid_t pid; 3669 long ret; 3670 int status; 3671 struct pollfd pollfd; 3672 struct clone_args args = { 3673 .flags = CLONE_FILES, 3674 .exit_signal = SIGCHLD, 3675 }; 3676 3677 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3678 ASSERT_EQ(0, ret) { 3679 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3680 } 3681 3682 pid = sys_clone3(&args, sizeof(args)); 3683 ASSERT_GE(pid, 0); 3684 3685 if (pid == 0) { 3686 int listener; 3687 3688 listener = user_notif_syscall(__NR_mknod, SECCOMP_FILTER_FLAG_NEW_LISTENER); 3689 if (listener < 0) 3690 _exit(EXIT_FAILURE); 3691 3692 if (dup2(listener, 200) != 200) 3693 _exit(EXIT_FAILURE); 3694 3695 close(listener); 3696 3697 _exit(EXIT_SUCCESS); 3698 } 3699 3700 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3701 EXPECT_EQ(true, WIFEXITED(status)); 3702 EXPECT_EQ(0, WEXITSTATUS(status)); 3703 3704 /* 3705 * The seccomp filter has become unused so we should be notified once 3706 * the kernel gets around to cleaning up task struct. 3707 */ 3708 pollfd.fd = 200; 3709 pollfd.events = POLLHUP; 3710 3711 EXPECT_GT(poll(&pollfd, 1, 2000), 0); 3712 EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); 3713 } 3714 3715 static void *do_thread(void *data) 3716 { 3717 return NULL; 3718 } 3719 3720 TEST(user_notification_filter_empty_threaded) 3721 { 3722 pid_t pid; 3723 long ret; 3724 int status; 3725 struct pollfd pollfd; 3726 struct clone_args args = { 3727 .flags = CLONE_FILES, 3728 .exit_signal = SIGCHLD, 3729 }; 3730 3731 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3732 ASSERT_EQ(0, ret) { 3733 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3734 } 3735 3736 pid = sys_clone3(&args, sizeof(args)); 3737 ASSERT_GE(pid, 0); 3738 3739 if (pid == 0) { 3740 pid_t pid1, pid2; 3741 int listener, status; 3742 pthread_t thread; 3743 3744 listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); 3745 if (listener < 0) 3746 _exit(EXIT_FAILURE); 3747 3748 if (dup2(listener, 200) != 200) 3749 _exit(EXIT_FAILURE); 3750 3751 close(listener); 3752 3753 pid1 = fork(); 3754 if (pid1 < 0) 3755 _exit(EXIT_FAILURE); 3756 3757 if (pid1 == 0) 3758 _exit(EXIT_SUCCESS); 3759 3760 pid2 = fork(); 3761 if (pid2 < 0) 3762 _exit(EXIT_FAILURE); 3763 3764 if (pid2 == 0) 3765 _exit(EXIT_SUCCESS); 3766 3767 if (pthread_create(&thread, NULL, do_thread, NULL) || 3768 pthread_join(thread, NULL)) 3769 _exit(EXIT_FAILURE); 3770 3771 if (pthread_create(&thread, NULL, do_thread, NULL) || 3772 pthread_join(thread, NULL)) 3773 _exit(EXIT_FAILURE); 3774 3775 if (waitpid(pid1, &status, 0) != pid1 || !WIFEXITED(status) || 3776 WEXITSTATUS(status)) 3777 _exit(EXIT_FAILURE); 3778 3779 if (waitpid(pid2, &status, 0) != pid2 || !WIFEXITED(status) || 3780 WEXITSTATUS(status)) 3781 _exit(EXIT_FAILURE); 3782 3783 exit(EXIT_SUCCESS); 3784 } 3785 3786 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3787 EXPECT_EQ(true, WIFEXITED(status)); 3788 EXPECT_EQ(0, WEXITSTATUS(status)); 3789 3790 /* 3791 * The seccomp filter has become unused so we should be notified once 3792 * the kernel gets around to cleaning up task struct. 3793 */ 3794 pollfd.fd = 200; 3795 pollfd.events = POLLHUP; 3796 3797 EXPECT_GT(poll(&pollfd, 1, 2000), 0); 3798 EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); 3799 } 3800 3801 TEST(user_notification_addfd) 3802 { 3803 pid_t pid; 3804 long ret; 3805 int status, listener, memfd, fd; 3806 struct seccomp_notif_addfd addfd = {}; 3807 struct seccomp_notif_addfd_small small = {}; 3808 struct seccomp_notif_addfd_big big = {}; 3809 struct seccomp_notif req = {}; 3810 struct seccomp_notif_resp resp = {}; 3811 /* 100 ms */ 3812 struct timespec delay = { .tv_nsec = 100000000 }; 3813 3814 memfd = memfd_create("test", 0); 3815 ASSERT_GE(memfd, 0); 3816 3817 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3818 ASSERT_EQ(0, ret) { 3819 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3820 } 3821 3822 /* Check that the basic notification machinery works */ 3823 listener = user_notif_syscall(__NR_getppid, 3824 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3825 ASSERT_GE(listener, 0); 3826 3827 pid = fork(); 3828 ASSERT_GE(pid, 0); 3829 3830 if (pid == 0) { 3831 if (syscall(__NR_getppid) != USER_NOTIF_MAGIC) 3832 exit(1); 3833 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3834 } 3835 3836 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3837 3838 addfd.srcfd = memfd; 3839 addfd.newfd = 0; 3840 addfd.id = req.id; 3841 addfd.flags = 0x0; 3842 3843 /* Verify bad newfd_flags cannot be set */ 3844 addfd.newfd_flags = ~O_CLOEXEC; 3845 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 3846 EXPECT_EQ(errno, EINVAL); 3847 addfd.newfd_flags = O_CLOEXEC; 3848 3849 /* Verify bad flags cannot be set */ 3850 addfd.flags = 0xff; 3851 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 3852 EXPECT_EQ(errno, EINVAL); 3853 addfd.flags = 0; 3854 3855 /* Verify that remote_fd cannot be set without setting flags */ 3856 addfd.newfd = 1; 3857 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 3858 EXPECT_EQ(errno, EINVAL); 3859 addfd.newfd = 0; 3860 3861 /* Verify small size cannot be set */ 3862 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1); 3863 EXPECT_EQ(errno, EINVAL); 3864 3865 /* Verify we can't send bits filled in unknown buffer area */ 3866 memset(&big, 0xAA, sizeof(big)); 3867 big.addfd = addfd; 3868 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1); 3869 EXPECT_EQ(errno, E2BIG); 3870 3871 3872 /* Verify we can set an arbitrary remote fd */ 3873 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); 3874 /* 3875 * The child has fds 0(stdin), 1(stdout), 2(stderr), 3(memfd), 3876 * 4(listener), so the newly allocated fd should be 5. 3877 */ 3878 EXPECT_EQ(fd, 5); 3879 EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); 3880 3881 /* Verify we can set an arbitrary remote fd with large size */ 3882 memset(&big, 0x0, sizeof(big)); 3883 big.addfd = addfd; 3884 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big); 3885 EXPECT_EQ(fd, 6); 3886 3887 /* Verify we can set a specific remote fd */ 3888 addfd.newfd = 42; 3889 addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; 3890 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); 3891 EXPECT_EQ(fd, 42); 3892 EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); 3893 3894 /* Resume syscall */ 3895 resp.id = req.id; 3896 resp.error = 0; 3897 resp.val = USER_NOTIF_MAGIC; 3898 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3899 3900 /* 3901 * This sets the ID of the ADD FD to the last request plus 1. The 3902 * notification ID increments 1 per notification. 3903 */ 3904 addfd.id = req.id + 1; 3905 3906 /* This spins until the underlying notification is generated */ 3907 while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 && 3908 errno != -EINPROGRESS) 3909 nanosleep(&delay, NULL); 3910 3911 memset(&req, 0, sizeof(req)); 3912 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3913 ASSERT_EQ(addfd.id, req.id); 3914 3915 resp.id = req.id; 3916 resp.error = 0; 3917 resp.val = USER_NOTIF_MAGIC; 3918 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3919 3920 /* Wait for child to finish. */ 3921 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3922 EXPECT_EQ(true, WIFEXITED(status)); 3923 EXPECT_EQ(0, WEXITSTATUS(status)); 3924 3925 close(memfd); 3926 } 3927 3928 TEST(user_notification_addfd_rlimit) 3929 { 3930 pid_t pid; 3931 long ret; 3932 int status, listener, memfd; 3933 struct seccomp_notif_addfd addfd = {}; 3934 struct seccomp_notif req = {}; 3935 struct seccomp_notif_resp resp = {}; 3936 const struct rlimit lim = { 3937 .rlim_cur = 0, 3938 .rlim_max = 0, 3939 }; 3940 3941 memfd = memfd_create("test", 0); 3942 ASSERT_GE(memfd, 0); 3943 3944 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3945 ASSERT_EQ(0, ret) { 3946 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3947 } 3948 3949 /* Check that the basic notification machinery works */ 3950 listener = user_notif_syscall(__NR_getppid, 3951 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3952 ASSERT_GE(listener, 0); 3953 3954 pid = fork(); 3955 ASSERT_GE(pid, 0); 3956 3957 if (pid == 0) 3958 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3959 3960 3961 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3962 3963 ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0); 3964 3965 addfd.srcfd = memfd; 3966 addfd.newfd_flags = O_CLOEXEC; 3967 addfd.newfd = 0; 3968 addfd.id = req.id; 3969 addfd.flags = 0; 3970 3971 /* Should probably spot check /proc/sys/fs/file-nr */ 3972 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 3973 EXPECT_EQ(errno, EMFILE); 3974 3975 addfd.newfd = 100; 3976 addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; 3977 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 3978 EXPECT_EQ(errno, EBADF); 3979 3980 resp.id = req.id; 3981 resp.error = 0; 3982 resp.val = USER_NOTIF_MAGIC; 3983 3984 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3985 3986 /* Wait for child to finish. */ 3987 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3988 EXPECT_EQ(true, WIFEXITED(status)); 3989 EXPECT_EQ(0, WEXITSTATUS(status)); 3990 3991 close(memfd); 3992 } 3993 3994 /* 3995 * TODO: 3996 * - expand NNP testing 3997 * - better arch-specific TRACE and TRAP handlers. 3998 * - endianness checking when appropriate 3999 * - 64-bit arg prodding 4000 * - arch value testing (x86 modes especially) 4001 * - verify that FILTER_FLAG_LOG filters generate log messages 4002 * - verify that RET_LOG generates log messages 4003 */ 4004 4005 TEST_HARNESS_MAIN 4006