1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 4 * 5 * Test code for seccomp bpf. 6 */ 7 8 #define _GNU_SOURCE 9 #include <sys/types.h> 10 11 /* 12 * glibc 2.26 and later have SIGSYS in siginfo_t. Before that, 13 * we need to use the kernel's siginfo.h file and trick glibc 14 * into accepting it. 15 */ 16 #if !__GLIBC_PREREQ(2, 26) 17 # include <asm/siginfo.h> 18 # define __have_siginfo_t 1 19 # define __have_sigval_t 1 20 # define __have_sigevent_t 1 21 #endif 22 23 #include <errno.h> 24 #include <linux/filter.h> 25 #include <sys/prctl.h> 26 #include <sys/ptrace.h> 27 #include <sys/user.h> 28 #include <linux/prctl.h> 29 #include <linux/ptrace.h> 30 #include <linux/seccomp.h> 31 #include <pthread.h> 32 #include <semaphore.h> 33 #include <signal.h> 34 #include <stddef.h> 35 #include <stdbool.h> 36 #include <string.h> 37 #include <time.h> 38 #include <limits.h> 39 #include <linux/elf.h> 40 #include <sys/uio.h> 41 #include <sys/utsname.h> 42 #include <sys/fcntl.h> 43 #include <sys/mman.h> 44 #include <sys/times.h> 45 #include <sys/socket.h> 46 #include <sys/ioctl.h> 47 #include <linux/kcmp.h> 48 #include <sys/resource.h> 49 #include <sys/capability.h> 50 51 #include <unistd.h> 52 #include <sys/syscall.h> 53 #include <poll.h> 54 55 #include "../kselftest_harness.h" 56 #include "../clone3/clone3_selftests.h" 57 58 /* Attempt to de-conflict with the selftests tree. */ 59 #ifndef SKIP 60 #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) 61 #endif 62 63 #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) 64 65 #ifndef PR_SET_PTRACER 66 # define PR_SET_PTRACER 0x59616d61 67 #endif 68 69 #ifndef PR_SET_NO_NEW_PRIVS 70 #define PR_SET_NO_NEW_PRIVS 38 71 #define PR_GET_NO_NEW_PRIVS 39 72 #endif 73 74 #ifndef PR_SECCOMP_EXT 75 #define PR_SECCOMP_EXT 43 76 #endif 77 78 #ifndef SECCOMP_EXT_ACT 79 #define SECCOMP_EXT_ACT 1 80 #endif 81 82 #ifndef SECCOMP_EXT_ACT_TSYNC 83 #define SECCOMP_EXT_ACT_TSYNC 1 84 #endif 85 86 #ifndef SECCOMP_MODE_STRICT 87 #define SECCOMP_MODE_STRICT 1 88 #endif 89 90 #ifndef SECCOMP_MODE_FILTER 91 #define SECCOMP_MODE_FILTER 2 92 #endif 93 94 #ifndef SECCOMP_RET_ALLOW 95 struct seccomp_data { 96 int nr; 97 __u32 arch; 98 __u64 instruction_pointer; 99 __u64 args[6]; 100 }; 101 #endif 102 103 #ifndef SECCOMP_RET_KILL_PROCESS 104 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ 105 #define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ 106 #endif 107 #ifndef SECCOMP_RET_KILL 108 #define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD 109 #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ 110 #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ 111 #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ 112 #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ 113 #endif 114 #ifndef SECCOMP_RET_LOG 115 #define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ 116 #endif 117 118 #ifndef __NR_seccomp 119 # if defined(__i386__) 120 # define __NR_seccomp 354 121 # elif defined(__x86_64__) 122 # define __NR_seccomp 317 123 # elif defined(__arm__) 124 # define __NR_seccomp 383 125 # elif defined(__aarch64__) 126 # define __NR_seccomp 277 127 # elif defined(__riscv) 128 # define __NR_seccomp 277 129 # elif defined(__csky__) 130 # define __NR_seccomp 277 131 # elif defined(__loongarch__) 132 # define __NR_seccomp 277 133 # elif defined(__hppa__) 134 # define __NR_seccomp 338 135 # elif defined(__powerpc__) 136 # define __NR_seccomp 358 137 # elif defined(__s390__) 138 # define __NR_seccomp 348 139 # elif defined(__xtensa__) 140 # define __NR_seccomp 337 141 # elif defined(__sh__) 142 # define __NR_seccomp 372 143 # elif defined(__mc68000__) 144 # define __NR_seccomp 380 145 # else 146 # warning "seccomp syscall number unknown for this architecture" 147 # define __NR_seccomp 0xffff 148 # endif 149 #endif 150 151 #ifndef SECCOMP_SET_MODE_STRICT 152 #define SECCOMP_SET_MODE_STRICT 0 153 #endif 154 155 #ifndef SECCOMP_SET_MODE_FILTER 156 #define SECCOMP_SET_MODE_FILTER 1 157 #endif 158 159 #ifndef SECCOMP_GET_ACTION_AVAIL 160 #define SECCOMP_GET_ACTION_AVAIL 2 161 #endif 162 163 #ifndef SECCOMP_GET_NOTIF_SIZES 164 #define SECCOMP_GET_NOTIF_SIZES 3 165 #endif 166 167 #ifndef SECCOMP_FILTER_FLAG_TSYNC 168 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) 169 #endif 170 171 #ifndef SECCOMP_FILTER_FLAG_LOG 172 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1) 173 #endif 174 175 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW 176 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) 177 #endif 178 179 #ifndef PTRACE_SECCOMP_GET_METADATA 180 #define PTRACE_SECCOMP_GET_METADATA 0x420d 181 182 struct seccomp_metadata { 183 __u64 filter_off; /* Input: which filter */ 184 __u64 flags; /* Output: filter's flags */ 185 }; 186 #endif 187 188 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER 189 #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) 190 #endif 191 192 #ifndef SECCOMP_RET_USER_NOTIF 193 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U 194 195 #define SECCOMP_IOC_MAGIC '!' 196 #define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) 197 #define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) 198 #define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) 199 #define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) 200 201 /* Flags for seccomp notification fd ioctl. */ 202 #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) 203 #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ 204 struct seccomp_notif_resp) 205 #define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64) 206 207 struct seccomp_notif { 208 __u64 id; 209 __u32 pid; 210 __u32 flags; 211 struct seccomp_data data; 212 }; 213 214 struct seccomp_notif_resp { 215 __u64 id; 216 __s64 val; 217 __s32 error; 218 __u32 flags; 219 }; 220 221 struct seccomp_notif_sizes { 222 __u16 seccomp_notif; 223 __u16 seccomp_notif_resp; 224 __u16 seccomp_data; 225 }; 226 #endif 227 228 #ifndef SECCOMP_IOCTL_NOTIF_ADDFD 229 /* On success, the return value is the remote process's added fd number */ 230 #define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \ 231 struct seccomp_notif_addfd) 232 233 /* valid flags for seccomp_notif_addfd */ 234 #define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ 235 236 struct seccomp_notif_addfd { 237 __u64 id; 238 __u32 flags; 239 __u32 srcfd; 240 __u32 newfd; 241 __u32 newfd_flags; 242 }; 243 #endif 244 245 #ifndef SECCOMP_ADDFD_FLAG_SEND 246 #define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */ 247 #endif 248 249 struct seccomp_notif_addfd_small { 250 __u64 id; 251 char weird[4]; 252 }; 253 #define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL \ 254 SECCOMP_IOW(3, struct seccomp_notif_addfd_small) 255 256 struct seccomp_notif_addfd_big { 257 union { 258 struct seccomp_notif_addfd addfd; 259 char buf[sizeof(struct seccomp_notif_addfd) + 8]; 260 }; 261 }; 262 #define SECCOMP_IOCTL_NOTIF_ADDFD_BIG \ 263 SECCOMP_IOWR(3, struct seccomp_notif_addfd_big) 264 265 #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY 266 #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 267 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 268 #endif 269 270 #ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE 271 #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001 272 #endif 273 274 #ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH 275 #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4) 276 #endif 277 278 #ifndef SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV 279 #define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5) 280 #endif 281 282 #ifndef seccomp 283 int seccomp(unsigned int op, unsigned int flags, void *args) 284 { 285 errno = 0; 286 return syscall(__NR_seccomp, op, flags, args); 287 } 288 #endif 289 290 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 291 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) 292 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 293 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32)) 294 #else 295 #error "wut? Unknown __BYTE_ORDER__?!" 296 #endif 297 298 #define SIBLING_EXIT_UNKILLED 0xbadbeef 299 #define SIBLING_EXIT_FAILURE 0xbadface 300 #define SIBLING_EXIT_NEWPRIVS 0xbadfeed 301 302 static int __filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2) 303 { 304 #ifdef __NR_kcmp 305 errno = 0; 306 return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2); 307 #else 308 errno = ENOSYS; 309 return -1; 310 #endif 311 } 312 313 /* Have TH_LOG report actual location filecmp() is used. */ 314 #define filecmp(pid1, pid2, fd1, fd2) ({ \ 315 int _ret; \ 316 \ 317 _ret = __filecmp(pid1, pid2, fd1, fd2); \ 318 if (_ret != 0) { \ 319 if (_ret < 0 && errno == ENOSYS) { \ 320 TH_LOG("kcmp() syscall missing (test is less accurate)");\ 321 _ret = 0; \ 322 } \ 323 } \ 324 _ret; }) 325 326 TEST(kcmp) 327 { 328 int ret; 329 330 ret = __filecmp(getpid(), getpid(), 1, 1); 331 EXPECT_EQ(ret, 0); 332 if (ret != 0 && errno == ENOSYS) 333 SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)"); 334 } 335 336 TEST(mode_strict_support) 337 { 338 long ret; 339 340 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); 341 ASSERT_EQ(0, ret) { 342 TH_LOG("Kernel does not support CONFIG_SECCOMP"); 343 } 344 syscall(__NR_exit, 0); 345 } 346 347 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL) 348 { 349 long ret; 350 351 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); 352 ASSERT_EQ(0, ret) { 353 TH_LOG("Kernel does not support CONFIG_SECCOMP"); 354 } 355 syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 356 NULL, NULL, NULL); 357 EXPECT_FALSE(true) { 358 TH_LOG("Unreachable!"); 359 } 360 } 361 362 /* Note! This doesn't test no new privs behavior */ 363 TEST(no_new_privs_support) 364 { 365 long ret; 366 367 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 368 EXPECT_EQ(0, ret) { 369 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 370 } 371 } 372 373 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */ 374 TEST(mode_filter_support) 375 { 376 long ret; 377 378 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 379 ASSERT_EQ(0, ret) { 380 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 381 } 382 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL); 383 EXPECT_EQ(-1, ret); 384 EXPECT_EQ(EFAULT, errno) { 385 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!"); 386 } 387 } 388 389 TEST(mode_filter_without_nnp) 390 { 391 struct sock_filter filter[] = { 392 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 393 }; 394 struct sock_fprog prog = { 395 .len = (unsigned short)ARRAY_SIZE(filter), 396 .filter = filter, 397 }; 398 long ret; 399 cap_t cap = cap_get_proc(); 400 cap_flag_value_t is_cap_sys_admin = 0; 401 402 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0); 403 ASSERT_LE(0, ret) { 404 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS"); 405 } 406 errno = 0; 407 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 408 /* Succeeds with CAP_SYS_ADMIN, fails without */ 409 cap_get_flag(cap, CAP_SYS_ADMIN, CAP_EFFECTIVE, &is_cap_sys_admin); 410 if (!is_cap_sys_admin) { 411 EXPECT_EQ(-1, ret); 412 EXPECT_EQ(EACCES, errno); 413 } else { 414 EXPECT_EQ(0, ret); 415 } 416 } 417 418 #define MAX_INSNS_PER_PATH 32768 419 420 TEST(filter_size_limits) 421 { 422 int i; 423 int count = BPF_MAXINSNS + 1; 424 struct sock_filter allow[] = { 425 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 426 }; 427 struct sock_filter *filter; 428 struct sock_fprog prog = { }; 429 long ret; 430 431 filter = calloc(count, sizeof(*filter)); 432 ASSERT_NE(NULL, filter); 433 434 for (i = 0; i < count; i++) 435 filter[i] = allow[0]; 436 437 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 438 ASSERT_EQ(0, ret); 439 440 prog.filter = filter; 441 prog.len = count; 442 443 /* Too many filter instructions in a single filter. */ 444 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 445 ASSERT_NE(0, ret) { 446 TH_LOG("Installing %d insn filter was allowed", prog.len); 447 } 448 449 /* One less is okay, though. */ 450 prog.len -= 1; 451 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 452 ASSERT_EQ(0, ret) { 453 TH_LOG("Installing %d insn filter wasn't allowed", prog.len); 454 } 455 } 456 457 TEST(filter_chain_limits) 458 { 459 int i; 460 int count = BPF_MAXINSNS; 461 struct sock_filter allow[] = { 462 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 463 }; 464 struct sock_filter *filter; 465 struct sock_fprog prog = { }; 466 long ret; 467 468 filter = calloc(count, sizeof(*filter)); 469 ASSERT_NE(NULL, filter); 470 471 for (i = 0; i < count; i++) 472 filter[i] = allow[0]; 473 474 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 475 ASSERT_EQ(0, ret); 476 477 prog.filter = filter; 478 prog.len = 1; 479 480 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 481 ASSERT_EQ(0, ret); 482 483 prog.len = count; 484 485 /* Too many total filter instructions. */ 486 for (i = 0; i < MAX_INSNS_PER_PATH; i++) { 487 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 488 if (ret != 0) 489 break; 490 } 491 ASSERT_NE(0, ret) { 492 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)", 493 i, count, i * (count + 4)); 494 } 495 } 496 497 TEST(mode_filter_cannot_move_to_strict) 498 { 499 struct sock_filter filter[] = { 500 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 501 }; 502 struct sock_fprog prog = { 503 .len = (unsigned short)ARRAY_SIZE(filter), 504 .filter = filter, 505 }; 506 long ret; 507 508 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 509 ASSERT_EQ(0, ret); 510 511 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 512 ASSERT_EQ(0, ret); 513 514 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0); 515 EXPECT_EQ(-1, ret); 516 EXPECT_EQ(EINVAL, errno); 517 } 518 519 520 TEST(mode_filter_get_seccomp) 521 { 522 struct sock_filter filter[] = { 523 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 524 }; 525 struct sock_fprog prog = { 526 .len = (unsigned short)ARRAY_SIZE(filter), 527 .filter = filter, 528 }; 529 long ret; 530 531 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 532 ASSERT_EQ(0, ret); 533 534 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 535 EXPECT_EQ(0, ret); 536 537 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 538 ASSERT_EQ(0, ret); 539 540 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 541 EXPECT_EQ(2, ret); 542 } 543 544 545 TEST(ALLOW_all) 546 { 547 struct sock_filter filter[] = { 548 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 549 }; 550 struct sock_fprog prog = { 551 .len = (unsigned short)ARRAY_SIZE(filter), 552 .filter = filter, 553 }; 554 long ret; 555 556 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 557 ASSERT_EQ(0, ret); 558 559 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 560 ASSERT_EQ(0, ret); 561 } 562 563 TEST(empty_prog) 564 { 565 struct sock_filter filter[] = { 566 }; 567 struct sock_fprog prog = { 568 .len = (unsigned short)ARRAY_SIZE(filter), 569 .filter = filter, 570 }; 571 long ret; 572 573 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 574 ASSERT_EQ(0, ret); 575 576 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 577 EXPECT_EQ(-1, ret); 578 EXPECT_EQ(EINVAL, errno); 579 } 580 581 TEST(log_all) 582 { 583 struct sock_filter filter[] = { 584 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), 585 }; 586 struct sock_fprog prog = { 587 .len = (unsigned short)ARRAY_SIZE(filter), 588 .filter = filter, 589 }; 590 long ret; 591 pid_t parent = getppid(); 592 593 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 594 ASSERT_EQ(0, ret); 595 596 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 597 ASSERT_EQ(0, ret); 598 599 /* getppid() should succeed and be logged (no check for logging) */ 600 EXPECT_EQ(parent, syscall(__NR_getppid)); 601 } 602 603 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS) 604 { 605 struct sock_filter filter[] = { 606 BPF_STMT(BPF_RET|BPF_K, 0x10000000U), 607 }; 608 struct sock_fprog prog = { 609 .len = (unsigned short)ARRAY_SIZE(filter), 610 .filter = filter, 611 }; 612 long ret; 613 614 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 615 ASSERT_EQ(0, ret); 616 617 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 618 ASSERT_EQ(0, ret); 619 EXPECT_EQ(0, syscall(__NR_getpid)) { 620 TH_LOG("getpid() shouldn't ever return"); 621 } 622 } 623 624 /* return code >= 0x80000000 is unused. */ 625 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS) 626 { 627 struct sock_filter filter[] = { 628 BPF_STMT(BPF_RET|BPF_K, 0x90000000U), 629 }; 630 struct sock_fprog prog = { 631 .len = (unsigned short)ARRAY_SIZE(filter), 632 .filter = filter, 633 }; 634 long ret; 635 636 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 637 ASSERT_EQ(0, ret); 638 639 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 640 ASSERT_EQ(0, ret); 641 EXPECT_EQ(0, syscall(__NR_getpid)) { 642 TH_LOG("getpid() shouldn't ever return"); 643 } 644 } 645 646 TEST_SIGNAL(KILL_all, SIGSYS) 647 { 648 struct sock_filter filter[] = { 649 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 650 }; 651 struct sock_fprog prog = { 652 .len = (unsigned short)ARRAY_SIZE(filter), 653 .filter = filter, 654 }; 655 long ret; 656 657 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 658 ASSERT_EQ(0, ret); 659 660 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 661 ASSERT_EQ(0, ret); 662 } 663 664 TEST_SIGNAL(KILL_one, SIGSYS) 665 { 666 struct sock_filter filter[] = { 667 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 668 offsetof(struct seccomp_data, nr)), 669 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 670 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 671 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 672 }; 673 struct sock_fprog prog = { 674 .len = (unsigned short)ARRAY_SIZE(filter), 675 .filter = filter, 676 }; 677 long ret; 678 pid_t parent = getppid(); 679 680 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 681 ASSERT_EQ(0, ret); 682 683 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 684 ASSERT_EQ(0, ret); 685 686 EXPECT_EQ(parent, syscall(__NR_getppid)); 687 /* getpid() should never return. */ 688 EXPECT_EQ(0, syscall(__NR_getpid)); 689 } 690 691 TEST_SIGNAL(KILL_one_arg_one, SIGSYS) 692 { 693 void *fatal_address; 694 struct sock_filter filter[] = { 695 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 696 offsetof(struct seccomp_data, nr)), 697 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0), 698 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 699 /* Only both with lower 32-bit for now. */ 700 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)), 701 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 702 (unsigned long)&fatal_address, 0, 1), 703 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 704 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 705 }; 706 struct sock_fprog prog = { 707 .len = (unsigned short)ARRAY_SIZE(filter), 708 .filter = filter, 709 }; 710 long ret; 711 pid_t parent = getppid(); 712 struct tms timebuf; 713 clock_t clock = times(&timebuf); 714 715 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 716 ASSERT_EQ(0, ret); 717 718 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 719 ASSERT_EQ(0, ret); 720 721 EXPECT_EQ(parent, syscall(__NR_getppid)); 722 EXPECT_LE(clock, syscall(__NR_times, &timebuf)); 723 /* times() should never return. */ 724 EXPECT_EQ(0, syscall(__NR_times, &fatal_address)); 725 } 726 727 TEST_SIGNAL(KILL_one_arg_six, SIGSYS) 728 { 729 #ifndef __NR_mmap2 730 int sysno = __NR_mmap; 731 #else 732 int sysno = __NR_mmap2; 733 #endif 734 struct sock_filter filter[] = { 735 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 736 offsetof(struct seccomp_data, nr)), 737 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0), 738 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 739 /* Only both with lower 32-bit for now. */ 740 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)), 741 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1), 742 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 743 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 744 }; 745 struct sock_fprog prog = { 746 .len = (unsigned short)ARRAY_SIZE(filter), 747 .filter = filter, 748 }; 749 long ret; 750 pid_t parent = getppid(); 751 int fd; 752 void *map1, *map2; 753 int page_size = sysconf(_SC_PAGESIZE); 754 755 ASSERT_LT(0, page_size); 756 757 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 758 ASSERT_EQ(0, ret); 759 760 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 761 ASSERT_EQ(0, ret); 762 763 fd = open("/dev/zero", O_RDONLY); 764 ASSERT_NE(-1, fd); 765 766 EXPECT_EQ(parent, syscall(__NR_getppid)); 767 map1 = (void *)syscall(sysno, 768 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size); 769 EXPECT_NE(MAP_FAILED, map1); 770 /* mmap2() should never return. */ 771 map2 = (void *)syscall(sysno, 772 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE); 773 EXPECT_EQ(MAP_FAILED, map2); 774 775 /* The test failed, so clean up the resources. */ 776 munmap(map1, page_size); 777 munmap(map2, page_size); 778 close(fd); 779 } 780 781 /* This is a thread task to die via seccomp filter violation. */ 782 void *kill_thread(void *data) 783 { 784 bool die = (bool)data; 785 786 if (die) { 787 syscall(__NR_getpid); 788 return (void *)SIBLING_EXIT_FAILURE; 789 } 790 791 return (void *)SIBLING_EXIT_UNKILLED; 792 } 793 794 enum kill_t { 795 KILL_THREAD, 796 KILL_PROCESS, 797 RET_UNKNOWN 798 }; 799 800 /* Prepare a thread that will kill itself or both of us. */ 801 void kill_thread_or_group(struct __test_metadata *_metadata, 802 enum kill_t kill_how) 803 { 804 pthread_t thread; 805 void *status; 806 /* Kill only when calling __NR_getpid. */ 807 struct sock_filter filter_thread[] = { 808 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 809 offsetof(struct seccomp_data, nr)), 810 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 811 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), 812 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 813 }; 814 struct sock_fprog prog_thread = { 815 .len = (unsigned short)ARRAY_SIZE(filter_thread), 816 .filter = filter_thread, 817 }; 818 int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAA; 819 struct sock_filter filter_process[] = { 820 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 821 offsetof(struct seccomp_data, nr)), 822 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 823 BPF_STMT(BPF_RET|BPF_K, kill), 824 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 825 }; 826 struct sock_fprog prog_process = { 827 .len = (unsigned short)ARRAY_SIZE(filter_process), 828 .filter = filter_process, 829 }; 830 831 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 832 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 833 } 834 835 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, 836 kill_how == KILL_THREAD ? &prog_thread 837 : &prog_process)); 838 839 /* 840 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS 841 * flag cannot be downgraded by a new filter. 842 */ 843 if (kill_how == KILL_PROCESS) 844 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); 845 846 /* Start a thread that will exit immediately. */ 847 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false)); 848 ASSERT_EQ(0, pthread_join(thread, &status)); 849 ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status); 850 851 /* Start a thread that will die immediately. */ 852 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true)); 853 ASSERT_EQ(0, pthread_join(thread, &status)); 854 ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status); 855 856 /* 857 * If we get here, only the spawned thread died. Let the parent know 858 * the whole process didn't die (i.e. this thread, the spawner, 859 * stayed running). 860 */ 861 exit(42); 862 } 863 864 TEST(KILL_thread) 865 { 866 int status; 867 pid_t child_pid; 868 869 child_pid = fork(); 870 ASSERT_LE(0, child_pid); 871 if (child_pid == 0) { 872 kill_thread_or_group(_metadata, KILL_THREAD); 873 _exit(38); 874 } 875 876 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 877 878 /* If only the thread was killed, we'll see exit 42. */ 879 ASSERT_TRUE(WIFEXITED(status)); 880 ASSERT_EQ(42, WEXITSTATUS(status)); 881 } 882 883 TEST(KILL_process) 884 { 885 int status; 886 pid_t child_pid; 887 888 child_pid = fork(); 889 ASSERT_LE(0, child_pid); 890 if (child_pid == 0) { 891 kill_thread_or_group(_metadata, KILL_PROCESS); 892 _exit(38); 893 } 894 895 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 896 897 /* If the entire process was killed, we'll see SIGSYS. */ 898 ASSERT_TRUE(WIFSIGNALED(status)); 899 ASSERT_EQ(SIGSYS, WTERMSIG(status)); 900 } 901 902 TEST(KILL_unknown) 903 { 904 int status; 905 pid_t child_pid; 906 907 child_pid = fork(); 908 ASSERT_LE(0, child_pid); 909 if (child_pid == 0) { 910 kill_thread_or_group(_metadata, RET_UNKNOWN); 911 _exit(38); 912 } 913 914 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 915 916 /* If the entire process was killed, we'll see SIGSYS. */ 917 EXPECT_TRUE(WIFSIGNALED(status)) { 918 TH_LOG("Unknown SECCOMP_RET is only killing the thread?"); 919 } 920 ASSERT_EQ(SIGSYS, WTERMSIG(status)); 921 } 922 923 /* TODO(wad) add 64-bit versus 32-bit arg tests. */ 924 TEST(arg_out_of_range) 925 { 926 struct sock_filter filter[] = { 927 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)), 928 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 929 }; 930 struct sock_fprog prog = { 931 .len = (unsigned short)ARRAY_SIZE(filter), 932 .filter = filter, 933 }; 934 long ret; 935 936 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 937 ASSERT_EQ(0, ret); 938 939 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 940 EXPECT_EQ(-1, ret); 941 EXPECT_EQ(EINVAL, errno); 942 } 943 944 #define ERRNO_FILTER(name, errno) \ 945 struct sock_filter _read_filter_##name[] = { \ 946 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, \ 947 offsetof(struct seccomp_data, nr)), \ 948 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), \ 949 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno), \ 950 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), \ 951 }; \ 952 struct sock_fprog prog_##name = { \ 953 .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \ 954 .filter = _read_filter_##name, \ 955 } 956 957 /* Make sure basic errno values are correctly passed through a filter. */ 958 TEST(ERRNO_valid) 959 { 960 ERRNO_FILTER(valid, E2BIG); 961 long ret; 962 pid_t parent = getppid(); 963 964 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 965 ASSERT_EQ(0, ret); 966 967 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid); 968 ASSERT_EQ(0, ret); 969 970 EXPECT_EQ(parent, syscall(__NR_getppid)); 971 EXPECT_EQ(-1, read(-1, NULL, 0)); 972 EXPECT_EQ(E2BIG, errno); 973 } 974 975 /* Make sure an errno of zero is correctly handled by the arch code. */ 976 TEST(ERRNO_zero) 977 { 978 ERRNO_FILTER(zero, 0); 979 long ret; 980 pid_t parent = getppid(); 981 982 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 983 ASSERT_EQ(0, ret); 984 985 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero); 986 ASSERT_EQ(0, ret); 987 988 EXPECT_EQ(parent, syscall(__NR_getppid)); 989 /* "errno" of 0 is ok. */ 990 EXPECT_EQ(0, read(-1, NULL, 0)); 991 } 992 993 /* 994 * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller. 995 * This tests that the errno value gets capped correctly, fixed by 996 * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO"). 997 */ 998 TEST(ERRNO_capped) 999 { 1000 ERRNO_FILTER(capped, 4096); 1001 long ret; 1002 pid_t parent = getppid(); 1003 1004 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1005 ASSERT_EQ(0, ret); 1006 1007 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped); 1008 ASSERT_EQ(0, ret); 1009 1010 EXPECT_EQ(parent, syscall(__NR_getppid)); 1011 EXPECT_EQ(-1, read(-1, NULL, 0)); 1012 EXPECT_EQ(4095, errno); 1013 } 1014 1015 /* 1016 * Filters are processed in reverse order: last applied is executed first. 1017 * Since only the SECCOMP_RET_ACTION mask is tested for return values, the 1018 * SECCOMP_RET_DATA mask results will follow the most recently applied 1019 * matching filter return (and not the lowest or highest value). 1020 */ 1021 TEST(ERRNO_order) 1022 { 1023 ERRNO_FILTER(first, 11); 1024 ERRNO_FILTER(second, 13); 1025 ERRNO_FILTER(third, 12); 1026 long ret; 1027 pid_t parent = getppid(); 1028 1029 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1030 ASSERT_EQ(0, ret); 1031 1032 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first); 1033 ASSERT_EQ(0, ret); 1034 1035 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second); 1036 ASSERT_EQ(0, ret); 1037 1038 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third); 1039 ASSERT_EQ(0, ret); 1040 1041 EXPECT_EQ(parent, syscall(__NR_getppid)); 1042 EXPECT_EQ(-1, read(-1, NULL, 0)); 1043 EXPECT_EQ(12, errno); 1044 } 1045 1046 FIXTURE(TRAP) { 1047 struct sock_fprog prog; 1048 }; 1049 1050 FIXTURE_SETUP(TRAP) 1051 { 1052 struct sock_filter filter[] = { 1053 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1054 offsetof(struct seccomp_data, nr)), 1055 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 1056 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), 1057 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1058 }; 1059 1060 memset(&self->prog, 0, sizeof(self->prog)); 1061 self->prog.filter = malloc(sizeof(filter)); 1062 ASSERT_NE(NULL, self->prog.filter); 1063 memcpy(self->prog.filter, filter, sizeof(filter)); 1064 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 1065 } 1066 1067 FIXTURE_TEARDOWN(TRAP) 1068 { 1069 if (self->prog.filter) 1070 free(self->prog.filter); 1071 } 1072 1073 TEST_F_SIGNAL(TRAP, dfl, SIGSYS) 1074 { 1075 long ret; 1076 1077 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1078 ASSERT_EQ(0, ret); 1079 1080 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1081 ASSERT_EQ(0, ret); 1082 syscall(__NR_getpid); 1083 } 1084 1085 /* Ensure that SIGSYS overrides SIG_IGN */ 1086 TEST_F_SIGNAL(TRAP, ign, SIGSYS) 1087 { 1088 long ret; 1089 1090 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1091 ASSERT_EQ(0, ret); 1092 1093 signal(SIGSYS, SIG_IGN); 1094 1095 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1096 ASSERT_EQ(0, ret); 1097 syscall(__NR_getpid); 1098 } 1099 1100 static siginfo_t TRAP_info; 1101 static volatile int TRAP_nr; 1102 static void TRAP_action(int nr, siginfo_t *info, void *void_context) 1103 { 1104 memcpy(&TRAP_info, info, sizeof(TRAP_info)); 1105 TRAP_nr = nr; 1106 } 1107 1108 TEST_F(TRAP, handler) 1109 { 1110 int ret, test; 1111 struct sigaction act; 1112 sigset_t mask; 1113 1114 memset(&act, 0, sizeof(act)); 1115 sigemptyset(&mask); 1116 sigaddset(&mask, SIGSYS); 1117 1118 act.sa_sigaction = &TRAP_action; 1119 act.sa_flags = SA_SIGINFO; 1120 ret = sigaction(SIGSYS, &act, NULL); 1121 ASSERT_EQ(0, ret) { 1122 TH_LOG("sigaction failed"); 1123 } 1124 ret = sigprocmask(SIG_UNBLOCK, &mask, NULL); 1125 ASSERT_EQ(0, ret) { 1126 TH_LOG("sigprocmask failed"); 1127 } 1128 1129 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1130 ASSERT_EQ(0, ret); 1131 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 1132 ASSERT_EQ(0, ret); 1133 TRAP_nr = 0; 1134 memset(&TRAP_info, 0, sizeof(TRAP_info)); 1135 /* Expect the registers to be rolled back. (nr = error) may vary 1136 * based on arch. */ 1137 ret = syscall(__NR_getpid); 1138 /* Silence gcc warning about volatile. */ 1139 test = TRAP_nr; 1140 EXPECT_EQ(SIGSYS, test); 1141 struct local_sigsys { 1142 void *_call_addr; /* calling user insn */ 1143 int _syscall; /* triggering system call number */ 1144 unsigned int _arch; /* AUDIT_ARCH_* of syscall */ 1145 } *sigsys = (struct local_sigsys *) 1146 #ifdef si_syscall 1147 &(TRAP_info.si_call_addr); 1148 #else 1149 &TRAP_info.si_pid; 1150 #endif 1151 EXPECT_EQ(__NR_getpid, sigsys->_syscall); 1152 /* Make sure arch is non-zero. */ 1153 EXPECT_NE(0, sigsys->_arch); 1154 EXPECT_NE(0, (unsigned long)sigsys->_call_addr); 1155 } 1156 1157 FIXTURE(precedence) { 1158 struct sock_fprog allow; 1159 struct sock_fprog log; 1160 struct sock_fprog trace; 1161 struct sock_fprog error; 1162 struct sock_fprog trap; 1163 struct sock_fprog kill; 1164 }; 1165 1166 FIXTURE_SETUP(precedence) 1167 { 1168 struct sock_filter allow_insns[] = { 1169 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1170 }; 1171 struct sock_filter log_insns[] = { 1172 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1173 offsetof(struct seccomp_data, nr)), 1174 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1175 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1176 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), 1177 }; 1178 struct sock_filter trace_insns[] = { 1179 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1180 offsetof(struct seccomp_data, nr)), 1181 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1182 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1183 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE), 1184 }; 1185 struct sock_filter error_insns[] = { 1186 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1187 offsetof(struct seccomp_data, nr)), 1188 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1189 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1190 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO), 1191 }; 1192 struct sock_filter trap_insns[] = { 1193 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1194 offsetof(struct seccomp_data, nr)), 1195 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1196 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1197 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), 1198 }; 1199 struct sock_filter kill_insns[] = { 1200 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1201 offsetof(struct seccomp_data, nr)), 1202 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1203 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1204 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 1205 }; 1206 1207 memset(self, 0, sizeof(*self)); 1208 #define FILTER_ALLOC(_x) \ 1209 self->_x.filter = malloc(sizeof(_x##_insns)); \ 1210 ASSERT_NE(NULL, self->_x.filter); \ 1211 memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \ 1212 self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns) 1213 FILTER_ALLOC(allow); 1214 FILTER_ALLOC(log); 1215 FILTER_ALLOC(trace); 1216 FILTER_ALLOC(error); 1217 FILTER_ALLOC(trap); 1218 FILTER_ALLOC(kill); 1219 } 1220 1221 FIXTURE_TEARDOWN(precedence) 1222 { 1223 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter) 1224 FILTER_FREE(allow); 1225 FILTER_FREE(log); 1226 FILTER_FREE(trace); 1227 FILTER_FREE(error); 1228 FILTER_FREE(trap); 1229 FILTER_FREE(kill); 1230 } 1231 1232 TEST_F(precedence, allow_ok) 1233 { 1234 pid_t parent, res = 0; 1235 long ret; 1236 1237 parent = getppid(); 1238 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1239 ASSERT_EQ(0, ret); 1240 1241 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1242 ASSERT_EQ(0, ret); 1243 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1244 ASSERT_EQ(0, ret); 1245 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1246 ASSERT_EQ(0, ret); 1247 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1248 ASSERT_EQ(0, ret); 1249 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1250 ASSERT_EQ(0, ret); 1251 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1252 ASSERT_EQ(0, ret); 1253 /* Should work just fine. */ 1254 res = syscall(__NR_getppid); 1255 EXPECT_EQ(parent, res); 1256 } 1257 1258 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS) 1259 { 1260 pid_t parent, res = 0; 1261 long ret; 1262 1263 parent = getppid(); 1264 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1265 ASSERT_EQ(0, ret); 1266 1267 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1268 ASSERT_EQ(0, ret); 1269 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1270 ASSERT_EQ(0, ret); 1271 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1272 ASSERT_EQ(0, ret); 1273 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1274 ASSERT_EQ(0, ret); 1275 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1276 ASSERT_EQ(0, ret); 1277 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1278 ASSERT_EQ(0, ret); 1279 /* Should work just fine. */ 1280 res = syscall(__NR_getppid); 1281 EXPECT_EQ(parent, res); 1282 /* getpid() should never return. */ 1283 res = syscall(__NR_getpid); 1284 EXPECT_EQ(0, res); 1285 } 1286 1287 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS) 1288 { 1289 pid_t parent; 1290 long ret; 1291 1292 parent = getppid(); 1293 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1294 ASSERT_EQ(0, ret); 1295 1296 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1297 ASSERT_EQ(0, ret); 1298 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1299 ASSERT_EQ(0, ret); 1300 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1301 ASSERT_EQ(0, ret); 1302 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1303 ASSERT_EQ(0, ret); 1304 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1305 ASSERT_EQ(0, ret); 1306 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1307 ASSERT_EQ(0, ret); 1308 /* Should work just fine. */ 1309 EXPECT_EQ(parent, syscall(__NR_getppid)); 1310 /* getpid() should never return. */ 1311 EXPECT_EQ(0, syscall(__NR_getpid)); 1312 } 1313 1314 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS) 1315 { 1316 pid_t parent; 1317 long ret; 1318 1319 parent = getppid(); 1320 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1321 ASSERT_EQ(0, ret); 1322 1323 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1324 ASSERT_EQ(0, ret); 1325 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1326 ASSERT_EQ(0, ret); 1327 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1328 ASSERT_EQ(0, ret); 1329 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1330 ASSERT_EQ(0, ret); 1331 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1332 ASSERT_EQ(0, ret); 1333 /* Should work just fine. */ 1334 EXPECT_EQ(parent, syscall(__NR_getppid)); 1335 /* getpid() should never return. */ 1336 EXPECT_EQ(0, syscall(__NR_getpid)); 1337 } 1338 1339 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS) 1340 { 1341 pid_t parent; 1342 long ret; 1343 1344 parent = getppid(); 1345 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1346 ASSERT_EQ(0, ret); 1347 1348 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1349 ASSERT_EQ(0, ret); 1350 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1351 ASSERT_EQ(0, ret); 1352 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1353 ASSERT_EQ(0, ret); 1354 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1355 ASSERT_EQ(0, ret); 1356 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1357 ASSERT_EQ(0, ret); 1358 /* Should work just fine. */ 1359 EXPECT_EQ(parent, syscall(__NR_getppid)); 1360 /* getpid() should never return. */ 1361 EXPECT_EQ(0, syscall(__NR_getpid)); 1362 } 1363 1364 TEST_F(precedence, errno_is_third) 1365 { 1366 pid_t parent; 1367 long ret; 1368 1369 parent = getppid(); 1370 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1371 ASSERT_EQ(0, ret); 1372 1373 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1374 ASSERT_EQ(0, ret); 1375 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1376 ASSERT_EQ(0, ret); 1377 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1378 ASSERT_EQ(0, ret); 1379 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1380 ASSERT_EQ(0, ret); 1381 /* Should work just fine. */ 1382 EXPECT_EQ(parent, syscall(__NR_getppid)); 1383 EXPECT_EQ(0, syscall(__NR_getpid)); 1384 } 1385 1386 TEST_F(precedence, errno_is_third_in_any_order) 1387 { 1388 pid_t parent; 1389 long ret; 1390 1391 parent = getppid(); 1392 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1393 ASSERT_EQ(0, ret); 1394 1395 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1396 ASSERT_EQ(0, ret); 1397 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1398 ASSERT_EQ(0, ret); 1399 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1400 ASSERT_EQ(0, ret); 1401 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1402 ASSERT_EQ(0, ret); 1403 /* Should work just fine. */ 1404 EXPECT_EQ(parent, syscall(__NR_getppid)); 1405 EXPECT_EQ(0, syscall(__NR_getpid)); 1406 } 1407 1408 TEST_F(precedence, trace_is_fourth) 1409 { 1410 pid_t parent; 1411 long ret; 1412 1413 parent = getppid(); 1414 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1415 ASSERT_EQ(0, ret); 1416 1417 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1418 ASSERT_EQ(0, ret); 1419 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1420 ASSERT_EQ(0, ret); 1421 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1422 ASSERT_EQ(0, ret); 1423 /* Should work just fine. */ 1424 EXPECT_EQ(parent, syscall(__NR_getppid)); 1425 /* No ptracer */ 1426 EXPECT_EQ(-1, syscall(__NR_getpid)); 1427 } 1428 1429 TEST_F(precedence, trace_is_fourth_in_any_order) 1430 { 1431 pid_t parent; 1432 long ret; 1433 1434 parent = getppid(); 1435 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1436 ASSERT_EQ(0, ret); 1437 1438 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1439 ASSERT_EQ(0, ret); 1440 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1441 ASSERT_EQ(0, ret); 1442 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1443 ASSERT_EQ(0, ret); 1444 /* Should work just fine. */ 1445 EXPECT_EQ(parent, syscall(__NR_getppid)); 1446 /* No ptracer */ 1447 EXPECT_EQ(-1, syscall(__NR_getpid)); 1448 } 1449 1450 TEST_F(precedence, log_is_fifth) 1451 { 1452 pid_t mypid, parent; 1453 long ret; 1454 1455 mypid = getpid(); 1456 parent = getppid(); 1457 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1458 ASSERT_EQ(0, ret); 1459 1460 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1461 ASSERT_EQ(0, ret); 1462 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1463 ASSERT_EQ(0, ret); 1464 /* Should work just fine. */ 1465 EXPECT_EQ(parent, syscall(__NR_getppid)); 1466 /* Should also work just fine */ 1467 EXPECT_EQ(mypid, syscall(__NR_getpid)); 1468 } 1469 1470 TEST_F(precedence, log_is_fifth_in_any_order) 1471 { 1472 pid_t mypid, parent; 1473 long ret; 1474 1475 mypid = getpid(); 1476 parent = getppid(); 1477 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1478 ASSERT_EQ(0, ret); 1479 1480 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1481 ASSERT_EQ(0, ret); 1482 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1483 ASSERT_EQ(0, ret); 1484 /* Should work just fine. */ 1485 EXPECT_EQ(parent, syscall(__NR_getppid)); 1486 /* Should also work just fine */ 1487 EXPECT_EQ(mypid, syscall(__NR_getpid)); 1488 } 1489 1490 #ifndef PTRACE_O_TRACESECCOMP 1491 #define PTRACE_O_TRACESECCOMP 0x00000080 1492 #endif 1493 1494 /* Catch the Ubuntu 12.04 value error. */ 1495 #if PTRACE_EVENT_SECCOMP != 7 1496 #undef PTRACE_EVENT_SECCOMP 1497 #endif 1498 1499 #ifndef PTRACE_EVENT_SECCOMP 1500 #define PTRACE_EVENT_SECCOMP 7 1501 #endif 1502 1503 #define PTRACE_EVENT_MASK(status) ((status) >> 16) 1504 bool tracer_running; 1505 void tracer_stop(int sig) 1506 { 1507 tracer_running = false; 1508 } 1509 1510 typedef void tracer_func_t(struct __test_metadata *_metadata, 1511 pid_t tracee, int status, void *args); 1512 1513 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, 1514 tracer_func_t tracer_func, void *args, bool ptrace_syscall) 1515 { 1516 int ret = -1; 1517 struct sigaction action = { 1518 .sa_handler = tracer_stop, 1519 }; 1520 1521 /* Allow external shutdown. */ 1522 tracer_running = true; 1523 ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL)); 1524 1525 errno = 0; 1526 while (ret == -1 && errno != EINVAL) 1527 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0); 1528 ASSERT_EQ(0, ret) { 1529 kill(tracee, SIGKILL); 1530 } 1531 /* Wait for attach stop */ 1532 wait(NULL); 1533 1534 ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ? 1535 PTRACE_O_TRACESYSGOOD : 1536 PTRACE_O_TRACESECCOMP); 1537 ASSERT_EQ(0, ret) { 1538 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP"); 1539 kill(tracee, SIGKILL); 1540 } 1541 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, 1542 tracee, NULL, 0); 1543 ASSERT_EQ(0, ret); 1544 1545 /* Unblock the tracee */ 1546 ASSERT_EQ(1, write(fd, "A", 1)); 1547 ASSERT_EQ(0, close(fd)); 1548 1549 /* Run until we're shut down. Must assert to stop execution. */ 1550 while (tracer_running) { 1551 int status; 1552 1553 if (wait(&status) != tracee) 1554 continue; 1555 1556 if (WIFSIGNALED(status)) { 1557 /* Child caught a fatal signal. */ 1558 return; 1559 } 1560 if (WIFEXITED(status)) { 1561 /* Child exited with code. */ 1562 return; 1563 } 1564 1565 /* Check if we got an expected event. */ 1566 ASSERT_EQ(WIFCONTINUED(status), false); 1567 ASSERT_EQ(WIFSTOPPED(status), true); 1568 ASSERT_EQ(WSTOPSIG(status) & SIGTRAP, SIGTRAP) { 1569 TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status)); 1570 } 1571 1572 tracer_func(_metadata, tracee, status, args); 1573 1574 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, 1575 tracee, NULL, 0); 1576 ASSERT_EQ(0, ret); 1577 } 1578 /* Directly report the status of our test harness results. */ 1579 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); 1580 } 1581 1582 /* Common tracer setup/teardown functions. */ 1583 void cont_handler(int num) 1584 { } 1585 pid_t setup_trace_fixture(struct __test_metadata *_metadata, 1586 tracer_func_t func, void *args, bool ptrace_syscall) 1587 { 1588 char sync; 1589 int pipefd[2]; 1590 pid_t tracer_pid; 1591 pid_t tracee = getpid(); 1592 1593 /* Setup a pipe for clean synchronization. */ 1594 ASSERT_EQ(0, pipe(pipefd)); 1595 1596 /* Fork a child which we'll promote to tracer */ 1597 tracer_pid = fork(); 1598 ASSERT_LE(0, tracer_pid); 1599 signal(SIGALRM, cont_handler); 1600 if (tracer_pid == 0) { 1601 close(pipefd[0]); 1602 start_tracer(_metadata, pipefd[1], tracee, func, args, 1603 ptrace_syscall); 1604 syscall(__NR_exit, 0); 1605 } 1606 close(pipefd[1]); 1607 prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0); 1608 read(pipefd[0], &sync, 1); 1609 close(pipefd[0]); 1610 1611 return tracer_pid; 1612 } 1613 1614 void teardown_trace_fixture(struct __test_metadata *_metadata, 1615 pid_t tracer) 1616 { 1617 if (tracer) { 1618 int status; 1619 /* 1620 * Extract the exit code from the other process and 1621 * adopt it for ourselves in case its asserts failed. 1622 */ 1623 ASSERT_EQ(0, kill(tracer, SIGUSR1)); 1624 ASSERT_EQ(tracer, waitpid(tracer, &status, 0)); 1625 if (WEXITSTATUS(status)) 1626 _metadata->passed = 0; 1627 } 1628 } 1629 1630 /* "poke" tracer arguments and function. */ 1631 struct tracer_args_poke_t { 1632 unsigned long poke_addr; 1633 }; 1634 1635 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status, 1636 void *args) 1637 { 1638 int ret; 1639 unsigned long msg; 1640 struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args; 1641 1642 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 1643 EXPECT_EQ(0, ret); 1644 /* If this fails, don't try to recover. */ 1645 ASSERT_EQ(0x1001, msg) { 1646 kill(tracee, SIGKILL); 1647 } 1648 /* 1649 * Poke in the message. 1650 * Registers are not touched to try to keep this relatively arch 1651 * agnostic. 1652 */ 1653 ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001); 1654 EXPECT_EQ(0, ret); 1655 } 1656 1657 FIXTURE(TRACE_poke) { 1658 struct sock_fprog prog; 1659 pid_t tracer; 1660 long poked; 1661 struct tracer_args_poke_t tracer_args; 1662 }; 1663 1664 FIXTURE_SETUP(TRACE_poke) 1665 { 1666 struct sock_filter filter[] = { 1667 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1668 offsetof(struct seccomp_data, nr)), 1669 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), 1670 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001), 1671 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1672 }; 1673 1674 self->poked = 0; 1675 memset(&self->prog, 0, sizeof(self->prog)); 1676 self->prog.filter = malloc(sizeof(filter)); 1677 ASSERT_NE(NULL, self->prog.filter); 1678 memcpy(self->prog.filter, filter, sizeof(filter)); 1679 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 1680 1681 /* Set up tracer args. */ 1682 self->tracer_args.poke_addr = (unsigned long)&self->poked; 1683 1684 /* Launch tracer. */ 1685 self->tracer = setup_trace_fixture(_metadata, tracer_poke, 1686 &self->tracer_args, false); 1687 } 1688 1689 FIXTURE_TEARDOWN(TRACE_poke) 1690 { 1691 teardown_trace_fixture(_metadata, self->tracer); 1692 if (self->prog.filter) 1693 free(self->prog.filter); 1694 } 1695 1696 TEST_F(TRACE_poke, read_has_side_effects) 1697 { 1698 ssize_t ret; 1699 1700 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1701 ASSERT_EQ(0, ret); 1702 1703 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1704 ASSERT_EQ(0, ret); 1705 1706 EXPECT_EQ(0, self->poked); 1707 ret = read(-1, NULL, 0); 1708 EXPECT_EQ(-1, ret); 1709 EXPECT_EQ(0x1001, self->poked); 1710 } 1711 1712 TEST_F(TRACE_poke, getpid_runs_normally) 1713 { 1714 long ret; 1715 1716 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1717 ASSERT_EQ(0, ret); 1718 1719 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1720 ASSERT_EQ(0, ret); 1721 1722 EXPECT_EQ(0, self->poked); 1723 EXPECT_NE(0, syscall(__NR_getpid)); 1724 EXPECT_EQ(0, self->poked); 1725 } 1726 1727 #if defined(__x86_64__) 1728 # define ARCH_REGS struct user_regs_struct 1729 # define SYSCALL_NUM(_regs) (_regs).orig_rax 1730 # define SYSCALL_RET(_regs) (_regs).rax 1731 #elif defined(__i386__) 1732 # define ARCH_REGS struct user_regs_struct 1733 # define SYSCALL_NUM(_regs) (_regs).orig_eax 1734 # define SYSCALL_RET(_regs) (_regs).eax 1735 #elif defined(__arm__) 1736 # define ARCH_REGS struct pt_regs 1737 # define SYSCALL_NUM(_regs) (_regs).ARM_r7 1738 # ifndef PTRACE_SET_SYSCALL 1739 # define PTRACE_SET_SYSCALL 23 1740 # endif 1741 # define SYSCALL_NUM_SET(_regs, _nr) \ 1742 EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr)) 1743 # define SYSCALL_RET(_regs) (_regs).ARM_r0 1744 #elif defined(__aarch64__) 1745 # define ARCH_REGS struct user_pt_regs 1746 # define SYSCALL_NUM(_regs) (_regs).regs[8] 1747 # ifndef NT_ARM_SYSTEM_CALL 1748 # define NT_ARM_SYSTEM_CALL 0x404 1749 # endif 1750 # define SYSCALL_NUM_SET(_regs, _nr) \ 1751 do { \ 1752 struct iovec __v; \ 1753 typeof(_nr) __nr = (_nr); \ 1754 __v.iov_base = &__nr; \ 1755 __v.iov_len = sizeof(__nr); \ 1756 EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee, \ 1757 NT_ARM_SYSTEM_CALL, &__v)); \ 1758 } while (0) 1759 # define SYSCALL_RET(_regs) (_regs).regs[0] 1760 #elif defined(__loongarch__) 1761 # define ARCH_REGS struct user_pt_regs 1762 # define SYSCALL_NUM(_regs) (_regs).regs[11] 1763 # define SYSCALL_RET(_regs) (_regs).regs[4] 1764 #elif defined(__riscv) && __riscv_xlen == 64 1765 # define ARCH_REGS struct user_regs_struct 1766 # define SYSCALL_NUM(_regs) (_regs).a7 1767 # define SYSCALL_RET(_regs) (_regs).a0 1768 #elif defined(__csky__) 1769 # define ARCH_REGS struct pt_regs 1770 # if defined(__CSKYABIV2__) 1771 # define SYSCALL_NUM(_regs) (_regs).regs[3] 1772 # else 1773 # define SYSCALL_NUM(_regs) (_regs).regs[9] 1774 # endif 1775 # define SYSCALL_RET(_regs) (_regs).a0 1776 #elif defined(__hppa__) 1777 # define ARCH_REGS struct user_regs_struct 1778 # define SYSCALL_NUM(_regs) (_regs).gr[20] 1779 # define SYSCALL_RET(_regs) (_regs).gr[28] 1780 #elif defined(__powerpc__) 1781 # define ARCH_REGS struct pt_regs 1782 # define SYSCALL_NUM(_regs) (_regs).gpr[0] 1783 # define SYSCALL_RET(_regs) (_regs).gpr[3] 1784 # define SYSCALL_RET_SET(_regs, _val) \ 1785 do { \ 1786 typeof(_val) _result = (_val); \ 1787 if ((_regs.trap & 0xfff0) == 0x3000) { \ 1788 /* \ 1789 * scv 0 system call uses -ve result \ 1790 * for error, so no need to adjust. \ 1791 */ \ 1792 SYSCALL_RET(_regs) = _result; \ 1793 } else { \ 1794 /* \ 1795 * A syscall error is signaled by the \ 1796 * CR0 SO bit and the code is stored as \ 1797 * a positive value. \ 1798 */ \ 1799 if (_result < 0) { \ 1800 SYSCALL_RET(_regs) = -_result; \ 1801 (_regs).ccr |= 0x10000000; \ 1802 } else { \ 1803 SYSCALL_RET(_regs) = _result; \ 1804 (_regs).ccr &= ~0x10000000; \ 1805 } \ 1806 } \ 1807 } while (0) 1808 # define SYSCALL_RET_SET_ON_PTRACE_EXIT 1809 #elif defined(__s390__) 1810 # define ARCH_REGS s390_regs 1811 # define SYSCALL_NUM(_regs) (_regs).gprs[2] 1812 # define SYSCALL_RET_SET(_regs, _val) \ 1813 TH_LOG("Can't modify syscall return on this architecture") 1814 #elif defined(__mips__) 1815 # include <asm/unistd_nr_n32.h> 1816 # include <asm/unistd_nr_n64.h> 1817 # include <asm/unistd_nr_o32.h> 1818 # define ARCH_REGS struct pt_regs 1819 # define SYSCALL_NUM(_regs) \ 1820 ({ \ 1821 typeof((_regs).regs[2]) _nr; \ 1822 if ((_regs).regs[2] == __NR_O32_Linux) \ 1823 _nr = (_regs).regs[4]; \ 1824 else \ 1825 _nr = (_regs).regs[2]; \ 1826 _nr; \ 1827 }) 1828 # define SYSCALL_NUM_SET(_regs, _nr) \ 1829 do { \ 1830 if ((_regs).regs[2] == __NR_O32_Linux) \ 1831 (_regs).regs[4] = _nr; \ 1832 else \ 1833 (_regs).regs[2] = _nr; \ 1834 } while (0) 1835 # define SYSCALL_RET_SET(_regs, _val) \ 1836 TH_LOG("Can't modify syscall return on this architecture") 1837 #elif defined(__xtensa__) 1838 # define ARCH_REGS struct user_pt_regs 1839 # define SYSCALL_NUM(_regs) (_regs).syscall 1840 /* 1841 * On xtensa syscall return value is in the register 1842 * a2 of the current window which is not fixed. 1843 */ 1844 #define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2] 1845 #elif defined(__sh__) 1846 # define ARCH_REGS struct pt_regs 1847 # define SYSCALL_NUM(_regs) (_regs).regs[3] 1848 # define SYSCALL_RET(_regs) (_regs).regs[0] 1849 #elif defined(__mc68000__) 1850 # define ARCH_REGS struct user_regs_struct 1851 # define SYSCALL_NUM(_regs) (_regs).orig_d0 1852 # define SYSCALL_RET(_regs) (_regs).d0 1853 #else 1854 # error "Do not know how to find your architecture's registers and syscalls" 1855 #endif 1856 1857 /* 1858 * Most architectures can change the syscall by just updating the 1859 * associated register. This is the default if not defined above. 1860 */ 1861 #ifndef SYSCALL_NUM_SET 1862 # define SYSCALL_NUM_SET(_regs, _nr) \ 1863 do { \ 1864 SYSCALL_NUM(_regs) = (_nr); \ 1865 } while (0) 1866 #endif 1867 /* 1868 * Most architectures can change the syscall return value by just 1869 * writing to the SYSCALL_RET register. This is the default if not 1870 * defined above. If an architecture cannot set the return value 1871 * (for example when the syscall and return value register is 1872 * shared), report it with TH_LOG() in an arch-specific definition 1873 * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined. 1874 */ 1875 #if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET) 1876 # error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch" 1877 #endif 1878 #ifndef SYSCALL_RET_SET 1879 # define SYSCALL_RET_SET(_regs, _val) \ 1880 do { \ 1881 SYSCALL_RET(_regs) = (_val); \ 1882 } while (0) 1883 #endif 1884 1885 /* When the syscall return can't be changed, stub out the tests for it. */ 1886 #ifndef SYSCALL_RET 1887 # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) 1888 #else 1889 # define EXPECT_SYSCALL_RETURN(val, action) \ 1890 do { \ 1891 errno = 0; \ 1892 if (val < 0) { \ 1893 EXPECT_EQ(-1, action); \ 1894 EXPECT_EQ(-(val), errno); \ 1895 } else { \ 1896 EXPECT_EQ(val, action); \ 1897 } \ 1898 } while (0) 1899 #endif 1900 1901 /* 1902 * Some architectures (e.g. powerpc) can only set syscall 1903 * return values on syscall exit during ptrace. 1904 */ 1905 const bool ptrace_entry_set_syscall_nr = true; 1906 const bool ptrace_entry_set_syscall_ret = 1907 #ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT 1908 true; 1909 #else 1910 false; 1911 #endif 1912 1913 /* 1914 * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for 1915 * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). 1916 */ 1917 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__) || defined(__mc68000__) 1918 # define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs)) 1919 # define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs)) 1920 #else 1921 # define ARCH_GETREGS(_regs) ({ \ 1922 struct iovec __v; \ 1923 __v.iov_base = &(_regs); \ 1924 __v.iov_len = sizeof(_regs); \ 1925 ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v); \ 1926 }) 1927 # define ARCH_SETREGS(_regs) ({ \ 1928 struct iovec __v; \ 1929 __v.iov_base = &(_regs); \ 1930 __v.iov_len = sizeof(_regs); \ 1931 ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v); \ 1932 }) 1933 #endif 1934 1935 /* Architecture-specific syscall fetching routine. */ 1936 int get_syscall(struct __test_metadata *_metadata, pid_t tracee) 1937 { 1938 ARCH_REGS regs; 1939 1940 EXPECT_EQ(0, ARCH_GETREGS(regs)) { 1941 return -1; 1942 } 1943 1944 return SYSCALL_NUM(regs); 1945 } 1946 1947 /* Architecture-specific syscall changing routine. */ 1948 void __change_syscall(struct __test_metadata *_metadata, 1949 pid_t tracee, long *syscall, long *ret) 1950 { 1951 ARCH_REGS orig, regs; 1952 1953 /* Do not get/set registers if we have nothing to do. */ 1954 if (!syscall && !ret) 1955 return; 1956 1957 EXPECT_EQ(0, ARCH_GETREGS(regs)) { 1958 return; 1959 } 1960 orig = regs; 1961 1962 if (syscall) 1963 SYSCALL_NUM_SET(regs, *syscall); 1964 1965 if (ret) 1966 SYSCALL_RET_SET(regs, *ret); 1967 1968 /* Flush any register changes made. */ 1969 if (memcmp(&orig, ®s, sizeof(orig)) != 0) 1970 EXPECT_EQ(0, ARCH_SETREGS(regs)); 1971 } 1972 1973 /* Change only syscall number. */ 1974 void change_syscall_nr(struct __test_metadata *_metadata, 1975 pid_t tracee, long syscall) 1976 { 1977 __change_syscall(_metadata, tracee, &syscall, NULL); 1978 } 1979 1980 /* Change syscall return value (and set syscall number to -1). */ 1981 void change_syscall_ret(struct __test_metadata *_metadata, 1982 pid_t tracee, long ret) 1983 { 1984 long syscall = -1; 1985 1986 __change_syscall(_metadata, tracee, &syscall, &ret); 1987 } 1988 1989 void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, 1990 int status, void *args) 1991 { 1992 int ret; 1993 unsigned long msg; 1994 1995 EXPECT_EQ(PTRACE_EVENT_MASK(status), PTRACE_EVENT_SECCOMP) { 1996 TH_LOG("Unexpected ptrace event: %d", PTRACE_EVENT_MASK(status)); 1997 return; 1998 } 1999 2000 /* Make sure we got the right message. */ 2001 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 2002 EXPECT_EQ(0, ret); 2003 2004 /* Validate and take action on expected syscalls. */ 2005 switch (msg) { 2006 case 0x1002: 2007 /* change getpid to getppid. */ 2008 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); 2009 change_syscall_nr(_metadata, tracee, __NR_getppid); 2010 break; 2011 case 0x1003: 2012 /* skip gettid with valid return code. */ 2013 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); 2014 change_syscall_ret(_metadata, tracee, 45000); 2015 break; 2016 case 0x1004: 2017 /* skip openat with error. */ 2018 EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee)); 2019 change_syscall_ret(_metadata, tracee, -ESRCH); 2020 break; 2021 case 0x1005: 2022 /* do nothing (allow getppid) */ 2023 EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee)); 2024 break; 2025 default: 2026 EXPECT_EQ(0, msg) { 2027 TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg); 2028 kill(tracee, SIGKILL); 2029 } 2030 } 2031 2032 } 2033 2034 FIXTURE(TRACE_syscall) { 2035 struct sock_fprog prog; 2036 pid_t tracer, mytid, mypid, parent; 2037 long syscall_nr; 2038 }; 2039 2040 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, 2041 int status, void *args) 2042 { 2043 int ret; 2044 unsigned long msg; 2045 static bool entry; 2046 long syscall_nr_val, syscall_ret_val; 2047 long *syscall_nr = NULL, *syscall_ret = NULL; 2048 FIXTURE_DATA(TRACE_syscall) *self = args; 2049 2050 EXPECT_EQ(WSTOPSIG(status) & 0x80, 0x80) { 2051 TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status)); 2052 return; 2053 } 2054 2055 /* 2056 * The traditional way to tell PTRACE_SYSCALL entry/exit 2057 * is by counting. 2058 */ 2059 entry = !entry; 2060 2061 /* Make sure we got an appropriate message. */ 2062 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 2063 EXPECT_EQ(0, ret); 2064 EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY 2065 : PTRACE_EVENTMSG_SYSCALL_EXIT, msg); 2066 2067 /* 2068 * Some architectures only support setting return values during 2069 * syscall exit under ptrace, and on exit the syscall number may 2070 * no longer be available. Therefore, save the initial sycall 2071 * number here, so it can be examined during both entry and exit 2072 * phases. 2073 */ 2074 if (entry) 2075 self->syscall_nr = get_syscall(_metadata, tracee); 2076 2077 /* 2078 * Depending on the architecture's syscall setting abilities, we 2079 * pick which things to set during this phase (entry or exit). 2080 */ 2081 if (entry == ptrace_entry_set_syscall_nr) 2082 syscall_nr = &syscall_nr_val; 2083 if (entry == ptrace_entry_set_syscall_ret) 2084 syscall_ret = &syscall_ret_val; 2085 2086 /* Now handle the actual rewriting cases. */ 2087 switch (self->syscall_nr) { 2088 case __NR_getpid: 2089 syscall_nr_val = __NR_getppid; 2090 /* Never change syscall return for this case. */ 2091 syscall_ret = NULL; 2092 break; 2093 case __NR_gettid: 2094 syscall_nr_val = -1; 2095 syscall_ret_val = 45000; 2096 break; 2097 case __NR_openat: 2098 syscall_nr_val = -1; 2099 syscall_ret_val = -ESRCH; 2100 break; 2101 default: 2102 /* Unhandled, do nothing. */ 2103 return; 2104 } 2105 2106 __change_syscall(_metadata, tracee, syscall_nr, syscall_ret); 2107 } 2108 2109 FIXTURE_VARIANT(TRACE_syscall) { 2110 /* 2111 * All of the SECCOMP_RET_TRACE behaviors can be tested with either 2112 * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL. 2113 * This indicates if we should use SECCOMP_RET_TRACE (false), or 2114 * ptrace (true). 2115 */ 2116 bool use_ptrace; 2117 }; 2118 2119 FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) { 2120 .use_ptrace = true, 2121 }; 2122 2123 FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) { 2124 .use_ptrace = false, 2125 }; 2126 2127 FIXTURE_SETUP(TRACE_syscall) 2128 { 2129 struct sock_filter filter[] = { 2130 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2131 offsetof(struct seccomp_data, nr)), 2132 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 2133 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002), 2134 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1), 2135 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003), 2136 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1), 2137 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004), 2138 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2139 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005), 2140 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2141 }; 2142 struct sock_fprog prog = { 2143 .len = (unsigned short)ARRAY_SIZE(filter), 2144 .filter = filter, 2145 }; 2146 long ret; 2147 2148 /* Prepare some testable syscall results. */ 2149 self->mytid = syscall(__NR_gettid); 2150 ASSERT_GT(self->mytid, 0); 2151 ASSERT_NE(self->mytid, 1) { 2152 TH_LOG("Running this test as init is not supported. :)"); 2153 } 2154 2155 self->mypid = getpid(); 2156 ASSERT_GT(self->mypid, 0); 2157 ASSERT_EQ(self->mytid, self->mypid); 2158 2159 self->parent = getppid(); 2160 ASSERT_GT(self->parent, 0); 2161 ASSERT_NE(self->parent, self->mypid); 2162 2163 /* Launch tracer. */ 2164 self->tracer = setup_trace_fixture(_metadata, 2165 variant->use_ptrace ? tracer_ptrace 2166 : tracer_seccomp, 2167 self, variant->use_ptrace); 2168 2169 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2170 ASSERT_EQ(0, ret); 2171 2172 /* Do not install seccomp rewrite filters, as we'll use ptrace instead. */ 2173 if (variant->use_ptrace) 2174 return; 2175 2176 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2177 ASSERT_EQ(0, ret); 2178 } 2179 2180 FIXTURE_TEARDOWN(TRACE_syscall) 2181 { 2182 teardown_trace_fixture(_metadata, self->tracer); 2183 } 2184 2185 TEST(negative_ENOSYS) 2186 { 2187 #if defined(__arm__) 2188 SKIP(return, "arm32 does not support calling syscall -1"); 2189 #endif 2190 /* 2191 * There should be no difference between an "internal" skip 2192 * and userspace asking for syscall "-1". 2193 */ 2194 errno = 0; 2195 EXPECT_EQ(-1, syscall(-1)); 2196 EXPECT_EQ(errno, ENOSYS); 2197 /* And no difference for "still not valid but not -1". */ 2198 errno = 0; 2199 EXPECT_EQ(-1, syscall(-101)); 2200 EXPECT_EQ(errno, ENOSYS); 2201 } 2202 2203 TEST_F(TRACE_syscall, negative_ENOSYS) 2204 { 2205 negative_ENOSYS(_metadata); 2206 } 2207 2208 TEST_F(TRACE_syscall, syscall_allowed) 2209 { 2210 /* getppid works as expected (no changes). */ 2211 EXPECT_EQ(self->parent, syscall(__NR_getppid)); 2212 EXPECT_NE(self->mypid, syscall(__NR_getppid)); 2213 } 2214 2215 TEST_F(TRACE_syscall, syscall_redirected) 2216 { 2217 /* getpid has been redirected to getppid as expected. */ 2218 EXPECT_EQ(self->parent, syscall(__NR_getpid)); 2219 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 2220 } 2221 2222 TEST_F(TRACE_syscall, syscall_errno) 2223 { 2224 /* Tracer should skip the open syscall, resulting in ESRCH. */ 2225 EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); 2226 } 2227 2228 TEST_F(TRACE_syscall, syscall_faked) 2229 { 2230 /* Tracer skips the gettid syscall and store altered return value. */ 2231 EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); 2232 } 2233 2234 TEST_F_SIGNAL(TRACE_syscall, kill_immediate, SIGSYS) 2235 { 2236 struct sock_filter filter[] = { 2237 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2238 offsetof(struct seccomp_data, nr)), 2239 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_mknodat, 0, 1), 2240 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), 2241 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2242 }; 2243 struct sock_fprog prog = { 2244 .len = (unsigned short)ARRAY_SIZE(filter), 2245 .filter = filter, 2246 }; 2247 long ret; 2248 2249 /* Install "kill on mknodat" filter. */ 2250 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2251 ASSERT_EQ(0, ret); 2252 2253 /* This should immediately die with SIGSYS, regardless of tracer. */ 2254 EXPECT_EQ(-1, syscall(__NR_mknodat, -1, NULL, 0, 0)); 2255 } 2256 2257 TEST_F(TRACE_syscall, skip_after) 2258 { 2259 struct sock_filter filter[] = { 2260 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2261 offsetof(struct seccomp_data, nr)), 2262 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2263 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM), 2264 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2265 }; 2266 struct sock_fprog prog = { 2267 .len = (unsigned short)ARRAY_SIZE(filter), 2268 .filter = filter, 2269 }; 2270 long ret; 2271 2272 /* Install additional "errno on getppid" filter. */ 2273 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2274 ASSERT_EQ(0, ret); 2275 2276 /* Tracer will redirect getpid to getppid, and we should see EPERM. */ 2277 errno = 0; 2278 EXPECT_EQ(-1, syscall(__NR_getpid)); 2279 EXPECT_EQ(EPERM, errno); 2280 } 2281 2282 TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS) 2283 { 2284 struct sock_filter filter[] = { 2285 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2286 offsetof(struct seccomp_data, nr)), 2287 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2288 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2289 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2290 }; 2291 struct sock_fprog prog = { 2292 .len = (unsigned short)ARRAY_SIZE(filter), 2293 .filter = filter, 2294 }; 2295 long ret; 2296 2297 /* Install additional "death on getppid" filter. */ 2298 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2299 ASSERT_EQ(0, ret); 2300 2301 /* Tracer will redirect getpid to getppid, and we should die. */ 2302 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 2303 } 2304 2305 TEST(seccomp_syscall) 2306 { 2307 struct sock_filter filter[] = { 2308 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2309 }; 2310 struct sock_fprog prog = { 2311 .len = (unsigned short)ARRAY_SIZE(filter), 2312 .filter = filter, 2313 }; 2314 long ret; 2315 2316 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2317 ASSERT_EQ(0, ret) { 2318 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2319 } 2320 2321 /* Reject insane operation. */ 2322 ret = seccomp(-1, 0, &prog); 2323 ASSERT_NE(ENOSYS, errno) { 2324 TH_LOG("Kernel does not support seccomp syscall!"); 2325 } 2326 EXPECT_EQ(EINVAL, errno) { 2327 TH_LOG("Did not reject crazy op value!"); 2328 } 2329 2330 /* Reject strict with flags or pointer. */ 2331 ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL); 2332 EXPECT_EQ(EINVAL, errno) { 2333 TH_LOG("Did not reject mode strict with flags!"); 2334 } 2335 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog); 2336 EXPECT_EQ(EINVAL, errno) { 2337 TH_LOG("Did not reject mode strict with uargs!"); 2338 } 2339 2340 /* Reject insane args for filter. */ 2341 ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog); 2342 EXPECT_EQ(EINVAL, errno) { 2343 TH_LOG("Did not reject crazy filter flags!"); 2344 } 2345 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL); 2346 EXPECT_EQ(EFAULT, errno) { 2347 TH_LOG("Did not reject NULL filter!"); 2348 } 2349 2350 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2351 EXPECT_EQ(0, errno) { 2352 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s", 2353 strerror(errno)); 2354 } 2355 } 2356 2357 TEST(seccomp_syscall_mode_lock) 2358 { 2359 struct sock_filter filter[] = { 2360 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2361 }; 2362 struct sock_fprog prog = { 2363 .len = (unsigned short)ARRAY_SIZE(filter), 2364 .filter = filter, 2365 }; 2366 long ret; 2367 2368 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 2369 ASSERT_EQ(0, ret) { 2370 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2371 } 2372 2373 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2374 ASSERT_NE(ENOSYS, errno) { 2375 TH_LOG("Kernel does not support seccomp syscall!"); 2376 } 2377 EXPECT_EQ(0, ret) { 2378 TH_LOG("Could not install filter!"); 2379 } 2380 2381 /* Make sure neither entry point will switch to strict. */ 2382 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0); 2383 EXPECT_EQ(EINVAL, errno) { 2384 TH_LOG("Switched to mode strict!"); 2385 } 2386 2387 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL); 2388 EXPECT_EQ(EINVAL, errno) { 2389 TH_LOG("Switched to mode strict!"); 2390 } 2391 } 2392 2393 /* 2394 * Test detection of known and unknown filter flags. Userspace needs to be able 2395 * to check if a filter flag is supported by the current kernel and a good way 2396 * of doing that is by attempting to enter filter mode, with the flag bit in 2397 * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates 2398 * that the flag is valid and EINVAL indicates that the flag is invalid. 2399 */ 2400 TEST(detect_seccomp_filter_flags) 2401 { 2402 unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, 2403 SECCOMP_FILTER_FLAG_LOG, 2404 SECCOMP_FILTER_FLAG_SPEC_ALLOW, 2405 SECCOMP_FILTER_FLAG_NEW_LISTENER, 2406 SECCOMP_FILTER_FLAG_TSYNC_ESRCH }; 2407 unsigned int exclusive[] = { 2408 SECCOMP_FILTER_FLAG_TSYNC, 2409 SECCOMP_FILTER_FLAG_NEW_LISTENER }; 2410 unsigned int flag, all_flags, exclusive_mask; 2411 int i; 2412 long ret; 2413 2414 /* Test detection of individual known-good filter flags */ 2415 for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { 2416 int bits = 0; 2417 2418 flag = flags[i]; 2419 /* Make sure the flag is a single bit! */ 2420 while (flag) { 2421 if (flag & 0x1) 2422 bits ++; 2423 flag >>= 1; 2424 } 2425 ASSERT_EQ(1, bits); 2426 flag = flags[i]; 2427 2428 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2429 ASSERT_NE(ENOSYS, errno) { 2430 TH_LOG("Kernel does not support seccomp syscall!"); 2431 } 2432 EXPECT_EQ(-1, ret); 2433 EXPECT_EQ(EFAULT, errno) { 2434 TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!", 2435 flag); 2436 } 2437 2438 all_flags |= flag; 2439 } 2440 2441 /* 2442 * Test detection of all known-good filter flags combined. But 2443 * for the exclusive flags we need to mask them out and try them 2444 * individually for the "all flags" testing. 2445 */ 2446 exclusive_mask = 0; 2447 for (i = 0; i < ARRAY_SIZE(exclusive); i++) 2448 exclusive_mask |= exclusive[i]; 2449 for (i = 0; i < ARRAY_SIZE(exclusive); i++) { 2450 flag = all_flags & ~exclusive_mask; 2451 flag |= exclusive[i]; 2452 2453 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2454 EXPECT_EQ(-1, ret); 2455 EXPECT_EQ(EFAULT, errno) { 2456 TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", 2457 flag); 2458 } 2459 } 2460 2461 /* Test detection of an unknown filter flags, without exclusives. */ 2462 flag = -1; 2463 flag &= ~exclusive_mask; 2464 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2465 EXPECT_EQ(-1, ret); 2466 EXPECT_EQ(EINVAL, errno) { 2467 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!", 2468 flag); 2469 } 2470 2471 /* 2472 * Test detection of an unknown filter flag that may simply need to be 2473 * added to this test 2474 */ 2475 flag = flags[ARRAY_SIZE(flags) - 1] << 1; 2476 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2477 EXPECT_EQ(-1, ret); 2478 EXPECT_EQ(EINVAL, errno) { 2479 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?", 2480 flag); 2481 } 2482 } 2483 2484 TEST(TSYNC_first) 2485 { 2486 struct sock_filter filter[] = { 2487 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2488 }; 2489 struct sock_fprog prog = { 2490 .len = (unsigned short)ARRAY_SIZE(filter), 2491 .filter = filter, 2492 }; 2493 long ret; 2494 2495 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 2496 ASSERT_EQ(0, ret) { 2497 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2498 } 2499 2500 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2501 &prog); 2502 ASSERT_NE(ENOSYS, errno) { 2503 TH_LOG("Kernel does not support seccomp syscall!"); 2504 } 2505 EXPECT_EQ(0, ret) { 2506 TH_LOG("Could not install initial filter with TSYNC!"); 2507 } 2508 } 2509 2510 #define TSYNC_SIBLINGS 2 2511 struct tsync_sibling { 2512 pthread_t tid; 2513 pid_t system_tid; 2514 sem_t *started; 2515 pthread_cond_t *cond; 2516 pthread_mutex_t *mutex; 2517 int diverge; 2518 int num_waits; 2519 struct sock_fprog *prog; 2520 struct __test_metadata *metadata; 2521 }; 2522 2523 /* 2524 * To avoid joining joined threads (which is not allowed by Bionic), 2525 * make sure we both successfully join and clear the tid to skip a 2526 * later join attempt during fixture teardown. Any remaining threads 2527 * will be directly killed during teardown. 2528 */ 2529 #define PTHREAD_JOIN(tid, status) \ 2530 do { \ 2531 int _rc = pthread_join(tid, status); \ 2532 if (_rc) { \ 2533 TH_LOG("pthread_join of tid %u failed: %d\n", \ 2534 (unsigned int)tid, _rc); \ 2535 } else { \ 2536 tid = 0; \ 2537 } \ 2538 } while (0) 2539 2540 FIXTURE(TSYNC) { 2541 struct sock_fprog root_prog, apply_prog; 2542 struct tsync_sibling sibling[TSYNC_SIBLINGS]; 2543 sem_t started; 2544 pthread_cond_t cond; 2545 pthread_mutex_t mutex; 2546 int sibling_count; 2547 }; 2548 2549 FIXTURE_SETUP(TSYNC) 2550 { 2551 struct sock_filter root_filter[] = { 2552 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2553 }; 2554 struct sock_filter apply_filter[] = { 2555 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2556 offsetof(struct seccomp_data, nr)), 2557 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), 2558 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2559 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2560 }; 2561 2562 memset(&self->root_prog, 0, sizeof(self->root_prog)); 2563 memset(&self->apply_prog, 0, sizeof(self->apply_prog)); 2564 memset(&self->sibling, 0, sizeof(self->sibling)); 2565 self->root_prog.filter = malloc(sizeof(root_filter)); 2566 ASSERT_NE(NULL, self->root_prog.filter); 2567 memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter)); 2568 self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter); 2569 2570 self->apply_prog.filter = malloc(sizeof(apply_filter)); 2571 ASSERT_NE(NULL, self->apply_prog.filter); 2572 memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter)); 2573 self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter); 2574 2575 self->sibling_count = 0; 2576 pthread_mutex_init(&self->mutex, NULL); 2577 pthread_cond_init(&self->cond, NULL); 2578 sem_init(&self->started, 0, 0); 2579 self->sibling[0].tid = 0; 2580 self->sibling[0].cond = &self->cond; 2581 self->sibling[0].started = &self->started; 2582 self->sibling[0].mutex = &self->mutex; 2583 self->sibling[0].diverge = 0; 2584 self->sibling[0].num_waits = 1; 2585 self->sibling[0].prog = &self->root_prog; 2586 self->sibling[0].metadata = _metadata; 2587 self->sibling[1].tid = 0; 2588 self->sibling[1].cond = &self->cond; 2589 self->sibling[1].started = &self->started; 2590 self->sibling[1].mutex = &self->mutex; 2591 self->sibling[1].diverge = 0; 2592 self->sibling[1].prog = &self->root_prog; 2593 self->sibling[1].num_waits = 1; 2594 self->sibling[1].metadata = _metadata; 2595 } 2596 2597 FIXTURE_TEARDOWN(TSYNC) 2598 { 2599 int sib = 0; 2600 2601 if (self->root_prog.filter) 2602 free(self->root_prog.filter); 2603 if (self->apply_prog.filter) 2604 free(self->apply_prog.filter); 2605 2606 for ( ; sib < self->sibling_count; ++sib) { 2607 struct tsync_sibling *s = &self->sibling[sib]; 2608 2609 if (!s->tid) 2610 continue; 2611 /* 2612 * If a thread is still running, it may be stuck, so hit 2613 * it over the head really hard. 2614 */ 2615 pthread_kill(s->tid, 9); 2616 } 2617 pthread_mutex_destroy(&self->mutex); 2618 pthread_cond_destroy(&self->cond); 2619 sem_destroy(&self->started); 2620 } 2621 2622 void *tsync_sibling(void *data) 2623 { 2624 long ret = 0; 2625 struct tsync_sibling *me = data; 2626 2627 me->system_tid = syscall(__NR_gettid); 2628 2629 pthread_mutex_lock(me->mutex); 2630 if (me->diverge) { 2631 /* Just re-apply the root prog to fork the tree */ 2632 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 2633 me->prog, 0, 0); 2634 } 2635 sem_post(me->started); 2636 /* Return outside of started so parent notices failures. */ 2637 if (ret) { 2638 pthread_mutex_unlock(me->mutex); 2639 return (void *)SIBLING_EXIT_FAILURE; 2640 } 2641 do { 2642 pthread_cond_wait(me->cond, me->mutex); 2643 me->num_waits = me->num_waits - 1; 2644 } while (me->num_waits); 2645 pthread_mutex_unlock(me->mutex); 2646 2647 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 2648 if (!ret) 2649 return (void *)SIBLING_EXIT_NEWPRIVS; 2650 read(-1, NULL, 0); 2651 return (void *)SIBLING_EXIT_UNKILLED; 2652 } 2653 2654 void tsync_start_sibling(struct tsync_sibling *sibling) 2655 { 2656 pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling); 2657 } 2658 2659 TEST_F(TSYNC, siblings_fail_prctl) 2660 { 2661 long ret; 2662 void *status; 2663 struct sock_filter filter[] = { 2664 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2665 offsetof(struct seccomp_data, nr)), 2666 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), 2667 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL), 2668 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2669 }; 2670 struct sock_fprog prog = { 2671 .len = (unsigned short)ARRAY_SIZE(filter), 2672 .filter = filter, 2673 }; 2674 2675 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2676 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2677 } 2678 2679 /* Check prctl failure detection by requesting sib 0 diverge. */ 2680 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2681 ASSERT_NE(ENOSYS, errno) { 2682 TH_LOG("Kernel does not support seccomp syscall!"); 2683 } 2684 ASSERT_EQ(0, ret) { 2685 TH_LOG("setting filter failed"); 2686 } 2687 2688 self->sibling[0].diverge = 1; 2689 tsync_start_sibling(&self->sibling[0]); 2690 tsync_start_sibling(&self->sibling[1]); 2691 2692 while (self->sibling_count < TSYNC_SIBLINGS) { 2693 sem_wait(&self->started); 2694 self->sibling_count++; 2695 } 2696 2697 /* Signal the threads to clean up*/ 2698 pthread_mutex_lock(&self->mutex); 2699 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2700 TH_LOG("cond broadcast non-zero"); 2701 } 2702 pthread_mutex_unlock(&self->mutex); 2703 2704 /* Ensure diverging sibling failed to call prctl. */ 2705 PTHREAD_JOIN(self->sibling[0].tid, &status); 2706 EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status); 2707 PTHREAD_JOIN(self->sibling[1].tid, &status); 2708 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2709 } 2710 2711 TEST_F(TSYNC, two_siblings_with_ancestor) 2712 { 2713 long ret; 2714 void *status; 2715 2716 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2717 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2718 } 2719 2720 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2721 ASSERT_NE(ENOSYS, errno) { 2722 TH_LOG("Kernel does not support seccomp syscall!"); 2723 } 2724 ASSERT_EQ(0, ret) { 2725 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2726 } 2727 tsync_start_sibling(&self->sibling[0]); 2728 tsync_start_sibling(&self->sibling[1]); 2729 2730 while (self->sibling_count < TSYNC_SIBLINGS) { 2731 sem_wait(&self->started); 2732 self->sibling_count++; 2733 } 2734 2735 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2736 &self->apply_prog); 2737 ASSERT_EQ(0, ret) { 2738 TH_LOG("Could install filter on all threads!"); 2739 } 2740 /* Tell the siblings to test the policy */ 2741 pthread_mutex_lock(&self->mutex); 2742 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2743 TH_LOG("cond broadcast non-zero"); 2744 } 2745 pthread_mutex_unlock(&self->mutex); 2746 /* Ensure they are both killed and don't exit cleanly. */ 2747 PTHREAD_JOIN(self->sibling[0].tid, &status); 2748 EXPECT_EQ(0x0, (long)status); 2749 PTHREAD_JOIN(self->sibling[1].tid, &status); 2750 EXPECT_EQ(0x0, (long)status); 2751 } 2752 2753 TEST_F(TSYNC, two_sibling_want_nnp) 2754 { 2755 void *status; 2756 2757 /* start siblings before any prctl() operations */ 2758 tsync_start_sibling(&self->sibling[0]); 2759 tsync_start_sibling(&self->sibling[1]); 2760 while (self->sibling_count < TSYNC_SIBLINGS) { 2761 sem_wait(&self->started); 2762 self->sibling_count++; 2763 } 2764 2765 /* Tell the siblings to test no policy */ 2766 pthread_mutex_lock(&self->mutex); 2767 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2768 TH_LOG("cond broadcast non-zero"); 2769 } 2770 pthread_mutex_unlock(&self->mutex); 2771 2772 /* Ensure they are both upset about lacking nnp. */ 2773 PTHREAD_JOIN(self->sibling[0].tid, &status); 2774 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); 2775 PTHREAD_JOIN(self->sibling[1].tid, &status); 2776 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); 2777 } 2778 2779 TEST_F(TSYNC, two_siblings_with_no_filter) 2780 { 2781 long ret; 2782 void *status; 2783 2784 /* start siblings before any prctl() operations */ 2785 tsync_start_sibling(&self->sibling[0]); 2786 tsync_start_sibling(&self->sibling[1]); 2787 while (self->sibling_count < TSYNC_SIBLINGS) { 2788 sem_wait(&self->started); 2789 self->sibling_count++; 2790 } 2791 2792 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2793 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2794 } 2795 2796 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2797 &self->apply_prog); 2798 ASSERT_NE(ENOSYS, errno) { 2799 TH_LOG("Kernel does not support seccomp syscall!"); 2800 } 2801 ASSERT_EQ(0, ret) { 2802 TH_LOG("Could install filter on all threads!"); 2803 } 2804 2805 /* Tell the siblings to test the policy */ 2806 pthread_mutex_lock(&self->mutex); 2807 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2808 TH_LOG("cond broadcast non-zero"); 2809 } 2810 pthread_mutex_unlock(&self->mutex); 2811 2812 /* Ensure they are both killed and don't exit cleanly. */ 2813 PTHREAD_JOIN(self->sibling[0].tid, &status); 2814 EXPECT_EQ(0x0, (long)status); 2815 PTHREAD_JOIN(self->sibling[1].tid, &status); 2816 EXPECT_EQ(0x0, (long)status); 2817 } 2818 2819 TEST_F(TSYNC, two_siblings_with_one_divergence) 2820 { 2821 long ret; 2822 void *status; 2823 2824 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2825 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2826 } 2827 2828 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2829 ASSERT_NE(ENOSYS, errno) { 2830 TH_LOG("Kernel does not support seccomp syscall!"); 2831 } 2832 ASSERT_EQ(0, ret) { 2833 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2834 } 2835 self->sibling[0].diverge = 1; 2836 tsync_start_sibling(&self->sibling[0]); 2837 tsync_start_sibling(&self->sibling[1]); 2838 2839 while (self->sibling_count < TSYNC_SIBLINGS) { 2840 sem_wait(&self->started); 2841 self->sibling_count++; 2842 } 2843 2844 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2845 &self->apply_prog); 2846 ASSERT_EQ(self->sibling[0].system_tid, ret) { 2847 TH_LOG("Did not fail on diverged sibling."); 2848 } 2849 2850 /* Wake the threads */ 2851 pthread_mutex_lock(&self->mutex); 2852 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2853 TH_LOG("cond broadcast non-zero"); 2854 } 2855 pthread_mutex_unlock(&self->mutex); 2856 2857 /* Ensure they are both unkilled. */ 2858 PTHREAD_JOIN(self->sibling[0].tid, &status); 2859 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2860 PTHREAD_JOIN(self->sibling[1].tid, &status); 2861 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2862 } 2863 2864 TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err) 2865 { 2866 long ret, flags; 2867 void *status; 2868 2869 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2870 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2871 } 2872 2873 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2874 ASSERT_NE(ENOSYS, errno) { 2875 TH_LOG("Kernel does not support seccomp syscall!"); 2876 } 2877 ASSERT_EQ(0, ret) { 2878 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2879 } 2880 self->sibling[0].diverge = 1; 2881 tsync_start_sibling(&self->sibling[0]); 2882 tsync_start_sibling(&self->sibling[1]); 2883 2884 while (self->sibling_count < TSYNC_SIBLINGS) { 2885 sem_wait(&self->started); 2886 self->sibling_count++; 2887 } 2888 2889 flags = SECCOMP_FILTER_FLAG_TSYNC | \ 2890 SECCOMP_FILTER_FLAG_TSYNC_ESRCH; 2891 ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog); 2892 ASSERT_EQ(ESRCH, errno) { 2893 TH_LOG("Did not return ESRCH for diverged sibling."); 2894 } 2895 ASSERT_EQ(-1, ret) { 2896 TH_LOG("Did not fail on diverged sibling."); 2897 } 2898 2899 /* Wake the threads */ 2900 pthread_mutex_lock(&self->mutex); 2901 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2902 TH_LOG("cond broadcast non-zero"); 2903 } 2904 pthread_mutex_unlock(&self->mutex); 2905 2906 /* Ensure they are both unkilled. */ 2907 PTHREAD_JOIN(self->sibling[0].tid, &status); 2908 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2909 PTHREAD_JOIN(self->sibling[1].tid, &status); 2910 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2911 } 2912 2913 TEST_F(TSYNC, two_siblings_not_under_filter) 2914 { 2915 long ret, sib; 2916 void *status; 2917 struct timespec delay = { .tv_nsec = 100000000 }; 2918 2919 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2920 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2921 } 2922 2923 /* 2924 * Sibling 0 will have its own seccomp policy 2925 * and Sibling 1 will not be under seccomp at 2926 * all. Sibling 1 will enter seccomp and 0 2927 * will cause failure. 2928 */ 2929 self->sibling[0].diverge = 1; 2930 tsync_start_sibling(&self->sibling[0]); 2931 tsync_start_sibling(&self->sibling[1]); 2932 2933 while (self->sibling_count < TSYNC_SIBLINGS) { 2934 sem_wait(&self->started); 2935 self->sibling_count++; 2936 } 2937 2938 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2939 ASSERT_NE(ENOSYS, errno) { 2940 TH_LOG("Kernel does not support seccomp syscall!"); 2941 } 2942 ASSERT_EQ(0, ret) { 2943 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2944 } 2945 2946 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2947 &self->apply_prog); 2948 ASSERT_EQ(ret, self->sibling[0].system_tid) { 2949 TH_LOG("Did not fail on diverged sibling."); 2950 } 2951 sib = 1; 2952 if (ret == self->sibling[0].system_tid) 2953 sib = 0; 2954 2955 pthread_mutex_lock(&self->mutex); 2956 2957 /* Increment the other siblings num_waits so we can clean up 2958 * the one we just saw. 2959 */ 2960 self->sibling[!sib].num_waits += 1; 2961 2962 /* Signal the thread to clean up*/ 2963 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2964 TH_LOG("cond broadcast non-zero"); 2965 } 2966 pthread_mutex_unlock(&self->mutex); 2967 PTHREAD_JOIN(self->sibling[sib].tid, &status); 2968 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2969 /* Poll for actual task death. pthread_join doesn't guarantee it. */ 2970 while (!kill(self->sibling[sib].system_tid, 0)) 2971 nanosleep(&delay, NULL); 2972 /* Switch to the remaining sibling */ 2973 sib = !sib; 2974 2975 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2976 &self->apply_prog); 2977 ASSERT_EQ(0, ret) { 2978 TH_LOG("Expected the remaining sibling to sync"); 2979 }; 2980 2981 pthread_mutex_lock(&self->mutex); 2982 2983 /* If remaining sibling didn't have a chance to wake up during 2984 * the first broadcast, manually reduce the num_waits now. 2985 */ 2986 if (self->sibling[sib].num_waits > 1) 2987 self->sibling[sib].num_waits = 1; 2988 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2989 TH_LOG("cond broadcast non-zero"); 2990 } 2991 pthread_mutex_unlock(&self->mutex); 2992 PTHREAD_JOIN(self->sibling[sib].tid, &status); 2993 EXPECT_EQ(0, (long)status); 2994 /* Poll for actual task death. pthread_join doesn't guarantee it. */ 2995 while (!kill(self->sibling[sib].system_tid, 0)) 2996 nanosleep(&delay, NULL); 2997 2998 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2999 &self->apply_prog); 3000 ASSERT_EQ(0, ret); /* just us chickens */ 3001 } 3002 3003 /* Make sure restarted syscalls are seen directly as "restart_syscall". */ 3004 TEST(syscall_restart) 3005 { 3006 long ret; 3007 unsigned long msg; 3008 pid_t child_pid; 3009 int pipefd[2]; 3010 int status; 3011 siginfo_t info = { }; 3012 struct sock_filter filter[] = { 3013 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 3014 offsetof(struct seccomp_data, nr)), 3015 3016 #ifdef __NR_sigreturn 3017 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 7, 0), 3018 #endif 3019 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 6, 0), 3020 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 5, 0), 3021 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 4, 0), 3022 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 5, 0), 3023 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_clock_nanosleep, 4, 0), 3024 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0), 3025 3026 /* Allow __NR_write for easy logging. */ 3027 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1), 3028 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3029 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 3030 /* The nanosleep jump target. */ 3031 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100), 3032 /* The restart_syscall jump target. */ 3033 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200), 3034 }; 3035 struct sock_fprog prog = { 3036 .len = (unsigned short)ARRAY_SIZE(filter), 3037 .filter = filter, 3038 }; 3039 #if defined(__arm__) 3040 struct utsname utsbuf; 3041 #endif 3042 3043 ASSERT_EQ(0, pipe(pipefd)); 3044 3045 child_pid = fork(); 3046 ASSERT_LE(0, child_pid); 3047 if (child_pid == 0) { 3048 /* Child uses EXPECT not ASSERT to deliver status correctly. */ 3049 char buf = ' '; 3050 struct timespec timeout = { }; 3051 3052 /* Attach parent as tracer and stop. */ 3053 EXPECT_EQ(0, ptrace(PTRACE_TRACEME)); 3054 EXPECT_EQ(0, raise(SIGSTOP)); 3055 3056 EXPECT_EQ(0, close(pipefd[1])); 3057 3058 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 3059 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3060 } 3061 3062 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 3063 EXPECT_EQ(0, ret) { 3064 TH_LOG("Failed to install filter!"); 3065 } 3066 3067 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { 3068 TH_LOG("Failed to read() sync from parent"); 3069 } 3070 EXPECT_EQ('.', buf) { 3071 TH_LOG("Failed to get sync data from read()"); 3072 } 3073 3074 /* Start nanosleep to be interrupted. */ 3075 timeout.tv_sec = 1; 3076 errno = 0; 3077 EXPECT_EQ(0, nanosleep(&timeout, NULL)) { 3078 TH_LOG("Call to nanosleep() failed (errno %d: %s)", 3079 errno, strerror(errno)); 3080 } 3081 3082 /* Read final sync from parent. */ 3083 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { 3084 TH_LOG("Failed final read() from parent"); 3085 } 3086 EXPECT_EQ('!', buf) { 3087 TH_LOG("Failed to get final data from read()"); 3088 } 3089 3090 /* Directly report the status of our test harness results. */ 3091 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS 3092 : EXIT_FAILURE); 3093 } 3094 EXPECT_EQ(0, close(pipefd[0])); 3095 3096 /* Attach to child, setup options, and release. */ 3097 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3098 ASSERT_EQ(true, WIFSTOPPED(status)); 3099 ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL, 3100 PTRACE_O_TRACESECCOMP)); 3101 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3102 ASSERT_EQ(1, write(pipefd[1], ".", 1)); 3103 3104 /* Wait for nanosleep() to start. */ 3105 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3106 ASSERT_EQ(true, WIFSTOPPED(status)); 3107 ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); 3108 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); 3109 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); 3110 ASSERT_EQ(0x100, msg); 3111 ret = get_syscall(_metadata, child_pid); 3112 EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep); 3113 3114 /* Might as well check siginfo for sanity while we're here. */ 3115 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); 3116 ASSERT_EQ(SIGTRAP, info.si_signo); 3117 ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code); 3118 EXPECT_EQ(0, info.si_errno); 3119 EXPECT_EQ(getuid(), info.si_uid); 3120 /* Verify signal delivery came from child (seccomp-triggered). */ 3121 EXPECT_EQ(child_pid, info.si_pid); 3122 3123 /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */ 3124 ASSERT_EQ(0, kill(child_pid, SIGSTOP)); 3125 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3126 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3127 ASSERT_EQ(true, WIFSTOPPED(status)); 3128 ASSERT_EQ(SIGSTOP, WSTOPSIG(status)); 3129 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); 3130 /* 3131 * There is no siginfo on SIGSTOP any more, so we can't verify 3132 * signal delivery came from parent now (getpid() == info.si_pid). 3133 * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com 3134 * At least verify the SIGSTOP via PTRACE_GETSIGINFO. 3135 */ 3136 EXPECT_EQ(SIGSTOP, info.si_signo); 3137 3138 /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */ 3139 ASSERT_EQ(0, kill(child_pid, SIGCONT)); 3140 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3141 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3142 ASSERT_EQ(true, WIFSTOPPED(status)); 3143 ASSERT_EQ(SIGCONT, WSTOPSIG(status)); 3144 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3145 3146 /* Wait for restart_syscall() to start. */ 3147 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3148 ASSERT_EQ(true, WIFSTOPPED(status)); 3149 ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); 3150 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); 3151 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); 3152 3153 ASSERT_EQ(0x200, msg); 3154 ret = get_syscall(_metadata, child_pid); 3155 #if defined(__arm__) 3156 /* 3157 * FIXME: 3158 * - native ARM registers do NOT expose true syscall. 3159 * - compat ARM registers on ARM64 DO expose true syscall. 3160 */ 3161 ASSERT_EQ(0, uname(&utsbuf)); 3162 if (strncmp(utsbuf.machine, "arm", 3) == 0) { 3163 EXPECT_EQ(__NR_nanosleep, ret); 3164 } else 3165 #endif 3166 { 3167 EXPECT_EQ(__NR_restart_syscall, ret); 3168 } 3169 3170 /* Write again to end test. */ 3171 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 3172 ASSERT_EQ(1, write(pipefd[1], "!", 1)); 3173 EXPECT_EQ(0, close(pipefd[1])); 3174 3175 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 3176 if (WIFSIGNALED(status) || WEXITSTATUS(status)) 3177 _metadata->passed = 0; 3178 } 3179 3180 TEST_SIGNAL(filter_flag_log, SIGSYS) 3181 { 3182 struct sock_filter allow_filter[] = { 3183 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3184 }; 3185 struct sock_filter kill_filter[] = { 3186 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 3187 offsetof(struct seccomp_data, nr)), 3188 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 3189 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 3190 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3191 }; 3192 struct sock_fprog allow_prog = { 3193 .len = (unsigned short)ARRAY_SIZE(allow_filter), 3194 .filter = allow_filter, 3195 }; 3196 struct sock_fprog kill_prog = { 3197 .len = (unsigned short)ARRAY_SIZE(kill_filter), 3198 .filter = kill_filter, 3199 }; 3200 long ret; 3201 pid_t parent = getppid(); 3202 3203 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3204 ASSERT_EQ(0, ret); 3205 3206 /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */ 3207 ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG, 3208 &allow_prog); 3209 ASSERT_NE(ENOSYS, errno) { 3210 TH_LOG("Kernel does not support seccomp syscall!"); 3211 } 3212 EXPECT_NE(0, ret) { 3213 TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!"); 3214 } 3215 EXPECT_EQ(EINVAL, errno) { 3216 TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!"); 3217 } 3218 3219 /* Verify that a simple, permissive filter can be added with no flags */ 3220 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog); 3221 EXPECT_EQ(0, ret); 3222 3223 /* See if the same filter can be added with the FILTER_FLAG_LOG flag */ 3224 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, 3225 &allow_prog); 3226 ASSERT_NE(EINVAL, errno) { 3227 TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!"); 3228 } 3229 EXPECT_EQ(0, ret); 3230 3231 /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */ 3232 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, 3233 &kill_prog); 3234 EXPECT_EQ(0, ret); 3235 3236 EXPECT_EQ(parent, syscall(__NR_getppid)); 3237 /* getpid() should never return. */ 3238 EXPECT_EQ(0, syscall(__NR_getpid)); 3239 } 3240 3241 TEST(get_action_avail) 3242 { 3243 __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP, 3244 SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE, 3245 SECCOMP_RET_LOG, SECCOMP_RET_ALLOW }; 3246 __u32 unknown_action = 0x10000000U; 3247 int i; 3248 long ret; 3249 3250 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]); 3251 ASSERT_NE(ENOSYS, errno) { 3252 TH_LOG("Kernel does not support seccomp syscall!"); 3253 } 3254 ASSERT_NE(EINVAL, errno) { 3255 TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!"); 3256 } 3257 EXPECT_EQ(ret, 0); 3258 3259 for (i = 0; i < ARRAY_SIZE(actions); i++) { 3260 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]); 3261 EXPECT_EQ(ret, 0) { 3262 TH_LOG("Expected action (0x%X) not available!", 3263 actions[i]); 3264 } 3265 } 3266 3267 /* Check that an unknown action is handled properly (EOPNOTSUPP) */ 3268 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action); 3269 EXPECT_EQ(ret, -1); 3270 EXPECT_EQ(errno, EOPNOTSUPP); 3271 } 3272 3273 TEST(get_metadata) 3274 { 3275 pid_t pid; 3276 int pipefd[2]; 3277 char buf; 3278 struct seccomp_metadata md; 3279 long ret; 3280 3281 /* Only real root can get metadata. */ 3282 if (geteuid()) { 3283 SKIP(return, "get_metadata requires real root"); 3284 return; 3285 } 3286 3287 ASSERT_EQ(0, pipe(pipefd)); 3288 3289 pid = fork(); 3290 ASSERT_GE(pid, 0); 3291 if (pid == 0) { 3292 struct sock_filter filter[] = { 3293 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3294 }; 3295 struct sock_fprog prog = { 3296 .len = (unsigned short)ARRAY_SIZE(filter), 3297 .filter = filter, 3298 }; 3299 3300 /* one with log, one without */ 3301 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 3302 SECCOMP_FILTER_FLAG_LOG, &prog)); 3303 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)); 3304 3305 EXPECT_EQ(0, close(pipefd[0])); 3306 ASSERT_EQ(1, write(pipefd[1], "1", 1)); 3307 ASSERT_EQ(0, close(pipefd[1])); 3308 3309 while (1) 3310 sleep(100); 3311 } 3312 3313 ASSERT_EQ(0, close(pipefd[1])); 3314 ASSERT_EQ(1, read(pipefd[0], &buf, 1)); 3315 3316 ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid)); 3317 ASSERT_EQ(pid, waitpid(pid, NULL, 0)); 3318 3319 /* Past here must not use ASSERT or child process is never killed. */ 3320 3321 md.filter_off = 0; 3322 errno = 0; 3323 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); 3324 EXPECT_EQ(sizeof(md), ret) { 3325 if (errno == EINVAL) 3326 SKIP(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)"); 3327 } 3328 3329 EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG); 3330 EXPECT_EQ(md.filter_off, 0); 3331 3332 md.filter_off = 1; 3333 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); 3334 EXPECT_EQ(sizeof(md), ret); 3335 EXPECT_EQ(md.flags, 0); 3336 EXPECT_EQ(md.filter_off, 1); 3337 3338 skip: 3339 ASSERT_EQ(0, kill(pid, SIGKILL)); 3340 } 3341 3342 static int user_notif_syscall(int nr, unsigned int flags) 3343 { 3344 struct sock_filter filter[] = { 3345 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 3346 offsetof(struct seccomp_data, nr)), 3347 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1), 3348 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF), 3349 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3350 }; 3351 3352 struct sock_fprog prog = { 3353 .len = (unsigned short)ARRAY_SIZE(filter), 3354 .filter = filter, 3355 }; 3356 3357 return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog); 3358 } 3359 3360 #define USER_NOTIF_MAGIC INT_MAX 3361 TEST(user_notification_basic) 3362 { 3363 pid_t pid; 3364 long ret; 3365 int status, listener; 3366 struct seccomp_notif req = {}; 3367 struct seccomp_notif_resp resp = {}; 3368 struct pollfd pollfd; 3369 3370 struct sock_filter filter[] = { 3371 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3372 }; 3373 struct sock_fprog prog = { 3374 .len = (unsigned short)ARRAY_SIZE(filter), 3375 .filter = filter, 3376 }; 3377 3378 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3379 ASSERT_EQ(0, ret) { 3380 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3381 } 3382 3383 pid = fork(); 3384 ASSERT_GE(pid, 0); 3385 3386 /* Check that we get -ENOSYS with no listener attached */ 3387 if (pid == 0) { 3388 if (user_notif_syscall(__NR_getppid, 0) < 0) 3389 exit(1); 3390 ret = syscall(__NR_getppid); 3391 exit(ret >= 0 || errno != ENOSYS); 3392 } 3393 3394 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3395 EXPECT_EQ(true, WIFEXITED(status)); 3396 EXPECT_EQ(0, WEXITSTATUS(status)); 3397 3398 /* Add some no-op filters for grins. */ 3399 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3400 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3401 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3402 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3403 3404 /* Check that the basic notification machinery works */ 3405 listener = user_notif_syscall(__NR_getppid, 3406 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3407 ASSERT_GE(listener, 0); 3408 3409 /* Installing a second listener in the chain should EBUSY */ 3410 EXPECT_EQ(user_notif_syscall(__NR_getppid, 3411 SECCOMP_FILTER_FLAG_NEW_LISTENER), 3412 -1); 3413 EXPECT_EQ(errno, EBUSY); 3414 3415 pid = fork(); 3416 ASSERT_GE(pid, 0); 3417 3418 if (pid == 0) { 3419 ret = syscall(__NR_getppid); 3420 exit(ret != USER_NOTIF_MAGIC); 3421 } 3422 3423 pollfd.fd = listener; 3424 pollfd.events = POLLIN | POLLOUT; 3425 3426 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3427 EXPECT_EQ(pollfd.revents, POLLIN); 3428 3429 /* Test that we can't pass garbage to the kernel. */ 3430 memset(&req, 0, sizeof(req)); 3431 req.pid = -1; 3432 errno = 0; 3433 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req); 3434 EXPECT_EQ(-1, ret); 3435 EXPECT_EQ(EINVAL, errno); 3436 3437 if (ret) { 3438 req.pid = 0; 3439 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3440 } 3441 3442 pollfd.fd = listener; 3443 pollfd.events = POLLIN | POLLOUT; 3444 3445 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3446 EXPECT_EQ(pollfd.revents, POLLOUT); 3447 3448 EXPECT_EQ(req.data.nr, __NR_getppid); 3449 3450 resp.id = req.id; 3451 resp.error = 0; 3452 resp.val = USER_NOTIF_MAGIC; 3453 3454 /* check that we make sure flags == 0 */ 3455 resp.flags = 1; 3456 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3457 EXPECT_EQ(errno, EINVAL); 3458 3459 resp.flags = 0; 3460 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3461 3462 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3463 EXPECT_EQ(true, WIFEXITED(status)); 3464 EXPECT_EQ(0, WEXITSTATUS(status)); 3465 } 3466 3467 TEST(user_notification_with_tsync) 3468 { 3469 int ret; 3470 unsigned int flags; 3471 3472 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3473 ASSERT_EQ(0, ret) { 3474 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3475 } 3476 3477 /* these were exclusive */ 3478 flags = SECCOMP_FILTER_FLAG_NEW_LISTENER | 3479 SECCOMP_FILTER_FLAG_TSYNC; 3480 ASSERT_EQ(-1, user_notif_syscall(__NR_getppid, flags)); 3481 ASSERT_EQ(EINVAL, errno); 3482 3483 /* but now they're not */ 3484 flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH; 3485 ret = user_notif_syscall(__NR_getppid, flags); 3486 close(ret); 3487 ASSERT_LE(0, ret); 3488 } 3489 3490 TEST(user_notification_kill_in_middle) 3491 { 3492 pid_t pid; 3493 long ret; 3494 int listener; 3495 struct seccomp_notif req = {}; 3496 struct seccomp_notif_resp resp = {}; 3497 3498 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3499 ASSERT_EQ(0, ret) { 3500 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3501 } 3502 3503 listener = user_notif_syscall(__NR_getppid, 3504 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3505 ASSERT_GE(listener, 0); 3506 3507 /* 3508 * Check that nothing bad happens when we kill the task in the middle 3509 * of a syscall. 3510 */ 3511 pid = fork(); 3512 ASSERT_GE(pid, 0); 3513 3514 if (pid == 0) { 3515 ret = syscall(__NR_getppid); 3516 exit(ret != USER_NOTIF_MAGIC); 3517 } 3518 3519 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3520 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0); 3521 3522 EXPECT_EQ(kill(pid, SIGKILL), 0); 3523 EXPECT_EQ(waitpid(pid, NULL, 0), pid); 3524 3525 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1); 3526 3527 resp.id = req.id; 3528 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp); 3529 EXPECT_EQ(ret, -1); 3530 EXPECT_EQ(errno, ENOENT); 3531 } 3532 3533 static int handled = -1; 3534 3535 static void signal_handler(int signal) 3536 { 3537 if (write(handled, "c", 1) != 1) 3538 perror("write from signal"); 3539 } 3540 3541 TEST(user_notification_signal) 3542 { 3543 pid_t pid; 3544 long ret; 3545 int status, listener, sk_pair[2]; 3546 struct seccomp_notif req = {}; 3547 struct seccomp_notif_resp resp = {}; 3548 char c; 3549 3550 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3551 ASSERT_EQ(0, ret) { 3552 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3553 } 3554 3555 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); 3556 3557 listener = user_notif_syscall(__NR_gettid, 3558 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3559 ASSERT_GE(listener, 0); 3560 3561 pid = fork(); 3562 ASSERT_GE(pid, 0); 3563 3564 if (pid == 0) { 3565 close(sk_pair[0]); 3566 handled = sk_pair[1]; 3567 if (signal(SIGUSR1, signal_handler) == SIG_ERR) { 3568 perror("signal"); 3569 exit(1); 3570 } 3571 /* 3572 * ERESTARTSYS behavior is a bit hard to test, because we need 3573 * to rely on a signal that has not yet been handled. Let's at 3574 * least check that the error code gets propagated through, and 3575 * hope that it doesn't break when there is actually a signal :) 3576 */ 3577 ret = syscall(__NR_gettid); 3578 exit(!(ret == -1 && errno == 512)); 3579 } 3580 3581 close(sk_pair[1]); 3582 3583 memset(&req, 0, sizeof(req)); 3584 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3585 3586 EXPECT_EQ(kill(pid, SIGUSR1), 0); 3587 3588 /* 3589 * Make sure the signal really is delivered, which means we're not 3590 * stuck in the user notification code any more and the notification 3591 * should be dead. 3592 */ 3593 EXPECT_EQ(read(sk_pair[0], &c, 1), 1); 3594 3595 resp.id = req.id; 3596 resp.error = -EPERM; 3597 resp.val = 0; 3598 3599 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3600 EXPECT_EQ(errno, ENOENT); 3601 3602 memset(&req, 0, sizeof(req)); 3603 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3604 3605 resp.id = req.id; 3606 resp.error = -512; /* -ERESTARTSYS */ 3607 resp.val = 0; 3608 3609 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3610 3611 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3612 EXPECT_EQ(true, WIFEXITED(status)); 3613 EXPECT_EQ(0, WEXITSTATUS(status)); 3614 } 3615 3616 TEST(user_notification_closed_listener) 3617 { 3618 pid_t pid; 3619 long ret; 3620 int status, listener; 3621 3622 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3623 ASSERT_EQ(0, ret) { 3624 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3625 } 3626 3627 listener = user_notif_syscall(__NR_getppid, 3628 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3629 ASSERT_GE(listener, 0); 3630 3631 /* 3632 * Check that we get an ENOSYS when the listener is closed. 3633 */ 3634 pid = fork(); 3635 ASSERT_GE(pid, 0); 3636 if (pid == 0) { 3637 close(listener); 3638 ret = syscall(__NR_getppid); 3639 exit(ret != -1 && errno != ENOSYS); 3640 } 3641 3642 close(listener); 3643 3644 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3645 EXPECT_EQ(true, WIFEXITED(status)); 3646 EXPECT_EQ(0, WEXITSTATUS(status)); 3647 } 3648 3649 /* 3650 * Check that a pid in a child namespace still shows up as valid in ours. 3651 */ 3652 TEST(user_notification_child_pid_ns) 3653 { 3654 pid_t pid; 3655 int status, listener; 3656 struct seccomp_notif req = {}; 3657 struct seccomp_notif_resp resp = {}; 3658 3659 ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0) { 3660 if (errno == EINVAL) 3661 SKIP(return, "kernel missing CLONE_NEWUSER support"); 3662 }; 3663 3664 listener = user_notif_syscall(__NR_getppid, 3665 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3666 ASSERT_GE(listener, 0); 3667 3668 pid = fork(); 3669 ASSERT_GE(pid, 0); 3670 3671 if (pid == 0) 3672 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3673 3674 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3675 EXPECT_EQ(req.pid, pid); 3676 3677 resp.id = req.id; 3678 resp.error = 0; 3679 resp.val = USER_NOTIF_MAGIC; 3680 3681 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3682 3683 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3684 EXPECT_EQ(true, WIFEXITED(status)); 3685 EXPECT_EQ(0, WEXITSTATUS(status)); 3686 close(listener); 3687 } 3688 3689 /* 3690 * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e. 3691 * invalid. 3692 */ 3693 TEST(user_notification_sibling_pid_ns) 3694 { 3695 pid_t pid, pid2; 3696 int status, listener; 3697 struct seccomp_notif req = {}; 3698 struct seccomp_notif_resp resp = {}; 3699 3700 ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) { 3701 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3702 } 3703 3704 listener = user_notif_syscall(__NR_getppid, 3705 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3706 ASSERT_GE(listener, 0); 3707 3708 pid = fork(); 3709 ASSERT_GE(pid, 0); 3710 3711 if (pid == 0) { 3712 ASSERT_EQ(unshare(CLONE_NEWPID), 0) { 3713 if (errno == EPERM) 3714 SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN"); 3715 else if (errno == EINVAL) 3716 SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)"); 3717 } 3718 3719 pid2 = fork(); 3720 ASSERT_GE(pid2, 0); 3721 3722 if (pid2 == 0) 3723 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3724 3725 EXPECT_EQ(waitpid(pid2, &status, 0), pid2); 3726 EXPECT_EQ(true, WIFEXITED(status)); 3727 EXPECT_EQ(0, WEXITSTATUS(status)); 3728 exit(WEXITSTATUS(status)); 3729 } 3730 3731 /* Create the sibling ns, and sibling in it. */ 3732 ASSERT_EQ(unshare(CLONE_NEWPID), 0) { 3733 if (errno == EPERM) 3734 SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN"); 3735 else if (errno == EINVAL) 3736 SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)"); 3737 } 3738 ASSERT_EQ(errno, 0); 3739 3740 pid2 = fork(); 3741 ASSERT_GE(pid2, 0); 3742 3743 if (pid2 == 0) { 3744 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3745 /* 3746 * The pid should be 0, i.e. the task is in some namespace that 3747 * we can't "see". 3748 */ 3749 EXPECT_EQ(req.pid, 0); 3750 3751 resp.id = req.id; 3752 resp.error = 0; 3753 resp.val = USER_NOTIF_MAGIC; 3754 3755 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3756 exit(0); 3757 } 3758 3759 close(listener); 3760 3761 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3762 EXPECT_EQ(true, WIFEXITED(status)); 3763 EXPECT_EQ(0, WEXITSTATUS(status)); 3764 3765 EXPECT_EQ(waitpid(pid2, &status, 0), pid2); 3766 EXPECT_EQ(true, WIFEXITED(status)); 3767 EXPECT_EQ(0, WEXITSTATUS(status)); 3768 } 3769 3770 TEST(user_notification_fault_recv) 3771 { 3772 pid_t pid; 3773 int status, listener; 3774 struct seccomp_notif req = {}; 3775 struct seccomp_notif_resp resp = {}; 3776 3777 ASSERT_EQ(unshare(CLONE_NEWUSER), 0) { 3778 if (errno == EINVAL) 3779 SKIP(return, "kernel missing CLONE_NEWUSER support"); 3780 } 3781 3782 listener = user_notif_syscall(__NR_getppid, 3783 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3784 ASSERT_GE(listener, 0); 3785 3786 pid = fork(); 3787 ASSERT_GE(pid, 0); 3788 3789 if (pid == 0) 3790 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3791 3792 /* Do a bad recv() */ 3793 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1); 3794 EXPECT_EQ(errno, EFAULT); 3795 3796 /* We should still be able to receive this notification, though. */ 3797 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3798 EXPECT_EQ(req.pid, pid); 3799 3800 resp.id = req.id; 3801 resp.error = 0; 3802 resp.val = USER_NOTIF_MAGIC; 3803 3804 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3805 3806 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3807 EXPECT_EQ(true, WIFEXITED(status)); 3808 EXPECT_EQ(0, WEXITSTATUS(status)); 3809 } 3810 3811 TEST(seccomp_get_notif_sizes) 3812 { 3813 struct seccomp_notif_sizes sizes; 3814 3815 ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0); 3816 EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif)); 3817 EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp)); 3818 } 3819 3820 TEST(user_notification_continue) 3821 { 3822 pid_t pid; 3823 long ret; 3824 int status, listener; 3825 struct seccomp_notif req = {}; 3826 struct seccomp_notif_resp resp = {}; 3827 struct pollfd pollfd; 3828 3829 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3830 ASSERT_EQ(0, ret) { 3831 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3832 } 3833 3834 listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); 3835 ASSERT_GE(listener, 0); 3836 3837 pid = fork(); 3838 ASSERT_GE(pid, 0); 3839 3840 if (pid == 0) { 3841 int dup_fd, pipe_fds[2]; 3842 pid_t self; 3843 3844 ASSERT_GE(pipe(pipe_fds), 0); 3845 3846 dup_fd = dup(pipe_fds[0]); 3847 ASSERT_GE(dup_fd, 0); 3848 EXPECT_NE(pipe_fds[0], dup_fd); 3849 3850 self = getpid(); 3851 ASSERT_EQ(filecmp(self, self, pipe_fds[0], dup_fd), 0); 3852 exit(0); 3853 } 3854 3855 pollfd.fd = listener; 3856 pollfd.events = POLLIN | POLLOUT; 3857 3858 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3859 EXPECT_EQ(pollfd.revents, POLLIN); 3860 3861 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3862 3863 pollfd.fd = listener; 3864 pollfd.events = POLLIN | POLLOUT; 3865 3866 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3867 EXPECT_EQ(pollfd.revents, POLLOUT); 3868 3869 EXPECT_EQ(req.data.nr, __NR_dup); 3870 3871 resp.id = req.id; 3872 resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE; 3873 3874 /* 3875 * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other 3876 * args be set to 0. 3877 */ 3878 resp.error = 0; 3879 resp.val = USER_NOTIF_MAGIC; 3880 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3881 EXPECT_EQ(errno, EINVAL); 3882 3883 resp.error = USER_NOTIF_MAGIC; 3884 resp.val = 0; 3885 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3886 EXPECT_EQ(errno, EINVAL); 3887 3888 resp.error = 0; 3889 resp.val = 0; 3890 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) { 3891 if (errno == EINVAL) 3892 SKIP(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE"); 3893 } 3894 3895 skip: 3896 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3897 EXPECT_EQ(true, WIFEXITED(status)); 3898 EXPECT_EQ(0, WEXITSTATUS(status)) { 3899 if (WEXITSTATUS(status) == 2) { 3900 SKIP(return, "Kernel does not support kcmp() syscall"); 3901 return; 3902 } 3903 } 3904 } 3905 3906 TEST(user_notification_filter_empty) 3907 { 3908 pid_t pid; 3909 long ret; 3910 int status; 3911 struct pollfd pollfd; 3912 struct __clone_args args = { 3913 .flags = CLONE_FILES, 3914 .exit_signal = SIGCHLD, 3915 }; 3916 3917 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3918 ASSERT_EQ(0, ret) { 3919 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3920 } 3921 3922 if (__NR_clone3 < 0) 3923 SKIP(return, "Test not built with clone3 support"); 3924 3925 pid = sys_clone3(&args, sizeof(args)); 3926 ASSERT_GE(pid, 0); 3927 3928 if (pid == 0) { 3929 int listener; 3930 3931 listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER); 3932 if (listener < 0) 3933 _exit(EXIT_FAILURE); 3934 3935 if (dup2(listener, 200) != 200) 3936 _exit(EXIT_FAILURE); 3937 3938 close(listener); 3939 3940 _exit(EXIT_SUCCESS); 3941 } 3942 3943 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3944 EXPECT_EQ(true, WIFEXITED(status)); 3945 EXPECT_EQ(0, WEXITSTATUS(status)); 3946 3947 /* 3948 * The seccomp filter has become unused so we should be notified once 3949 * the kernel gets around to cleaning up task struct. 3950 */ 3951 pollfd.fd = 200; 3952 pollfd.events = POLLHUP; 3953 3954 EXPECT_GT(poll(&pollfd, 1, 2000), 0); 3955 EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); 3956 } 3957 3958 static void *do_thread(void *data) 3959 { 3960 return NULL; 3961 } 3962 3963 TEST(user_notification_filter_empty_threaded) 3964 { 3965 pid_t pid; 3966 long ret; 3967 int status; 3968 struct pollfd pollfd; 3969 struct __clone_args args = { 3970 .flags = CLONE_FILES, 3971 .exit_signal = SIGCHLD, 3972 }; 3973 3974 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3975 ASSERT_EQ(0, ret) { 3976 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3977 } 3978 3979 if (__NR_clone3 < 0) 3980 SKIP(return, "Test not built with clone3 support"); 3981 3982 pid = sys_clone3(&args, sizeof(args)); 3983 ASSERT_GE(pid, 0); 3984 3985 if (pid == 0) { 3986 pid_t pid1, pid2; 3987 int listener, status; 3988 pthread_t thread; 3989 3990 listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); 3991 if (listener < 0) 3992 _exit(EXIT_FAILURE); 3993 3994 if (dup2(listener, 200) != 200) 3995 _exit(EXIT_FAILURE); 3996 3997 close(listener); 3998 3999 pid1 = fork(); 4000 if (pid1 < 0) 4001 _exit(EXIT_FAILURE); 4002 4003 if (pid1 == 0) 4004 _exit(EXIT_SUCCESS); 4005 4006 pid2 = fork(); 4007 if (pid2 < 0) 4008 _exit(EXIT_FAILURE); 4009 4010 if (pid2 == 0) 4011 _exit(EXIT_SUCCESS); 4012 4013 if (pthread_create(&thread, NULL, do_thread, NULL) || 4014 pthread_join(thread, NULL)) 4015 _exit(EXIT_FAILURE); 4016 4017 if (pthread_create(&thread, NULL, do_thread, NULL) || 4018 pthread_join(thread, NULL)) 4019 _exit(EXIT_FAILURE); 4020 4021 if (waitpid(pid1, &status, 0) != pid1 || !WIFEXITED(status) || 4022 WEXITSTATUS(status)) 4023 _exit(EXIT_FAILURE); 4024 4025 if (waitpid(pid2, &status, 0) != pid2 || !WIFEXITED(status) || 4026 WEXITSTATUS(status)) 4027 _exit(EXIT_FAILURE); 4028 4029 exit(EXIT_SUCCESS); 4030 } 4031 4032 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4033 EXPECT_EQ(true, WIFEXITED(status)); 4034 EXPECT_EQ(0, WEXITSTATUS(status)); 4035 4036 /* 4037 * The seccomp filter has become unused so we should be notified once 4038 * the kernel gets around to cleaning up task struct. 4039 */ 4040 pollfd.fd = 200; 4041 pollfd.events = POLLHUP; 4042 4043 EXPECT_GT(poll(&pollfd, 1, 2000), 0); 4044 EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); 4045 } 4046 4047 4048 int get_next_fd(int prev_fd) 4049 { 4050 for (int i = prev_fd + 1; i < FD_SETSIZE; ++i) { 4051 if (fcntl(i, F_GETFD) == -1) 4052 return i; 4053 } 4054 _exit(EXIT_FAILURE); 4055 } 4056 4057 TEST(user_notification_addfd) 4058 { 4059 pid_t pid; 4060 long ret; 4061 int status, listener, memfd, fd, nextfd; 4062 struct seccomp_notif_addfd addfd = {}; 4063 struct seccomp_notif_addfd_small small = {}; 4064 struct seccomp_notif_addfd_big big = {}; 4065 struct seccomp_notif req = {}; 4066 struct seccomp_notif_resp resp = {}; 4067 /* 100 ms */ 4068 struct timespec delay = { .tv_nsec = 100000000 }; 4069 4070 /* There may be arbitrary already-open fds at test start. */ 4071 memfd = memfd_create("test", 0); 4072 ASSERT_GE(memfd, 0); 4073 nextfd = get_next_fd(memfd); 4074 4075 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4076 ASSERT_EQ(0, ret) { 4077 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4078 } 4079 4080 /* fd: 4 */ 4081 /* Check that the basic notification machinery works */ 4082 listener = user_notif_syscall(__NR_getppid, 4083 SECCOMP_FILTER_FLAG_NEW_LISTENER); 4084 ASSERT_EQ(listener, nextfd); 4085 nextfd = get_next_fd(nextfd); 4086 4087 pid = fork(); 4088 ASSERT_GE(pid, 0); 4089 4090 if (pid == 0) { 4091 /* fds will be added and this value is expected */ 4092 if (syscall(__NR_getppid) != USER_NOTIF_MAGIC) 4093 exit(1); 4094 4095 /* Atomic addfd+send is received here. Check it is a valid fd */ 4096 if (fcntl(syscall(__NR_getppid), F_GETFD) == -1) 4097 exit(1); 4098 4099 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 4100 } 4101 4102 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4103 4104 addfd.srcfd = memfd; 4105 addfd.newfd = 0; 4106 addfd.id = req.id; 4107 addfd.flags = 0x0; 4108 4109 /* Verify bad newfd_flags cannot be set */ 4110 addfd.newfd_flags = ~O_CLOEXEC; 4111 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4112 EXPECT_EQ(errno, EINVAL); 4113 addfd.newfd_flags = O_CLOEXEC; 4114 4115 /* Verify bad flags cannot be set */ 4116 addfd.flags = 0xff; 4117 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4118 EXPECT_EQ(errno, EINVAL); 4119 addfd.flags = 0; 4120 4121 /* Verify that remote_fd cannot be set without setting flags */ 4122 addfd.newfd = 1; 4123 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4124 EXPECT_EQ(errno, EINVAL); 4125 addfd.newfd = 0; 4126 4127 /* Verify small size cannot be set */ 4128 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1); 4129 EXPECT_EQ(errno, EINVAL); 4130 4131 /* Verify we can't send bits filled in unknown buffer area */ 4132 memset(&big, 0xAA, sizeof(big)); 4133 big.addfd = addfd; 4134 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1); 4135 EXPECT_EQ(errno, E2BIG); 4136 4137 4138 /* Verify we can set an arbitrary remote fd */ 4139 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); 4140 EXPECT_EQ(fd, nextfd); 4141 nextfd = get_next_fd(nextfd); 4142 EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); 4143 4144 /* Verify we can set an arbitrary remote fd with large size */ 4145 memset(&big, 0x0, sizeof(big)); 4146 big.addfd = addfd; 4147 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big); 4148 EXPECT_EQ(fd, nextfd); 4149 nextfd = get_next_fd(nextfd); 4150 4151 /* Verify we can set a specific remote fd */ 4152 addfd.newfd = 42; 4153 addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; 4154 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); 4155 EXPECT_EQ(fd, 42); 4156 EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); 4157 4158 /* Resume syscall */ 4159 resp.id = req.id; 4160 resp.error = 0; 4161 resp.val = USER_NOTIF_MAGIC; 4162 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4163 4164 /* 4165 * This sets the ID of the ADD FD to the last request plus 1. The 4166 * notification ID increments 1 per notification. 4167 */ 4168 addfd.id = req.id + 1; 4169 4170 /* This spins until the underlying notification is generated */ 4171 while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 && 4172 errno != -EINPROGRESS) 4173 nanosleep(&delay, NULL); 4174 4175 memset(&req, 0, sizeof(req)); 4176 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4177 ASSERT_EQ(addfd.id, req.id); 4178 4179 /* Verify we can do an atomic addfd and send */ 4180 addfd.newfd = 0; 4181 addfd.flags = SECCOMP_ADDFD_FLAG_SEND; 4182 fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); 4183 /* 4184 * Child has earlier "low" fds and now 42, so we expect the next 4185 * lowest available fd to be assigned here. 4186 */ 4187 EXPECT_EQ(fd, nextfd); 4188 nextfd = get_next_fd(nextfd); 4189 ASSERT_EQ(filecmp(getpid(), pid, memfd, fd), 0); 4190 4191 /* 4192 * This sets the ID of the ADD FD to the last request plus 1. The 4193 * notification ID increments 1 per notification. 4194 */ 4195 addfd.id = req.id + 1; 4196 4197 /* This spins until the underlying notification is generated */ 4198 while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 && 4199 errno != -EINPROGRESS) 4200 nanosleep(&delay, NULL); 4201 4202 memset(&req, 0, sizeof(req)); 4203 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4204 ASSERT_EQ(addfd.id, req.id); 4205 4206 resp.id = req.id; 4207 resp.error = 0; 4208 resp.val = USER_NOTIF_MAGIC; 4209 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4210 4211 /* Wait for child to finish. */ 4212 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4213 EXPECT_EQ(true, WIFEXITED(status)); 4214 EXPECT_EQ(0, WEXITSTATUS(status)); 4215 4216 close(memfd); 4217 } 4218 4219 TEST(user_notification_addfd_rlimit) 4220 { 4221 pid_t pid; 4222 long ret; 4223 int status, listener, memfd; 4224 struct seccomp_notif_addfd addfd = {}; 4225 struct seccomp_notif req = {}; 4226 struct seccomp_notif_resp resp = {}; 4227 const struct rlimit lim = { 4228 .rlim_cur = 0, 4229 .rlim_max = 0, 4230 }; 4231 4232 memfd = memfd_create("test", 0); 4233 ASSERT_GE(memfd, 0); 4234 4235 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4236 ASSERT_EQ(0, ret) { 4237 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4238 } 4239 4240 /* Check that the basic notification machinery works */ 4241 listener = user_notif_syscall(__NR_getppid, 4242 SECCOMP_FILTER_FLAG_NEW_LISTENER); 4243 ASSERT_GE(listener, 0); 4244 4245 pid = fork(); 4246 ASSERT_GE(pid, 0); 4247 4248 if (pid == 0) 4249 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 4250 4251 4252 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4253 4254 ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0); 4255 4256 addfd.srcfd = memfd; 4257 addfd.newfd_flags = O_CLOEXEC; 4258 addfd.newfd = 0; 4259 addfd.id = req.id; 4260 addfd.flags = 0; 4261 4262 /* Should probably spot check /proc/sys/fs/file-nr */ 4263 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4264 EXPECT_EQ(errno, EMFILE); 4265 4266 addfd.flags = SECCOMP_ADDFD_FLAG_SEND; 4267 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4268 EXPECT_EQ(errno, EMFILE); 4269 4270 addfd.newfd = 100; 4271 addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; 4272 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); 4273 EXPECT_EQ(errno, EBADF); 4274 4275 resp.id = req.id; 4276 resp.error = 0; 4277 resp.val = USER_NOTIF_MAGIC; 4278 4279 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4280 4281 /* Wait for child to finish. */ 4282 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4283 EXPECT_EQ(true, WIFEXITED(status)); 4284 EXPECT_EQ(0, WEXITSTATUS(status)); 4285 4286 close(memfd); 4287 } 4288 4289 #ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP 4290 #define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0) 4291 #define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64) 4292 #endif 4293 4294 TEST(user_notification_sync) 4295 { 4296 struct seccomp_notif req = {}; 4297 struct seccomp_notif_resp resp = {}; 4298 int status, listener; 4299 pid_t pid; 4300 long ret; 4301 4302 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4303 ASSERT_EQ(0, ret) { 4304 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4305 } 4306 4307 listener = user_notif_syscall(__NR_getppid, 4308 SECCOMP_FILTER_FLAG_NEW_LISTENER); 4309 ASSERT_GE(listener, 0); 4310 4311 /* Try to set invalid flags. */ 4312 EXPECT_SYSCALL_RETURN(-EINVAL, 4313 ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 0xffffffff, 0)); 4314 4315 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 4316 SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0), 0); 4317 4318 pid = fork(); 4319 ASSERT_GE(pid, 0); 4320 if (pid == 0) { 4321 ret = syscall(__NR_getppid); 4322 ASSERT_EQ(ret, USER_NOTIF_MAGIC) { 4323 _exit(1); 4324 } 4325 _exit(0); 4326 } 4327 4328 req.pid = 0; 4329 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4330 4331 ASSERT_EQ(req.data.nr, __NR_getppid); 4332 4333 resp.id = req.id; 4334 resp.error = 0; 4335 resp.val = USER_NOTIF_MAGIC; 4336 resp.flags = 0; 4337 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4338 4339 ASSERT_EQ(waitpid(pid, &status, 0), pid); 4340 ASSERT_EQ(status, 0); 4341 } 4342 4343 4344 /* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */ 4345 FIXTURE(O_SUSPEND_SECCOMP) { 4346 pid_t pid; 4347 }; 4348 4349 FIXTURE_SETUP(O_SUSPEND_SECCOMP) 4350 { 4351 ERRNO_FILTER(block_read, E2BIG); 4352 cap_value_t cap_list[] = { CAP_SYS_ADMIN }; 4353 cap_t caps; 4354 4355 self->pid = 0; 4356 4357 /* make sure we don't have CAP_SYS_ADMIN */ 4358 caps = cap_get_proc(); 4359 ASSERT_NE(NULL, caps); 4360 ASSERT_EQ(0, cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR)); 4361 ASSERT_EQ(0, cap_set_proc(caps)); 4362 cap_free(caps); 4363 4364 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); 4365 ASSERT_EQ(0, prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_block_read)); 4366 4367 self->pid = fork(); 4368 ASSERT_GE(self->pid, 0); 4369 4370 if (self->pid == 0) { 4371 while (1) 4372 pause(); 4373 _exit(127); 4374 } 4375 } 4376 4377 FIXTURE_TEARDOWN(O_SUSPEND_SECCOMP) 4378 { 4379 if (self->pid) 4380 kill(self->pid, SIGKILL); 4381 } 4382 4383 TEST_F(O_SUSPEND_SECCOMP, setoptions) 4384 { 4385 int wstatus; 4386 4387 ASSERT_EQ(0, ptrace(PTRACE_ATTACH, self->pid, NULL, 0)); 4388 ASSERT_EQ(self->pid, wait(&wstatus)); 4389 ASSERT_EQ(-1, ptrace(PTRACE_SETOPTIONS, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP)); 4390 if (errno == EINVAL) 4391 SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)"); 4392 ASSERT_EQ(EPERM, errno); 4393 } 4394 4395 TEST_F(O_SUSPEND_SECCOMP, seize) 4396 { 4397 int ret; 4398 4399 ret = ptrace(PTRACE_SEIZE, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP); 4400 ASSERT_EQ(-1, ret); 4401 if (errno == EINVAL) 4402 SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)"); 4403 ASSERT_EQ(EPERM, errno); 4404 } 4405 4406 /* 4407 * get_nth - Get the nth, space separated entry in a file. 4408 * 4409 * Returns the length of the read field. 4410 * Throws error if field is zero-lengthed. 4411 */ 4412 static ssize_t get_nth(struct __test_metadata *_metadata, const char *path, 4413 const unsigned int position, char **entry) 4414 { 4415 char *line = NULL; 4416 unsigned int i; 4417 ssize_t nread; 4418 size_t len = 0; 4419 FILE *f; 4420 4421 f = fopen(path, "r"); 4422 ASSERT_NE(f, NULL) { 4423 TH_LOG("Could not open %s: %s", path, strerror(errno)); 4424 } 4425 4426 for (i = 0; i < position; i++) { 4427 nread = getdelim(&line, &len, ' ', f); 4428 ASSERT_GE(nread, 0) { 4429 TH_LOG("Failed to read %d entry in file %s", i, path); 4430 } 4431 } 4432 fclose(f); 4433 4434 ASSERT_GT(nread, 0) { 4435 TH_LOG("Entry in file %s had zero length", path); 4436 } 4437 4438 *entry = line; 4439 return nread - 1; 4440 } 4441 4442 /* For a given PID, get the task state (D, R, etc...) */ 4443 static char get_proc_stat(struct __test_metadata *_metadata, pid_t pid) 4444 { 4445 char proc_path[100] = {0}; 4446 char status; 4447 char *line; 4448 4449 snprintf(proc_path, sizeof(proc_path), "/proc/%d/stat", pid); 4450 ASSERT_EQ(get_nth(_metadata, proc_path, 3, &line), 1); 4451 4452 status = *line; 4453 free(line); 4454 4455 return status; 4456 } 4457 4458 TEST(user_notification_fifo) 4459 { 4460 struct seccomp_notif_resp resp = {}; 4461 struct seccomp_notif req = {}; 4462 int i, status, listener; 4463 pid_t pid, pids[3]; 4464 __u64 baseid; 4465 long ret; 4466 /* 100 ms */ 4467 struct timespec delay = { .tv_nsec = 100000000 }; 4468 4469 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4470 ASSERT_EQ(0, ret) { 4471 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4472 } 4473 4474 /* Setup a listener */ 4475 listener = user_notif_syscall(__NR_getppid, 4476 SECCOMP_FILTER_FLAG_NEW_LISTENER); 4477 ASSERT_GE(listener, 0); 4478 4479 pid = fork(); 4480 ASSERT_GE(pid, 0); 4481 4482 if (pid == 0) { 4483 ret = syscall(__NR_getppid); 4484 exit(ret != USER_NOTIF_MAGIC); 4485 } 4486 4487 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4488 baseid = req.id + 1; 4489 4490 resp.id = req.id; 4491 resp.error = 0; 4492 resp.val = USER_NOTIF_MAGIC; 4493 4494 /* check that we make sure flags == 0 */ 4495 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4496 4497 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4498 EXPECT_EQ(true, WIFEXITED(status)); 4499 EXPECT_EQ(0, WEXITSTATUS(status)); 4500 4501 /* Start children, and generate notifications */ 4502 for (i = 0; i < ARRAY_SIZE(pids); i++) { 4503 pid = fork(); 4504 if (pid == 0) { 4505 ret = syscall(__NR_getppid); 4506 exit(ret != USER_NOTIF_MAGIC); 4507 } 4508 pids[i] = pid; 4509 } 4510 4511 /* This spins until all of the children are sleeping */ 4512 restart_wait: 4513 for (i = 0; i < ARRAY_SIZE(pids); i++) { 4514 if (get_proc_stat(_metadata, pids[i]) != 'S') { 4515 nanosleep(&delay, NULL); 4516 goto restart_wait; 4517 } 4518 } 4519 4520 /* Read the notifications in order (and respond) */ 4521 for (i = 0; i < ARRAY_SIZE(pids); i++) { 4522 memset(&req, 0, sizeof(req)); 4523 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4524 EXPECT_EQ(req.id, baseid + i); 4525 resp.id = req.id; 4526 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4527 } 4528 4529 /* Make sure notifications were received */ 4530 for (i = 0; i < ARRAY_SIZE(pids); i++) { 4531 EXPECT_EQ(waitpid(pids[i], &status, 0), pids[i]); 4532 EXPECT_EQ(true, WIFEXITED(status)); 4533 EXPECT_EQ(0, WEXITSTATUS(status)); 4534 } 4535 } 4536 4537 /* get_proc_syscall - Get the syscall in progress for a given pid 4538 * 4539 * Returns the current syscall number for a given process 4540 * Returns -1 if not in syscall (running or blocked) 4541 */ 4542 static long get_proc_syscall(struct __test_metadata *_metadata, int pid) 4543 { 4544 char proc_path[100] = {0}; 4545 long ret = -1; 4546 ssize_t nread; 4547 char *line; 4548 4549 snprintf(proc_path, sizeof(proc_path), "/proc/%d/syscall", pid); 4550 nread = get_nth(_metadata, proc_path, 1, &line); 4551 ASSERT_GT(nread, 0); 4552 4553 if (!strncmp("running", line, MIN(7, nread))) 4554 ret = strtol(line, NULL, 16); 4555 4556 free(line); 4557 return ret; 4558 } 4559 4560 /* Ensure non-fatal signals prior to receive are unmodified */ 4561 TEST(user_notification_wait_killable_pre_notification) 4562 { 4563 struct sigaction new_action = { 4564 .sa_handler = signal_handler, 4565 }; 4566 int listener, status, sk_pair[2]; 4567 pid_t pid; 4568 long ret; 4569 char c; 4570 /* 100 ms */ 4571 struct timespec delay = { .tv_nsec = 100000000 }; 4572 4573 ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0); 4574 4575 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4576 ASSERT_EQ(0, ret) 4577 { 4578 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4579 } 4580 4581 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); 4582 4583 listener = user_notif_syscall( 4584 __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER | 4585 SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); 4586 ASSERT_GE(listener, 0); 4587 4588 /* 4589 * Check that we can kill the process with SIGUSR1 prior to receiving 4590 * the notification. SIGUSR1 is wired up to a custom signal handler, 4591 * and make sure it gets called. 4592 */ 4593 pid = fork(); 4594 ASSERT_GE(pid, 0); 4595 4596 if (pid == 0) { 4597 close(sk_pair[0]); 4598 handled = sk_pair[1]; 4599 4600 /* Setup the non-fatal sigaction without SA_RESTART */ 4601 if (sigaction(SIGUSR1, &new_action, NULL)) { 4602 perror("sigaction"); 4603 exit(1); 4604 } 4605 4606 ret = syscall(__NR_getppid); 4607 /* Make sure we got a return from a signal interruption */ 4608 exit(ret != -1 || errno != EINTR); 4609 } 4610 4611 /* 4612 * Make sure we've gotten to the seccomp user notification wait 4613 * from getppid prior to sending any signals 4614 */ 4615 while (get_proc_syscall(_metadata, pid) != __NR_getppid && 4616 get_proc_stat(_metadata, pid) != 'S') 4617 nanosleep(&delay, NULL); 4618 4619 /* Send non-fatal kill signal */ 4620 EXPECT_EQ(kill(pid, SIGUSR1), 0); 4621 4622 /* wait for process to exit (exit checks for EINTR) */ 4623 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4624 EXPECT_EQ(true, WIFEXITED(status)); 4625 EXPECT_EQ(0, WEXITSTATUS(status)); 4626 4627 EXPECT_EQ(read(sk_pair[0], &c, 1), 1); 4628 } 4629 4630 /* Ensure non-fatal signals after receive are blocked */ 4631 TEST(user_notification_wait_killable) 4632 { 4633 struct sigaction new_action = { 4634 .sa_handler = signal_handler, 4635 }; 4636 struct seccomp_notif_resp resp = {}; 4637 struct seccomp_notif req = {}; 4638 int listener, status, sk_pair[2]; 4639 pid_t pid; 4640 long ret; 4641 char c; 4642 /* 100 ms */ 4643 struct timespec delay = { .tv_nsec = 100000000 }; 4644 4645 ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0); 4646 4647 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4648 ASSERT_EQ(0, ret) 4649 { 4650 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4651 } 4652 4653 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); 4654 4655 listener = user_notif_syscall( 4656 __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER | 4657 SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); 4658 ASSERT_GE(listener, 0); 4659 4660 pid = fork(); 4661 ASSERT_GE(pid, 0); 4662 4663 if (pid == 0) { 4664 close(sk_pair[0]); 4665 handled = sk_pair[1]; 4666 4667 /* Setup the sigaction without SA_RESTART */ 4668 if (sigaction(SIGUSR1, &new_action, NULL)) { 4669 perror("sigaction"); 4670 exit(1); 4671 } 4672 4673 /* Make sure that the syscall is completed (no EINTR) */ 4674 ret = syscall(__NR_getppid); 4675 exit(ret != USER_NOTIF_MAGIC); 4676 } 4677 4678 /* 4679 * Get the notification, to make move the notifying process into a 4680 * non-preemptible (TASK_KILLABLE) state. 4681 */ 4682 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4683 /* Send non-fatal kill signal */ 4684 EXPECT_EQ(kill(pid, SIGUSR1), 0); 4685 4686 /* 4687 * Make sure the task enters moves to TASK_KILLABLE by waiting for 4688 * D (Disk Sleep) state after receiving non-fatal signal. 4689 */ 4690 while (get_proc_stat(_metadata, pid) != 'D') 4691 nanosleep(&delay, NULL); 4692 4693 resp.id = req.id; 4694 resp.val = USER_NOTIF_MAGIC; 4695 /* Make sure the notification is found and able to be replied to */ 4696 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 4697 4698 /* 4699 * Make sure that the signal handler does get called once we're back in 4700 * userspace. 4701 */ 4702 EXPECT_EQ(read(sk_pair[0], &c, 1), 1); 4703 /* wait for process to exit (exit checks for USER_NOTIF_MAGIC) */ 4704 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4705 EXPECT_EQ(true, WIFEXITED(status)); 4706 EXPECT_EQ(0, WEXITSTATUS(status)); 4707 } 4708 4709 /* Ensure fatal signals after receive are not blocked */ 4710 TEST(user_notification_wait_killable_fatal) 4711 { 4712 struct seccomp_notif req = {}; 4713 int listener, status; 4714 pid_t pid; 4715 long ret; 4716 /* 100 ms */ 4717 struct timespec delay = { .tv_nsec = 100000000 }; 4718 4719 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4720 ASSERT_EQ(0, ret) 4721 { 4722 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4723 } 4724 4725 listener = user_notif_syscall( 4726 __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER | 4727 SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); 4728 ASSERT_GE(listener, 0); 4729 4730 pid = fork(); 4731 ASSERT_GE(pid, 0); 4732 4733 if (pid == 0) { 4734 /* This should never complete as it should get a SIGTERM */ 4735 syscall(__NR_getppid); 4736 exit(1); 4737 } 4738 4739 while (get_proc_stat(_metadata, pid) != 'S') 4740 nanosleep(&delay, NULL); 4741 4742 /* 4743 * Get the notification, to make move the notifying process into a 4744 * non-preemptible (TASK_KILLABLE) state. 4745 */ 4746 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 4747 /* Kill the process with a fatal signal */ 4748 EXPECT_EQ(kill(pid, SIGTERM), 0); 4749 4750 /* 4751 * Wait for the process to exit, and make sure the process terminated 4752 * due to the SIGTERM signal. 4753 */ 4754 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4755 EXPECT_EQ(true, WIFSIGNALED(status)); 4756 EXPECT_EQ(SIGTERM, WTERMSIG(status)); 4757 } 4758 4759 /* 4760 * TODO: 4761 * - expand NNP testing 4762 * - better arch-specific TRACE and TRAP handlers. 4763 * - endianness checking when appropriate 4764 * - 64-bit arg prodding 4765 * - arch value testing (x86 modes especially) 4766 * - verify that FILTER_FLAG_LOG filters generate log messages 4767 * - verify that RET_LOG generates log messages 4768 */ 4769 4770 TEST_HARNESS_MAIN 4771