1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 4 * 5 * Test code for seccomp bpf. 6 */ 7 8 #define _GNU_SOURCE 9 #include <sys/types.h> 10 11 /* 12 * glibc 2.26 and later have SIGSYS in siginfo_t. Before that, 13 * we need to use the kernel's siginfo.h file and trick glibc 14 * into accepting it. 15 */ 16 #if !__GLIBC_PREREQ(2, 26) 17 # include <asm/siginfo.h> 18 # define __have_siginfo_t 1 19 # define __have_sigval_t 1 20 # define __have_sigevent_t 1 21 #endif 22 23 #include <errno.h> 24 #include <linux/filter.h> 25 #include <sys/prctl.h> 26 #include <sys/ptrace.h> 27 #include <sys/user.h> 28 #include <linux/prctl.h> 29 #include <linux/ptrace.h> 30 #include <linux/seccomp.h> 31 #include <pthread.h> 32 #include <semaphore.h> 33 #include <signal.h> 34 #include <stddef.h> 35 #include <stdbool.h> 36 #include <string.h> 37 #include <time.h> 38 #include <linux/elf.h> 39 #include <sys/uio.h> 40 #include <sys/utsname.h> 41 #include <sys/fcntl.h> 42 #include <sys/mman.h> 43 #include <sys/times.h> 44 #include <sys/socket.h> 45 #include <sys/ioctl.h> 46 47 #include <unistd.h> 48 #include <sys/syscall.h> 49 #include <poll.h> 50 51 #include "../kselftest_harness.h" 52 53 #ifndef PR_SET_PTRACER 54 # define PR_SET_PTRACER 0x59616d61 55 #endif 56 57 #ifndef PR_SET_NO_NEW_PRIVS 58 #define PR_SET_NO_NEW_PRIVS 38 59 #define PR_GET_NO_NEW_PRIVS 39 60 #endif 61 62 #ifndef PR_SECCOMP_EXT 63 #define PR_SECCOMP_EXT 43 64 #endif 65 66 #ifndef SECCOMP_EXT_ACT 67 #define SECCOMP_EXT_ACT 1 68 #endif 69 70 #ifndef SECCOMP_EXT_ACT_TSYNC 71 #define SECCOMP_EXT_ACT_TSYNC 1 72 #endif 73 74 #ifndef SECCOMP_MODE_STRICT 75 #define SECCOMP_MODE_STRICT 1 76 #endif 77 78 #ifndef SECCOMP_MODE_FILTER 79 #define SECCOMP_MODE_FILTER 2 80 #endif 81 82 #ifndef SECCOMP_RET_ALLOW 83 struct seccomp_data { 84 int nr; 85 __u32 arch; 86 __u64 instruction_pointer; 87 __u64 args[6]; 88 }; 89 #endif 90 91 #ifndef SECCOMP_RET_KILL_PROCESS 92 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ 93 #define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ 94 #endif 95 #ifndef SECCOMP_RET_KILL 96 #define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD 97 #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ 98 #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ 99 #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ 100 #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ 101 #endif 102 #ifndef SECCOMP_RET_LOG 103 #define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ 104 #endif 105 106 #ifndef __NR_seccomp 107 # if defined(__i386__) 108 # define __NR_seccomp 354 109 # elif defined(__x86_64__) 110 # define __NR_seccomp 317 111 # elif defined(__arm__) 112 # define __NR_seccomp 383 113 # elif defined(__aarch64__) 114 # define __NR_seccomp 277 115 # elif defined(__hppa__) 116 # define __NR_seccomp 338 117 # elif defined(__powerpc__) 118 # define __NR_seccomp 358 119 # elif defined(__s390__) 120 # define __NR_seccomp 348 121 # else 122 # warning "seccomp syscall number unknown for this architecture" 123 # define __NR_seccomp 0xffff 124 # endif 125 #endif 126 127 #ifndef SECCOMP_SET_MODE_STRICT 128 #define SECCOMP_SET_MODE_STRICT 0 129 #endif 130 131 #ifndef SECCOMP_SET_MODE_FILTER 132 #define SECCOMP_SET_MODE_FILTER 1 133 #endif 134 135 #ifndef SECCOMP_GET_ACTION_AVAIL 136 #define SECCOMP_GET_ACTION_AVAIL 2 137 #endif 138 139 #ifndef SECCOMP_GET_NOTIF_SIZES 140 #define SECCOMP_GET_NOTIF_SIZES 3 141 #endif 142 143 #ifndef SECCOMP_FILTER_FLAG_TSYNC 144 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) 145 #endif 146 147 #ifndef SECCOMP_FILTER_FLAG_LOG 148 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1) 149 #endif 150 151 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW 152 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) 153 #endif 154 155 #ifndef PTRACE_SECCOMP_GET_METADATA 156 #define PTRACE_SECCOMP_GET_METADATA 0x420d 157 158 struct seccomp_metadata { 159 __u64 filter_off; /* Input: which filter */ 160 __u64 flags; /* Output: filter's flags */ 161 }; 162 #endif 163 164 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER 165 #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) 166 167 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U 168 169 #define SECCOMP_IOC_MAGIC '!' 170 #define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) 171 #define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) 172 #define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) 173 #define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) 174 175 /* Flags for seccomp notification fd ioctl. */ 176 #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) 177 #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ 178 struct seccomp_notif_resp) 179 #define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64) 180 181 struct seccomp_notif { 182 __u64 id; 183 __u32 pid; 184 __u32 flags; 185 struct seccomp_data data; 186 }; 187 188 struct seccomp_notif_resp { 189 __u64 id; 190 __s64 val; 191 __s32 error; 192 __u32 flags; 193 }; 194 195 struct seccomp_notif_sizes { 196 __u16 seccomp_notif; 197 __u16 seccomp_notif_resp; 198 __u16 seccomp_data; 199 }; 200 #endif 201 202 #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY 203 #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 204 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 205 #endif 206 207 #ifndef seccomp 208 int seccomp(unsigned int op, unsigned int flags, void *args) 209 { 210 errno = 0; 211 return syscall(__NR_seccomp, op, flags, args); 212 } 213 #endif 214 215 #if __BYTE_ORDER == __LITTLE_ENDIAN 216 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) 217 #elif __BYTE_ORDER == __BIG_ENDIAN 218 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32)) 219 #else 220 #error "wut? Unknown __BYTE_ORDER?!" 221 #endif 222 223 #define SIBLING_EXIT_UNKILLED 0xbadbeef 224 #define SIBLING_EXIT_FAILURE 0xbadface 225 #define SIBLING_EXIT_NEWPRIVS 0xbadfeed 226 227 TEST(mode_strict_support) 228 { 229 long ret; 230 231 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); 232 ASSERT_EQ(0, ret) { 233 TH_LOG("Kernel does not support CONFIG_SECCOMP"); 234 } 235 syscall(__NR_exit, 0); 236 } 237 238 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL) 239 { 240 long ret; 241 242 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); 243 ASSERT_EQ(0, ret) { 244 TH_LOG("Kernel does not support CONFIG_SECCOMP"); 245 } 246 syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 247 NULL, NULL, NULL); 248 EXPECT_FALSE(true) { 249 TH_LOG("Unreachable!"); 250 } 251 } 252 253 /* Note! This doesn't test no new privs behavior */ 254 TEST(no_new_privs_support) 255 { 256 long ret; 257 258 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 259 EXPECT_EQ(0, ret) { 260 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 261 } 262 } 263 264 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */ 265 TEST(mode_filter_support) 266 { 267 long ret; 268 269 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 270 ASSERT_EQ(0, ret) { 271 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 272 } 273 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL); 274 EXPECT_EQ(-1, ret); 275 EXPECT_EQ(EFAULT, errno) { 276 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!"); 277 } 278 } 279 280 TEST(mode_filter_without_nnp) 281 { 282 struct sock_filter filter[] = { 283 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 284 }; 285 struct sock_fprog prog = { 286 .len = (unsigned short)ARRAY_SIZE(filter), 287 .filter = filter, 288 }; 289 long ret; 290 291 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0); 292 ASSERT_LE(0, ret) { 293 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS"); 294 } 295 errno = 0; 296 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 297 /* Succeeds with CAP_SYS_ADMIN, fails without */ 298 /* TODO(wad) check caps not euid */ 299 if (geteuid()) { 300 EXPECT_EQ(-1, ret); 301 EXPECT_EQ(EACCES, errno); 302 } else { 303 EXPECT_EQ(0, ret); 304 } 305 } 306 307 #define MAX_INSNS_PER_PATH 32768 308 309 TEST(filter_size_limits) 310 { 311 int i; 312 int count = BPF_MAXINSNS + 1; 313 struct sock_filter allow[] = { 314 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 315 }; 316 struct sock_filter *filter; 317 struct sock_fprog prog = { }; 318 long ret; 319 320 filter = calloc(count, sizeof(*filter)); 321 ASSERT_NE(NULL, filter); 322 323 for (i = 0; i < count; i++) 324 filter[i] = allow[0]; 325 326 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 327 ASSERT_EQ(0, ret); 328 329 prog.filter = filter; 330 prog.len = count; 331 332 /* Too many filter instructions in a single filter. */ 333 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 334 ASSERT_NE(0, ret) { 335 TH_LOG("Installing %d insn filter was allowed", prog.len); 336 } 337 338 /* One less is okay, though. */ 339 prog.len -= 1; 340 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 341 ASSERT_EQ(0, ret) { 342 TH_LOG("Installing %d insn filter wasn't allowed", prog.len); 343 } 344 } 345 346 TEST(filter_chain_limits) 347 { 348 int i; 349 int count = BPF_MAXINSNS; 350 struct sock_filter allow[] = { 351 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 352 }; 353 struct sock_filter *filter; 354 struct sock_fprog prog = { }; 355 long ret; 356 357 filter = calloc(count, sizeof(*filter)); 358 ASSERT_NE(NULL, filter); 359 360 for (i = 0; i < count; i++) 361 filter[i] = allow[0]; 362 363 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 364 ASSERT_EQ(0, ret); 365 366 prog.filter = filter; 367 prog.len = 1; 368 369 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 370 ASSERT_EQ(0, ret); 371 372 prog.len = count; 373 374 /* Too many total filter instructions. */ 375 for (i = 0; i < MAX_INSNS_PER_PATH; i++) { 376 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 377 if (ret != 0) 378 break; 379 } 380 ASSERT_NE(0, ret) { 381 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)", 382 i, count, i * (count + 4)); 383 } 384 } 385 386 TEST(mode_filter_cannot_move_to_strict) 387 { 388 struct sock_filter filter[] = { 389 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 390 }; 391 struct sock_fprog prog = { 392 .len = (unsigned short)ARRAY_SIZE(filter), 393 .filter = filter, 394 }; 395 long ret; 396 397 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 398 ASSERT_EQ(0, ret); 399 400 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 401 ASSERT_EQ(0, ret); 402 403 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0); 404 EXPECT_EQ(-1, ret); 405 EXPECT_EQ(EINVAL, errno); 406 } 407 408 409 TEST(mode_filter_get_seccomp) 410 { 411 struct sock_filter filter[] = { 412 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 413 }; 414 struct sock_fprog prog = { 415 .len = (unsigned short)ARRAY_SIZE(filter), 416 .filter = filter, 417 }; 418 long ret; 419 420 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 421 ASSERT_EQ(0, ret); 422 423 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 424 EXPECT_EQ(0, ret); 425 426 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 427 ASSERT_EQ(0, ret); 428 429 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 430 EXPECT_EQ(2, ret); 431 } 432 433 434 TEST(ALLOW_all) 435 { 436 struct sock_filter filter[] = { 437 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 438 }; 439 struct sock_fprog prog = { 440 .len = (unsigned short)ARRAY_SIZE(filter), 441 .filter = filter, 442 }; 443 long ret; 444 445 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 446 ASSERT_EQ(0, ret); 447 448 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 449 ASSERT_EQ(0, ret); 450 } 451 452 TEST(empty_prog) 453 { 454 struct sock_filter filter[] = { 455 }; 456 struct sock_fprog prog = { 457 .len = (unsigned short)ARRAY_SIZE(filter), 458 .filter = filter, 459 }; 460 long ret; 461 462 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 463 ASSERT_EQ(0, ret); 464 465 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 466 EXPECT_EQ(-1, ret); 467 EXPECT_EQ(EINVAL, errno); 468 } 469 470 TEST(log_all) 471 { 472 struct sock_filter filter[] = { 473 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), 474 }; 475 struct sock_fprog prog = { 476 .len = (unsigned short)ARRAY_SIZE(filter), 477 .filter = filter, 478 }; 479 long ret; 480 pid_t parent = getppid(); 481 482 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 483 ASSERT_EQ(0, ret); 484 485 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 486 ASSERT_EQ(0, ret); 487 488 /* getppid() should succeed and be logged (no check for logging) */ 489 EXPECT_EQ(parent, syscall(__NR_getppid)); 490 } 491 492 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS) 493 { 494 struct sock_filter filter[] = { 495 BPF_STMT(BPF_RET|BPF_K, 0x10000000U), 496 }; 497 struct sock_fprog prog = { 498 .len = (unsigned short)ARRAY_SIZE(filter), 499 .filter = filter, 500 }; 501 long ret; 502 503 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 504 ASSERT_EQ(0, ret); 505 506 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 507 ASSERT_EQ(0, ret); 508 EXPECT_EQ(0, syscall(__NR_getpid)) { 509 TH_LOG("getpid() shouldn't ever return"); 510 } 511 } 512 513 /* return code >= 0x80000000 is unused. */ 514 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS) 515 { 516 struct sock_filter filter[] = { 517 BPF_STMT(BPF_RET|BPF_K, 0x90000000U), 518 }; 519 struct sock_fprog prog = { 520 .len = (unsigned short)ARRAY_SIZE(filter), 521 .filter = filter, 522 }; 523 long ret; 524 525 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 526 ASSERT_EQ(0, ret); 527 528 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 529 ASSERT_EQ(0, ret); 530 EXPECT_EQ(0, syscall(__NR_getpid)) { 531 TH_LOG("getpid() shouldn't ever return"); 532 } 533 } 534 535 TEST_SIGNAL(KILL_all, SIGSYS) 536 { 537 struct sock_filter filter[] = { 538 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 539 }; 540 struct sock_fprog prog = { 541 .len = (unsigned short)ARRAY_SIZE(filter), 542 .filter = filter, 543 }; 544 long ret; 545 546 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 547 ASSERT_EQ(0, ret); 548 549 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 550 ASSERT_EQ(0, ret); 551 } 552 553 TEST_SIGNAL(KILL_one, SIGSYS) 554 { 555 struct sock_filter filter[] = { 556 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 557 offsetof(struct seccomp_data, nr)), 558 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 559 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 560 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 561 }; 562 struct sock_fprog prog = { 563 .len = (unsigned short)ARRAY_SIZE(filter), 564 .filter = filter, 565 }; 566 long ret; 567 pid_t parent = getppid(); 568 569 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 570 ASSERT_EQ(0, ret); 571 572 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 573 ASSERT_EQ(0, ret); 574 575 EXPECT_EQ(parent, syscall(__NR_getppid)); 576 /* getpid() should never return. */ 577 EXPECT_EQ(0, syscall(__NR_getpid)); 578 } 579 580 TEST_SIGNAL(KILL_one_arg_one, SIGSYS) 581 { 582 void *fatal_address; 583 struct sock_filter filter[] = { 584 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 585 offsetof(struct seccomp_data, nr)), 586 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0), 587 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 588 /* Only both with lower 32-bit for now. */ 589 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)), 590 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 591 (unsigned long)&fatal_address, 0, 1), 592 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 593 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 594 }; 595 struct sock_fprog prog = { 596 .len = (unsigned short)ARRAY_SIZE(filter), 597 .filter = filter, 598 }; 599 long ret; 600 pid_t parent = getppid(); 601 struct tms timebuf; 602 clock_t clock = times(&timebuf); 603 604 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 605 ASSERT_EQ(0, ret); 606 607 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 608 ASSERT_EQ(0, ret); 609 610 EXPECT_EQ(parent, syscall(__NR_getppid)); 611 EXPECT_LE(clock, syscall(__NR_times, &timebuf)); 612 /* times() should never return. */ 613 EXPECT_EQ(0, syscall(__NR_times, &fatal_address)); 614 } 615 616 TEST_SIGNAL(KILL_one_arg_six, SIGSYS) 617 { 618 #ifndef __NR_mmap2 619 int sysno = __NR_mmap; 620 #else 621 int sysno = __NR_mmap2; 622 #endif 623 struct sock_filter filter[] = { 624 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 625 offsetof(struct seccomp_data, nr)), 626 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0), 627 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 628 /* Only both with lower 32-bit for now. */ 629 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)), 630 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1), 631 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 632 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 633 }; 634 struct sock_fprog prog = { 635 .len = (unsigned short)ARRAY_SIZE(filter), 636 .filter = filter, 637 }; 638 long ret; 639 pid_t parent = getppid(); 640 int fd; 641 void *map1, *map2; 642 int page_size = sysconf(_SC_PAGESIZE); 643 644 ASSERT_LT(0, page_size); 645 646 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 647 ASSERT_EQ(0, ret); 648 649 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 650 ASSERT_EQ(0, ret); 651 652 fd = open("/dev/zero", O_RDONLY); 653 ASSERT_NE(-1, fd); 654 655 EXPECT_EQ(parent, syscall(__NR_getppid)); 656 map1 = (void *)syscall(sysno, 657 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size); 658 EXPECT_NE(MAP_FAILED, map1); 659 /* mmap2() should never return. */ 660 map2 = (void *)syscall(sysno, 661 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE); 662 EXPECT_EQ(MAP_FAILED, map2); 663 664 /* The test failed, so clean up the resources. */ 665 munmap(map1, page_size); 666 munmap(map2, page_size); 667 close(fd); 668 } 669 670 /* This is a thread task to die via seccomp filter violation. */ 671 void *kill_thread(void *data) 672 { 673 bool die = (bool)data; 674 675 if (die) { 676 prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 677 return (void *)SIBLING_EXIT_FAILURE; 678 } 679 680 return (void *)SIBLING_EXIT_UNKILLED; 681 } 682 683 /* Prepare a thread that will kill itself or both of us. */ 684 void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process) 685 { 686 pthread_t thread; 687 void *status; 688 /* Kill only when calling __NR_prctl. */ 689 struct sock_filter filter_thread[] = { 690 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 691 offsetof(struct seccomp_data, nr)), 692 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), 693 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), 694 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 695 }; 696 struct sock_fprog prog_thread = { 697 .len = (unsigned short)ARRAY_SIZE(filter_thread), 698 .filter = filter_thread, 699 }; 700 struct sock_filter filter_process[] = { 701 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 702 offsetof(struct seccomp_data, nr)), 703 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), 704 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS), 705 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 706 }; 707 struct sock_fprog prog_process = { 708 .len = (unsigned short)ARRAY_SIZE(filter_process), 709 .filter = filter_process, 710 }; 711 712 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 713 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 714 } 715 716 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, 717 kill_process ? &prog_process : &prog_thread)); 718 719 /* 720 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS 721 * flag cannot be downgraded by a new filter. 722 */ 723 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); 724 725 /* Start a thread that will exit immediately. */ 726 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false)); 727 ASSERT_EQ(0, pthread_join(thread, &status)); 728 ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status); 729 730 /* Start a thread that will die immediately. */ 731 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true)); 732 ASSERT_EQ(0, pthread_join(thread, &status)); 733 ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status); 734 735 /* 736 * If we get here, only the spawned thread died. Let the parent know 737 * the whole process didn't die (i.e. this thread, the spawner, 738 * stayed running). 739 */ 740 exit(42); 741 } 742 743 TEST(KILL_thread) 744 { 745 int status; 746 pid_t child_pid; 747 748 child_pid = fork(); 749 ASSERT_LE(0, child_pid); 750 if (child_pid == 0) { 751 kill_thread_or_group(_metadata, false); 752 _exit(38); 753 } 754 755 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 756 757 /* If only the thread was killed, we'll see exit 42. */ 758 ASSERT_TRUE(WIFEXITED(status)); 759 ASSERT_EQ(42, WEXITSTATUS(status)); 760 } 761 762 TEST(KILL_process) 763 { 764 int status; 765 pid_t child_pid; 766 767 child_pid = fork(); 768 ASSERT_LE(0, child_pid); 769 if (child_pid == 0) { 770 kill_thread_or_group(_metadata, true); 771 _exit(38); 772 } 773 774 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 775 776 /* If the entire process was killed, we'll see SIGSYS. */ 777 ASSERT_TRUE(WIFSIGNALED(status)); 778 ASSERT_EQ(SIGSYS, WTERMSIG(status)); 779 } 780 781 /* TODO(wad) add 64-bit versus 32-bit arg tests. */ 782 TEST(arg_out_of_range) 783 { 784 struct sock_filter filter[] = { 785 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)), 786 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 787 }; 788 struct sock_fprog prog = { 789 .len = (unsigned short)ARRAY_SIZE(filter), 790 .filter = filter, 791 }; 792 long ret; 793 794 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 795 ASSERT_EQ(0, ret); 796 797 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 798 EXPECT_EQ(-1, ret); 799 EXPECT_EQ(EINVAL, errno); 800 } 801 802 #define ERRNO_FILTER(name, errno) \ 803 struct sock_filter _read_filter_##name[] = { \ 804 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, \ 805 offsetof(struct seccomp_data, nr)), \ 806 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), \ 807 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno), \ 808 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), \ 809 }; \ 810 struct sock_fprog prog_##name = { \ 811 .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \ 812 .filter = _read_filter_##name, \ 813 } 814 815 /* Make sure basic errno values are correctly passed through a filter. */ 816 TEST(ERRNO_valid) 817 { 818 ERRNO_FILTER(valid, E2BIG); 819 long ret; 820 pid_t parent = getppid(); 821 822 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 823 ASSERT_EQ(0, ret); 824 825 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid); 826 ASSERT_EQ(0, ret); 827 828 EXPECT_EQ(parent, syscall(__NR_getppid)); 829 EXPECT_EQ(-1, read(0, NULL, 0)); 830 EXPECT_EQ(E2BIG, errno); 831 } 832 833 /* Make sure an errno of zero is correctly handled by the arch code. */ 834 TEST(ERRNO_zero) 835 { 836 ERRNO_FILTER(zero, 0); 837 long ret; 838 pid_t parent = getppid(); 839 840 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 841 ASSERT_EQ(0, ret); 842 843 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero); 844 ASSERT_EQ(0, ret); 845 846 EXPECT_EQ(parent, syscall(__NR_getppid)); 847 /* "errno" of 0 is ok. */ 848 EXPECT_EQ(0, read(0, NULL, 0)); 849 } 850 851 /* 852 * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller. 853 * This tests that the errno value gets capped correctly, fixed by 854 * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO"). 855 */ 856 TEST(ERRNO_capped) 857 { 858 ERRNO_FILTER(capped, 4096); 859 long ret; 860 pid_t parent = getppid(); 861 862 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 863 ASSERT_EQ(0, ret); 864 865 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped); 866 ASSERT_EQ(0, ret); 867 868 EXPECT_EQ(parent, syscall(__NR_getppid)); 869 EXPECT_EQ(-1, read(0, NULL, 0)); 870 EXPECT_EQ(4095, errno); 871 } 872 873 /* 874 * Filters are processed in reverse order: last applied is executed first. 875 * Since only the SECCOMP_RET_ACTION mask is tested for return values, the 876 * SECCOMP_RET_DATA mask results will follow the most recently applied 877 * matching filter return (and not the lowest or highest value). 878 */ 879 TEST(ERRNO_order) 880 { 881 ERRNO_FILTER(first, 11); 882 ERRNO_FILTER(second, 13); 883 ERRNO_FILTER(third, 12); 884 long ret; 885 pid_t parent = getppid(); 886 887 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 888 ASSERT_EQ(0, ret); 889 890 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first); 891 ASSERT_EQ(0, ret); 892 893 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second); 894 ASSERT_EQ(0, ret); 895 896 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third); 897 ASSERT_EQ(0, ret); 898 899 EXPECT_EQ(parent, syscall(__NR_getppid)); 900 EXPECT_EQ(-1, read(0, NULL, 0)); 901 EXPECT_EQ(12, errno); 902 } 903 904 FIXTURE_DATA(TRAP) { 905 struct sock_fprog prog; 906 }; 907 908 FIXTURE_SETUP(TRAP) 909 { 910 struct sock_filter filter[] = { 911 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 912 offsetof(struct seccomp_data, nr)), 913 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 914 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), 915 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 916 }; 917 918 memset(&self->prog, 0, sizeof(self->prog)); 919 self->prog.filter = malloc(sizeof(filter)); 920 ASSERT_NE(NULL, self->prog.filter); 921 memcpy(self->prog.filter, filter, sizeof(filter)); 922 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 923 } 924 925 FIXTURE_TEARDOWN(TRAP) 926 { 927 if (self->prog.filter) 928 free(self->prog.filter); 929 } 930 931 TEST_F_SIGNAL(TRAP, dfl, SIGSYS) 932 { 933 long ret; 934 935 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 936 ASSERT_EQ(0, ret); 937 938 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 939 ASSERT_EQ(0, ret); 940 syscall(__NR_getpid); 941 } 942 943 /* Ensure that SIGSYS overrides SIG_IGN */ 944 TEST_F_SIGNAL(TRAP, ign, SIGSYS) 945 { 946 long ret; 947 948 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 949 ASSERT_EQ(0, ret); 950 951 signal(SIGSYS, SIG_IGN); 952 953 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 954 ASSERT_EQ(0, ret); 955 syscall(__NR_getpid); 956 } 957 958 static siginfo_t TRAP_info; 959 static volatile int TRAP_nr; 960 static void TRAP_action(int nr, siginfo_t *info, void *void_context) 961 { 962 memcpy(&TRAP_info, info, sizeof(TRAP_info)); 963 TRAP_nr = nr; 964 } 965 966 TEST_F(TRAP, handler) 967 { 968 int ret, test; 969 struct sigaction act; 970 sigset_t mask; 971 972 memset(&act, 0, sizeof(act)); 973 sigemptyset(&mask); 974 sigaddset(&mask, SIGSYS); 975 976 act.sa_sigaction = &TRAP_action; 977 act.sa_flags = SA_SIGINFO; 978 ret = sigaction(SIGSYS, &act, NULL); 979 ASSERT_EQ(0, ret) { 980 TH_LOG("sigaction failed"); 981 } 982 ret = sigprocmask(SIG_UNBLOCK, &mask, NULL); 983 ASSERT_EQ(0, ret) { 984 TH_LOG("sigprocmask failed"); 985 } 986 987 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 988 ASSERT_EQ(0, ret); 989 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 990 ASSERT_EQ(0, ret); 991 TRAP_nr = 0; 992 memset(&TRAP_info, 0, sizeof(TRAP_info)); 993 /* Expect the registers to be rolled back. (nr = error) may vary 994 * based on arch. */ 995 ret = syscall(__NR_getpid); 996 /* Silence gcc warning about volatile. */ 997 test = TRAP_nr; 998 EXPECT_EQ(SIGSYS, test); 999 struct local_sigsys { 1000 void *_call_addr; /* calling user insn */ 1001 int _syscall; /* triggering system call number */ 1002 unsigned int _arch; /* AUDIT_ARCH_* of syscall */ 1003 } *sigsys = (struct local_sigsys *) 1004 #ifdef si_syscall 1005 &(TRAP_info.si_call_addr); 1006 #else 1007 &TRAP_info.si_pid; 1008 #endif 1009 EXPECT_EQ(__NR_getpid, sigsys->_syscall); 1010 /* Make sure arch is non-zero. */ 1011 EXPECT_NE(0, sigsys->_arch); 1012 EXPECT_NE(0, (unsigned long)sigsys->_call_addr); 1013 } 1014 1015 FIXTURE_DATA(precedence) { 1016 struct sock_fprog allow; 1017 struct sock_fprog log; 1018 struct sock_fprog trace; 1019 struct sock_fprog error; 1020 struct sock_fprog trap; 1021 struct sock_fprog kill; 1022 }; 1023 1024 FIXTURE_SETUP(precedence) 1025 { 1026 struct sock_filter allow_insns[] = { 1027 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1028 }; 1029 struct sock_filter log_insns[] = { 1030 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1031 offsetof(struct seccomp_data, nr)), 1032 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1033 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1034 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), 1035 }; 1036 struct sock_filter trace_insns[] = { 1037 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1038 offsetof(struct seccomp_data, nr)), 1039 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1040 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1041 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE), 1042 }; 1043 struct sock_filter error_insns[] = { 1044 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1045 offsetof(struct seccomp_data, nr)), 1046 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1047 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1048 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO), 1049 }; 1050 struct sock_filter trap_insns[] = { 1051 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1052 offsetof(struct seccomp_data, nr)), 1053 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1054 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1055 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), 1056 }; 1057 struct sock_filter kill_insns[] = { 1058 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1059 offsetof(struct seccomp_data, nr)), 1060 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1061 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1062 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 1063 }; 1064 1065 memset(self, 0, sizeof(*self)); 1066 #define FILTER_ALLOC(_x) \ 1067 self->_x.filter = malloc(sizeof(_x##_insns)); \ 1068 ASSERT_NE(NULL, self->_x.filter); \ 1069 memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \ 1070 self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns) 1071 FILTER_ALLOC(allow); 1072 FILTER_ALLOC(log); 1073 FILTER_ALLOC(trace); 1074 FILTER_ALLOC(error); 1075 FILTER_ALLOC(trap); 1076 FILTER_ALLOC(kill); 1077 } 1078 1079 FIXTURE_TEARDOWN(precedence) 1080 { 1081 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter) 1082 FILTER_FREE(allow); 1083 FILTER_FREE(log); 1084 FILTER_FREE(trace); 1085 FILTER_FREE(error); 1086 FILTER_FREE(trap); 1087 FILTER_FREE(kill); 1088 } 1089 1090 TEST_F(precedence, allow_ok) 1091 { 1092 pid_t parent, res = 0; 1093 long ret; 1094 1095 parent = getppid(); 1096 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1097 ASSERT_EQ(0, ret); 1098 1099 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1100 ASSERT_EQ(0, ret); 1101 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1102 ASSERT_EQ(0, ret); 1103 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1104 ASSERT_EQ(0, ret); 1105 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1106 ASSERT_EQ(0, ret); 1107 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1108 ASSERT_EQ(0, ret); 1109 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1110 ASSERT_EQ(0, ret); 1111 /* Should work just fine. */ 1112 res = syscall(__NR_getppid); 1113 EXPECT_EQ(parent, res); 1114 } 1115 1116 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS) 1117 { 1118 pid_t parent, res = 0; 1119 long ret; 1120 1121 parent = getppid(); 1122 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1123 ASSERT_EQ(0, ret); 1124 1125 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1126 ASSERT_EQ(0, ret); 1127 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1128 ASSERT_EQ(0, ret); 1129 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1130 ASSERT_EQ(0, ret); 1131 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1132 ASSERT_EQ(0, ret); 1133 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1134 ASSERT_EQ(0, ret); 1135 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1136 ASSERT_EQ(0, ret); 1137 /* Should work just fine. */ 1138 res = syscall(__NR_getppid); 1139 EXPECT_EQ(parent, res); 1140 /* getpid() should never return. */ 1141 res = syscall(__NR_getpid); 1142 EXPECT_EQ(0, res); 1143 } 1144 1145 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS) 1146 { 1147 pid_t parent; 1148 long ret; 1149 1150 parent = getppid(); 1151 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1152 ASSERT_EQ(0, ret); 1153 1154 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1155 ASSERT_EQ(0, ret); 1156 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1157 ASSERT_EQ(0, ret); 1158 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1159 ASSERT_EQ(0, ret); 1160 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1161 ASSERT_EQ(0, ret); 1162 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1163 ASSERT_EQ(0, ret); 1164 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1165 ASSERT_EQ(0, ret); 1166 /* Should work just fine. */ 1167 EXPECT_EQ(parent, syscall(__NR_getppid)); 1168 /* getpid() should never return. */ 1169 EXPECT_EQ(0, syscall(__NR_getpid)); 1170 } 1171 1172 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS) 1173 { 1174 pid_t parent; 1175 long ret; 1176 1177 parent = getppid(); 1178 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1179 ASSERT_EQ(0, ret); 1180 1181 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1182 ASSERT_EQ(0, ret); 1183 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1184 ASSERT_EQ(0, ret); 1185 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1186 ASSERT_EQ(0, ret); 1187 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1188 ASSERT_EQ(0, ret); 1189 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1190 ASSERT_EQ(0, ret); 1191 /* Should work just fine. */ 1192 EXPECT_EQ(parent, syscall(__NR_getppid)); 1193 /* getpid() should never return. */ 1194 EXPECT_EQ(0, syscall(__NR_getpid)); 1195 } 1196 1197 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS) 1198 { 1199 pid_t parent; 1200 long ret; 1201 1202 parent = getppid(); 1203 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1204 ASSERT_EQ(0, ret); 1205 1206 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1207 ASSERT_EQ(0, ret); 1208 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1209 ASSERT_EQ(0, ret); 1210 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1211 ASSERT_EQ(0, ret); 1212 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1213 ASSERT_EQ(0, ret); 1214 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1215 ASSERT_EQ(0, ret); 1216 /* Should work just fine. */ 1217 EXPECT_EQ(parent, syscall(__NR_getppid)); 1218 /* getpid() should never return. */ 1219 EXPECT_EQ(0, syscall(__NR_getpid)); 1220 } 1221 1222 TEST_F(precedence, errno_is_third) 1223 { 1224 pid_t parent; 1225 long ret; 1226 1227 parent = getppid(); 1228 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1229 ASSERT_EQ(0, ret); 1230 1231 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1232 ASSERT_EQ(0, ret); 1233 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1234 ASSERT_EQ(0, ret); 1235 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1236 ASSERT_EQ(0, ret); 1237 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1238 ASSERT_EQ(0, ret); 1239 /* Should work just fine. */ 1240 EXPECT_EQ(parent, syscall(__NR_getppid)); 1241 EXPECT_EQ(0, syscall(__NR_getpid)); 1242 } 1243 1244 TEST_F(precedence, errno_is_third_in_any_order) 1245 { 1246 pid_t parent; 1247 long ret; 1248 1249 parent = getppid(); 1250 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1251 ASSERT_EQ(0, ret); 1252 1253 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1254 ASSERT_EQ(0, ret); 1255 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1256 ASSERT_EQ(0, ret); 1257 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1258 ASSERT_EQ(0, ret); 1259 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1260 ASSERT_EQ(0, ret); 1261 /* Should work just fine. */ 1262 EXPECT_EQ(parent, syscall(__NR_getppid)); 1263 EXPECT_EQ(0, syscall(__NR_getpid)); 1264 } 1265 1266 TEST_F(precedence, trace_is_fourth) 1267 { 1268 pid_t parent; 1269 long ret; 1270 1271 parent = getppid(); 1272 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1273 ASSERT_EQ(0, ret); 1274 1275 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1276 ASSERT_EQ(0, ret); 1277 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1278 ASSERT_EQ(0, ret); 1279 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1280 ASSERT_EQ(0, ret); 1281 /* Should work just fine. */ 1282 EXPECT_EQ(parent, syscall(__NR_getppid)); 1283 /* No ptracer */ 1284 EXPECT_EQ(-1, syscall(__NR_getpid)); 1285 } 1286 1287 TEST_F(precedence, trace_is_fourth_in_any_order) 1288 { 1289 pid_t parent; 1290 long ret; 1291 1292 parent = getppid(); 1293 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1294 ASSERT_EQ(0, ret); 1295 1296 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1297 ASSERT_EQ(0, ret); 1298 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1299 ASSERT_EQ(0, ret); 1300 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1301 ASSERT_EQ(0, ret); 1302 /* Should work just fine. */ 1303 EXPECT_EQ(parent, syscall(__NR_getppid)); 1304 /* No ptracer */ 1305 EXPECT_EQ(-1, syscall(__NR_getpid)); 1306 } 1307 1308 TEST_F(precedence, log_is_fifth) 1309 { 1310 pid_t mypid, parent; 1311 long ret; 1312 1313 mypid = getpid(); 1314 parent = getppid(); 1315 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1316 ASSERT_EQ(0, ret); 1317 1318 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1319 ASSERT_EQ(0, ret); 1320 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1321 ASSERT_EQ(0, ret); 1322 /* Should work just fine. */ 1323 EXPECT_EQ(parent, syscall(__NR_getppid)); 1324 /* Should also work just fine */ 1325 EXPECT_EQ(mypid, syscall(__NR_getpid)); 1326 } 1327 1328 TEST_F(precedence, log_is_fifth_in_any_order) 1329 { 1330 pid_t mypid, parent; 1331 long ret; 1332 1333 mypid = getpid(); 1334 parent = getppid(); 1335 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1336 ASSERT_EQ(0, ret); 1337 1338 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1339 ASSERT_EQ(0, ret); 1340 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1341 ASSERT_EQ(0, ret); 1342 /* Should work just fine. */ 1343 EXPECT_EQ(parent, syscall(__NR_getppid)); 1344 /* Should also work just fine */ 1345 EXPECT_EQ(mypid, syscall(__NR_getpid)); 1346 } 1347 1348 #ifndef PTRACE_O_TRACESECCOMP 1349 #define PTRACE_O_TRACESECCOMP 0x00000080 1350 #endif 1351 1352 /* Catch the Ubuntu 12.04 value error. */ 1353 #if PTRACE_EVENT_SECCOMP != 7 1354 #undef PTRACE_EVENT_SECCOMP 1355 #endif 1356 1357 #ifndef PTRACE_EVENT_SECCOMP 1358 #define PTRACE_EVENT_SECCOMP 7 1359 #endif 1360 1361 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP) 1362 bool tracer_running; 1363 void tracer_stop(int sig) 1364 { 1365 tracer_running = false; 1366 } 1367 1368 typedef void tracer_func_t(struct __test_metadata *_metadata, 1369 pid_t tracee, int status, void *args); 1370 1371 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, 1372 tracer_func_t tracer_func, void *args, bool ptrace_syscall) 1373 { 1374 int ret = -1; 1375 struct sigaction action = { 1376 .sa_handler = tracer_stop, 1377 }; 1378 1379 /* Allow external shutdown. */ 1380 tracer_running = true; 1381 ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL)); 1382 1383 errno = 0; 1384 while (ret == -1 && errno != EINVAL) 1385 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0); 1386 ASSERT_EQ(0, ret) { 1387 kill(tracee, SIGKILL); 1388 } 1389 /* Wait for attach stop */ 1390 wait(NULL); 1391 1392 ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ? 1393 PTRACE_O_TRACESYSGOOD : 1394 PTRACE_O_TRACESECCOMP); 1395 ASSERT_EQ(0, ret) { 1396 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP"); 1397 kill(tracee, SIGKILL); 1398 } 1399 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, 1400 tracee, NULL, 0); 1401 ASSERT_EQ(0, ret); 1402 1403 /* Unblock the tracee */ 1404 ASSERT_EQ(1, write(fd, "A", 1)); 1405 ASSERT_EQ(0, close(fd)); 1406 1407 /* Run until we're shut down. Must assert to stop execution. */ 1408 while (tracer_running) { 1409 int status; 1410 1411 if (wait(&status) != tracee) 1412 continue; 1413 if (WIFSIGNALED(status) || WIFEXITED(status)) 1414 /* Child is dead. Time to go. */ 1415 return; 1416 1417 /* Check if this is a seccomp event. */ 1418 ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status)); 1419 1420 tracer_func(_metadata, tracee, status, args); 1421 1422 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, 1423 tracee, NULL, 0); 1424 ASSERT_EQ(0, ret); 1425 } 1426 /* Directly report the status of our test harness results. */ 1427 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); 1428 } 1429 1430 /* Common tracer setup/teardown functions. */ 1431 void cont_handler(int num) 1432 { } 1433 pid_t setup_trace_fixture(struct __test_metadata *_metadata, 1434 tracer_func_t func, void *args, bool ptrace_syscall) 1435 { 1436 char sync; 1437 int pipefd[2]; 1438 pid_t tracer_pid; 1439 pid_t tracee = getpid(); 1440 1441 /* Setup a pipe for clean synchronization. */ 1442 ASSERT_EQ(0, pipe(pipefd)); 1443 1444 /* Fork a child which we'll promote to tracer */ 1445 tracer_pid = fork(); 1446 ASSERT_LE(0, tracer_pid); 1447 signal(SIGALRM, cont_handler); 1448 if (tracer_pid == 0) { 1449 close(pipefd[0]); 1450 start_tracer(_metadata, pipefd[1], tracee, func, args, 1451 ptrace_syscall); 1452 syscall(__NR_exit, 0); 1453 } 1454 close(pipefd[1]); 1455 prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0); 1456 read(pipefd[0], &sync, 1); 1457 close(pipefd[0]); 1458 1459 return tracer_pid; 1460 } 1461 void teardown_trace_fixture(struct __test_metadata *_metadata, 1462 pid_t tracer) 1463 { 1464 if (tracer) { 1465 int status; 1466 /* 1467 * Extract the exit code from the other process and 1468 * adopt it for ourselves in case its asserts failed. 1469 */ 1470 ASSERT_EQ(0, kill(tracer, SIGUSR1)); 1471 ASSERT_EQ(tracer, waitpid(tracer, &status, 0)); 1472 if (WEXITSTATUS(status)) 1473 _metadata->passed = 0; 1474 } 1475 } 1476 1477 /* "poke" tracer arguments and function. */ 1478 struct tracer_args_poke_t { 1479 unsigned long poke_addr; 1480 }; 1481 1482 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status, 1483 void *args) 1484 { 1485 int ret; 1486 unsigned long msg; 1487 struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args; 1488 1489 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 1490 EXPECT_EQ(0, ret); 1491 /* If this fails, don't try to recover. */ 1492 ASSERT_EQ(0x1001, msg) { 1493 kill(tracee, SIGKILL); 1494 } 1495 /* 1496 * Poke in the message. 1497 * Registers are not touched to try to keep this relatively arch 1498 * agnostic. 1499 */ 1500 ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001); 1501 EXPECT_EQ(0, ret); 1502 } 1503 1504 FIXTURE_DATA(TRACE_poke) { 1505 struct sock_fprog prog; 1506 pid_t tracer; 1507 long poked; 1508 struct tracer_args_poke_t tracer_args; 1509 }; 1510 1511 FIXTURE_SETUP(TRACE_poke) 1512 { 1513 struct sock_filter filter[] = { 1514 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1515 offsetof(struct seccomp_data, nr)), 1516 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), 1517 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001), 1518 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1519 }; 1520 1521 self->poked = 0; 1522 memset(&self->prog, 0, sizeof(self->prog)); 1523 self->prog.filter = malloc(sizeof(filter)); 1524 ASSERT_NE(NULL, self->prog.filter); 1525 memcpy(self->prog.filter, filter, sizeof(filter)); 1526 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 1527 1528 /* Set up tracer args. */ 1529 self->tracer_args.poke_addr = (unsigned long)&self->poked; 1530 1531 /* Launch tracer. */ 1532 self->tracer = setup_trace_fixture(_metadata, tracer_poke, 1533 &self->tracer_args, false); 1534 } 1535 1536 FIXTURE_TEARDOWN(TRACE_poke) 1537 { 1538 teardown_trace_fixture(_metadata, self->tracer); 1539 if (self->prog.filter) 1540 free(self->prog.filter); 1541 } 1542 1543 TEST_F(TRACE_poke, read_has_side_effects) 1544 { 1545 ssize_t ret; 1546 1547 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1548 ASSERT_EQ(0, ret); 1549 1550 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1551 ASSERT_EQ(0, ret); 1552 1553 EXPECT_EQ(0, self->poked); 1554 ret = read(-1, NULL, 0); 1555 EXPECT_EQ(-1, ret); 1556 EXPECT_EQ(0x1001, self->poked); 1557 } 1558 1559 TEST_F(TRACE_poke, getpid_runs_normally) 1560 { 1561 long ret; 1562 1563 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1564 ASSERT_EQ(0, ret); 1565 1566 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1567 ASSERT_EQ(0, ret); 1568 1569 EXPECT_EQ(0, self->poked); 1570 EXPECT_NE(0, syscall(__NR_getpid)); 1571 EXPECT_EQ(0, self->poked); 1572 } 1573 1574 #if defined(__x86_64__) 1575 # define ARCH_REGS struct user_regs_struct 1576 # define SYSCALL_NUM orig_rax 1577 # define SYSCALL_RET rax 1578 #elif defined(__i386__) 1579 # define ARCH_REGS struct user_regs_struct 1580 # define SYSCALL_NUM orig_eax 1581 # define SYSCALL_RET eax 1582 #elif defined(__arm__) 1583 # define ARCH_REGS struct pt_regs 1584 # define SYSCALL_NUM ARM_r7 1585 # define SYSCALL_RET ARM_r0 1586 #elif defined(__aarch64__) 1587 # define ARCH_REGS struct user_pt_regs 1588 # define SYSCALL_NUM regs[8] 1589 # define SYSCALL_RET regs[0] 1590 #elif defined(__hppa__) 1591 # define ARCH_REGS struct user_regs_struct 1592 # define SYSCALL_NUM gr[20] 1593 # define SYSCALL_RET gr[28] 1594 #elif defined(__powerpc__) 1595 # define ARCH_REGS struct pt_regs 1596 # define SYSCALL_NUM gpr[0] 1597 # define SYSCALL_RET gpr[3] 1598 #elif defined(__s390__) 1599 # define ARCH_REGS s390_regs 1600 # define SYSCALL_NUM gprs[2] 1601 # define SYSCALL_RET gprs[2] 1602 #elif defined(__mips__) 1603 # define ARCH_REGS struct pt_regs 1604 # define SYSCALL_NUM regs[2] 1605 # define SYSCALL_SYSCALL_NUM regs[4] 1606 # define SYSCALL_RET regs[2] 1607 # define SYSCALL_NUM_RET_SHARE_REG 1608 #else 1609 # error "Do not know how to find your architecture's registers and syscalls" 1610 #endif 1611 1612 /* When the syscall return can't be changed, stub out the tests for it. */ 1613 #ifdef SYSCALL_NUM_RET_SHARE_REG 1614 # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) 1615 #else 1616 # define EXPECT_SYSCALL_RETURN(val, action) \ 1617 do { \ 1618 errno = 0; \ 1619 if (val < 0) { \ 1620 EXPECT_EQ(-1, action); \ 1621 EXPECT_EQ(-(val), errno); \ 1622 } else { \ 1623 EXPECT_EQ(val, action); \ 1624 } \ 1625 } while (0) 1626 #endif 1627 1628 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for 1629 * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). 1630 */ 1631 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__) 1632 #define HAVE_GETREGS 1633 #endif 1634 1635 /* Architecture-specific syscall fetching routine. */ 1636 int get_syscall(struct __test_metadata *_metadata, pid_t tracee) 1637 { 1638 ARCH_REGS regs; 1639 #ifdef HAVE_GETREGS 1640 EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, ®s)) { 1641 TH_LOG("PTRACE_GETREGS failed"); 1642 return -1; 1643 } 1644 #else 1645 struct iovec iov; 1646 1647 iov.iov_base = ®s; 1648 iov.iov_len = sizeof(regs); 1649 EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) { 1650 TH_LOG("PTRACE_GETREGSET failed"); 1651 return -1; 1652 } 1653 #endif 1654 1655 #if defined(__mips__) 1656 if (regs.SYSCALL_NUM == __NR_O32_Linux) 1657 return regs.SYSCALL_SYSCALL_NUM; 1658 #endif 1659 return regs.SYSCALL_NUM; 1660 } 1661 1662 /* Architecture-specific syscall changing routine. */ 1663 void change_syscall(struct __test_metadata *_metadata, 1664 pid_t tracee, int syscall, int result) 1665 { 1666 int ret; 1667 ARCH_REGS regs; 1668 #ifdef HAVE_GETREGS 1669 ret = ptrace(PTRACE_GETREGS, tracee, 0, ®s); 1670 #else 1671 struct iovec iov; 1672 iov.iov_base = ®s; 1673 iov.iov_len = sizeof(regs); 1674 ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); 1675 #endif 1676 EXPECT_EQ(0, ret) {} 1677 1678 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ 1679 defined(__s390__) || defined(__hppa__) 1680 { 1681 regs.SYSCALL_NUM = syscall; 1682 } 1683 #elif defined(__mips__) 1684 { 1685 if (regs.SYSCALL_NUM == __NR_O32_Linux) 1686 regs.SYSCALL_SYSCALL_NUM = syscall; 1687 else 1688 regs.SYSCALL_NUM = syscall; 1689 } 1690 1691 #elif defined(__arm__) 1692 # ifndef PTRACE_SET_SYSCALL 1693 # define PTRACE_SET_SYSCALL 23 1694 # endif 1695 { 1696 ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall); 1697 EXPECT_EQ(0, ret); 1698 } 1699 1700 #elif defined(__aarch64__) 1701 # ifndef NT_ARM_SYSTEM_CALL 1702 # define NT_ARM_SYSTEM_CALL 0x404 1703 # endif 1704 { 1705 iov.iov_base = &syscall; 1706 iov.iov_len = sizeof(syscall); 1707 ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL, 1708 &iov); 1709 EXPECT_EQ(0, ret); 1710 } 1711 1712 #else 1713 ASSERT_EQ(1, 0) { 1714 TH_LOG("How is the syscall changed on this architecture?"); 1715 } 1716 #endif 1717 1718 /* If syscall is skipped, change return value. */ 1719 if (syscall == -1) 1720 #ifdef SYSCALL_NUM_RET_SHARE_REG 1721 TH_LOG("Can't modify syscall return on this architecture"); 1722 #else 1723 regs.SYSCALL_RET = result; 1724 #endif 1725 1726 #ifdef HAVE_GETREGS 1727 ret = ptrace(PTRACE_SETREGS, tracee, 0, ®s); 1728 #else 1729 iov.iov_base = ®s; 1730 iov.iov_len = sizeof(regs); 1731 ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov); 1732 #endif 1733 EXPECT_EQ(0, ret); 1734 } 1735 1736 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee, 1737 int status, void *args) 1738 { 1739 int ret; 1740 unsigned long msg; 1741 1742 /* Make sure we got the right message. */ 1743 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 1744 EXPECT_EQ(0, ret); 1745 1746 /* Validate and take action on expected syscalls. */ 1747 switch (msg) { 1748 case 0x1002: 1749 /* change getpid to getppid. */ 1750 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); 1751 change_syscall(_metadata, tracee, __NR_getppid, 0); 1752 break; 1753 case 0x1003: 1754 /* skip gettid with valid return code. */ 1755 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); 1756 change_syscall(_metadata, tracee, -1, 45000); 1757 break; 1758 case 0x1004: 1759 /* skip openat with error. */ 1760 EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee)); 1761 change_syscall(_metadata, tracee, -1, -ESRCH); 1762 break; 1763 case 0x1005: 1764 /* do nothing (allow getppid) */ 1765 EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee)); 1766 break; 1767 default: 1768 EXPECT_EQ(0, msg) { 1769 TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg); 1770 kill(tracee, SIGKILL); 1771 } 1772 } 1773 1774 } 1775 1776 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, 1777 int status, void *args) 1778 { 1779 int ret, nr; 1780 unsigned long msg; 1781 static bool entry; 1782 1783 /* 1784 * The traditional way to tell PTRACE_SYSCALL entry/exit 1785 * is by counting. 1786 */ 1787 entry = !entry; 1788 1789 /* Make sure we got an appropriate message. */ 1790 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 1791 EXPECT_EQ(0, ret); 1792 EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY 1793 : PTRACE_EVENTMSG_SYSCALL_EXIT, msg); 1794 1795 if (!entry) 1796 return; 1797 1798 nr = get_syscall(_metadata, tracee); 1799 1800 if (nr == __NR_getpid) 1801 change_syscall(_metadata, tracee, __NR_getppid, 0); 1802 if (nr == __NR_gettid) 1803 change_syscall(_metadata, tracee, -1, 45000); 1804 if (nr == __NR_openat) 1805 change_syscall(_metadata, tracee, -1, -ESRCH); 1806 } 1807 1808 FIXTURE_DATA(TRACE_syscall) { 1809 struct sock_fprog prog; 1810 pid_t tracer, mytid, mypid, parent; 1811 }; 1812 1813 FIXTURE_SETUP(TRACE_syscall) 1814 { 1815 struct sock_filter filter[] = { 1816 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1817 offsetof(struct seccomp_data, nr)), 1818 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 1819 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002), 1820 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1), 1821 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003), 1822 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1), 1823 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004), 1824 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 1825 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005), 1826 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1827 }; 1828 1829 memset(&self->prog, 0, sizeof(self->prog)); 1830 self->prog.filter = malloc(sizeof(filter)); 1831 ASSERT_NE(NULL, self->prog.filter); 1832 memcpy(self->prog.filter, filter, sizeof(filter)); 1833 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 1834 1835 /* Prepare some testable syscall results. */ 1836 self->mytid = syscall(__NR_gettid); 1837 ASSERT_GT(self->mytid, 0); 1838 ASSERT_NE(self->mytid, 1) { 1839 TH_LOG("Running this test as init is not supported. :)"); 1840 } 1841 1842 self->mypid = getpid(); 1843 ASSERT_GT(self->mypid, 0); 1844 ASSERT_EQ(self->mytid, self->mypid); 1845 1846 self->parent = getppid(); 1847 ASSERT_GT(self->parent, 0); 1848 ASSERT_NE(self->parent, self->mypid); 1849 1850 /* Launch tracer. */ 1851 self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL, 1852 false); 1853 } 1854 1855 FIXTURE_TEARDOWN(TRACE_syscall) 1856 { 1857 teardown_trace_fixture(_metadata, self->tracer); 1858 if (self->prog.filter) 1859 free(self->prog.filter); 1860 } 1861 1862 TEST_F(TRACE_syscall, ptrace_syscall_redirected) 1863 { 1864 /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ 1865 teardown_trace_fixture(_metadata, self->tracer); 1866 self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, 1867 true); 1868 1869 /* Tracer will redirect getpid to getppid. */ 1870 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 1871 } 1872 1873 TEST_F(TRACE_syscall, ptrace_syscall_errno) 1874 { 1875 /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ 1876 teardown_trace_fixture(_metadata, self->tracer); 1877 self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, 1878 true); 1879 1880 /* Tracer should skip the open syscall, resulting in ESRCH. */ 1881 EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); 1882 } 1883 1884 TEST_F(TRACE_syscall, ptrace_syscall_faked) 1885 { 1886 /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ 1887 teardown_trace_fixture(_metadata, self->tracer); 1888 self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, 1889 true); 1890 1891 /* Tracer should skip the gettid syscall, resulting fake pid. */ 1892 EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); 1893 } 1894 1895 TEST_F(TRACE_syscall, syscall_allowed) 1896 { 1897 long ret; 1898 1899 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1900 ASSERT_EQ(0, ret); 1901 1902 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1903 ASSERT_EQ(0, ret); 1904 1905 /* getppid works as expected (no changes). */ 1906 EXPECT_EQ(self->parent, syscall(__NR_getppid)); 1907 EXPECT_NE(self->mypid, syscall(__NR_getppid)); 1908 } 1909 1910 TEST_F(TRACE_syscall, syscall_redirected) 1911 { 1912 long ret; 1913 1914 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1915 ASSERT_EQ(0, ret); 1916 1917 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1918 ASSERT_EQ(0, ret); 1919 1920 /* getpid has been redirected to getppid as expected. */ 1921 EXPECT_EQ(self->parent, syscall(__NR_getpid)); 1922 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 1923 } 1924 1925 TEST_F(TRACE_syscall, syscall_errno) 1926 { 1927 long ret; 1928 1929 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1930 ASSERT_EQ(0, ret); 1931 1932 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1933 ASSERT_EQ(0, ret); 1934 1935 /* openat has been skipped and an errno return. */ 1936 EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); 1937 } 1938 1939 TEST_F(TRACE_syscall, syscall_faked) 1940 { 1941 long ret; 1942 1943 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1944 ASSERT_EQ(0, ret); 1945 1946 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1947 ASSERT_EQ(0, ret); 1948 1949 /* gettid has been skipped and an altered return value stored. */ 1950 EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); 1951 } 1952 1953 TEST_F(TRACE_syscall, skip_after_RET_TRACE) 1954 { 1955 struct sock_filter filter[] = { 1956 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1957 offsetof(struct seccomp_data, nr)), 1958 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 1959 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM), 1960 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1961 }; 1962 struct sock_fprog prog = { 1963 .len = (unsigned short)ARRAY_SIZE(filter), 1964 .filter = filter, 1965 }; 1966 long ret; 1967 1968 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1969 ASSERT_EQ(0, ret); 1970 1971 /* Install fixture filter. */ 1972 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1973 ASSERT_EQ(0, ret); 1974 1975 /* Install "errno on getppid" filter. */ 1976 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 1977 ASSERT_EQ(0, ret); 1978 1979 /* Tracer will redirect getpid to getppid, and we should see EPERM. */ 1980 errno = 0; 1981 EXPECT_EQ(-1, syscall(__NR_getpid)); 1982 EXPECT_EQ(EPERM, errno); 1983 } 1984 1985 TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS) 1986 { 1987 struct sock_filter filter[] = { 1988 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1989 offsetof(struct seccomp_data, nr)), 1990 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 1991 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 1992 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1993 }; 1994 struct sock_fprog prog = { 1995 .len = (unsigned short)ARRAY_SIZE(filter), 1996 .filter = filter, 1997 }; 1998 long ret; 1999 2000 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2001 ASSERT_EQ(0, ret); 2002 2003 /* Install fixture filter. */ 2004 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 2005 ASSERT_EQ(0, ret); 2006 2007 /* Install "death on getppid" filter. */ 2008 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2009 ASSERT_EQ(0, ret); 2010 2011 /* Tracer will redirect getpid to getppid, and we should die. */ 2012 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 2013 } 2014 2015 TEST_F(TRACE_syscall, skip_after_ptrace) 2016 { 2017 struct sock_filter filter[] = { 2018 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2019 offsetof(struct seccomp_data, nr)), 2020 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2021 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM), 2022 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2023 }; 2024 struct sock_fprog prog = { 2025 .len = (unsigned short)ARRAY_SIZE(filter), 2026 .filter = filter, 2027 }; 2028 long ret; 2029 2030 /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ 2031 teardown_trace_fixture(_metadata, self->tracer); 2032 self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, 2033 true); 2034 2035 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2036 ASSERT_EQ(0, ret); 2037 2038 /* Install "errno on getppid" filter. */ 2039 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2040 ASSERT_EQ(0, ret); 2041 2042 /* Tracer will redirect getpid to getppid, and we should see EPERM. */ 2043 EXPECT_EQ(-1, syscall(__NR_getpid)); 2044 EXPECT_EQ(EPERM, errno); 2045 } 2046 2047 TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS) 2048 { 2049 struct sock_filter filter[] = { 2050 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2051 offsetof(struct seccomp_data, nr)), 2052 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2053 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2054 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2055 }; 2056 struct sock_fprog prog = { 2057 .len = (unsigned short)ARRAY_SIZE(filter), 2058 .filter = filter, 2059 }; 2060 long ret; 2061 2062 /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ 2063 teardown_trace_fixture(_metadata, self->tracer); 2064 self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, 2065 true); 2066 2067 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2068 ASSERT_EQ(0, ret); 2069 2070 /* Install "death on getppid" filter. */ 2071 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2072 ASSERT_EQ(0, ret); 2073 2074 /* Tracer will redirect getpid to getppid, and we should die. */ 2075 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 2076 } 2077 2078 TEST(seccomp_syscall) 2079 { 2080 struct sock_filter filter[] = { 2081 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2082 }; 2083 struct sock_fprog prog = { 2084 .len = (unsigned short)ARRAY_SIZE(filter), 2085 .filter = filter, 2086 }; 2087 long ret; 2088 2089 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2090 ASSERT_EQ(0, ret) { 2091 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2092 } 2093 2094 /* Reject insane operation. */ 2095 ret = seccomp(-1, 0, &prog); 2096 ASSERT_NE(ENOSYS, errno) { 2097 TH_LOG("Kernel does not support seccomp syscall!"); 2098 } 2099 EXPECT_EQ(EINVAL, errno) { 2100 TH_LOG("Did not reject crazy op value!"); 2101 } 2102 2103 /* Reject strict with flags or pointer. */ 2104 ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL); 2105 EXPECT_EQ(EINVAL, errno) { 2106 TH_LOG("Did not reject mode strict with flags!"); 2107 } 2108 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog); 2109 EXPECT_EQ(EINVAL, errno) { 2110 TH_LOG("Did not reject mode strict with uargs!"); 2111 } 2112 2113 /* Reject insane args for filter. */ 2114 ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog); 2115 EXPECT_EQ(EINVAL, errno) { 2116 TH_LOG("Did not reject crazy filter flags!"); 2117 } 2118 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL); 2119 EXPECT_EQ(EFAULT, errno) { 2120 TH_LOG("Did not reject NULL filter!"); 2121 } 2122 2123 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2124 EXPECT_EQ(0, errno) { 2125 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s", 2126 strerror(errno)); 2127 } 2128 } 2129 2130 TEST(seccomp_syscall_mode_lock) 2131 { 2132 struct sock_filter filter[] = { 2133 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2134 }; 2135 struct sock_fprog prog = { 2136 .len = (unsigned short)ARRAY_SIZE(filter), 2137 .filter = filter, 2138 }; 2139 long ret; 2140 2141 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 2142 ASSERT_EQ(0, ret) { 2143 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2144 } 2145 2146 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2147 ASSERT_NE(ENOSYS, errno) { 2148 TH_LOG("Kernel does not support seccomp syscall!"); 2149 } 2150 EXPECT_EQ(0, ret) { 2151 TH_LOG("Could not install filter!"); 2152 } 2153 2154 /* Make sure neither entry point will switch to strict. */ 2155 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0); 2156 EXPECT_EQ(EINVAL, errno) { 2157 TH_LOG("Switched to mode strict!"); 2158 } 2159 2160 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL); 2161 EXPECT_EQ(EINVAL, errno) { 2162 TH_LOG("Switched to mode strict!"); 2163 } 2164 } 2165 2166 /* 2167 * Test detection of known and unknown filter flags. Userspace needs to be able 2168 * to check if a filter flag is supported by the current kernel and a good way 2169 * of doing that is by attempting to enter filter mode, with the flag bit in 2170 * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates 2171 * that the flag is valid and EINVAL indicates that the flag is invalid. 2172 */ 2173 TEST(detect_seccomp_filter_flags) 2174 { 2175 unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, 2176 SECCOMP_FILTER_FLAG_LOG, 2177 SECCOMP_FILTER_FLAG_SPEC_ALLOW, 2178 SECCOMP_FILTER_FLAG_NEW_LISTENER }; 2179 unsigned int exclusive[] = { 2180 SECCOMP_FILTER_FLAG_TSYNC, 2181 SECCOMP_FILTER_FLAG_NEW_LISTENER }; 2182 unsigned int flag, all_flags, exclusive_mask; 2183 int i; 2184 long ret; 2185 2186 /* Test detection of individual known-good filter flags */ 2187 for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { 2188 int bits = 0; 2189 2190 flag = flags[i]; 2191 /* Make sure the flag is a single bit! */ 2192 while (flag) { 2193 if (flag & 0x1) 2194 bits ++; 2195 flag >>= 1; 2196 } 2197 ASSERT_EQ(1, bits); 2198 flag = flags[i]; 2199 2200 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2201 ASSERT_NE(ENOSYS, errno) { 2202 TH_LOG("Kernel does not support seccomp syscall!"); 2203 } 2204 EXPECT_EQ(-1, ret); 2205 EXPECT_EQ(EFAULT, errno) { 2206 TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!", 2207 flag); 2208 } 2209 2210 all_flags |= flag; 2211 } 2212 2213 /* 2214 * Test detection of all known-good filter flags combined. But 2215 * for the exclusive flags we need to mask them out and try them 2216 * individually for the "all flags" testing. 2217 */ 2218 exclusive_mask = 0; 2219 for (i = 0; i < ARRAY_SIZE(exclusive); i++) 2220 exclusive_mask |= exclusive[i]; 2221 for (i = 0; i < ARRAY_SIZE(exclusive); i++) { 2222 flag = all_flags & ~exclusive_mask; 2223 flag |= exclusive[i]; 2224 2225 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2226 EXPECT_EQ(-1, ret); 2227 EXPECT_EQ(EFAULT, errno) { 2228 TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", 2229 flag); 2230 } 2231 } 2232 2233 /* Test detection of an unknown filter flags, without exclusives. */ 2234 flag = -1; 2235 flag &= ~exclusive_mask; 2236 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2237 EXPECT_EQ(-1, ret); 2238 EXPECT_EQ(EINVAL, errno) { 2239 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!", 2240 flag); 2241 } 2242 2243 /* 2244 * Test detection of an unknown filter flag that may simply need to be 2245 * added to this test 2246 */ 2247 flag = flags[ARRAY_SIZE(flags) - 1] << 1; 2248 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2249 EXPECT_EQ(-1, ret); 2250 EXPECT_EQ(EINVAL, errno) { 2251 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?", 2252 flag); 2253 } 2254 } 2255 2256 TEST(TSYNC_first) 2257 { 2258 struct sock_filter filter[] = { 2259 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2260 }; 2261 struct sock_fprog prog = { 2262 .len = (unsigned short)ARRAY_SIZE(filter), 2263 .filter = filter, 2264 }; 2265 long ret; 2266 2267 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 2268 ASSERT_EQ(0, ret) { 2269 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2270 } 2271 2272 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2273 &prog); 2274 ASSERT_NE(ENOSYS, errno) { 2275 TH_LOG("Kernel does not support seccomp syscall!"); 2276 } 2277 EXPECT_EQ(0, ret) { 2278 TH_LOG("Could not install initial filter with TSYNC!"); 2279 } 2280 } 2281 2282 #define TSYNC_SIBLINGS 2 2283 struct tsync_sibling { 2284 pthread_t tid; 2285 pid_t system_tid; 2286 sem_t *started; 2287 pthread_cond_t *cond; 2288 pthread_mutex_t *mutex; 2289 int diverge; 2290 int num_waits; 2291 struct sock_fprog *prog; 2292 struct __test_metadata *metadata; 2293 }; 2294 2295 /* 2296 * To avoid joining joined threads (which is not allowed by Bionic), 2297 * make sure we both successfully join and clear the tid to skip a 2298 * later join attempt during fixture teardown. Any remaining threads 2299 * will be directly killed during teardown. 2300 */ 2301 #define PTHREAD_JOIN(tid, status) \ 2302 do { \ 2303 int _rc = pthread_join(tid, status); \ 2304 if (_rc) { \ 2305 TH_LOG("pthread_join of tid %u failed: %d\n", \ 2306 (unsigned int)tid, _rc); \ 2307 } else { \ 2308 tid = 0; \ 2309 } \ 2310 } while (0) 2311 2312 FIXTURE_DATA(TSYNC) { 2313 struct sock_fprog root_prog, apply_prog; 2314 struct tsync_sibling sibling[TSYNC_SIBLINGS]; 2315 sem_t started; 2316 pthread_cond_t cond; 2317 pthread_mutex_t mutex; 2318 int sibling_count; 2319 }; 2320 2321 FIXTURE_SETUP(TSYNC) 2322 { 2323 struct sock_filter root_filter[] = { 2324 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2325 }; 2326 struct sock_filter apply_filter[] = { 2327 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2328 offsetof(struct seccomp_data, nr)), 2329 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), 2330 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2331 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2332 }; 2333 2334 memset(&self->root_prog, 0, sizeof(self->root_prog)); 2335 memset(&self->apply_prog, 0, sizeof(self->apply_prog)); 2336 memset(&self->sibling, 0, sizeof(self->sibling)); 2337 self->root_prog.filter = malloc(sizeof(root_filter)); 2338 ASSERT_NE(NULL, self->root_prog.filter); 2339 memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter)); 2340 self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter); 2341 2342 self->apply_prog.filter = malloc(sizeof(apply_filter)); 2343 ASSERT_NE(NULL, self->apply_prog.filter); 2344 memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter)); 2345 self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter); 2346 2347 self->sibling_count = 0; 2348 pthread_mutex_init(&self->mutex, NULL); 2349 pthread_cond_init(&self->cond, NULL); 2350 sem_init(&self->started, 0, 0); 2351 self->sibling[0].tid = 0; 2352 self->sibling[0].cond = &self->cond; 2353 self->sibling[0].started = &self->started; 2354 self->sibling[0].mutex = &self->mutex; 2355 self->sibling[0].diverge = 0; 2356 self->sibling[0].num_waits = 1; 2357 self->sibling[0].prog = &self->root_prog; 2358 self->sibling[0].metadata = _metadata; 2359 self->sibling[1].tid = 0; 2360 self->sibling[1].cond = &self->cond; 2361 self->sibling[1].started = &self->started; 2362 self->sibling[1].mutex = &self->mutex; 2363 self->sibling[1].diverge = 0; 2364 self->sibling[1].prog = &self->root_prog; 2365 self->sibling[1].num_waits = 1; 2366 self->sibling[1].metadata = _metadata; 2367 } 2368 2369 FIXTURE_TEARDOWN(TSYNC) 2370 { 2371 int sib = 0; 2372 2373 if (self->root_prog.filter) 2374 free(self->root_prog.filter); 2375 if (self->apply_prog.filter) 2376 free(self->apply_prog.filter); 2377 2378 for ( ; sib < self->sibling_count; ++sib) { 2379 struct tsync_sibling *s = &self->sibling[sib]; 2380 2381 if (!s->tid) 2382 continue; 2383 /* 2384 * If a thread is still running, it may be stuck, so hit 2385 * it over the head really hard. 2386 */ 2387 pthread_kill(s->tid, 9); 2388 } 2389 pthread_mutex_destroy(&self->mutex); 2390 pthread_cond_destroy(&self->cond); 2391 sem_destroy(&self->started); 2392 } 2393 2394 void *tsync_sibling(void *data) 2395 { 2396 long ret = 0; 2397 struct tsync_sibling *me = data; 2398 2399 me->system_tid = syscall(__NR_gettid); 2400 2401 pthread_mutex_lock(me->mutex); 2402 if (me->diverge) { 2403 /* Just re-apply the root prog to fork the tree */ 2404 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 2405 me->prog, 0, 0); 2406 } 2407 sem_post(me->started); 2408 /* Return outside of started so parent notices failures. */ 2409 if (ret) { 2410 pthread_mutex_unlock(me->mutex); 2411 return (void *)SIBLING_EXIT_FAILURE; 2412 } 2413 do { 2414 pthread_cond_wait(me->cond, me->mutex); 2415 me->num_waits = me->num_waits - 1; 2416 } while (me->num_waits); 2417 pthread_mutex_unlock(me->mutex); 2418 2419 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 2420 if (!ret) 2421 return (void *)SIBLING_EXIT_NEWPRIVS; 2422 read(0, NULL, 0); 2423 return (void *)SIBLING_EXIT_UNKILLED; 2424 } 2425 2426 void tsync_start_sibling(struct tsync_sibling *sibling) 2427 { 2428 pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling); 2429 } 2430 2431 TEST_F(TSYNC, siblings_fail_prctl) 2432 { 2433 long ret; 2434 void *status; 2435 struct sock_filter filter[] = { 2436 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2437 offsetof(struct seccomp_data, nr)), 2438 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), 2439 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL), 2440 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2441 }; 2442 struct sock_fprog prog = { 2443 .len = (unsigned short)ARRAY_SIZE(filter), 2444 .filter = filter, 2445 }; 2446 2447 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2448 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2449 } 2450 2451 /* Check prctl failure detection by requesting sib 0 diverge. */ 2452 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2453 ASSERT_NE(ENOSYS, errno) { 2454 TH_LOG("Kernel does not support seccomp syscall!"); 2455 } 2456 ASSERT_EQ(0, ret) { 2457 TH_LOG("setting filter failed"); 2458 } 2459 2460 self->sibling[0].diverge = 1; 2461 tsync_start_sibling(&self->sibling[0]); 2462 tsync_start_sibling(&self->sibling[1]); 2463 2464 while (self->sibling_count < TSYNC_SIBLINGS) { 2465 sem_wait(&self->started); 2466 self->sibling_count++; 2467 } 2468 2469 /* Signal the threads to clean up*/ 2470 pthread_mutex_lock(&self->mutex); 2471 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2472 TH_LOG("cond broadcast non-zero"); 2473 } 2474 pthread_mutex_unlock(&self->mutex); 2475 2476 /* Ensure diverging sibling failed to call prctl. */ 2477 PTHREAD_JOIN(self->sibling[0].tid, &status); 2478 EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status); 2479 PTHREAD_JOIN(self->sibling[1].tid, &status); 2480 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2481 } 2482 2483 TEST_F(TSYNC, two_siblings_with_ancestor) 2484 { 2485 long ret; 2486 void *status; 2487 2488 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2489 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2490 } 2491 2492 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2493 ASSERT_NE(ENOSYS, errno) { 2494 TH_LOG("Kernel does not support seccomp syscall!"); 2495 } 2496 ASSERT_EQ(0, ret) { 2497 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2498 } 2499 tsync_start_sibling(&self->sibling[0]); 2500 tsync_start_sibling(&self->sibling[1]); 2501 2502 while (self->sibling_count < TSYNC_SIBLINGS) { 2503 sem_wait(&self->started); 2504 self->sibling_count++; 2505 } 2506 2507 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2508 &self->apply_prog); 2509 ASSERT_EQ(0, ret) { 2510 TH_LOG("Could install filter on all threads!"); 2511 } 2512 /* Tell the siblings to test the policy */ 2513 pthread_mutex_lock(&self->mutex); 2514 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2515 TH_LOG("cond broadcast non-zero"); 2516 } 2517 pthread_mutex_unlock(&self->mutex); 2518 /* Ensure they are both killed and don't exit cleanly. */ 2519 PTHREAD_JOIN(self->sibling[0].tid, &status); 2520 EXPECT_EQ(0x0, (long)status); 2521 PTHREAD_JOIN(self->sibling[1].tid, &status); 2522 EXPECT_EQ(0x0, (long)status); 2523 } 2524 2525 TEST_F(TSYNC, two_sibling_want_nnp) 2526 { 2527 void *status; 2528 2529 /* start siblings before any prctl() operations */ 2530 tsync_start_sibling(&self->sibling[0]); 2531 tsync_start_sibling(&self->sibling[1]); 2532 while (self->sibling_count < TSYNC_SIBLINGS) { 2533 sem_wait(&self->started); 2534 self->sibling_count++; 2535 } 2536 2537 /* Tell the siblings to test no policy */ 2538 pthread_mutex_lock(&self->mutex); 2539 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2540 TH_LOG("cond broadcast non-zero"); 2541 } 2542 pthread_mutex_unlock(&self->mutex); 2543 2544 /* Ensure they are both upset about lacking nnp. */ 2545 PTHREAD_JOIN(self->sibling[0].tid, &status); 2546 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); 2547 PTHREAD_JOIN(self->sibling[1].tid, &status); 2548 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); 2549 } 2550 2551 TEST_F(TSYNC, two_siblings_with_no_filter) 2552 { 2553 long ret; 2554 void *status; 2555 2556 /* start siblings before any prctl() operations */ 2557 tsync_start_sibling(&self->sibling[0]); 2558 tsync_start_sibling(&self->sibling[1]); 2559 while (self->sibling_count < TSYNC_SIBLINGS) { 2560 sem_wait(&self->started); 2561 self->sibling_count++; 2562 } 2563 2564 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2565 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2566 } 2567 2568 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2569 &self->apply_prog); 2570 ASSERT_NE(ENOSYS, errno) { 2571 TH_LOG("Kernel does not support seccomp syscall!"); 2572 } 2573 ASSERT_EQ(0, ret) { 2574 TH_LOG("Could install filter on all threads!"); 2575 } 2576 2577 /* Tell the siblings to test the policy */ 2578 pthread_mutex_lock(&self->mutex); 2579 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2580 TH_LOG("cond broadcast non-zero"); 2581 } 2582 pthread_mutex_unlock(&self->mutex); 2583 2584 /* Ensure they are both killed and don't exit cleanly. */ 2585 PTHREAD_JOIN(self->sibling[0].tid, &status); 2586 EXPECT_EQ(0x0, (long)status); 2587 PTHREAD_JOIN(self->sibling[1].tid, &status); 2588 EXPECT_EQ(0x0, (long)status); 2589 } 2590 2591 TEST_F(TSYNC, two_siblings_with_one_divergence) 2592 { 2593 long ret; 2594 void *status; 2595 2596 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2597 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2598 } 2599 2600 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2601 ASSERT_NE(ENOSYS, errno) { 2602 TH_LOG("Kernel does not support seccomp syscall!"); 2603 } 2604 ASSERT_EQ(0, ret) { 2605 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2606 } 2607 self->sibling[0].diverge = 1; 2608 tsync_start_sibling(&self->sibling[0]); 2609 tsync_start_sibling(&self->sibling[1]); 2610 2611 while (self->sibling_count < TSYNC_SIBLINGS) { 2612 sem_wait(&self->started); 2613 self->sibling_count++; 2614 } 2615 2616 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2617 &self->apply_prog); 2618 ASSERT_EQ(self->sibling[0].system_tid, ret) { 2619 TH_LOG("Did not fail on diverged sibling."); 2620 } 2621 2622 /* Wake the threads */ 2623 pthread_mutex_lock(&self->mutex); 2624 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2625 TH_LOG("cond broadcast non-zero"); 2626 } 2627 pthread_mutex_unlock(&self->mutex); 2628 2629 /* Ensure they are both unkilled. */ 2630 PTHREAD_JOIN(self->sibling[0].tid, &status); 2631 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2632 PTHREAD_JOIN(self->sibling[1].tid, &status); 2633 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2634 } 2635 2636 TEST_F(TSYNC, two_siblings_not_under_filter) 2637 { 2638 long ret, sib; 2639 void *status; 2640 struct timespec delay = { .tv_nsec = 100000000 }; 2641 2642 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2643 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2644 } 2645 2646 /* 2647 * Sibling 0 will have its own seccomp policy 2648 * and Sibling 1 will not be under seccomp at 2649 * all. Sibling 1 will enter seccomp and 0 2650 * will cause failure. 2651 */ 2652 self->sibling[0].diverge = 1; 2653 tsync_start_sibling(&self->sibling[0]); 2654 tsync_start_sibling(&self->sibling[1]); 2655 2656 while (self->sibling_count < TSYNC_SIBLINGS) { 2657 sem_wait(&self->started); 2658 self->sibling_count++; 2659 } 2660 2661 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2662 ASSERT_NE(ENOSYS, errno) { 2663 TH_LOG("Kernel does not support seccomp syscall!"); 2664 } 2665 ASSERT_EQ(0, ret) { 2666 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2667 } 2668 2669 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2670 &self->apply_prog); 2671 ASSERT_EQ(ret, self->sibling[0].system_tid) { 2672 TH_LOG("Did not fail on diverged sibling."); 2673 } 2674 sib = 1; 2675 if (ret == self->sibling[0].system_tid) 2676 sib = 0; 2677 2678 pthread_mutex_lock(&self->mutex); 2679 2680 /* Increment the other siblings num_waits so we can clean up 2681 * the one we just saw. 2682 */ 2683 self->sibling[!sib].num_waits += 1; 2684 2685 /* Signal the thread to clean up*/ 2686 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2687 TH_LOG("cond broadcast non-zero"); 2688 } 2689 pthread_mutex_unlock(&self->mutex); 2690 PTHREAD_JOIN(self->sibling[sib].tid, &status); 2691 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2692 /* Poll for actual task death. pthread_join doesn't guarantee it. */ 2693 while (!kill(self->sibling[sib].system_tid, 0)) 2694 nanosleep(&delay, NULL); 2695 /* Switch to the remaining sibling */ 2696 sib = !sib; 2697 2698 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2699 &self->apply_prog); 2700 ASSERT_EQ(0, ret) { 2701 TH_LOG("Expected the remaining sibling to sync"); 2702 }; 2703 2704 pthread_mutex_lock(&self->mutex); 2705 2706 /* If remaining sibling didn't have a chance to wake up during 2707 * the first broadcast, manually reduce the num_waits now. 2708 */ 2709 if (self->sibling[sib].num_waits > 1) 2710 self->sibling[sib].num_waits = 1; 2711 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2712 TH_LOG("cond broadcast non-zero"); 2713 } 2714 pthread_mutex_unlock(&self->mutex); 2715 PTHREAD_JOIN(self->sibling[sib].tid, &status); 2716 EXPECT_EQ(0, (long)status); 2717 /* Poll for actual task death. pthread_join doesn't guarantee it. */ 2718 while (!kill(self->sibling[sib].system_tid, 0)) 2719 nanosleep(&delay, NULL); 2720 2721 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2722 &self->apply_prog); 2723 ASSERT_EQ(0, ret); /* just us chickens */ 2724 } 2725 2726 /* Make sure restarted syscalls are seen directly as "restart_syscall". */ 2727 TEST(syscall_restart) 2728 { 2729 long ret; 2730 unsigned long msg; 2731 pid_t child_pid; 2732 int pipefd[2]; 2733 int status; 2734 siginfo_t info = { }; 2735 struct sock_filter filter[] = { 2736 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2737 offsetof(struct seccomp_data, nr)), 2738 2739 #ifdef __NR_sigreturn 2740 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0), 2741 #endif 2742 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0), 2743 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0), 2744 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0), 2745 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0), 2746 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0), 2747 2748 /* Allow __NR_write for easy logging. */ 2749 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1), 2750 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2751 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2752 /* The nanosleep jump target. */ 2753 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100), 2754 /* The restart_syscall jump target. */ 2755 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200), 2756 }; 2757 struct sock_fprog prog = { 2758 .len = (unsigned short)ARRAY_SIZE(filter), 2759 .filter = filter, 2760 }; 2761 #if defined(__arm__) 2762 struct utsname utsbuf; 2763 #endif 2764 2765 ASSERT_EQ(0, pipe(pipefd)); 2766 2767 child_pid = fork(); 2768 ASSERT_LE(0, child_pid); 2769 if (child_pid == 0) { 2770 /* Child uses EXPECT not ASSERT to deliver status correctly. */ 2771 char buf = ' '; 2772 struct timespec timeout = { }; 2773 2774 /* Attach parent as tracer and stop. */ 2775 EXPECT_EQ(0, ptrace(PTRACE_TRACEME)); 2776 EXPECT_EQ(0, raise(SIGSTOP)); 2777 2778 EXPECT_EQ(0, close(pipefd[1])); 2779 2780 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2781 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2782 } 2783 2784 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2785 EXPECT_EQ(0, ret) { 2786 TH_LOG("Failed to install filter!"); 2787 } 2788 2789 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { 2790 TH_LOG("Failed to read() sync from parent"); 2791 } 2792 EXPECT_EQ('.', buf) { 2793 TH_LOG("Failed to get sync data from read()"); 2794 } 2795 2796 /* Start nanosleep to be interrupted. */ 2797 timeout.tv_sec = 1; 2798 errno = 0; 2799 EXPECT_EQ(0, nanosleep(&timeout, NULL)) { 2800 TH_LOG("Call to nanosleep() failed (errno %d)", errno); 2801 } 2802 2803 /* Read final sync from parent. */ 2804 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { 2805 TH_LOG("Failed final read() from parent"); 2806 } 2807 EXPECT_EQ('!', buf) { 2808 TH_LOG("Failed to get final data from read()"); 2809 } 2810 2811 /* Directly report the status of our test harness results. */ 2812 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS 2813 : EXIT_FAILURE); 2814 } 2815 EXPECT_EQ(0, close(pipefd[0])); 2816 2817 /* Attach to child, setup options, and release. */ 2818 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2819 ASSERT_EQ(true, WIFSTOPPED(status)); 2820 ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL, 2821 PTRACE_O_TRACESECCOMP)); 2822 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2823 ASSERT_EQ(1, write(pipefd[1], ".", 1)); 2824 2825 /* Wait for nanosleep() to start. */ 2826 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2827 ASSERT_EQ(true, WIFSTOPPED(status)); 2828 ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); 2829 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); 2830 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); 2831 ASSERT_EQ(0x100, msg); 2832 EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid)); 2833 2834 /* Might as well check siginfo for sanity while we're here. */ 2835 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); 2836 ASSERT_EQ(SIGTRAP, info.si_signo); 2837 ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code); 2838 EXPECT_EQ(0, info.si_errno); 2839 EXPECT_EQ(getuid(), info.si_uid); 2840 /* Verify signal delivery came from child (seccomp-triggered). */ 2841 EXPECT_EQ(child_pid, info.si_pid); 2842 2843 /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */ 2844 ASSERT_EQ(0, kill(child_pid, SIGSTOP)); 2845 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2846 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2847 ASSERT_EQ(true, WIFSTOPPED(status)); 2848 ASSERT_EQ(SIGSTOP, WSTOPSIG(status)); 2849 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); 2850 /* 2851 * There is no siginfo on SIGSTOP any more, so we can't verify 2852 * signal delivery came from parent now (getpid() == info.si_pid). 2853 * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com 2854 * At least verify the SIGSTOP via PTRACE_GETSIGINFO. 2855 */ 2856 EXPECT_EQ(SIGSTOP, info.si_signo); 2857 2858 /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */ 2859 ASSERT_EQ(0, kill(child_pid, SIGCONT)); 2860 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2861 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2862 ASSERT_EQ(true, WIFSTOPPED(status)); 2863 ASSERT_EQ(SIGCONT, WSTOPSIG(status)); 2864 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2865 2866 /* Wait for restart_syscall() to start. */ 2867 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2868 ASSERT_EQ(true, WIFSTOPPED(status)); 2869 ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); 2870 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); 2871 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); 2872 2873 ASSERT_EQ(0x200, msg); 2874 ret = get_syscall(_metadata, child_pid); 2875 #if defined(__arm__) 2876 /* 2877 * FIXME: 2878 * - native ARM registers do NOT expose true syscall. 2879 * - compat ARM registers on ARM64 DO expose true syscall. 2880 */ 2881 ASSERT_EQ(0, uname(&utsbuf)); 2882 if (strncmp(utsbuf.machine, "arm", 3) == 0) { 2883 EXPECT_EQ(__NR_nanosleep, ret); 2884 } else 2885 #endif 2886 { 2887 EXPECT_EQ(__NR_restart_syscall, ret); 2888 } 2889 2890 /* Write again to end test. */ 2891 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2892 ASSERT_EQ(1, write(pipefd[1], "!", 1)); 2893 EXPECT_EQ(0, close(pipefd[1])); 2894 2895 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2896 if (WIFSIGNALED(status) || WEXITSTATUS(status)) 2897 _metadata->passed = 0; 2898 } 2899 2900 TEST_SIGNAL(filter_flag_log, SIGSYS) 2901 { 2902 struct sock_filter allow_filter[] = { 2903 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2904 }; 2905 struct sock_filter kill_filter[] = { 2906 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2907 offsetof(struct seccomp_data, nr)), 2908 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 2909 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2910 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2911 }; 2912 struct sock_fprog allow_prog = { 2913 .len = (unsigned short)ARRAY_SIZE(allow_filter), 2914 .filter = allow_filter, 2915 }; 2916 struct sock_fprog kill_prog = { 2917 .len = (unsigned short)ARRAY_SIZE(kill_filter), 2918 .filter = kill_filter, 2919 }; 2920 long ret; 2921 pid_t parent = getppid(); 2922 2923 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2924 ASSERT_EQ(0, ret); 2925 2926 /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */ 2927 ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG, 2928 &allow_prog); 2929 ASSERT_NE(ENOSYS, errno) { 2930 TH_LOG("Kernel does not support seccomp syscall!"); 2931 } 2932 EXPECT_NE(0, ret) { 2933 TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!"); 2934 } 2935 EXPECT_EQ(EINVAL, errno) { 2936 TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!"); 2937 } 2938 2939 /* Verify that a simple, permissive filter can be added with no flags */ 2940 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog); 2941 EXPECT_EQ(0, ret); 2942 2943 /* See if the same filter can be added with the FILTER_FLAG_LOG flag */ 2944 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, 2945 &allow_prog); 2946 ASSERT_NE(EINVAL, errno) { 2947 TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!"); 2948 } 2949 EXPECT_EQ(0, ret); 2950 2951 /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */ 2952 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, 2953 &kill_prog); 2954 EXPECT_EQ(0, ret); 2955 2956 EXPECT_EQ(parent, syscall(__NR_getppid)); 2957 /* getpid() should never return. */ 2958 EXPECT_EQ(0, syscall(__NR_getpid)); 2959 } 2960 2961 TEST(get_action_avail) 2962 { 2963 __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP, 2964 SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE, 2965 SECCOMP_RET_LOG, SECCOMP_RET_ALLOW }; 2966 __u32 unknown_action = 0x10000000U; 2967 int i; 2968 long ret; 2969 2970 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]); 2971 ASSERT_NE(ENOSYS, errno) { 2972 TH_LOG("Kernel does not support seccomp syscall!"); 2973 } 2974 ASSERT_NE(EINVAL, errno) { 2975 TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!"); 2976 } 2977 EXPECT_EQ(ret, 0); 2978 2979 for (i = 0; i < ARRAY_SIZE(actions); i++) { 2980 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]); 2981 EXPECT_EQ(ret, 0) { 2982 TH_LOG("Expected action (0x%X) not available!", 2983 actions[i]); 2984 } 2985 } 2986 2987 /* Check that an unknown action is handled properly (EOPNOTSUPP) */ 2988 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action); 2989 EXPECT_EQ(ret, -1); 2990 EXPECT_EQ(errno, EOPNOTSUPP); 2991 } 2992 2993 TEST(get_metadata) 2994 { 2995 pid_t pid; 2996 int pipefd[2]; 2997 char buf; 2998 struct seccomp_metadata md; 2999 long ret; 3000 3001 /* Only real root can get metadata. */ 3002 if (geteuid()) { 3003 XFAIL(return, "get_metadata requires real root"); 3004 return; 3005 } 3006 3007 ASSERT_EQ(0, pipe(pipefd)); 3008 3009 pid = fork(); 3010 ASSERT_GE(pid, 0); 3011 if (pid == 0) { 3012 struct sock_filter filter[] = { 3013 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3014 }; 3015 struct sock_fprog prog = { 3016 .len = (unsigned short)ARRAY_SIZE(filter), 3017 .filter = filter, 3018 }; 3019 3020 /* one with log, one without */ 3021 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 3022 SECCOMP_FILTER_FLAG_LOG, &prog)); 3023 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)); 3024 3025 EXPECT_EQ(0, close(pipefd[0])); 3026 ASSERT_EQ(1, write(pipefd[1], "1", 1)); 3027 ASSERT_EQ(0, close(pipefd[1])); 3028 3029 while (1) 3030 sleep(100); 3031 } 3032 3033 ASSERT_EQ(0, close(pipefd[1])); 3034 ASSERT_EQ(1, read(pipefd[0], &buf, 1)); 3035 3036 ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid)); 3037 ASSERT_EQ(pid, waitpid(pid, NULL, 0)); 3038 3039 /* Past here must not use ASSERT or child process is never killed. */ 3040 3041 md.filter_off = 0; 3042 errno = 0; 3043 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); 3044 EXPECT_EQ(sizeof(md), ret) { 3045 if (errno == EINVAL) 3046 XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)"); 3047 } 3048 3049 EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG); 3050 EXPECT_EQ(md.filter_off, 0); 3051 3052 md.filter_off = 1; 3053 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); 3054 EXPECT_EQ(sizeof(md), ret); 3055 EXPECT_EQ(md.flags, 0); 3056 EXPECT_EQ(md.filter_off, 1); 3057 3058 skip: 3059 ASSERT_EQ(0, kill(pid, SIGKILL)); 3060 } 3061 3062 static int user_trap_syscall(int nr, unsigned int flags) 3063 { 3064 struct sock_filter filter[] = { 3065 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, 3066 offsetof(struct seccomp_data, nr)), 3067 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1), 3068 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF), 3069 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), 3070 }; 3071 3072 struct sock_fprog prog = { 3073 .len = (unsigned short)ARRAY_SIZE(filter), 3074 .filter = filter, 3075 }; 3076 3077 return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog); 3078 } 3079 3080 #define USER_NOTIF_MAGIC 116983961184613L 3081 TEST(user_notification_basic) 3082 { 3083 pid_t pid; 3084 long ret; 3085 int status, listener; 3086 struct seccomp_notif req = {}; 3087 struct seccomp_notif_resp resp = {}; 3088 struct pollfd pollfd; 3089 3090 struct sock_filter filter[] = { 3091 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3092 }; 3093 struct sock_fprog prog = { 3094 .len = (unsigned short)ARRAY_SIZE(filter), 3095 .filter = filter, 3096 }; 3097 3098 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3099 ASSERT_EQ(0, ret) { 3100 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3101 } 3102 3103 pid = fork(); 3104 ASSERT_GE(pid, 0); 3105 3106 /* Check that we get -ENOSYS with no listener attached */ 3107 if (pid == 0) { 3108 if (user_trap_syscall(__NR_getppid, 0) < 0) 3109 exit(1); 3110 ret = syscall(__NR_getppid); 3111 exit(ret >= 0 || errno != ENOSYS); 3112 } 3113 3114 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3115 EXPECT_EQ(true, WIFEXITED(status)); 3116 EXPECT_EQ(0, WEXITSTATUS(status)); 3117 3118 /* Add some no-op filters for grins. */ 3119 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3120 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3121 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3122 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3123 3124 /* Check that the basic notification machinery works */ 3125 listener = user_trap_syscall(__NR_getppid, 3126 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3127 ASSERT_GE(listener, 0); 3128 3129 /* Installing a second listener in the chain should EBUSY */ 3130 EXPECT_EQ(user_trap_syscall(__NR_getppid, 3131 SECCOMP_FILTER_FLAG_NEW_LISTENER), 3132 -1); 3133 EXPECT_EQ(errno, EBUSY); 3134 3135 pid = fork(); 3136 ASSERT_GE(pid, 0); 3137 3138 if (pid == 0) { 3139 ret = syscall(__NR_getppid); 3140 exit(ret != USER_NOTIF_MAGIC); 3141 } 3142 3143 pollfd.fd = listener; 3144 pollfd.events = POLLIN | POLLOUT; 3145 3146 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3147 EXPECT_EQ(pollfd.revents, POLLIN); 3148 3149 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3150 3151 pollfd.fd = listener; 3152 pollfd.events = POLLIN | POLLOUT; 3153 3154 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3155 EXPECT_EQ(pollfd.revents, POLLOUT); 3156 3157 EXPECT_EQ(req.data.nr, __NR_getppid); 3158 3159 resp.id = req.id; 3160 resp.error = 0; 3161 resp.val = USER_NOTIF_MAGIC; 3162 3163 /* check that we make sure flags == 0 */ 3164 resp.flags = 1; 3165 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3166 EXPECT_EQ(errno, EINVAL); 3167 3168 resp.flags = 0; 3169 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3170 3171 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3172 EXPECT_EQ(true, WIFEXITED(status)); 3173 EXPECT_EQ(0, WEXITSTATUS(status)); 3174 } 3175 3176 TEST(user_notification_kill_in_middle) 3177 { 3178 pid_t pid; 3179 long ret; 3180 int listener; 3181 struct seccomp_notif req = {}; 3182 struct seccomp_notif_resp resp = {}; 3183 3184 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3185 ASSERT_EQ(0, ret) { 3186 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3187 } 3188 3189 listener = user_trap_syscall(__NR_getppid, 3190 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3191 ASSERT_GE(listener, 0); 3192 3193 /* 3194 * Check that nothing bad happens when we kill the task in the middle 3195 * of a syscall. 3196 */ 3197 pid = fork(); 3198 ASSERT_GE(pid, 0); 3199 3200 if (pid == 0) { 3201 ret = syscall(__NR_getppid); 3202 exit(ret != USER_NOTIF_MAGIC); 3203 } 3204 3205 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3206 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0); 3207 3208 EXPECT_EQ(kill(pid, SIGKILL), 0); 3209 EXPECT_EQ(waitpid(pid, NULL, 0), pid); 3210 3211 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1); 3212 3213 resp.id = req.id; 3214 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp); 3215 EXPECT_EQ(ret, -1); 3216 EXPECT_EQ(errno, ENOENT); 3217 } 3218 3219 static int handled = -1; 3220 3221 static void signal_handler(int signal) 3222 { 3223 if (write(handled, "c", 1) != 1) 3224 perror("write from signal"); 3225 } 3226 3227 TEST(user_notification_signal) 3228 { 3229 pid_t pid; 3230 long ret; 3231 int status, listener, sk_pair[2]; 3232 struct seccomp_notif req = {}; 3233 struct seccomp_notif_resp resp = {}; 3234 char c; 3235 3236 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3237 ASSERT_EQ(0, ret) { 3238 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3239 } 3240 3241 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); 3242 3243 listener = user_trap_syscall(__NR_gettid, 3244 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3245 ASSERT_GE(listener, 0); 3246 3247 pid = fork(); 3248 ASSERT_GE(pid, 0); 3249 3250 if (pid == 0) { 3251 close(sk_pair[0]); 3252 handled = sk_pair[1]; 3253 if (signal(SIGUSR1, signal_handler) == SIG_ERR) { 3254 perror("signal"); 3255 exit(1); 3256 } 3257 /* 3258 * ERESTARTSYS behavior is a bit hard to test, because we need 3259 * to rely on a signal that has not yet been handled. Let's at 3260 * least check that the error code gets propagated through, and 3261 * hope that it doesn't break when there is actually a signal :) 3262 */ 3263 ret = syscall(__NR_gettid); 3264 exit(!(ret == -1 && errno == 512)); 3265 } 3266 3267 close(sk_pair[1]); 3268 3269 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3270 3271 EXPECT_EQ(kill(pid, SIGUSR1), 0); 3272 3273 /* 3274 * Make sure the signal really is delivered, which means we're not 3275 * stuck in the user notification code any more and the notification 3276 * should be dead. 3277 */ 3278 EXPECT_EQ(read(sk_pair[0], &c, 1), 1); 3279 3280 resp.id = req.id; 3281 resp.error = -EPERM; 3282 resp.val = 0; 3283 3284 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3285 EXPECT_EQ(errno, ENOENT); 3286 3287 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3288 3289 resp.id = req.id; 3290 resp.error = -512; /* -ERESTARTSYS */ 3291 resp.val = 0; 3292 3293 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3294 3295 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3296 EXPECT_EQ(true, WIFEXITED(status)); 3297 EXPECT_EQ(0, WEXITSTATUS(status)); 3298 } 3299 3300 TEST(user_notification_closed_listener) 3301 { 3302 pid_t pid; 3303 long ret; 3304 int status, listener; 3305 3306 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3307 ASSERT_EQ(0, ret) { 3308 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3309 } 3310 3311 listener = user_trap_syscall(__NR_getppid, 3312 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3313 ASSERT_GE(listener, 0); 3314 3315 /* 3316 * Check that we get an ENOSYS when the listener is closed. 3317 */ 3318 pid = fork(); 3319 ASSERT_GE(pid, 0); 3320 if (pid == 0) { 3321 close(listener); 3322 ret = syscall(__NR_getppid); 3323 exit(ret != -1 && errno != ENOSYS); 3324 } 3325 3326 close(listener); 3327 3328 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3329 EXPECT_EQ(true, WIFEXITED(status)); 3330 EXPECT_EQ(0, WEXITSTATUS(status)); 3331 } 3332 3333 /* 3334 * Check that a pid in a child namespace still shows up as valid in ours. 3335 */ 3336 TEST(user_notification_child_pid_ns) 3337 { 3338 pid_t pid; 3339 int status, listener; 3340 struct seccomp_notif req = {}; 3341 struct seccomp_notif_resp resp = {}; 3342 3343 ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0); 3344 3345 listener = user_trap_syscall(__NR_getppid, 3346 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3347 ASSERT_GE(listener, 0); 3348 3349 pid = fork(); 3350 ASSERT_GE(pid, 0); 3351 3352 if (pid == 0) 3353 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3354 3355 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3356 EXPECT_EQ(req.pid, pid); 3357 3358 resp.id = req.id; 3359 resp.error = 0; 3360 resp.val = USER_NOTIF_MAGIC; 3361 3362 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3363 3364 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3365 EXPECT_EQ(true, WIFEXITED(status)); 3366 EXPECT_EQ(0, WEXITSTATUS(status)); 3367 close(listener); 3368 } 3369 3370 /* 3371 * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e. 3372 * invalid. 3373 */ 3374 TEST(user_notification_sibling_pid_ns) 3375 { 3376 pid_t pid, pid2; 3377 int status, listener; 3378 struct seccomp_notif req = {}; 3379 struct seccomp_notif_resp resp = {}; 3380 3381 ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) { 3382 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3383 } 3384 3385 listener = user_trap_syscall(__NR_getppid, 3386 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3387 ASSERT_GE(listener, 0); 3388 3389 pid = fork(); 3390 ASSERT_GE(pid, 0); 3391 3392 if (pid == 0) { 3393 ASSERT_EQ(unshare(CLONE_NEWPID), 0); 3394 3395 pid2 = fork(); 3396 ASSERT_GE(pid2, 0); 3397 3398 if (pid2 == 0) 3399 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3400 3401 EXPECT_EQ(waitpid(pid2, &status, 0), pid2); 3402 EXPECT_EQ(true, WIFEXITED(status)); 3403 EXPECT_EQ(0, WEXITSTATUS(status)); 3404 exit(WEXITSTATUS(status)); 3405 } 3406 3407 /* Create the sibling ns, and sibling in it. */ 3408 ASSERT_EQ(unshare(CLONE_NEWPID), 0); 3409 ASSERT_EQ(errno, 0); 3410 3411 pid2 = fork(); 3412 ASSERT_GE(pid2, 0); 3413 3414 if (pid2 == 0) { 3415 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3416 /* 3417 * The pid should be 0, i.e. the task is in some namespace that 3418 * we can't "see". 3419 */ 3420 EXPECT_EQ(req.pid, 0); 3421 3422 resp.id = req.id; 3423 resp.error = 0; 3424 resp.val = USER_NOTIF_MAGIC; 3425 3426 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3427 exit(0); 3428 } 3429 3430 close(listener); 3431 3432 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3433 EXPECT_EQ(true, WIFEXITED(status)); 3434 EXPECT_EQ(0, WEXITSTATUS(status)); 3435 3436 EXPECT_EQ(waitpid(pid2, &status, 0), pid2); 3437 EXPECT_EQ(true, WIFEXITED(status)); 3438 EXPECT_EQ(0, WEXITSTATUS(status)); 3439 } 3440 3441 TEST(user_notification_fault_recv) 3442 { 3443 pid_t pid; 3444 int status, listener; 3445 struct seccomp_notif req = {}; 3446 struct seccomp_notif_resp resp = {}; 3447 3448 ASSERT_EQ(unshare(CLONE_NEWUSER), 0); 3449 3450 listener = user_trap_syscall(__NR_getppid, 3451 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3452 ASSERT_GE(listener, 0); 3453 3454 pid = fork(); 3455 ASSERT_GE(pid, 0); 3456 3457 if (pid == 0) 3458 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3459 3460 /* Do a bad recv() */ 3461 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1); 3462 EXPECT_EQ(errno, EFAULT); 3463 3464 /* We should still be able to receive this notification, though. */ 3465 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3466 EXPECT_EQ(req.pid, pid); 3467 3468 resp.id = req.id; 3469 resp.error = 0; 3470 resp.val = USER_NOTIF_MAGIC; 3471 3472 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3473 3474 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3475 EXPECT_EQ(true, WIFEXITED(status)); 3476 EXPECT_EQ(0, WEXITSTATUS(status)); 3477 } 3478 3479 TEST(seccomp_get_notif_sizes) 3480 { 3481 struct seccomp_notif_sizes sizes; 3482 3483 ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0); 3484 EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif)); 3485 EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp)); 3486 } 3487 3488 /* 3489 * TODO: 3490 * - add microbenchmarks 3491 * - expand NNP testing 3492 * - better arch-specific TRACE and TRAP handlers. 3493 * - endianness checking when appropriate 3494 * - 64-bit arg prodding 3495 * - arch value testing (x86 modes especially) 3496 * - verify that FILTER_FLAG_LOG filters generate log messages 3497 * - verify that RET_LOG generates log messages 3498 * - ... 3499 */ 3500 3501 TEST_HARNESS_MAIN 3502