xref: /openbmc/linux/tools/testing/selftests/seccomp/seccomp_bpf.c (revision 81464192839de0b5bc84c5739381101e04d94f62)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4  *
5  * Test code for seccomp bpf.
6  */
7 
8 #define _GNU_SOURCE
9 #include <sys/types.h>
10 
11 /*
12  * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
13  * we need to use the kernel's siginfo.h file and trick glibc
14  * into accepting it.
15  */
16 #if !__GLIBC_PREREQ(2, 26)
17 # include <asm/siginfo.h>
18 # define __have_siginfo_t 1
19 # define __have_sigval_t 1
20 # define __have_sigevent_t 1
21 #endif
22 
23 #include <errno.h>
24 #include <linux/filter.h>
25 #include <sys/prctl.h>
26 #include <sys/ptrace.h>
27 #include <sys/user.h>
28 #include <linux/prctl.h>
29 #include <linux/ptrace.h>
30 #include <linux/seccomp.h>
31 #include <pthread.h>
32 #include <semaphore.h>
33 #include <signal.h>
34 #include <stddef.h>
35 #include <stdbool.h>
36 #include <string.h>
37 #include <time.h>
38 #include <limits.h>
39 #include <linux/elf.h>
40 #include <sys/uio.h>
41 #include <sys/utsname.h>
42 #include <sys/fcntl.h>
43 #include <sys/mman.h>
44 #include <sys/times.h>
45 #include <sys/socket.h>
46 #include <sys/ioctl.h>
47 #include <linux/kcmp.h>
48 
49 #include <unistd.h>
50 #include <sys/syscall.h>
51 #include <poll.h>
52 
53 #include "../kselftest_harness.h"
54 
55 #ifndef PR_SET_PTRACER
56 # define PR_SET_PTRACER 0x59616d61
57 #endif
58 
59 #ifndef PR_SET_NO_NEW_PRIVS
60 #define PR_SET_NO_NEW_PRIVS 38
61 #define PR_GET_NO_NEW_PRIVS 39
62 #endif
63 
64 #ifndef PR_SECCOMP_EXT
65 #define PR_SECCOMP_EXT 43
66 #endif
67 
68 #ifndef SECCOMP_EXT_ACT
69 #define SECCOMP_EXT_ACT 1
70 #endif
71 
72 #ifndef SECCOMP_EXT_ACT_TSYNC
73 #define SECCOMP_EXT_ACT_TSYNC 1
74 #endif
75 
76 #ifndef SECCOMP_MODE_STRICT
77 #define SECCOMP_MODE_STRICT 1
78 #endif
79 
80 #ifndef SECCOMP_MODE_FILTER
81 #define SECCOMP_MODE_FILTER 2
82 #endif
83 
84 #ifndef SECCOMP_RET_ALLOW
85 struct seccomp_data {
86 	int nr;
87 	__u32 arch;
88 	__u64 instruction_pointer;
89 	__u64 args[6];
90 };
91 #endif
92 
93 #ifndef SECCOMP_RET_KILL_PROCESS
94 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
95 #define SECCOMP_RET_KILL_THREAD	 0x00000000U /* kill the thread */
96 #endif
97 #ifndef SECCOMP_RET_KILL
98 #define SECCOMP_RET_KILL	 SECCOMP_RET_KILL_THREAD
99 #define SECCOMP_RET_TRAP	 0x00030000U /* disallow and force a SIGSYS */
100 #define SECCOMP_RET_ERRNO	 0x00050000U /* returns an errno */
101 #define SECCOMP_RET_TRACE	 0x7ff00000U /* pass to a tracer or disallow */
102 #define SECCOMP_RET_ALLOW	 0x7fff0000U /* allow */
103 #endif
104 #ifndef SECCOMP_RET_LOG
105 #define SECCOMP_RET_LOG		 0x7ffc0000U /* allow after logging */
106 #endif
107 
108 #ifndef __NR_seccomp
109 # if defined(__i386__)
110 #  define __NR_seccomp 354
111 # elif defined(__x86_64__)
112 #  define __NR_seccomp 317
113 # elif defined(__arm__)
114 #  define __NR_seccomp 383
115 # elif defined(__aarch64__)
116 #  define __NR_seccomp 277
117 # elif defined(__riscv)
118 #  define __NR_seccomp 277
119 # elif defined(__hppa__)
120 #  define __NR_seccomp 338
121 # elif defined(__powerpc__)
122 #  define __NR_seccomp 358
123 # elif defined(__s390__)
124 #  define __NR_seccomp 348
125 # else
126 #  warning "seccomp syscall number unknown for this architecture"
127 #  define __NR_seccomp 0xffff
128 # endif
129 #endif
130 
131 #ifndef SECCOMP_SET_MODE_STRICT
132 #define SECCOMP_SET_MODE_STRICT 0
133 #endif
134 
135 #ifndef SECCOMP_SET_MODE_FILTER
136 #define SECCOMP_SET_MODE_FILTER 1
137 #endif
138 
139 #ifndef SECCOMP_GET_ACTION_AVAIL
140 #define SECCOMP_GET_ACTION_AVAIL 2
141 #endif
142 
143 #ifndef SECCOMP_GET_NOTIF_SIZES
144 #define SECCOMP_GET_NOTIF_SIZES 3
145 #endif
146 
147 #ifndef SECCOMP_FILTER_FLAG_TSYNC
148 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
149 #endif
150 
151 #ifndef SECCOMP_FILTER_FLAG_LOG
152 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
153 #endif
154 
155 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
156 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
157 #endif
158 
159 #ifndef PTRACE_SECCOMP_GET_METADATA
160 #define PTRACE_SECCOMP_GET_METADATA	0x420d
161 
162 struct seccomp_metadata {
163 	__u64 filter_off;       /* Input: which filter */
164 	__u64 flags;             /* Output: filter's flags */
165 };
166 #endif
167 
168 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
169 #define SECCOMP_FILTER_FLAG_NEW_LISTENER	(1UL << 3)
170 
171 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U
172 
173 #define SECCOMP_IOC_MAGIC		'!'
174 #define SECCOMP_IO(nr)			_IO(SECCOMP_IOC_MAGIC, nr)
175 #define SECCOMP_IOR(nr, type)		_IOR(SECCOMP_IOC_MAGIC, nr, type)
176 #define SECCOMP_IOW(nr, type)		_IOW(SECCOMP_IOC_MAGIC, nr, type)
177 #define SECCOMP_IOWR(nr, type)		_IOWR(SECCOMP_IOC_MAGIC, nr, type)
178 
179 /* Flags for seccomp notification fd ioctl. */
180 #define SECCOMP_IOCTL_NOTIF_RECV	SECCOMP_IOWR(0, struct seccomp_notif)
181 #define SECCOMP_IOCTL_NOTIF_SEND	SECCOMP_IOWR(1,	\
182 						struct seccomp_notif_resp)
183 #define SECCOMP_IOCTL_NOTIF_ID_VALID	SECCOMP_IOR(2, __u64)
184 
185 struct seccomp_notif {
186 	__u64 id;
187 	__u32 pid;
188 	__u32 flags;
189 	struct seccomp_data data;
190 };
191 
192 struct seccomp_notif_resp {
193 	__u64 id;
194 	__s64 val;
195 	__s32 error;
196 	__u32 flags;
197 };
198 
199 struct seccomp_notif_sizes {
200 	__u16 seccomp_notif;
201 	__u16 seccomp_notif_resp;
202 	__u16 seccomp_data;
203 };
204 #endif
205 
206 #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
207 #define PTRACE_EVENTMSG_SYSCALL_ENTRY	1
208 #define PTRACE_EVENTMSG_SYSCALL_EXIT	2
209 #endif
210 
211 #ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE
212 #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001
213 #endif
214 
215 #ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH
216 #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
217 #endif
218 
219 #ifndef seccomp
220 int seccomp(unsigned int op, unsigned int flags, void *args)
221 {
222 	errno = 0;
223 	return syscall(__NR_seccomp, op, flags, args);
224 }
225 #endif
226 
227 #if __BYTE_ORDER == __LITTLE_ENDIAN
228 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
229 #elif __BYTE_ORDER == __BIG_ENDIAN
230 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
231 #else
232 #error "wut? Unknown __BYTE_ORDER?!"
233 #endif
234 
235 #define SIBLING_EXIT_UNKILLED	0xbadbeef
236 #define SIBLING_EXIT_FAILURE	0xbadface
237 #define SIBLING_EXIT_NEWPRIVS	0xbadfeed
238 
239 TEST(mode_strict_support)
240 {
241 	long ret;
242 
243 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
244 	ASSERT_EQ(0, ret) {
245 		TH_LOG("Kernel does not support CONFIG_SECCOMP");
246 	}
247 	syscall(__NR_exit, 0);
248 }
249 
250 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
251 {
252 	long ret;
253 
254 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
255 	ASSERT_EQ(0, ret) {
256 		TH_LOG("Kernel does not support CONFIG_SECCOMP");
257 	}
258 	syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
259 		NULL, NULL, NULL);
260 	EXPECT_FALSE(true) {
261 		TH_LOG("Unreachable!");
262 	}
263 }
264 
265 /* Note! This doesn't test no new privs behavior */
266 TEST(no_new_privs_support)
267 {
268 	long ret;
269 
270 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
271 	EXPECT_EQ(0, ret) {
272 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
273 	}
274 }
275 
276 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */
277 TEST(mode_filter_support)
278 {
279 	long ret;
280 
281 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
282 	ASSERT_EQ(0, ret) {
283 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
284 	}
285 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
286 	EXPECT_EQ(-1, ret);
287 	EXPECT_EQ(EFAULT, errno) {
288 		TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
289 	}
290 }
291 
292 TEST(mode_filter_without_nnp)
293 {
294 	struct sock_filter filter[] = {
295 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
296 	};
297 	struct sock_fprog prog = {
298 		.len = (unsigned short)ARRAY_SIZE(filter),
299 		.filter = filter,
300 	};
301 	long ret;
302 
303 	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
304 	ASSERT_LE(0, ret) {
305 		TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
306 	}
307 	errno = 0;
308 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
309 	/* Succeeds with CAP_SYS_ADMIN, fails without */
310 	/* TODO(wad) check caps not euid */
311 	if (geteuid()) {
312 		EXPECT_EQ(-1, ret);
313 		EXPECT_EQ(EACCES, errno);
314 	} else {
315 		EXPECT_EQ(0, ret);
316 	}
317 }
318 
319 #define MAX_INSNS_PER_PATH 32768
320 
321 TEST(filter_size_limits)
322 {
323 	int i;
324 	int count = BPF_MAXINSNS + 1;
325 	struct sock_filter allow[] = {
326 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
327 	};
328 	struct sock_filter *filter;
329 	struct sock_fprog prog = { };
330 	long ret;
331 
332 	filter = calloc(count, sizeof(*filter));
333 	ASSERT_NE(NULL, filter);
334 
335 	for (i = 0; i < count; i++)
336 		filter[i] = allow[0];
337 
338 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
339 	ASSERT_EQ(0, ret);
340 
341 	prog.filter = filter;
342 	prog.len = count;
343 
344 	/* Too many filter instructions in a single filter. */
345 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
346 	ASSERT_NE(0, ret) {
347 		TH_LOG("Installing %d insn filter was allowed", prog.len);
348 	}
349 
350 	/* One less is okay, though. */
351 	prog.len -= 1;
352 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
353 	ASSERT_EQ(0, ret) {
354 		TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
355 	}
356 }
357 
358 TEST(filter_chain_limits)
359 {
360 	int i;
361 	int count = BPF_MAXINSNS;
362 	struct sock_filter allow[] = {
363 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
364 	};
365 	struct sock_filter *filter;
366 	struct sock_fprog prog = { };
367 	long ret;
368 
369 	filter = calloc(count, sizeof(*filter));
370 	ASSERT_NE(NULL, filter);
371 
372 	for (i = 0; i < count; i++)
373 		filter[i] = allow[0];
374 
375 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
376 	ASSERT_EQ(0, ret);
377 
378 	prog.filter = filter;
379 	prog.len = 1;
380 
381 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
382 	ASSERT_EQ(0, ret);
383 
384 	prog.len = count;
385 
386 	/* Too many total filter instructions. */
387 	for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
388 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
389 		if (ret != 0)
390 			break;
391 	}
392 	ASSERT_NE(0, ret) {
393 		TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
394 		       i, count, i * (count + 4));
395 	}
396 }
397 
398 TEST(mode_filter_cannot_move_to_strict)
399 {
400 	struct sock_filter filter[] = {
401 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
402 	};
403 	struct sock_fprog prog = {
404 		.len = (unsigned short)ARRAY_SIZE(filter),
405 		.filter = filter,
406 	};
407 	long ret;
408 
409 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
410 	ASSERT_EQ(0, ret);
411 
412 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
413 	ASSERT_EQ(0, ret);
414 
415 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
416 	EXPECT_EQ(-1, ret);
417 	EXPECT_EQ(EINVAL, errno);
418 }
419 
420 
421 TEST(mode_filter_get_seccomp)
422 {
423 	struct sock_filter filter[] = {
424 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
425 	};
426 	struct sock_fprog prog = {
427 		.len = (unsigned short)ARRAY_SIZE(filter),
428 		.filter = filter,
429 	};
430 	long ret;
431 
432 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
433 	ASSERT_EQ(0, ret);
434 
435 	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
436 	EXPECT_EQ(0, ret);
437 
438 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
439 	ASSERT_EQ(0, ret);
440 
441 	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
442 	EXPECT_EQ(2, ret);
443 }
444 
445 
446 TEST(ALLOW_all)
447 {
448 	struct sock_filter filter[] = {
449 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
450 	};
451 	struct sock_fprog prog = {
452 		.len = (unsigned short)ARRAY_SIZE(filter),
453 		.filter = filter,
454 	};
455 	long ret;
456 
457 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
458 	ASSERT_EQ(0, ret);
459 
460 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
461 	ASSERT_EQ(0, ret);
462 }
463 
464 TEST(empty_prog)
465 {
466 	struct sock_filter filter[] = {
467 	};
468 	struct sock_fprog prog = {
469 		.len = (unsigned short)ARRAY_SIZE(filter),
470 		.filter = filter,
471 	};
472 	long ret;
473 
474 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
475 	ASSERT_EQ(0, ret);
476 
477 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
478 	EXPECT_EQ(-1, ret);
479 	EXPECT_EQ(EINVAL, errno);
480 }
481 
482 TEST(log_all)
483 {
484 	struct sock_filter filter[] = {
485 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
486 	};
487 	struct sock_fprog prog = {
488 		.len = (unsigned short)ARRAY_SIZE(filter),
489 		.filter = filter,
490 	};
491 	long ret;
492 	pid_t parent = getppid();
493 
494 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
495 	ASSERT_EQ(0, ret);
496 
497 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
498 	ASSERT_EQ(0, ret);
499 
500 	/* getppid() should succeed and be logged (no check for logging) */
501 	EXPECT_EQ(parent, syscall(__NR_getppid));
502 }
503 
504 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
505 {
506 	struct sock_filter filter[] = {
507 		BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
508 	};
509 	struct sock_fprog prog = {
510 		.len = (unsigned short)ARRAY_SIZE(filter),
511 		.filter = filter,
512 	};
513 	long ret;
514 
515 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
516 	ASSERT_EQ(0, ret);
517 
518 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
519 	ASSERT_EQ(0, ret);
520 	EXPECT_EQ(0, syscall(__NR_getpid)) {
521 		TH_LOG("getpid() shouldn't ever return");
522 	}
523 }
524 
525 /* return code >= 0x80000000 is unused. */
526 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
527 {
528 	struct sock_filter filter[] = {
529 		BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
530 	};
531 	struct sock_fprog prog = {
532 		.len = (unsigned short)ARRAY_SIZE(filter),
533 		.filter = filter,
534 	};
535 	long ret;
536 
537 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
538 	ASSERT_EQ(0, ret);
539 
540 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
541 	ASSERT_EQ(0, ret);
542 	EXPECT_EQ(0, syscall(__NR_getpid)) {
543 		TH_LOG("getpid() shouldn't ever return");
544 	}
545 }
546 
547 TEST_SIGNAL(KILL_all, SIGSYS)
548 {
549 	struct sock_filter filter[] = {
550 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
551 	};
552 	struct sock_fprog prog = {
553 		.len = (unsigned short)ARRAY_SIZE(filter),
554 		.filter = filter,
555 	};
556 	long ret;
557 
558 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
559 	ASSERT_EQ(0, ret);
560 
561 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
562 	ASSERT_EQ(0, ret);
563 }
564 
565 TEST_SIGNAL(KILL_one, SIGSYS)
566 {
567 	struct sock_filter filter[] = {
568 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
569 			offsetof(struct seccomp_data, nr)),
570 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
571 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
572 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
573 	};
574 	struct sock_fprog prog = {
575 		.len = (unsigned short)ARRAY_SIZE(filter),
576 		.filter = filter,
577 	};
578 	long ret;
579 	pid_t parent = getppid();
580 
581 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
582 	ASSERT_EQ(0, ret);
583 
584 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
585 	ASSERT_EQ(0, ret);
586 
587 	EXPECT_EQ(parent, syscall(__NR_getppid));
588 	/* getpid() should never return. */
589 	EXPECT_EQ(0, syscall(__NR_getpid));
590 }
591 
592 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
593 {
594 	void *fatal_address;
595 	struct sock_filter filter[] = {
596 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
597 			offsetof(struct seccomp_data, nr)),
598 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
599 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
600 		/* Only both with lower 32-bit for now. */
601 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
602 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
603 			(unsigned long)&fatal_address, 0, 1),
604 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
605 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
606 	};
607 	struct sock_fprog prog = {
608 		.len = (unsigned short)ARRAY_SIZE(filter),
609 		.filter = filter,
610 	};
611 	long ret;
612 	pid_t parent = getppid();
613 	struct tms timebuf;
614 	clock_t clock = times(&timebuf);
615 
616 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
617 	ASSERT_EQ(0, ret);
618 
619 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
620 	ASSERT_EQ(0, ret);
621 
622 	EXPECT_EQ(parent, syscall(__NR_getppid));
623 	EXPECT_LE(clock, syscall(__NR_times, &timebuf));
624 	/* times() should never return. */
625 	EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
626 }
627 
628 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
629 {
630 #ifndef __NR_mmap2
631 	int sysno = __NR_mmap;
632 #else
633 	int sysno = __NR_mmap2;
634 #endif
635 	struct sock_filter filter[] = {
636 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
637 			offsetof(struct seccomp_data, nr)),
638 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
639 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
640 		/* Only both with lower 32-bit for now. */
641 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
642 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
643 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
644 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
645 	};
646 	struct sock_fprog prog = {
647 		.len = (unsigned short)ARRAY_SIZE(filter),
648 		.filter = filter,
649 	};
650 	long ret;
651 	pid_t parent = getppid();
652 	int fd;
653 	void *map1, *map2;
654 	int page_size = sysconf(_SC_PAGESIZE);
655 
656 	ASSERT_LT(0, page_size);
657 
658 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
659 	ASSERT_EQ(0, ret);
660 
661 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
662 	ASSERT_EQ(0, ret);
663 
664 	fd = open("/dev/zero", O_RDONLY);
665 	ASSERT_NE(-1, fd);
666 
667 	EXPECT_EQ(parent, syscall(__NR_getppid));
668 	map1 = (void *)syscall(sysno,
669 		NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
670 	EXPECT_NE(MAP_FAILED, map1);
671 	/* mmap2() should never return. */
672 	map2 = (void *)syscall(sysno,
673 		 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
674 	EXPECT_EQ(MAP_FAILED, map2);
675 
676 	/* The test failed, so clean up the resources. */
677 	munmap(map1, page_size);
678 	munmap(map2, page_size);
679 	close(fd);
680 }
681 
682 /* This is a thread task to die via seccomp filter violation. */
683 void *kill_thread(void *data)
684 {
685 	bool die = (bool)data;
686 
687 	if (die) {
688 		prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
689 		return (void *)SIBLING_EXIT_FAILURE;
690 	}
691 
692 	return (void *)SIBLING_EXIT_UNKILLED;
693 }
694 
695 /* Prepare a thread that will kill itself or both of us. */
696 void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
697 {
698 	pthread_t thread;
699 	void *status;
700 	/* Kill only when calling __NR_prctl. */
701 	struct sock_filter filter_thread[] = {
702 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
703 			offsetof(struct seccomp_data, nr)),
704 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
705 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
706 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
707 	};
708 	struct sock_fprog prog_thread = {
709 		.len = (unsigned short)ARRAY_SIZE(filter_thread),
710 		.filter = filter_thread,
711 	};
712 	struct sock_filter filter_process[] = {
713 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
714 			offsetof(struct seccomp_data, nr)),
715 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
716 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
717 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
718 	};
719 	struct sock_fprog prog_process = {
720 		.len = (unsigned short)ARRAY_SIZE(filter_process),
721 		.filter = filter_process,
722 	};
723 
724 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
725 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
726 	}
727 
728 	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
729 			     kill_process ? &prog_process : &prog_thread));
730 
731 	/*
732 	 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
733 	 * flag cannot be downgraded by a new filter.
734 	 */
735 	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
736 
737 	/* Start a thread that will exit immediately. */
738 	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
739 	ASSERT_EQ(0, pthread_join(thread, &status));
740 	ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
741 
742 	/* Start a thread that will die immediately. */
743 	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
744 	ASSERT_EQ(0, pthread_join(thread, &status));
745 	ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
746 
747 	/*
748 	 * If we get here, only the spawned thread died. Let the parent know
749 	 * the whole process didn't die (i.e. this thread, the spawner,
750 	 * stayed running).
751 	 */
752 	exit(42);
753 }
754 
755 TEST(KILL_thread)
756 {
757 	int status;
758 	pid_t child_pid;
759 
760 	child_pid = fork();
761 	ASSERT_LE(0, child_pid);
762 	if (child_pid == 0) {
763 		kill_thread_or_group(_metadata, false);
764 		_exit(38);
765 	}
766 
767 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
768 
769 	/* If only the thread was killed, we'll see exit 42. */
770 	ASSERT_TRUE(WIFEXITED(status));
771 	ASSERT_EQ(42, WEXITSTATUS(status));
772 }
773 
774 TEST(KILL_process)
775 {
776 	int status;
777 	pid_t child_pid;
778 
779 	child_pid = fork();
780 	ASSERT_LE(0, child_pid);
781 	if (child_pid == 0) {
782 		kill_thread_or_group(_metadata, true);
783 		_exit(38);
784 	}
785 
786 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
787 
788 	/* If the entire process was killed, we'll see SIGSYS. */
789 	ASSERT_TRUE(WIFSIGNALED(status));
790 	ASSERT_EQ(SIGSYS, WTERMSIG(status));
791 }
792 
793 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
794 TEST(arg_out_of_range)
795 {
796 	struct sock_filter filter[] = {
797 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
798 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
799 	};
800 	struct sock_fprog prog = {
801 		.len = (unsigned short)ARRAY_SIZE(filter),
802 		.filter = filter,
803 	};
804 	long ret;
805 
806 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
807 	ASSERT_EQ(0, ret);
808 
809 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
810 	EXPECT_EQ(-1, ret);
811 	EXPECT_EQ(EINVAL, errno);
812 }
813 
814 #define ERRNO_FILTER(name, errno)					\
815 	struct sock_filter _read_filter_##name[] = {			\
816 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,				\
817 			offsetof(struct seccomp_data, nr)),		\
818 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),	\
819 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),	\
820 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),		\
821 	};								\
822 	struct sock_fprog prog_##name = {				\
823 		.len = (unsigned short)ARRAY_SIZE(_read_filter_##name),	\
824 		.filter = _read_filter_##name,				\
825 	}
826 
827 /* Make sure basic errno values are correctly passed through a filter. */
828 TEST(ERRNO_valid)
829 {
830 	ERRNO_FILTER(valid, E2BIG);
831 	long ret;
832 	pid_t parent = getppid();
833 
834 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
835 	ASSERT_EQ(0, ret);
836 
837 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
838 	ASSERT_EQ(0, ret);
839 
840 	EXPECT_EQ(parent, syscall(__NR_getppid));
841 	EXPECT_EQ(-1, read(0, NULL, 0));
842 	EXPECT_EQ(E2BIG, errno);
843 }
844 
845 /* Make sure an errno of zero is correctly handled by the arch code. */
846 TEST(ERRNO_zero)
847 {
848 	ERRNO_FILTER(zero, 0);
849 	long ret;
850 	pid_t parent = getppid();
851 
852 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
853 	ASSERT_EQ(0, ret);
854 
855 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
856 	ASSERT_EQ(0, ret);
857 
858 	EXPECT_EQ(parent, syscall(__NR_getppid));
859 	/* "errno" of 0 is ok. */
860 	EXPECT_EQ(0, read(0, NULL, 0));
861 }
862 
863 /*
864  * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
865  * This tests that the errno value gets capped correctly, fixed by
866  * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
867  */
868 TEST(ERRNO_capped)
869 {
870 	ERRNO_FILTER(capped, 4096);
871 	long ret;
872 	pid_t parent = getppid();
873 
874 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
875 	ASSERT_EQ(0, ret);
876 
877 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
878 	ASSERT_EQ(0, ret);
879 
880 	EXPECT_EQ(parent, syscall(__NR_getppid));
881 	EXPECT_EQ(-1, read(0, NULL, 0));
882 	EXPECT_EQ(4095, errno);
883 }
884 
885 /*
886  * Filters are processed in reverse order: last applied is executed first.
887  * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
888  * SECCOMP_RET_DATA mask results will follow the most recently applied
889  * matching filter return (and not the lowest or highest value).
890  */
891 TEST(ERRNO_order)
892 {
893 	ERRNO_FILTER(first,  11);
894 	ERRNO_FILTER(second, 13);
895 	ERRNO_FILTER(third,  12);
896 	long ret;
897 	pid_t parent = getppid();
898 
899 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
900 	ASSERT_EQ(0, ret);
901 
902 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
903 	ASSERT_EQ(0, ret);
904 
905 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
906 	ASSERT_EQ(0, ret);
907 
908 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
909 	ASSERT_EQ(0, ret);
910 
911 	EXPECT_EQ(parent, syscall(__NR_getppid));
912 	EXPECT_EQ(-1, read(0, NULL, 0));
913 	EXPECT_EQ(12, errno);
914 }
915 
916 FIXTURE(TRAP) {
917 	struct sock_fprog prog;
918 };
919 
920 FIXTURE_SETUP(TRAP)
921 {
922 	struct sock_filter filter[] = {
923 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
924 			offsetof(struct seccomp_data, nr)),
925 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
926 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
927 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
928 	};
929 
930 	memset(&self->prog, 0, sizeof(self->prog));
931 	self->prog.filter = malloc(sizeof(filter));
932 	ASSERT_NE(NULL, self->prog.filter);
933 	memcpy(self->prog.filter, filter, sizeof(filter));
934 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
935 }
936 
937 FIXTURE_TEARDOWN(TRAP)
938 {
939 	if (self->prog.filter)
940 		free(self->prog.filter);
941 }
942 
943 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
944 {
945 	long ret;
946 
947 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
948 	ASSERT_EQ(0, ret);
949 
950 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
951 	ASSERT_EQ(0, ret);
952 	syscall(__NR_getpid);
953 }
954 
955 /* Ensure that SIGSYS overrides SIG_IGN */
956 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
957 {
958 	long ret;
959 
960 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
961 	ASSERT_EQ(0, ret);
962 
963 	signal(SIGSYS, SIG_IGN);
964 
965 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
966 	ASSERT_EQ(0, ret);
967 	syscall(__NR_getpid);
968 }
969 
970 static siginfo_t TRAP_info;
971 static volatile int TRAP_nr;
972 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
973 {
974 	memcpy(&TRAP_info, info, sizeof(TRAP_info));
975 	TRAP_nr = nr;
976 }
977 
978 TEST_F(TRAP, handler)
979 {
980 	int ret, test;
981 	struct sigaction act;
982 	sigset_t mask;
983 
984 	memset(&act, 0, sizeof(act));
985 	sigemptyset(&mask);
986 	sigaddset(&mask, SIGSYS);
987 
988 	act.sa_sigaction = &TRAP_action;
989 	act.sa_flags = SA_SIGINFO;
990 	ret = sigaction(SIGSYS, &act, NULL);
991 	ASSERT_EQ(0, ret) {
992 		TH_LOG("sigaction failed");
993 	}
994 	ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
995 	ASSERT_EQ(0, ret) {
996 		TH_LOG("sigprocmask failed");
997 	}
998 
999 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1000 	ASSERT_EQ(0, ret);
1001 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
1002 	ASSERT_EQ(0, ret);
1003 	TRAP_nr = 0;
1004 	memset(&TRAP_info, 0, sizeof(TRAP_info));
1005 	/* Expect the registers to be rolled back. (nr = error) may vary
1006 	 * based on arch. */
1007 	ret = syscall(__NR_getpid);
1008 	/* Silence gcc warning about volatile. */
1009 	test = TRAP_nr;
1010 	EXPECT_EQ(SIGSYS, test);
1011 	struct local_sigsys {
1012 		void *_call_addr;	/* calling user insn */
1013 		int _syscall;		/* triggering system call number */
1014 		unsigned int _arch;	/* AUDIT_ARCH_* of syscall */
1015 	} *sigsys = (struct local_sigsys *)
1016 #ifdef si_syscall
1017 		&(TRAP_info.si_call_addr);
1018 #else
1019 		&TRAP_info.si_pid;
1020 #endif
1021 	EXPECT_EQ(__NR_getpid, sigsys->_syscall);
1022 	/* Make sure arch is non-zero. */
1023 	EXPECT_NE(0, sigsys->_arch);
1024 	EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
1025 }
1026 
1027 FIXTURE(precedence) {
1028 	struct sock_fprog allow;
1029 	struct sock_fprog log;
1030 	struct sock_fprog trace;
1031 	struct sock_fprog error;
1032 	struct sock_fprog trap;
1033 	struct sock_fprog kill;
1034 };
1035 
1036 FIXTURE_SETUP(precedence)
1037 {
1038 	struct sock_filter allow_insns[] = {
1039 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1040 	};
1041 	struct sock_filter log_insns[] = {
1042 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1043 			offsetof(struct seccomp_data, nr)),
1044 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1045 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1046 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
1047 	};
1048 	struct sock_filter trace_insns[] = {
1049 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1050 			offsetof(struct seccomp_data, nr)),
1051 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1052 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1053 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
1054 	};
1055 	struct sock_filter error_insns[] = {
1056 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1057 			offsetof(struct seccomp_data, nr)),
1058 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1059 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1060 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
1061 	};
1062 	struct sock_filter trap_insns[] = {
1063 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1064 			offsetof(struct seccomp_data, nr)),
1065 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1066 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1067 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
1068 	};
1069 	struct sock_filter kill_insns[] = {
1070 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1071 			offsetof(struct seccomp_data, nr)),
1072 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1073 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1074 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1075 	};
1076 
1077 	memset(self, 0, sizeof(*self));
1078 #define FILTER_ALLOC(_x) \
1079 	self->_x.filter = malloc(sizeof(_x##_insns)); \
1080 	ASSERT_NE(NULL, self->_x.filter); \
1081 	memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
1082 	self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
1083 	FILTER_ALLOC(allow);
1084 	FILTER_ALLOC(log);
1085 	FILTER_ALLOC(trace);
1086 	FILTER_ALLOC(error);
1087 	FILTER_ALLOC(trap);
1088 	FILTER_ALLOC(kill);
1089 }
1090 
1091 FIXTURE_TEARDOWN(precedence)
1092 {
1093 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
1094 	FILTER_FREE(allow);
1095 	FILTER_FREE(log);
1096 	FILTER_FREE(trace);
1097 	FILTER_FREE(error);
1098 	FILTER_FREE(trap);
1099 	FILTER_FREE(kill);
1100 }
1101 
1102 TEST_F(precedence, allow_ok)
1103 {
1104 	pid_t parent, res = 0;
1105 	long ret;
1106 
1107 	parent = getppid();
1108 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1109 	ASSERT_EQ(0, ret);
1110 
1111 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1112 	ASSERT_EQ(0, ret);
1113 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1114 	ASSERT_EQ(0, ret);
1115 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1116 	ASSERT_EQ(0, ret);
1117 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1118 	ASSERT_EQ(0, ret);
1119 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1120 	ASSERT_EQ(0, ret);
1121 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1122 	ASSERT_EQ(0, ret);
1123 	/* Should work just fine. */
1124 	res = syscall(__NR_getppid);
1125 	EXPECT_EQ(parent, res);
1126 }
1127 
1128 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
1129 {
1130 	pid_t parent, res = 0;
1131 	long ret;
1132 
1133 	parent = getppid();
1134 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1135 	ASSERT_EQ(0, ret);
1136 
1137 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1138 	ASSERT_EQ(0, ret);
1139 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1140 	ASSERT_EQ(0, ret);
1141 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1142 	ASSERT_EQ(0, ret);
1143 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1144 	ASSERT_EQ(0, ret);
1145 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1146 	ASSERT_EQ(0, ret);
1147 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1148 	ASSERT_EQ(0, ret);
1149 	/* Should work just fine. */
1150 	res = syscall(__NR_getppid);
1151 	EXPECT_EQ(parent, res);
1152 	/* getpid() should never return. */
1153 	res = syscall(__NR_getpid);
1154 	EXPECT_EQ(0, res);
1155 }
1156 
1157 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
1158 {
1159 	pid_t parent;
1160 	long ret;
1161 
1162 	parent = getppid();
1163 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1164 	ASSERT_EQ(0, ret);
1165 
1166 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1167 	ASSERT_EQ(0, ret);
1168 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1169 	ASSERT_EQ(0, ret);
1170 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1171 	ASSERT_EQ(0, ret);
1172 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1173 	ASSERT_EQ(0, ret);
1174 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1175 	ASSERT_EQ(0, ret);
1176 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1177 	ASSERT_EQ(0, ret);
1178 	/* Should work just fine. */
1179 	EXPECT_EQ(parent, syscall(__NR_getppid));
1180 	/* getpid() should never return. */
1181 	EXPECT_EQ(0, syscall(__NR_getpid));
1182 }
1183 
1184 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
1185 {
1186 	pid_t parent;
1187 	long ret;
1188 
1189 	parent = getppid();
1190 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1191 	ASSERT_EQ(0, ret);
1192 
1193 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1194 	ASSERT_EQ(0, ret);
1195 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1196 	ASSERT_EQ(0, ret);
1197 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1198 	ASSERT_EQ(0, ret);
1199 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1200 	ASSERT_EQ(0, ret);
1201 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1202 	ASSERT_EQ(0, ret);
1203 	/* Should work just fine. */
1204 	EXPECT_EQ(parent, syscall(__NR_getppid));
1205 	/* getpid() should never return. */
1206 	EXPECT_EQ(0, syscall(__NR_getpid));
1207 }
1208 
1209 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
1210 {
1211 	pid_t parent;
1212 	long ret;
1213 
1214 	parent = getppid();
1215 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1216 	ASSERT_EQ(0, ret);
1217 
1218 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1219 	ASSERT_EQ(0, ret);
1220 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1221 	ASSERT_EQ(0, ret);
1222 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1223 	ASSERT_EQ(0, ret);
1224 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1225 	ASSERT_EQ(0, ret);
1226 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1227 	ASSERT_EQ(0, ret);
1228 	/* Should work just fine. */
1229 	EXPECT_EQ(parent, syscall(__NR_getppid));
1230 	/* getpid() should never return. */
1231 	EXPECT_EQ(0, syscall(__NR_getpid));
1232 }
1233 
1234 TEST_F(precedence, errno_is_third)
1235 {
1236 	pid_t parent;
1237 	long ret;
1238 
1239 	parent = getppid();
1240 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1241 	ASSERT_EQ(0, ret);
1242 
1243 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1244 	ASSERT_EQ(0, ret);
1245 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1246 	ASSERT_EQ(0, ret);
1247 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1248 	ASSERT_EQ(0, ret);
1249 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1250 	ASSERT_EQ(0, ret);
1251 	/* Should work just fine. */
1252 	EXPECT_EQ(parent, syscall(__NR_getppid));
1253 	EXPECT_EQ(0, syscall(__NR_getpid));
1254 }
1255 
1256 TEST_F(precedence, errno_is_third_in_any_order)
1257 {
1258 	pid_t parent;
1259 	long ret;
1260 
1261 	parent = getppid();
1262 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1263 	ASSERT_EQ(0, ret);
1264 
1265 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1266 	ASSERT_EQ(0, ret);
1267 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1268 	ASSERT_EQ(0, ret);
1269 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1270 	ASSERT_EQ(0, ret);
1271 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1272 	ASSERT_EQ(0, ret);
1273 	/* Should work just fine. */
1274 	EXPECT_EQ(parent, syscall(__NR_getppid));
1275 	EXPECT_EQ(0, syscall(__NR_getpid));
1276 }
1277 
1278 TEST_F(precedence, trace_is_fourth)
1279 {
1280 	pid_t parent;
1281 	long ret;
1282 
1283 	parent = getppid();
1284 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1285 	ASSERT_EQ(0, ret);
1286 
1287 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1288 	ASSERT_EQ(0, ret);
1289 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1290 	ASSERT_EQ(0, ret);
1291 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1292 	ASSERT_EQ(0, ret);
1293 	/* Should work just fine. */
1294 	EXPECT_EQ(parent, syscall(__NR_getppid));
1295 	/* No ptracer */
1296 	EXPECT_EQ(-1, syscall(__NR_getpid));
1297 }
1298 
1299 TEST_F(precedence, trace_is_fourth_in_any_order)
1300 {
1301 	pid_t parent;
1302 	long ret;
1303 
1304 	parent = getppid();
1305 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1306 	ASSERT_EQ(0, ret);
1307 
1308 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1309 	ASSERT_EQ(0, ret);
1310 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1311 	ASSERT_EQ(0, ret);
1312 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1313 	ASSERT_EQ(0, ret);
1314 	/* Should work just fine. */
1315 	EXPECT_EQ(parent, syscall(__NR_getppid));
1316 	/* No ptracer */
1317 	EXPECT_EQ(-1, syscall(__NR_getpid));
1318 }
1319 
1320 TEST_F(precedence, log_is_fifth)
1321 {
1322 	pid_t mypid, parent;
1323 	long ret;
1324 
1325 	mypid = getpid();
1326 	parent = getppid();
1327 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1328 	ASSERT_EQ(0, ret);
1329 
1330 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1331 	ASSERT_EQ(0, ret);
1332 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1333 	ASSERT_EQ(0, ret);
1334 	/* Should work just fine. */
1335 	EXPECT_EQ(parent, syscall(__NR_getppid));
1336 	/* Should also work just fine */
1337 	EXPECT_EQ(mypid, syscall(__NR_getpid));
1338 }
1339 
1340 TEST_F(precedence, log_is_fifth_in_any_order)
1341 {
1342 	pid_t mypid, parent;
1343 	long ret;
1344 
1345 	mypid = getpid();
1346 	parent = getppid();
1347 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1348 	ASSERT_EQ(0, ret);
1349 
1350 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1351 	ASSERT_EQ(0, ret);
1352 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1353 	ASSERT_EQ(0, ret);
1354 	/* Should work just fine. */
1355 	EXPECT_EQ(parent, syscall(__NR_getppid));
1356 	/* Should also work just fine */
1357 	EXPECT_EQ(mypid, syscall(__NR_getpid));
1358 }
1359 
1360 #ifndef PTRACE_O_TRACESECCOMP
1361 #define PTRACE_O_TRACESECCOMP	0x00000080
1362 #endif
1363 
1364 /* Catch the Ubuntu 12.04 value error. */
1365 #if PTRACE_EVENT_SECCOMP != 7
1366 #undef PTRACE_EVENT_SECCOMP
1367 #endif
1368 
1369 #ifndef PTRACE_EVENT_SECCOMP
1370 #define PTRACE_EVENT_SECCOMP 7
1371 #endif
1372 
1373 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
1374 bool tracer_running;
1375 void tracer_stop(int sig)
1376 {
1377 	tracer_running = false;
1378 }
1379 
1380 typedef void tracer_func_t(struct __test_metadata *_metadata,
1381 			   pid_t tracee, int status, void *args);
1382 
1383 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1384 	    tracer_func_t tracer_func, void *args, bool ptrace_syscall)
1385 {
1386 	int ret = -1;
1387 	struct sigaction action = {
1388 		.sa_handler = tracer_stop,
1389 	};
1390 
1391 	/* Allow external shutdown. */
1392 	tracer_running = true;
1393 	ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1394 
1395 	errno = 0;
1396 	while (ret == -1 && errno != EINVAL)
1397 		ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1398 	ASSERT_EQ(0, ret) {
1399 		kill(tracee, SIGKILL);
1400 	}
1401 	/* Wait for attach stop */
1402 	wait(NULL);
1403 
1404 	ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
1405 						      PTRACE_O_TRACESYSGOOD :
1406 						      PTRACE_O_TRACESECCOMP);
1407 	ASSERT_EQ(0, ret) {
1408 		TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1409 		kill(tracee, SIGKILL);
1410 	}
1411 	ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1412 		     tracee, NULL, 0);
1413 	ASSERT_EQ(0, ret);
1414 
1415 	/* Unblock the tracee */
1416 	ASSERT_EQ(1, write(fd, "A", 1));
1417 	ASSERT_EQ(0, close(fd));
1418 
1419 	/* Run until we're shut down. Must assert to stop execution. */
1420 	while (tracer_running) {
1421 		int status;
1422 
1423 		if (wait(&status) != tracee)
1424 			continue;
1425 		if (WIFSIGNALED(status) || WIFEXITED(status))
1426 			/* Child is dead. Time to go. */
1427 			return;
1428 
1429 		/* Check if this is a seccomp event. */
1430 		ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
1431 
1432 		tracer_func(_metadata, tracee, status, args);
1433 
1434 		ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1435 			     tracee, NULL, 0);
1436 		ASSERT_EQ(0, ret);
1437 	}
1438 	/* Directly report the status of our test harness results. */
1439 	syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1440 }
1441 
1442 /* Common tracer setup/teardown functions. */
1443 void cont_handler(int num)
1444 { }
1445 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1446 			  tracer_func_t func, void *args, bool ptrace_syscall)
1447 {
1448 	char sync;
1449 	int pipefd[2];
1450 	pid_t tracer_pid;
1451 	pid_t tracee = getpid();
1452 
1453 	/* Setup a pipe for clean synchronization. */
1454 	ASSERT_EQ(0, pipe(pipefd));
1455 
1456 	/* Fork a child which we'll promote to tracer */
1457 	tracer_pid = fork();
1458 	ASSERT_LE(0, tracer_pid);
1459 	signal(SIGALRM, cont_handler);
1460 	if (tracer_pid == 0) {
1461 		close(pipefd[0]);
1462 		start_tracer(_metadata, pipefd[1], tracee, func, args,
1463 			     ptrace_syscall);
1464 		syscall(__NR_exit, 0);
1465 	}
1466 	close(pipefd[1]);
1467 	prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1468 	read(pipefd[0], &sync, 1);
1469 	close(pipefd[0]);
1470 
1471 	return tracer_pid;
1472 }
1473 void teardown_trace_fixture(struct __test_metadata *_metadata,
1474 			    pid_t tracer)
1475 {
1476 	if (tracer) {
1477 		int status;
1478 		/*
1479 		 * Extract the exit code from the other process and
1480 		 * adopt it for ourselves in case its asserts failed.
1481 		 */
1482 		ASSERT_EQ(0, kill(tracer, SIGUSR1));
1483 		ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1484 		if (WEXITSTATUS(status))
1485 			_metadata->passed = 0;
1486 	}
1487 }
1488 
1489 /* "poke" tracer arguments and function. */
1490 struct tracer_args_poke_t {
1491 	unsigned long poke_addr;
1492 };
1493 
1494 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1495 		 void *args)
1496 {
1497 	int ret;
1498 	unsigned long msg;
1499 	struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1500 
1501 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1502 	EXPECT_EQ(0, ret);
1503 	/* If this fails, don't try to recover. */
1504 	ASSERT_EQ(0x1001, msg) {
1505 		kill(tracee, SIGKILL);
1506 	}
1507 	/*
1508 	 * Poke in the message.
1509 	 * Registers are not touched to try to keep this relatively arch
1510 	 * agnostic.
1511 	 */
1512 	ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1513 	EXPECT_EQ(0, ret);
1514 }
1515 
1516 FIXTURE(TRACE_poke) {
1517 	struct sock_fprog prog;
1518 	pid_t tracer;
1519 	long poked;
1520 	struct tracer_args_poke_t tracer_args;
1521 };
1522 
1523 FIXTURE_SETUP(TRACE_poke)
1524 {
1525 	struct sock_filter filter[] = {
1526 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1527 			offsetof(struct seccomp_data, nr)),
1528 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1529 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1530 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1531 	};
1532 
1533 	self->poked = 0;
1534 	memset(&self->prog, 0, sizeof(self->prog));
1535 	self->prog.filter = malloc(sizeof(filter));
1536 	ASSERT_NE(NULL, self->prog.filter);
1537 	memcpy(self->prog.filter, filter, sizeof(filter));
1538 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1539 
1540 	/* Set up tracer args. */
1541 	self->tracer_args.poke_addr = (unsigned long)&self->poked;
1542 
1543 	/* Launch tracer. */
1544 	self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1545 					   &self->tracer_args, false);
1546 }
1547 
1548 FIXTURE_TEARDOWN(TRACE_poke)
1549 {
1550 	teardown_trace_fixture(_metadata, self->tracer);
1551 	if (self->prog.filter)
1552 		free(self->prog.filter);
1553 }
1554 
1555 TEST_F(TRACE_poke, read_has_side_effects)
1556 {
1557 	ssize_t ret;
1558 
1559 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1560 	ASSERT_EQ(0, ret);
1561 
1562 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1563 	ASSERT_EQ(0, ret);
1564 
1565 	EXPECT_EQ(0, self->poked);
1566 	ret = read(-1, NULL, 0);
1567 	EXPECT_EQ(-1, ret);
1568 	EXPECT_EQ(0x1001, self->poked);
1569 }
1570 
1571 TEST_F(TRACE_poke, getpid_runs_normally)
1572 {
1573 	long ret;
1574 
1575 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1576 	ASSERT_EQ(0, ret);
1577 
1578 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1579 	ASSERT_EQ(0, ret);
1580 
1581 	EXPECT_EQ(0, self->poked);
1582 	EXPECT_NE(0, syscall(__NR_getpid));
1583 	EXPECT_EQ(0, self->poked);
1584 }
1585 
1586 #if defined(__x86_64__)
1587 # define ARCH_REGS	struct user_regs_struct
1588 # define SYSCALL_NUM	orig_rax
1589 # define SYSCALL_RET	rax
1590 #elif defined(__i386__)
1591 # define ARCH_REGS	struct user_regs_struct
1592 # define SYSCALL_NUM	orig_eax
1593 # define SYSCALL_RET	eax
1594 #elif defined(__arm__)
1595 # define ARCH_REGS	struct pt_regs
1596 # define SYSCALL_NUM	ARM_r7
1597 # define SYSCALL_RET	ARM_r0
1598 #elif defined(__aarch64__)
1599 # define ARCH_REGS	struct user_pt_regs
1600 # define SYSCALL_NUM	regs[8]
1601 # define SYSCALL_RET	regs[0]
1602 #elif defined(__riscv) && __riscv_xlen == 64
1603 # define ARCH_REGS	struct user_regs_struct
1604 # define SYSCALL_NUM	a7
1605 # define SYSCALL_RET	a0
1606 #elif defined(__hppa__)
1607 # define ARCH_REGS	struct user_regs_struct
1608 # define SYSCALL_NUM	gr[20]
1609 # define SYSCALL_RET	gr[28]
1610 #elif defined(__powerpc__)
1611 # define ARCH_REGS	struct pt_regs
1612 # define SYSCALL_NUM	gpr[0]
1613 # define SYSCALL_RET	gpr[3]
1614 #elif defined(__s390__)
1615 # define ARCH_REGS     s390_regs
1616 # define SYSCALL_NUM   gprs[2]
1617 # define SYSCALL_RET   gprs[2]
1618 # define SYSCALL_NUM_RET_SHARE_REG
1619 #elif defined(__mips__)
1620 # define ARCH_REGS	struct pt_regs
1621 # define SYSCALL_NUM	regs[2]
1622 # define SYSCALL_SYSCALL_NUM regs[4]
1623 # define SYSCALL_RET	regs[2]
1624 # define SYSCALL_NUM_RET_SHARE_REG
1625 #else
1626 # error "Do not know how to find your architecture's registers and syscalls"
1627 #endif
1628 
1629 /* When the syscall return can't be changed, stub out the tests for it. */
1630 #ifdef SYSCALL_NUM_RET_SHARE_REG
1631 # define EXPECT_SYSCALL_RETURN(val, action)	EXPECT_EQ(-1, action)
1632 #else
1633 # define EXPECT_SYSCALL_RETURN(val, action)		\
1634 	do {						\
1635 		errno = 0;				\
1636 		if (val < 0) {				\
1637 			EXPECT_EQ(-1, action);		\
1638 			EXPECT_EQ(-(val), errno);	\
1639 		} else {				\
1640 			EXPECT_EQ(val, action);		\
1641 		}					\
1642 	} while (0)
1643 #endif
1644 
1645 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1646  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1647  */
1648 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
1649 #define HAVE_GETREGS
1650 #endif
1651 
1652 /* Architecture-specific syscall fetching routine. */
1653 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1654 {
1655 	ARCH_REGS regs;
1656 #ifdef HAVE_GETREGS
1657 	EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
1658 		TH_LOG("PTRACE_GETREGS failed");
1659 		return -1;
1660 	}
1661 #else
1662 	struct iovec iov;
1663 
1664 	iov.iov_base = &regs;
1665 	iov.iov_len = sizeof(regs);
1666 	EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1667 		TH_LOG("PTRACE_GETREGSET failed");
1668 		return -1;
1669 	}
1670 #endif
1671 
1672 #if defined(__mips__)
1673 	if (regs.SYSCALL_NUM == __NR_O32_Linux)
1674 		return regs.SYSCALL_SYSCALL_NUM;
1675 #endif
1676 	return regs.SYSCALL_NUM;
1677 }
1678 
1679 /* Architecture-specific syscall changing routine. */
1680 void change_syscall(struct __test_metadata *_metadata,
1681 		    pid_t tracee, int syscall, int result)
1682 {
1683 	int ret;
1684 	ARCH_REGS regs;
1685 #ifdef HAVE_GETREGS
1686 	ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
1687 #else
1688 	struct iovec iov;
1689 	iov.iov_base = &regs;
1690 	iov.iov_len = sizeof(regs);
1691 	ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1692 #endif
1693 	EXPECT_EQ(0, ret) {}
1694 
1695 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
1696 	defined(__s390__) || defined(__hppa__) || defined(__riscv)
1697 	{
1698 		regs.SYSCALL_NUM = syscall;
1699 	}
1700 #elif defined(__mips__)
1701 	{
1702 		if (regs.SYSCALL_NUM == __NR_O32_Linux)
1703 			regs.SYSCALL_SYSCALL_NUM = syscall;
1704 		else
1705 			regs.SYSCALL_NUM = syscall;
1706 	}
1707 
1708 #elif defined(__arm__)
1709 # ifndef PTRACE_SET_SYSCALL
1710 #  define PTRACE_SET_SYSCALL   23
1711 # endif
1712 	{
1713 		ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1714 		EXPECT_EQ(0, ret);
1715 	}
1716 
1717 #elif defined(__aarch64__)
1718 # ifndef NT_ARM_SYSTEM_CALL
1719 #  define NT_ARM_SYSTEM_CALL 0x404
1720 # endif
1721 	{
1722 		iov.iov_base = &syscall;
1723 		iov.iov_len = sizeof(syscall);
1724 		ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
1725 			     &iov);
1726 		EXPECT_EQ(0, ret);
1727 	}
1728 
1729 #else
1730 	ASSERT_EQ(1, 0) {
1731 		TH_LOG("How is the syscall changed on this architecture?");
1732 	}
1733 #endif
1734 
1735 	/* If syscall is skipped, change return value. */
1736 	if (syscall == -1)
1737 #ifdef SYSCALL_NUM_RET_SHARE_REG
1738 		TH_LOG("Can't modify syscall return on this architecture");
1739 #else
1740 		regs.SYSCALL_RET = result;
1741 #endif
1742 
1743 #ifdef HAVE_GETREGS
1744 	ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
1745 #else
1746 	iov.iov_base = &regs;
1747 	iov.iov_len = sizeof(regs);
1748 	ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1749 #endif
1750 	EXPECT_EQ(0, ret);
1751 }
1752 
1753 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1754 		    int status, void *args)
1755 {
1756 	int ret;
1757 	unsigned long msg;
1758 
1759 	/* Make sure we got the right message. */
1760 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1761 	EXPECT_EQ(0, ret);
1762 
1763 	/* Validate and take action on expected syscalls. */
1764 	switch (msg) {
1765 	case 0x1002:
1766 		/* change getpid to getppid. */
1767 		EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1768 		change_syscall(_metadata, tracee, __NR_getppid, 0);
1769 		break;
1770 	case 0x1003:
1771 		/* skip gettid with valid return code. */
1772 		EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1773 		change_syscall(_metadata, tracee, -1, 45000);
1774 		break;
1775 	case 0x1004:
1776 		/* skip openat with error. */
1777 		EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
1778 		change_syscall(_metadata, tracee, -1, -ESRCH);
1779 		break;
1780 	case 0x1005:
1781 		/* do nothing (allow getppid) */
1782 		EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1783 		break;
1784 	default:
1785 		EXPECT_EQ(0, msg) {
1786 			TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1787 			kill(tracee, SIGKILL);
1788 		}
1789 	}
1790 
1791 }
1792 
1793 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
1794 		   int status, void *args)
1795 {
1796 	int ret, nr;
1797 	unsigned long msg;
1798 	static bool entry;
1799 
1800 	/*
1801 	 * The traditional way to tell PTRACE_SYSCALL entry/exit
1802 	 * is by counting.
1803 	 */
1804 	entry = !entry;
1805 
1806 	/* Make sure we got an appropriate message. */
1807 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1808 	EXPECT_EQ(0, ret);
1809 	EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
1810 			: PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
1811 
1812 	if (!entry)
1813 		return;
1814 
1815 	nr = get_syscall(_metadata, tracee);
1816 
1817 	if (nr == __NR_getpid)
1818 		change_syscall(_metadata, tracee, __NR_getppid, 0);
1819 	if (nr == __NR_gettid)
1820 		change_syscall(_metadata, tracee, -1, 45000);
1821 	if (nr == __NR_openat)
1822 		change_syscall(_metadata, tracee, -1, -ESRCH);
1823 }
1824 
1825 FIXTURE(TRACE_syscall) {
1826 	struct sock_fprog prog;
1827 	pid_t tracer, mytid, mypid, parent;
1828 };
1829 
1830 FIXTURE_SETUP(TRACE_syscall)
1831 {
1832 	struct sock_filter filter[] = {
1833 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1834 			offsetof(struct seccomp_data, nr)),
1835 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1836 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1837 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1838 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1839 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1),
1840 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1841 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1842 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
1843 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1844 	};
1845 
1846 	memset(&self->prog, 0, sizeof(self->prog));
1847 	self->prog.filter = malloc(sizeof(filter));
1848 	ASSERT_NE(NULL, self->prog.filter);
1849 	memcpy(self->prog.filter, filter, sizeof(filter));
1850 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1851 
1852 	/* Prepare some testable syscall results. */
1853 	self->mytid = syscall(__NR_gettid);
1854 	ASSERT_GT(self->mytid, 0);
1855 	ASSERT_NE(self->mytid, 1) {
1856 		TH_LOG("Running this test as init is not supported. :)");
1857 	}
1858 
1859 	self->mypid = getpid();
1860 	ASSERT_GT(self->mypid, 0);
1861 	ASSERT_EQ(self->mytid, self->mypid);
1862 
1863 	self->parent = getppid();
1864 	ASSERT_GT(self->parent, 0);
1865 	ASSERT_NE(self->parent, self->mypid);
1866 
1867 	/* Launch tracer. */
1868 	self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
1869 					   false);
1870 }
1871 
1872 FIXTURE_TEARDOWN(TRACE_syscall)
1873 {
1874 	teardown_trace_fixture(_metadata, self->tracer);
1875 	if (self->prog.filter)
1876 		free(self->prog.filter);
1877 }
1878 
1879 TEST_F(TRACE_syscall, ptrace_syscall_redirected)
1880 {
1881 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1882 	teardown_trace_fixture(_metadata, self->tracer);
1883 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1884 					   true);
1885 
1886 	/* Tracer will redirect getpid to getppid. */
1887 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
1888 }
1889 
1890 TEST_F(TRACE_syscall, ptrace_syscall_errno)
1891 {
1892 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1893 	teardown_trace_fixture(_metadata, self->tracer);
1894 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1895 					   true);
1896 
1897 	/* Tracer should skip the open syscall, resulting in ESRCH. */
1898 	EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
1899 }
1900 
1901 TEST_F(TRACE_syscall, ptrace_syscall_faked)
1902 {
1903 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1904 	teardown_trace_fixture(_metadata, self->tracer);
1905 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1906 					   true);
1907 
1908 	/* Tracer should skip the gettid syscall, resulting fake pid. */
1909 	EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
1910 }
1911 
1912 TEST_F(TRACE_syscall, syscall_allowed)
1913 {
1914 	long ret;
1915 
1916 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1917 	ASSERT_EQ(0, ret);
1918 
1919 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1920 	ASSERT_EQ(0, ret);
1921 
1922 	/* getppid works as expected (no changes). */
1923 	EXPECT_EQ(self->parent, syscall(__NR_getppid));
1924 	EXPECT_NE(self->mypid, syscall(__NR_getppid));
1925 }
1926 
1927 TEST_F(TRACE_syscall, syscall_redirected)
1928 {
1929 	long ret;
1930 
1931 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1932 	ASSERT_EQ(0, ret);
1933 
1934 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1935 	ASSERT_EQ(0, ret);
1936 
1937 	/* getpid has been redirected to getppid as expected. */
1938 	EXPECT_EQ(self->parent, syscall(__NR_getpid));
1939 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
1940 }
1941 
1942 TEST_F(TRACE_syscall, syscall_errno)
1943 {
1944 	long ret;
1945 
1946 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1947 	ASSERT_EQ(0, ret);
1948 
1949 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1950 	ASSERT_EQ(0, ret);
1951 
1952 	/* openat has been skipped and an errno return. */
1953 	EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
1954 }
1955 
1956 TEST_F(TRACE_syscall, syscall_faked)
1957 {
1958 	long ret;
1959 
1960 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1961 	ASSERT_EQ(0, ret);
1962 
1963 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1964 	ASSERT_EQ(0, ret);
1965 
1966 	/* gettid has been skipped and an altered return value stored. */
1967 	EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
1968 }
1969 
1970 TEST_F(TRACE_syscall, skip_after_RET_TRACE)
1971 {
1972 	struct sock_filter filter[] = {
1973 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1974 			offsetof(struct seccomp_data, nr)),
1975 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1976 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1977 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1978 	};
1979 	struct sock_fprog prog = {
1980 		.len = (unsigned short)ARRAY_SIZE(filter),
1981 		.filter = filter,
1982 	};
1983 	long ret;
1984 
1985 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1986 	ASSERT_EQ(0, ret);
1987 
1988 	/* Install fixture filter. */
1989 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1990 	ASSERT_EQ(0, ret);
1991 
1992 	/* Install "errno on getppid" filter. */
1993 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1994 	ASSERT_EQ(0, ret);
1995 
1996 	/* Tracer will redirect getpid to getppid, and we should see EPERM. */
1997 	errno = 0;
1998 	EXPECT_EQ(-1, syscall(__NR_getpid));
1999 	EXPECT_EQ(EPERM, errno);
2000 }
2001 
2002 TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
2003 {
2004 	struct sock_filter filter[] = {
2005 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2006 			offsetof(struct seccomp_data, nr)),
2007 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2008 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2009 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2010 	};
2011 	struct sock_fprog prog = {
2012 		.len = (unsigned short)ARRAY_SIZE(filter),
2013 		.filter = filter,
2014 	};
2015 	long ret;
2016 
2017 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2018 	ASSERT_EQ(0, ret);
2019 
2020 	/* Install fixture filter. */
2021 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
2022 	ASSERT_EQ(0, ret);
2023 
2024 	/* Install "death on getppid" filter. */
2025 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2026 	ASSERT_EQ(0, ret);
2027 
2028 	/* Tracer will redirect getpid to getppid, and we should die. */
2029 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
2030 }
2031 
2032 TEST_F(TRACE_syscall, skip_after_ptrace)
2033 {
2034 	struct sock_filter filter[] = {
2035 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2036 			offsetof(struct seccomp_data, nr)),
2037 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2038 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
2039 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2040 	};
2041 	struct sock_fprog prog = {
2042 		.len = (unsigned short)ARRAY_SIZE(filter),
2043 		.filter = filter,
2044 	};
2045 	long ret;
2046 
2047 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
2048 	teardown_trace_fixture(_metadata, self->tracer);
2049 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
2050 					   true);
2051 
2052 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2053 	ASSERT_EQ(0, ret);
2054 
2055 	/* Install "errno on getppid" filter. */
2056 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2057 	ASSERT_EQ(0, ret);
2058 
2059 	/* Tracer will redirect getpid to getppid, and we should see EPERM. */
2060 	EXPECT_EQ(-1, syscall(__NR_getpid));
2061 	EXPECT_EQ(EPERM, errno);
2062 }
2063 
2064 TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
2065 {
2066 	struct sock_filter filter[] = {
2067 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2068 			offsetof(struct seccomp_data, nr)),
2069 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2070 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2071 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2072 	};
2073 	struct sock_fprog prog = {
2074 		.len = (unsigned short)ARRAY_SIZE(filter),
2075 		.filter = filter,
2076 	};
2077 	long ret;
2078 
2079 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
2080 	teardown_trace_fixture(_metadata, self->tracer);
2081 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
2082 					   true);
2083 
2084 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2085 	ASSERT_EQ(0, ret);
2086 
2087 	/* Install "death on getppid" filter. */
2088 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2089 	ASSERT_EQ(0, ret);
2090 
2091 	/* Tracer will redirect getpid to getppid, and we should die. */
2092 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
2093 }
2094 
2095 TEST(seccomp_syscall)
2096 {
2097 	struct sock_filter filter[] = {
2098 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2099 	};
2100 	struct sock_fprog prog = {
2101 		.len = (unsigned short)ARRAY_SIZE(filter),
2102 		.filter = filter,
2103 	};
2104 	long ret;
2105 
2106 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2107 	ASSERT_EQ(0, ret) {
2108 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2109 	}
2110 
2111 	/* Reject insane operation. */
2112 	ret = seccomp(-1, 0, &prog);
2113 	ASSERT_NE(ENOSYS, errno) {
2114 		TH_LOG("Kernel does not support seccomp syscall!");
2115 	}
2116 	EXPECT_EQ(EINVAL, errno) {
2117 		TH_LOG("Did not reject crazy op value!");
2118 	}
2119 
2120 	/* Reject strict with flags or pointer. */
2121 	ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
2122 	EXPECT_EQ(EINVAL, errno) {
2123 		TH_LOG("Did not reject mode strict with flags!");
2124 	}
2125 	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
2126 	EXPECT_EQ(EINVAL, errno) {
2127 		TH_LOG("Did not reject mode strict with uargs!");
2128 	}
2129 
2130 	/* Reject insane args for filter. */
2131 	ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
2132 	EXPECT_EQ(EINVAL, errno) {
2133 		TH_LOG("Did not reject crazy filter flags!");
2134 	}
2135 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
2136 	EXPECT_EQ(EFAULT, errno) {
2137 		TH_LOG("Did not reject NULL filter!");
2138 	}
2139 
2140 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2141 	EXPECT_EQ(0, errno) {
2142 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
2143 			strerror(errno));
2144 	}
2145 }
2146 
2147 TEST(seccomp_syscall_mode_lock)
2148 {
2149 	struct sock_filter filter[] = {
2150 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2151 	};
2152 	struct sock_fprog prog = {
2153 		.len = (unsigned short)ARRAY_SIZE(filter),
2154 		.filter = filter,
2155 	};
2156 	long ret;
2157 
2158 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2159 	ASSERT_EQ(0, ret) {
2160 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2161 	}
2162 
2163 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2164 	ASSERT_NE(ENOSYS, errno) {
2165 		TH_LOG("Kernel does not support seccomp syscall!");
2166 	}
2167 	EXPECT_EQ(0, ret) {
2168 		TH_LOG("Could not install filter!");
2169 	}
2170 
2171 	/* Make sure neither entry point will switch to strict. */
2172 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
2173 	EXPECT_EQ(EINVAL, errno) {
2174 		TH_LOG("Switched to mode strict!");
2175 	}
2176 
2177 	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
2178 	EXPECT_EQ(EINVAL, errno) {
2179 		TH_LOG("Switched to mode strict!");
2180 	}
2181 }
2182 
2183 /*
2184  * Test detection of known and unknown filter flags. Userspace needs to be able
2185  * to check if a filter flag is supported by the current kernel and a good way
2186  * of doing that is by attempting to enter filter mode, with the flag bit in
2187  * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
2188  * that the flag is valid and EINVAL indicates that the flag is invalid.
2189  */
2190 TEST(detect_seccomp_filter_flags)
2191 {
2192 	unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
2193 				 SECCOMP_FILTER_FLAG_LOG,
2194 				 SECCOMP_FILTER_FLAG_SPEC_ALLOW,
2195 				 SECCOMP_FILTER_FLAG_NEW_LISTENER,
2196 				 SECCOMP_FILTER_FLAG_TSYNC_ESRCH };
2197 	unsigned int exclusive[] = {
2198 				SECCOMP_FILTER_FLAG_TSYNC,
2199 				SECCOMP_FILTER_FLAG_NEW_LISTENER };
2200 	unsigned int flag, all_flags, exclusive_mask;
2201 	int i;
2202 	long ret;
2203 
2204 	/* Test detection of individual known-good filter flags */
2205 	for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
2206 		int bits = 0;
2207 
2208 		flag = flags[i];
2209 		/* Make sure the flag is a single bit! */
2210 		while (flag) {
2211 			if (flag & 0x1)
2212 				bits ++;
2213 			flag >>= 1;
2214 		}
2215 		ASSERT_EQ(1, bits);
2216 		flag = flags[i];
2217 
2218 		ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2219 		ASSERT_NE(ENOSYS, errno) {
2220 			TH_LOG("Kernel does not support seccomp syscall!");
2221 		}
2222 		EXPECT_EQ(-1, ret);
2223 		EXPECT_EQ(EFAULT, errno) {
2224 			TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
2225 			       flag);
2226 		}
2227 
2228 		all_flags |= flag;
2229 	}
2230 
2231 	/*
2232 	 * Test detection of all known-good filter flags combined. But
2233 	 * for the exclusive flags we need to mask them out and try them
2234 	 * individually for the "all flags" testing.
2235 	 */
2236 	exclusive_mask = 0;
2237 	for (i = 0; i < ARRAY_SIZE(exclusive); i++)
2238 		exclusive_mask |= exclusive[i];
2239 	for (i = 0; i < ARRAY_SIZE(exclusive); i++) {
2240 		flag = all_flags & ~exclusive_mask;
2241 		flag |= exclusive[i];
2242 
2243 		ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2244 		EXPECT_EQ(-1, ret);
2245 		EXPECT_EQ(EFAULT, errno) {
2246 			TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
2247 			       flag);
2248 		}
2249 	}
2250 
2251 	/* Test detection of an unknown filter flags, without exclusives. */
2252 	flag = -1;
2253 	flag &= ~exclusive_mask;
2254 	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2255 	EXPECT_EQ(-1, ret);
2256 	EXPECT_EQ(EINVAL, errno) {
2257 		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
2258 		       flag);
2259 	}
2260 
2261 	/*
2262 	 * Test detection of an unknown filter flag that may simply need to be
2263 	 * added to this test
2264 	 */
2265 	flag = flags[ARRAY_SIZE(flags) - 1] << 1;
2266 	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2267 	EXPECT_EQ(-1, ret);
2268 	EXPECT_EQ(EINVAL, errno) {
2269 		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
2270 		       flag);
2271 	}
2272 }
2273 
2274 TEST(TSYNC_first)
2275 {
2276 	struct sock_filter filter[] = {
2277 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2278 	};
2279 	struct sock_fprog prog = {
2280 		.len = (unsigned short)ARRAY_SIZE(filter),
2281 		.filter = filter,
2282 	};
2283 	long ret;
2284 
2285 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2286 	ASSERT_EQ(0, ret) {
2287 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2288 	}
2289 
2290 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2291 		      &prog);
2292 	ASSERT_NE(ENOSYS, errno) {
2293 		TH_LOG("Kernel does not support seccomp syscall!");
2294 	}
2295 	EXPECT_EQ(0, ret) {
2296 		TH_LOG("Could not install initial filter with TSYNC!");
2297 	}
2298 }
2299 
2300 #define TSYNC_SIBLINGS 2
2301 struct tsync_sibling {
2302 	pthread_t tid;
2303 	pid_t system_tid;
2304 	sem_t *started;
2305 	pthread_cond_t *cond;
2306 	pthread_mutex_t *mutex;
2307 	int diverge;
2308 	int num_waits;
2309 	struct sock_fprog *prog;
2310 	struct __test_metadata *metadata;
2311 };
2312 
2313 /*
2314  * To avoid joining joined threads (which is not allowed by Bionic),
2315  * make sure we both successfully join and clear the tid to skip a
2316  * later join attempt during fixture teardown. Any remaining threads
2317  * will be directly killed during teardown.
2318  */
2319 #define PTHREAD_JOIN(tid, status)					\
2320 	do {								\
2321 		int _rc = pthread_join(tid, status);			\
2322 		if (_rc) {						\
2323 			TH_LOG("pthread_join of tid %u failed: %d\n",	\
2324 				(unsigned int)tid, _rc);		\
2325 		} else {						\
2326 			tid = 0;					\
2327 		}							\
2328 	} while (0)
2329 
2330 FIXTURE(TSYNC) {
2331 	struct sock_fprog root_prog, apply_prog;
2332 	struct tsync_sibling sibling[TSYNC_SIBLINGS];
2333 	sem_t started;
2334 	pthread_cond_t cond;
2335 	pthread_mutex_t mutex;
2336 	int sibling_count;
2337 };
2338 
2339 FIXTURE_SETUP(TSYNC)
2340 {
2341 	struct sock_filter root_filter[] = {
2342 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2343 	};
2344 	struct sock_filter apply_filter[] = {
2345 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2346 			offsetof(struct seccomp_data, nr)),
2347 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
2348 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2349 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2350 	};
2351 
2352 	memset(&self->root_prog, 0, sizeof(self->root_prog));
2353 	memset(&self->apply_prog, 0, sizeof(self->apply_prog));
2354 	memset(&self->sibling, 0, sizeof(self->sibling));
2355 	self->root_prog.filter = malloc(sizeof(root_filter));
2356 	ASSERT_NE(NULL, self->root_prog.filter);
2357 	memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
2358 	self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
2359 
2360 	self->apply_prog.filter = malloc(sizeof(apply_filter));
2361 	ASSERT_NE(NULL, self->apply_prog.filter);
2362 	memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
2363 	self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
2364 
2365 	self->sibling_count = 0;
2366 	pthread_mutex_init(&self->mutex, NULL);
2367 	pthread_cond_init(&self->cond, NULL);
2368 	sem_init(&self->started, 0, 0);
2369 	self->sibling[0].tid = 0;
2370 	self->sibling[0].cond = &self->cond;
2371 	self->sibling[0].started = &self->started;
2372 	self->sibling[0].mutex = &self->mutex;
2373 	self->sibling[0].diverge = 0;
2374 	self->sibling[0].num_waits = 1;
2375 	self->sibling[0].prog = &self->root_prog;
2376 	self->sibling[0].metadata = _metadata;
2377 	self->sibling[1].tid = 0;
2378 	self->sibling[1].cond = &self->cond;
2379 	self->sibling[1].started = &self->started;
2380 	self->sibling[1].mutex = &self->mutex;
2381 	self->sibling[1].diverge = 0;
2382 	self->sibling[1].prog = &self->root_prog;
2383 	self->sibling[1].num_waits = 1;
2384 	self->sibling[1].metadata = _metadata;
2385 }
2386 
2387 FIXTURE_TEARDOWN(TSYNC)
2388 {
2389 	int sib = 0;
2390 
2391 	if (self->root_prog.filter)
2392 		free(self->root_prog.filter);
2393 	if (self->apply_prog.filter)
2394 		free(self->apply_prog.filter);
2395 
2396 	for ( ; sib < self->sibling_count; ++sib) {
2397 		struct tsync_sibling *s = &self->sibling[sib];
2398 
2399 		if (!s->tid)
2400 			continue;
2401 		/*
2402 		 * If a thread is still running, it may be stuck, so hit
2403 		 * it over the head really hard.
2404 		 */
2405 		pthread_kill(s->tid, 9);
2406 	}
2407 	pthread_mutex_destroy(&self->mutex);
2408 	pthread_cond_destroy(&self->cond);
2409 	sem_destroy(&self->started);
2410 }
2411 
2412 void *tsync_sibling(void *data)
2413 {
2414 	long ret = 0;
2415 	struct tsync_sibling *me = data;
2416 
2417 	me->system_tid = syscall(__NR_gettid);
2418 
2419 	pthread_mutex_lock(me->mutex);
2420 	if (me->diverge) {
2421 		/* Just re-apply the root prog to fork the tree */
2422 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
2423 				me->prog, 0, 0);
2424 	}
2425 	sem_post(me->started);
2426 	/* Return outside of started so parent notices failures. */
2427 	if (ret) {
2428 		pthread_mutex_unlock(me->mutex);
2429 		return (void *)SIBLING_EXIT_FAILURE;
2430 	}
2431 	do {
2432 		pthread_cond_wait(me->cond, me->mutex);
2433 		me->num_waits = me->num_waits - 1;
2434 	} while (me->num_waits);
2435 	pthread_mutex_unlock(me->mutex);
2436 
2437 	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
2438 	if (!ret)
2439 		return (void *)SIBLING_EXIT_NEWPRIVS;
2440 	read(0, NULL, 0);
2441 	return (void *)SIBLING_EXIT_UNKILLED;
2442 }
2443 
2444 void tsync_start_sibling(struct tsync_sibling *sibling)
2445 {
2446 	pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
2447 }
2448 
2449 TEST_F(TSYNC, siblings_fail_prctl)
2450 {
2451 	long ret;
2452 	void *status;
2453 	struct sock_filter filter[] = {
2454 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2455 			offsetof(struct seccomp_data, nr)),
2456 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
2457 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
2458 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2459 	};
2460 	struct sock_fprog prog = {
2461 		.len = (unsigned short)ARRAY_SIZE(filter),
2462 		.filter = filter,
2463 	};
2464 
2465 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2466 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2467 	}
2468 
2469 	/* Check prctl failure detection by requesting sib 0 diverge. */
2470 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2471 	ASSERT_NE(ENOSYS, errno) {
2472 		TH_LOG("Kernel does not support seccomp syscall!");
2473 	}
2474 	ASSERT_EQ(0, ret) {
2475 		TH_LOG("setting filter failed");
2476 	}
2477 
2478 	self->sibling[0].diverge = 1;
2479 	tsync_start_sibling(&self->sibling[0]);
2480 	tsync_start_sibling(&self->sibling[1]);
2481 
2482 	while (self->sibling_count < TSYNC_SIBLINGS) {
2483 		sem_wait(&self->started);
2484 		self->sibling_count++;
2485 	}
2486 
2487 	/* Signal the threads to clean up*/
2488 	pthread_mutex_lock(&self->mutex);
2489 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2490 		TH_LOG("cond broadcast non-zero");
2491 	}
2492 	pthread_mutex_unlock(&self->mutex);
2493 
2494 	/* Ensure diverging sibling failed to call prctl. */
2495 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2496 	EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
2497 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2498 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2499 }
2500 
2501 TEST_F(TSYNC, two_siblings_with_ancestor)
2502 {
2503 	long ret;
2504 	void *status;
2505 
2506 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2507 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2508 	}
2509 
2510 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2511 	ASSERT_NE(ENOSYS, errno) {
2512 		TH_LOG("Kernel does not support seccomp syscall!");
2513 	}
2514 	ASSERT_EQ(0, ret) {
2515 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2516 	}
2517 	tsync_start_sibling(&self->sibling[0]);
2518 	tsync_start_sibling(&self->sibling[1]);
2519 
2520 	while (self->sibling_count < TSYNC_SIBLINGS) {
2521 		sem_wait(&self->started);
2522 		self->sibling_count++;
2523 	}
2524 
2525 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2526 		      &self->apply_prog);
2527 	ASSERT_EQ(0, ret) {
2528 		TH_LOG("Could install filter on all threads!");
2529 	}
2530 	/* Tell the siblings to test the policy */
2531 	pthread_mutex_lock(&self->mutex);
2532 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2533 		TH_LOG("cond broadcast non-zero");
2534 	}
2535 	pthread_mutex_unlock(&self->mutex);
2536 	/* Ensure they are both killed and don't exit cleanly. */
2537 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2538 	EXPECT_EQ(0x0, (long)status);
2539 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2540 	EXPECT_EQ(0x0, (long)status);
2541 }
2542 
2543 TEST_F(TSYNC, two_sibling_want_nnp)
2544 {
2545 	void *status;
2546 
2547 	/* start siblings before any prctl() operations */
2548 	tsync_start_sibling(&self->sibling[0]);
2549 	tsync_start_sibling(&self->sibling[1]);
2550 	while (self->sibling_count < TSYNC_SIBLINGS) {
2551 		sem_wait(&self->started);
2552 		self->sibling_count++;
2553 	}
2554 
2555 	/* Tell the siblings to test no policy */
2556 	pthread_mutex_lock(&self->mutex);
2557 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2558 		TH_LOG("cond broadcast non-zero");
2559 	}
2560 	pthread_mutex_unlock(&self->mutex);
2561 
2562 	/* Ensure they are both upset about lacking nnp. */
2563 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2564 	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2565 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2566 	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2567 }
2568 
2569 TEST_F(TSYNC, two_siblings_with_no_filter)
2570 {
2571 	long ret;
2572 	void *status;
2573 
2574 	/* start siblings before any prctl() operations */
2575 	tsync_start_sibling(&self->sibling[0]);
2576 	tsync_start_sibling(&self->sibling[1]);
2577 	while (self->sibling_count < TSYNC_SIBLINGS) {
2578 		sem_wait(&self->started);
2579 		self->sibling_count++;
2580 	}
2581 
2582 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2583 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2584 	}
2585 
2586 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2587 		      &self->apply_prog);
2588 	ASSERT_NE(ENOSYS, errno) {
2589 		TH_LOG("Kernel does not support seccomp syscall!");
2590 	}
2591 	ASSERT_EQ(0, ret) {
2592 		TH_LOG("Could install filter on all threads!");
2593 	}
2594 
2595 	/* Tell the siblings to test the policy */
2596 	pthread_mutex_lock(&self->mutex);
2597 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2598 		TH_LOG("cond broadcast non-zero");
2599 	}
2600 	pthread_mutex_unlock(&self->mutex);
2601 
2602 	/* Ensure they are both killed and don't exit cleanly. */
2603 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2604 	EXPECT_EQ(0x0, (long)status);
2605 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2606 	EXPECT_EQ(0x0, (long)status);
2607 }
2608 
2609 TEST_F(TSYNC, two_siblings_with_one_divergence)
2610 {
2611 	long ret;
2612 	void *status;
2613 
2614 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2615 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2616 	}
2617 
2618 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2619 	ASSERT_NE(ENOSYS, errno) {
2620 		TH_LOG("Kernel does not support seccomp syscall!");
2621 	}
2622 	ASSERT_EQ(0, ret) {
2623 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2624 	}
2625 	self->sibling[0].diverge = 1;
2626 	tsync_start_sibling(&self->sibling[0]);
2627 	tsync_start_sibling(&self->sibling[1]);
2628 
2629 	while (self->sibling_count < TSYNC_SIBLINGS) {
2630 		sem_wait(&self->started);
2631 		self->sibling_count++;
2632 	}
2633 
2634 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2635 		      &self->apply_prog);
2636 	ASSERT_EQ(self->sibling[0].system_tid, ret) {
2637 		TH_LOG("Did not fail on diverged sibling.");
2638 	}
2639 
2640 	/* Wake the threads */
2641 	pthread_mutex_lock(&self->mutex);
2642 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2643 		TH_LOG("cond broadcast non-zero");
2644 	}
2645 	pthread_mutex_unlock(&self->mutex);
2646 
2647 	/* Ensure they are both unkilled. */
2648 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2649 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2650 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2651 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2652 }
2653 
2654 TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err)
2655 {
2656 	long ret, flags;
2657 	void *status;
2658 
2659 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2660 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2661 	}
2662 
2663 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2664 	ASSERT_NE(ENOSYS, errno) {
2665 		TH_LOG("Kernel does not support seccomp syscall!");
2666 	}
2667 	ASSERT_EQ(0, ret) {
2668 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2669 	}
2670 	self->sibling[0].diverge = 1;
2671 	tsync_start_sibling(&self->sibling[0]);
2672 	tsync_start_sibling(&self->sibling[1]);
2673 
2674 	while (self->sibling_count < TSYNC_SIBLINGS) {
2675 		sem_wait(&self->started);
2676 		self->sibling_count++;
2677 	}
2678 
2679 	flags = SECCOMP_FILTER_FLAG_TSYNC | \
2680 		SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
2681 	ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog);
2682 	ASSERT_EQ(ESRCH, errno) {
2683 		TH_LOG("Did not return ESRCH for diverged sibling.");
2684 	}
2685 	ASSERT_EQ(-1, ret) {
2686 		TH_LOG("Did not fail on diverged sibling.");
2687 	}
2688 
2689 	/* Wake the threads */
2690 	pthread_mutex_lock(&self->mutex);
2691 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2692 		TH_LOG("cond broadcast non-zero");
2693 	}
2694 	pthread_mutex_unlock(&self->mutex);
2695 
2696 	/* Ensure they are both unkilled. */
2697 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2698 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2699 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2700 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2701 }
2702 
2703 TEST_F(TSYNC, two_siblings_not_under_filter)
2704 {
2705 	long ret, sib;
2706 	void *status;
2707 	struct timespec delay = { .tv_nsec = 100000000 };
2708 
2709 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2710 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2711 	}
2712 
2713 	/*
2714 	 * Sibling 0 will have its own seccomp policy
2715 	 * and Sibling 1 will not be under seccomp at
2716 	 * all. Sibling 1 will enter seccomp and 0
2717 	 * will cause failure.
2718 	 */
2719 	self->sibling[0].diverge = 1;
2720 	tsync_start_sibling(&self->sibling[0]);
2721 	tsync_start_sibling(&self->sibling[1]);
2722 
2723 	while (self->sibling_count < TSYNC_SIBLINGS) {
2724 		sem_wait(&self->started);
2725 		self->sibling_count++;
2726 	}
2727 
2728 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2729 	ASSERT_NE(ENOSYS, errno) {
2730 		TH_LOG("Kernel does not support seccomp syscall!");
2731 	}
2732 	ASSERT_EQ(0, ret) {
2733 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2734 	}
2735 
2736 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2737 		      &self->apply_prog);
2738 	ASSERT_EQ(ret, self->sibling[0].system_tid) {
2739 		TH_LOG("Did not fail on diverged sibling.");
2740 	}
2741 	sib = 1;
2742 	if (ret == self->sibling[0].system_tid)
2743 		sib = 0;
2744 
2745 	pthread_mutex_lock(&self->mutex);
2746 
2747 	/* Increment the other siblings num_waits so we can clean up
2748 	 * the one we just saw.
2749 	 */
2750 	self->sibling[!sib].num_waits += 1;
2751 
2752 	/* Signal the thread to clean up*/
2753 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2754 		TH_LOG("cond broadcast non-zero");
2755 	}
2756 	pthread_mutex_unlock(&self->mutex);
2757 	PTHREAD_JOIN(self->sibling[sib].tid, &status);
2758 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2759 	/* Poll for actual task death. pthread_join doesn't guarantee it. */
2760 	while (!kill(self->sibling[sib].system_tid, 0))
2761 		nanosleep(&delay, NULL);
2762 	/* Switch to the remaining sibling */
2763 	sib = !sib;
2764 
2765 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2766 		      &self->apply_prog);
2767 	ASSERT_EQ(0, ret) {
2768 		TH_LOG("Expected the remaining sibling to sync");
2769 	};
2770 
2771 	pthread_mutex_lock(&self->mutex);
2772 
2773 	/* If remaining sibling didn't have a chance to wake up during
2774 	 * the first broadcast, manually reduce the num_waits now.
2775 	 */
2776 	if (self->sibling[sib].num_waits > 1)
2777 		self->sibling[sib].num_waits = 1;
2778 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2779 		TH_LOG("cond broadcast non-zero");
2780 	}
2781 	pthread_mutex_unlock(&self->mutex);
2782 	PTHREAD_JOIN(self->sibling[sib].tid, &status);
2783 	EXPECT_EQ(0, (long)status);
2784 	/* Poll for actual task death. pthread_join doesn't guarantee it. */
2785 	while (!kill(self->sibling[sib].system_tid, 0))
2786 		nanosleep(&delay, NULL);
2787 
2788 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2789 		      &self->apply_prog);
2790 	ASSERT_EQ(0, ret);  /* just us chickens */
2791 }
2792 
2793 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
2794 TEST(syscall_restart)
2795 {
2796 	long ret;
2797 	unsigned long msg;
2798 	pid_t child_pid;
2799 	int pipefd[2];
2800 	int status;
2801 	siginfo_t info = { };
2802 	struct sock_filter filter[] = {
2803 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2804 			 offsetof(struct seccomp_data, nr)),
2805 
2806 #ifdef __NR_sigreturn
2807 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 7, 0),
2808 #endif
2809 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 6, 0),
2810 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 5, 0),
2811 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 4, 0),
2812 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 5, 0),
2813 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_clock_nanosleep, 4, 0),
2814 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2815 
2816 		/* Allow __NR_write for easy logging. */
2817 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2818 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2819 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2820 		/* The nanosleep jump target. */
2821 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
2822 		/* The restart_syscall jump target. */
2823 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
2824 	};
2825 	struct sock_fprog prog = {
2826 		.len = (unsigned short)ARRAY_SIZE(filter),
2827 		.filter = filter,
2828 	};
2829 #if defined(__arm__)
2830 	struct utsname utsbuf;
2831 #endif
2832 
2833 	ASSERT_EQ(0, pipe(pipefd));
2834 
2835 	child_pid = fork();
2836 	ASSERT_LE(0, child_pid);
2837 	if (child_pid == 0) {
2838 		/* Child uses EXPECT not ASSERT to deliver status correctly. */
2839 		char buf = ' ';
2840 		struct timespec timeout = { };
2841 
2842 		/* Attach parent as tracer and stop. */
2843 		EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2844 		EXPECT_EQ(0, raise(SIGSTOP));
2845 
2846 		EXPECT_EQ(0, close(pipefd[1]));
2847 
2848 		EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2849 			TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2850 		}
2851 
2852 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2853 		EXPECT_EQ(0, ret) {
2854 			TH_LOG("Failed to install filter!");
2855 		}
2856 
2857 		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2858 			TH_LOG("Failed to read() sync from parent");
2859 		}
2860 		EXPECT_EQ('.', buf) {
2861 			TH_LOG("Failed to get sync data from read()");
2862 		}
2863 
2864 		/* Start nanosleep to be interrupted. */
2865 		timeout.tv_sec = 1;
2866 		errno = 0;
2867 		EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
2868 			TH_LOG("Call to nanosleep() failed (errno %d)", errno);
2869 		}
2870 
2871 		/* Read final sync from parent. */
2872 		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2873 			TH_LOG("Failed final read() from parent");
2874 		}
2875 		EXPECT_EQ('!', buf) {
2876 			TH_LOG("Failed to get final data from read()");
2877 		}
2878 
2879 		/* Directly report the status of our test harness results. */
2880 		syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2881 						     : EXIT_FAILURE);
2882 	}
2883 	EXPECT_EQ(0, close(pipefd[0]));
2884 
2885 	/* Attach to child, setup options, and release. */
2886 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2887 	ASSERT_EQ(true, WIFSTOPPED(status));
2888 	ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2889 			    PTRACE_O_TRACESECCOMP));
2890 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2891 	ASSERT_EQ(1, write(pipefd[1], ".", 1));
2892 
2893 	/* Wait for nanosleep() to start. */
2894 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2895 	ASSERT_EQ(true, WIFSTOPPED(status));
2896 	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2897 	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2898 	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2899 	ASSERT_EQ(0x100, msg);
2900 	ret = get_syscall(_metadata, child_pid);
2901 	EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep);
2902 
2903 	/* Might as well check siginfo for sanity while we're here. */
2904 	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2905 	ASSERT_EQ(SIGTRAP, info.si_signo);
2906 	ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2907 	EXPECT_EQ(0, info.si_errno);
2908 	EXPECT_EQ(getuid(), info.si_uid);
2909 	/* Verify signal delivery came from child (seccomp-triggered). */
2910 	EXPECT_EQ(child_pid, info.si_pid);
2911 
2912 	/* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
2913 	ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2914 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2915 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2916 	ASSERT_EQ(true, WIFSTOPPED(status));
2917 	ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2918 	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2919 	/*
2920 	 * There is no siginfo on SIGSTOP any more, so we can't verify
2921 	 * signal delivery came from parent now (getpid() == info.si_pid).
2922 	 * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com
2923 	 * At least verify the SIGSTOP via PTRACE_GETSIGINFO.
2924 	 */
2925 	EXPECT_EQ(SIGSTOP, info.si_signo);
2926 
2927 	/* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
2928 	ASSERT_EQ(0, kill(child_pid, SIGCONT));
2929 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2930 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2931 	ASSERT_EQ(true, WIFSTOPPED(status));
2932 	ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2933 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2934 
2935 	/* Wait for restart_syscall() to start. */
2936 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2937 	ASSERT_EQ(true, WIFSTOPPED(status));
2938 	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2939 	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2940 	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2941 
2942 	ASSERT_EQ(0x200, msg);
2943 	ret = get_syscall(_metadata, child_pid);
2944 #if defined(__arm__)
2945 	/*
2946 	 * FIXME:
2947 	 * - native ARM registers do NOT expose true syscall.
2948 	 * - compat ARM registers on ARM64 DO expose true syscall.
2949 	 */
2950 	ASSERT_EQ(0, uname(&utsbuf));
2951 	if (strncmp(utsbuf.machine, "arm", 3) == 0) {
2952 		EXPECT_EQ(__NR_nanosleep, ret);
2953 	} else
2954 #endif
2955 	{
2956 		EXPECT_EQ(__NR_restart_syscall, ret);
2957 	}
2958 
2959 	/* Write again to end test. */
2960 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2961 	ASSERT_EQ(1, write(pipefd[1], "!", 1));
2962 	EXPECT_EQ(0, close(pipefd[1]));
2963 
2964 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2965 	if (WIFSIGNALED(status) || WEXITSTATUS(status))
2966 		_metadata->passed = 0;
2967 }
2968 
2969 TEST_SIGNAL(filter_flag_log, SIGSYS)
2970 {
2971 	struct sock_filter allow_filter[] = {
2972 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2973 	};
2974 	struct sock_filter kill_filter[] = {
2975 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2976 			offsetof(struct seccomp_data, nr)),
2977 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
2978 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2979 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2980 	};
2981 	struct sock_fprog allow_prog = {
2982 		.len = (unsigned short)ARRAY_SIZE(allow_filter),
2983 		.filter = allow_filter,
2984 	};
2985 	struct sock_fprog kill_prog = {
2986 		.len = (unsigned short)ARRAY_SIZE(kill_filter),
2987 		.filter = kill_filter,
2988 	};
2989 	long ret;
2990 	pid_t parent = getppid();
2991 
2992 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2993 	ASSERT_EQ(0, ret);
2994 
2995 	/* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
2996 	ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
2997 		      &allow_prog);
2998 	ASSERT_NE(ENOSYS, errno) {
2999 		TH_LOG("Kernel does not support seccomp syscall!");
3000 	}
3001 	EXPECT_NE(0, ret) {
3002 		TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
3003 	}
3004 	EXPECT_EQ(EINVAL, errno) {
3005 		TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
3006 	}
3007 
3008 	/* Verify that a simple, permissive filter can be added with no flags */
3009 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
3010 	EXPECT_EQ(0, ret);
3011 
3012 	/* See if the same filter can be added with the FILTER_FLAG_LOG flag */
3013 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
3014 		      &allow_prog);
3015 	ASSERT_NE(EINVAL, errno) {
3016 		TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
3017 	}
3018 	EXPECT_EQ(0, ret);
3019 
3020 	/* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
3021 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
3022 		      &kill_prog);
3023 	EXPECT_EQ(0, ret);
3024 
3025 	EXPECT_EQ(parent, syscall(__NR_getppid));
3026 	/* getpid() should never return. */
3027 	EXPECT_EQ(0, syscall(__NR_getpid));
3028 }
3029 
3030 TEST(get_action_avail)
3031 {
3032 	__u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
3033 			    SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
3034 			    SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
3035 	__u32 unknown_action = 0x10000000U;
3036 	int i;
3037 	long ret;
3038 
3039 	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
3040 	ASSERT_NE(ENOSYS, errno) {
3041 		TH_LOG("Kernel does not support seccomp syscall!");
3042 	}
3043 	ASSERT_NE(EINVAL, errno) {
3044 		TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
3045 	}
3046 	EXPECT_EQ(ret, 0);
3047 
3048 	for (i = 0; i < ARRAY_SIZE(actions); i++) {
3049 		ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
3050 		EXPECT_EQ(ret, 0) {
3051 			TH_LOG("Expected action (0x%X) not available!",
3052 			       actions[i]);
3053 		}
3054 	}
3055 
3056 	/* Check that an unknown action is handled properly (EOPNOTSUPP) */
3057 	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
3058 	EXPECT_EQ(ret, -1);
3059 	EXPECT_EQ(errno, EOPNOTSUPP);
3060 }
3061 
3062 TEST(get_metadata)
3063 {
3064 	pid_t pid;
3065 	int pipefd[2];
3066 	char buf;
3067 	struct seccomp_metadata md;
3068 	long ret;
3069 
3070 	/* Only real root can get metadata. */
3071 	if (geteuid()) {
3072 		XFAIL(return, "get_metadata requires real root");
3073 		return;
3074 	}
3075 
3076 	ASSERT_EQ(0, pipe(pipefd));
3077 
3078 	pid = fork();
3079 	ASSERT_GE(pid, 0);
3080 	if (pid == 0) {
3081 		struct sock_filter filter[] = {
3082 			BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3083 		};
3084 		struct sock_fprog prog = {
3085 			.len = (unsigned short)ARRAY_SIZE(filter),
3086 			.filter = filter,
3087 		};
3088 
3089 		/* one with log, one without */
3090 		EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
3091 				     SECCOMP_FILTER_FLAG_LOG, &prog));
3092 		EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
3093 
3094 		EXPECT_EQ(0, close(pipefd[0]));
3095 		ASSERT_EQ(1, write(pipefd[1], "1", 1));
3096 		ASSERT_EQ(0, close(pipefd[1]));
3097 
3098 		while (1)
3099 			sleep(100);
3100 	}
3101 
3102 	ASSERT_EQ(0, close(pipefd[1]));
3103 	ASSERT_EQ(1, read(pipefd[0], &buf, 1));
3104 
3105 	ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
3106 	ASSERT_EQ(pid, waitpid(pid, NULL, 0));
3107 
3108 	/* Past here must not use ASSERT or child process is never killed. */
3109 
3110 	md.filter_off = 0;
3111 	errno = 0;
3112 	ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3113 	EXPECT_EQ(sizeof(md), ret) {
3114 		if (errno == EINVAL)
3115 			XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
3116 	}
3117 
3118 	EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
3119 	EXPECT_EQ(md.filter_off, 0);
3120 
3121 	md.filter_off = 1;
3122 	ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3123 	EXPECT_EQ(sizeof(md), ret);
3124 	EXPECT_EQ(md.flags, 0);
3125 	EXPECT_EQ(md.filter_off, 1);
3126 
3127 skip:
3128 	ASSERT_EQ(0, kill(pid, SIGKILL));
3129 }
3130 
3131 static int user_trap_syscall(int nr, unsigned int flags)
3132 {
3133 	struct sock_filter filter[] = {
3134 		BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
3135 			offsetof(struct seccomp_data, nr)),
3136 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
3137 		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
3138 		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
3139 	};
3140 
3141 	struct sock_fprog prog = {
3142 		.len = (unsigned short)ARRAY_SIZE(filter),
3143 		.filter = filter,
3144 	};
3145 
3146 	return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
3147 }
3148 
3149 #define USER_NOTIF_MAGIC INT_MAX
3150 TEST(user_notification_basic)
3151 {
3152 	pid_t pid;
3153 	long ret;
3154 	int status, listener;
3155 	struct seccomp_notif req = {};
3156 	struct seccomp_notif_resp resp = {};
3157 	struct pollfd pollfd;
3158 
3159 	struct sock_filter filter[] = {
3160 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3161 	};
3162 	struct sock_fprog prog = {
3163 		.len = (unsigned short)ARRAY_SIZE(filter),
3164 		.filter = filter,
3165 	};
3166 
3167 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3168 	ASSERT_EQ(0, ret) {
3169 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3170 	}
3171 
3172 	pid = fork();
3173 	ASSERT_GE(pid, 0);
3174 
3175 	/* Check that we get -ENOSYS with no listener attached */
3176 	if (pid == 0) {
3177 		if (user_trap_syscall(__NR_getppid, 0) < 0)
3178 			exit(1);
3179 		ret = syscall(__NR_getppid);
3180 		exit(ret >= 0 || errno != ENOSYS);
3181 	}
3182 
3183 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3184 	EXPECT_EQ(true, WIFEXITED(status));
3185 	EXPECT_EQ(0, WEXITSTATUS(status));
3186 
3187 	/* Add some no-op filters for grins. */
3188 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3189 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3190 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3191 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3192 
3193 	/* Check that the basic notification machinery works */
3194 	listener = user_trap_syscall(__NR_getppid,
3195 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3196 	ASSERT_GE(listener, 0);
3197 
3198 	/* Installing a second listener in the chain should EBUSY */
3199 	EXPECT_EQ(user_trap_syscall(__NR_getppid,
3200 				    SECCOMP_FILTER_FLAG_NEW_LISTENER),
3201 		  -1);
3202 	EXPECT_EQ(errno, EBUSY);
3203 
3204 	pid = fork();
3205 	ASSERT_GE(pid, 0);
3206 
3207 	if (pid == 0) {
3208 		ret = syscall(__NR_getppid);
3209 		exit(ret != USER_NOTIF_MAGIC);
3210 	}
3211 
3212 	pollfd.fd = listener;
3213 	pollfd.events = POLLIN | POLLOUT;
3214 
3215 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3216 	EXPECT_EQ(pollfd.revents, POLLIN);
3217 
3218 	/* Test that we can't pass garbage to the kernel. */
3219 	memset(&req, 0, sizeof(req));
3220 	req.pid = -1;
3221 	errno = 0;
3222 	ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
3223 	EXPECT_EQ(-1, ret);
3224 	EXPECT_EQ(EINVAL, errno);
3225 
3226 	if (ret) {
3227 		req.pid = 0;
3228 		EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3229 	}
3230 
3231 	pollfd.fd = listener;
3232 	pollfd.events = POLLIN | POLLOUT;
3233 
3234 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3235 	EXPECT_EQ(pollfd.revents, POLLOUT);
3236 
3237 	EXPECT_EQ(req.data.nr,  __NR_getppid);
3238 
3239 	resp.id = req.id;
3240 	resp.error = 0;
3241 	resp.val = USER_NOTIF_MAGIC;
3242 
3243 	/* check that we make sure flags == 0 */
3244 	resp.flags = 1;
3245 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3246 	EXPECT_EQ(errno, EINVAL);
3247 
3248 	resp.flags = 0;
3249 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3250 
3251 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3252 	EXPECT_EQ(true, WIFEXITED(status));
3253 	EXPECT_EQ(0, WEXITSTATUS(status));
3254 }
3255 
3256 TEST(user_notification_with_tsync)
3257 {
3258 	int ret;
3259 	unsigned int flags;
3260 
3261 	/* these were exclusive */
3262 	flags = SECCOMP_FILTER_FLAG_NEW_LISTENER |
3263 		SECCOMP_FILTER_FLAG_TSYNC;
3264 	ASSERT_EQ(-1, user_trap_syscall(__NR_getppid, flags));
3265 	ASSERT_EQ(EINVAL, errno);
3266 
3267 	/* but now they're not */
3268 	flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
3269 	ret = user_trap_syscall(__NR_getppid, flags);
3270 	close(ret);
3271 	ASSERT_LE(0, ret);
3272 }
3273 
3274 TEST(user_notification_kill_in_middle)
3275 {
3276 	pid_t pid;
3277 	long ret;
3278 	int listener;
3279 	struct seccomp_notif req = {};
3280 	struct seccomp_notif_resp resp = {};
3281 
3282 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3283 	ASSERT_EQ(0, ret) {
3284 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3285 	}
3286 
3287 	listener = user_trap_syscall(__NR_getppid,
3288 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3289 	ASSERT_GE(listener, 0);
3290 
3291 	/*
3292 	 * Check that nothing bad happens when we kill the task in the middle
3293 	 * of a syscall.
3294 	 */
3295 	pid = fork();
3296 	ASSERT_GE(pid, 0);
3297 
3298 	if (pid == 0) {
3299 		ret = syscall(__NR_getppid);
3300 		exit(ret != USER_NOTIF_MAGIC);
3301 	}
3302 
3303 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3304 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0);
3305 
3306 	EXPECT_EQ(kill(pid, SIGKILL), 0);
3307 	EXPECT_EQ(waitpid(pid, NULL, 0), pid);
3308 
3309 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1);
3310 
3311 	resp.id = req.id;
3312 	ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp);
3313 	EXPECT_EQ(ret, -1);
3314 	EXPECT_EQ(errno, ENOENT);
3315 }
3316 
3317 static int handled = -1;
3318 
3319 static void signal_handler(int signal)
3320 {
3321 	if (write(handled, "c", 1) != 1)
3322 		perror("write from signal");
3323 }
3324 
3325 TEST(user_notification_signal)
3326 {
3327 	pid_t pid;
3328 	long ret;
3329 	int status, listener, sk_pair[2];
3330 	struct seccomp_notif req = {};
3331 	struct seccomp_notif_resp resp = {};
3332 	char c;
3333 
3334 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3335 	ASSERT_EQ(0, ret) {
3336 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3337 	}
3338 
3339 	ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
3340 
3341 	listener = user_trap_syscall(__NR_gettid,
3342 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3343 	ASSERT_GE(listener, 0);
3344 
3345 	pid = fork();
3346 	ASSERT_GE(pid, 0);
3347 
3348 	if (pid == 0) {
3349 		close(sk_pair[0]);
3350 		handled = sk_pair[1];
3351 		if (signal(SIGUSR1, signal_handler) == SIG_ERR) {
3352 			perror("signal");
3353 			exit(1);
3354 		}
3355 		/*
3356 		 * ERESTARTSYS behavior is a bit hard to test, because we need
3357 		 * to rely on a signal that has not yet been handled. Let's at
3358 		 * least check that the error code gets propagated through, and
3359 		 * hope that it doesn't break when there is actually a signal :)
3360 		 */
3361 		ret = syscall(__NR_gettid);
3362 		exit(!(ret == -1 && errno == 512));
3363 	}
3364 
3365 	close(sk_pair[1]);
3366 
3367 	memset(&req, 0, sizeof(req));
3368 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3369 
3370 	EXPECT_EQ(kill(pid, SIGUSR1), 0);
3371 
3372 	/*
3373 	 * Make sure the signal really is delivered, which means we're not
3374 	 * stuck in the user notification code any more and the notification
3375 	 * should be dead.
3376 	 */
3377 	EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
3378 
3379 	resp.id = req.id;
3380 	resp.error = -EPERM;
3381 	resp.val = 0;
3382 
3383 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3384 	EXPECT_EQ(errno, ENOENT);
3385 
3386 	memset(&req, 0, sizeof(req));
3387 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3388 
3389 	resp.id = req.id;
3390 	resp.error = -512; /* -ERESTARTSYS */
3391 	resp.val = 0;
3392 
3393 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3394 
3395 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3396 	EXPECT_EQ(true, WIFEXITED(status));
3397 	EXPECT_EQ(0, WEXITSTATUS(status));
3398 }
3399 
3400 TEST(user_notification_closed_listener)
3401 {
3402 	pid_t pid;
3403 	long ret;
3404 	int status, listener;
3405 
3406 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3407 	ASSERT_EQ(0, ret) {
3408 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3409 	}
3410 
3411 	listener = user_trap_syscall(__NR_getppid,
3412 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3413 	ASSERT_GE(listener, 0);
3414 
3415 	/*
3416 	 * Check that we get an ENOSYS when the listener is closed.
3417 	 */
3418 	pid = fork();
3419 	ASSERT_GE(pid, 0);
3420 	if (pid == 0) {
3421 		close(listener);
3422 		ret = syscall(__NR_getppid);
3423 		exit(ret != -1 && errno != ENOSYS);
3424 	}
3425 
3426 	close(listener);
3427 
3428 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3429 	EXPECT_EQ(true, WIFEXITED(status));
3430 	EXPECT_EQ(0, WEXITSTATUS(status));
3431 }
3432 
3433 /*
3434  * Check that a pid in a child namespace still shows up as valid in ours.
3435  */
3436 TEST(user_notification_child_pid_ns)
3437 {
3438 	pid_t pid;
3439 	int status, listener;
3440 	struct seccomp_notif req = {};
3441 	struct seccomp_notif_resp resp = {};
3442 
3443 	ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0);
3444 
3445 	listener = user_trap_syscall(__NR_getppid,
3446 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3447 	ASSERT_GE(listener, 0);
3448 
3449 	pid = fork();
3450 	ASSERT_GE(pid, 0);
3451 
3452 	if (pid == 0)
3453 		exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3454 
3455 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3456 	EXPECT_EQ(req.pid, pid);
3457 
3458 	resp.id = req.id;
3459 	resp.error = 0;
3460 	resp.val = USER_NOTIF_MAGIC;
3461 
3462 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3463 
3464 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3465 	EXPECT_EQ(true, WIFEXITED(status));
3466 	EXPECT_EQ(0, WEXITSTATUS(status));
3467 	close(listener);
3468 }
3469 
3470 /*
3471  * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e.
3472  * invalid.
3473  */
3474 TEST(user_notification_sibling_pid_ns)
3475 {
3476 	pid_t pid, pid2;
3477 	int status, listener;
3478 	struct seccomp_notif req = {};
3479 	struct seccomp_notif_resp resp = {};
3480 
3481 	ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) {
3482 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3483 	}
3484 
3485 	listener = user_trap_syscall(__NR_getppid,
3486 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3487 	ASSERT_GE(listener, 0);
3488 
3489 	pid = fork();
3490 	ASSERT_GE(pid, 0);
3491 
3492 	if (pid == 0) {
3493 		ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3494 
3495 		pid2 = fork();
3496 		ASSERT_GE(pid2, 0);
3497 
3498 		if (pid2 == 0)
3499 			exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3500 
3501 		EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3502 		EXPECT_EQ(true, WIFEXITED(status));
3503 		EXPECT_EQ(0, WEXITSTATUS(status));
3504 		exit(WEXITSTATUS(status));
3505 	}
3506 
3507 	/* Create the sibling ns, and sibling in it. */
3508 	ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3509 	ASSERT_EQ(errno, 0);
3510 
3511 	pid2 = fork();
3512 	ASSERT_GE(pid2, 0);
3513 
3514 	if (pid2 == 0) {
3515 		ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3516 		/*
3517 		 * The pid should be 0, i.e. the task is in some namespace that
3518 		 * we can't "see".
3519 		 */
3520 		EXPECT_EQ(req.pid, 0);
3521 
3522 		resp.id = req.id;
3523 		resp.error = 0;
3524 		resp.val = USER_NOTIF_MAGIC;
3525 
3526 		ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3527 		exit(0);
3528 	}
3529 
3530 	close(listener);
3531 
3532 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3533 	EXPECT_EQ(true, WIFEXITED(status));
3534 	EXPECT_EQ(0, WEXITSTATUS(status));
3535 
3536 	EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3537 	EXPECT_EQ(true, WIFEXITED(status));
3538 	EXPECT_EQ(0, WEXITSTATUS(status));
3539 }
3540 
3541 TEST(user_notification_fault_recv)
3542 {
3543 	pid_t pid;
3544 	int status, listener;
3545 	struct seccomp_notif req = {};
3546 	struct seccomp_notif_resp resp = {};
3547 
3548 	ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
3549 
3550 	listener = user_trap_syscall(__NR_getppid,
3551 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3552 	ASSERT_GE(listener, 0);
3553 
3554 	pid = fork();
3555 	ASSERT_GE(pid, 0);
3556 
3557 	if (pid == 0)
3558 		exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3559 
3560 	/* Do a bad recv() */
3561 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1);
3562 	EXPECT_EQ(errno, EFAULT);
3563 
3564 	/* We should still be able to receive this notification, though. */
3565 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3566 	EXPECT_EQ(req.pid, pid);
3567 
3568 	resp.id = req.id;
3569 	resp.error = 0;
3570 	resp.val = USER_NOTIF_MAGIC;
3571 
3572 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3573 
3574 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3575 	EXPECT_EQ(true, WIFEXITED(status));
3576 	EXPECT_EQ(0, WEXITSTATUS(status));
3577 }
3578 
3579 TEST(seccomp_get_notif_sizes)
3580 {
3581 	struct seccomp_notif_sizes sizes;
3582 
3583 	ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0);
3584 	EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif));
3585 	EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp));
3586 }
3587 
3588 static int filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
3589 {
3590 #ifdef __NR_kcmp
3591 	return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
3592 #else
3593 	errno = ENOSYS;
3594 	return -1;
3595 #endif
3596 }
3597 
3598 TEST(user_notification_continue)
3599 {
3600 	pid_t pid;
3601 	long ret;
3602 	int status, listener;
3603 	struct seccomp_notif req = {};
3604 	struct seccomp_notif_resp resp = {};
3605 	struct pollfd pollfd;
3606 
3607 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3608 	ASSERT_EQ(0, ret) {
3609 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3610 	}
3611 
3612 	listener = user_trap_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
3613 	ASSERT_GE(listener, 0);
3614 
3615 	pid = fork();
3616 	ASSERT_GE(pid, 0);
3617 
3618 	if (pid == 0) {
3619 		int dup_fd, pipe_fds[2];
3620 		pid_t self;
3621 
3622 		ret = pipe(pipe_fds);
3623 		if (ret < 0)
3624 			exit(1);
3625 
3626 		dup_fd = dup(pipe_fds[0]);
3627 		if (dup_fd < 0)
3628 			exit(1);
3629 
3630 		self = getpid();
3631 
3632 		ret = filecmp(self, self, pipe_fds[0], dup_fd);
3633 		if (ret)
3634 			exit(2);
3635 
3636 		exit(0);
3637 	}
3638 
3639 	pollfd.fd = listener;
3640 	pollfd.events = POLLIN | POLLOUT;
3641 
3642 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3643 	EXPECT_EQ(pollfd.revents, POLLIN);
3644 
3645 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3646 
3647 	pollfd.fd = listener;
3648 	pollfd.events = POLLIN | POLLOUT;
3649 
3650 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3651 	EXPECT_EQ(pollfd.revents, POLLOUT);
3652 
3653 	EXPECT_EQ(req.data.nr, __NR_dup);
3654 
3655 	resp.id = req.id;
3656 	resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE;
3657 
3658 	/*
3659 	 * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other
3660 	 * args be set to 0.
3661 	 */
3662 	resp.error = 0;
3663 	resp.val = USER_NOTIF_MAGIC;
3664 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3665 	EXPECT_EQ(errno, EINVAL);
3666 
3667 	resp.error = USER_NOTIF_MAGIC;
3668 	resp.val = 0;
3669 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3670 	EXPECT_EQ(errno, EINVAL);
3671 
3672 	resp.error = 0;
3673 	resp.val = 0;
3674 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) {
3675 		if (errno == EINVAL)
3676 			XFAIL(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE");
3677 	}
3678 
3679 skip:
3680 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3681 	EXPECT_EQ(true, WIFEXITED(status));
3682 	EXPECT_EQ(0, WEXITSTATUS(status)) {
3683 		if (WEXITSTATUS(status) == 2) {
3684 			XFAIL(return, "Kernel does not support kcmp() syscall");
3685 			return;
3686 		}
3687 	}
3688 }
3689 
3690 /*
3691  * TODO:
3692  * - add microbenchmarks
3693  * - expand NNP testing
3694  * - better arch-specific TRACE and TRAP handlers.
3695  * - endianness checking when appropriate
3696  * - 64-bit arg prodding
3697  * - arch value testing (x86 modes especially)
3698  * - verify that FILTER_FLAG_LOG filters generate log messages
3699  * - verify that RET_LOG generates log messages
3700  * - ...
3701  */
3702 
3703 TEST_HARNESS_MAIN
3704