1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4  *
5  * Test code for seccomp bpf.
6  */
7 
8 #define _GNU_SOURCE
9 #include <sys/types.h>
10 
11 /*
12  * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
13  * we need to use the kernel's siginfo.h file and trick glibc
14  * into accepting it.
15  */
16 #if !__GLIBC_PREREQ(2, 26)
17 # include <asm/siginfo.h>
18 # define __have_siginfo_t 1
19 # define __have_sigval_t 1
20 # define __have_sigevent_t 1
21 #endif
22 
23 #include <errno.h>
24 #include <linux/filter.h>
25 #include <sys/prctl.h>
26 #include <sys/ptrace.h>
27 #include <sys/user.h>
28 #include <linux/prctl.h>
29 #include <linux/ptrace.h>
30 #include <linux/seccomp.h>
31 #include <pthread.h>
32 #include <semaphore.h>
33 #include <signal.h>
34 #include <stddef.h>
35 #include <stdbool.h>
36 #include <string.h>
37 #include <time.h>
38 #include <limits.h>
39 #include <linux/elf.h>
40 #include <sys/uio.h>
41 #include <sys/utsname.h>
42 #include <sys/fcntl.h>
43 #include <sys/mman.h>
44 #include <sys/times.h>
45 #include <sys/socket.h>
46 #include <sys/ioctl.h>
47 #include <linux/kcmp.h>
48 
49 #include <unistd.h>
50 #include <sys/syscall.h>
51 #include <poll.h>
52 
53 #include "../kselftest_harness.h"
54 
55 #ifndef PR_SET_PTRACER
56 # define PR_SET_PTRACER 0x59616d61
57 #endif
58 
59 #ifndef PR_SET_NO_NEW_PRIVS
60 #define PR_SET_NO_NEW_PRIVS 38
61 #define PR_GET_NO_NEW_PRIVS 39
62 #endif
63 
64 #ifndef PR_SECCOMP_EXT
65 #define PR_SECCOMP_EXT 43
66 #endif
67 
68 #ifndef SECCOMP_EXT_ACT
69 #define SECCOMP_EXT_ACT 1
70 #endif
71 
72 #ifndef SECCOMP_EXT_ACT_TSYNC
73 #define SECCOMP_EXT_ACT_TSYNC 1
74 #endif
75 
76 #ifndef SECCOMP_MODE_STRICT
77 #define SECCOMP_MODE_STRICT 1
78 #endif
79 
80 #ifndef SECCOMP_MODE_FILTER
81 #define SECCOMP_MODE_FILTER 2
82 #endif
83 
84 #ifndef SECCOMP_RET_ALLOW
85 struct seccomp_data {
86 	int nr;
87 	__u32 arch;
88 	__u64 instruction_pointer;
89 	__u64 args[6];
90 };
91 #endif
92 
93 #ifndef SECCOMP_RET_KILL_PROCESS
94 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
95 #define SECCOMP_RET_KILL_THREAD	 0x00000000U /* kill the thread */
96 #endif
97 #ifndef SECCOMP_RET_KILL
98 #define SECCOMP_RET_KILL	 SECCOMP_RET_KILL_THREAD
99 #define SECCOMP_RET_TRAP	 0x00030000U /* disallow and force a SIGSYS */
100 #define SECCOMP_RET_ERRNO	 0x00050000U /* returns an errno */
101 #define SECCOMP_RET_TRACE	 0x7ff00000U /* pass to a tracer or disallow */
102 #define SECCOMP_RET_ALLOW	 0x7fff0000U /* allow */
103 #endif
104 #ifndef SECCOMP_RET_LOG
105 #define SECCOMP_RET_LOG		 0x7ffc0000U /* allow after logging */
106 #endif
107 
108 #ifndef __NR_seccomp
109 # if defined(__i386__)
110 #  define __NR_seccomp 354
111 # elif defined(__x86_64__)
112 #  define __NR_seccomp 317
113 # elif defined(__arm__)
114 #  define __NR_seccomp 383
115 # elif defined(__aarch64__)
116 #  define __NR_seccomp 277
117 # elif defined(__riscv)
118 #  define __NR_seccomp 277
119 # elif defined(__hppa__)
120 #  define __NR_seccomp 338
121 # elif defined(__powerpc__)
122 #  define __NR_seccomp 358
123 # elif defined(__s390__)
124 #  define __NR_seccomp 348
125 # else
126 #  warning "seccomp syscall number unknown for this architecture"
127 #  define __NR_seccomp 0xffff
128 # endif
129 #endif
130 
131 #ifndef SECCOMP_SET_MODE_STRICT
132 #define SECCOMP_SET_MODE_STRICT 0
133 #endif
134 
135 #ifndef SECCOMP_SET_MODE_FILTER
136 #define SECCOMP_SET_MODE_FILTER 1
137 #endif
138 
139 #ifndef SECCOMP_GET_ACTION_AVAIL
140 #define SECCOMP_GET_ACTION_AVAIL 2
141 #endif
142 
143 #ifndef SECCOMP_GET_NOTIF_SIZES
144 #define SECCOMP_GET_NOTIF_SIZES 3
145 #endif
146 
147 #ifndef SECCOMP_FILTER_FLAG_TSYNC
148 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
149 #endif
150 
151 #ifndef SECCOMP_FILTER_FLAG_LOG
152 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
153 #endif
154 
155 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
156 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
157 #endif
158 
159 #ifndef PTRACE_SECCOMP_GET_METADATA
160 #define PTRACE_SECCOMP_GET_METADATA	0x420d
161 
162 struct seccomp_metadata {
163 	__u64 filter_off;       /* Input: which filter */
164 	__u64 flags;             /* Output: filter's flags */
165 };
166 #endif
167 
168 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
169 #define SECCOMP_FILTER_FLAG_NEW_LISTENER	(1UL << 3)
170 
171 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U
172 
173 #define SECCOMP_IOC_MAGIC		'!'
174 #define SECCOMP_IO(nr)			_IO(SECCOMP_IOC_MAGIC, nr)
175 #define SECCOMP_IOR(nr, type)		_IOR(SECCOMP_IOC_MAGIC, nr, type)
176 #define SECCOMP_IOW(nr, type)		_IOW(SECCOMP_IOC_MAGIC, nr, type)
177 #define SECCOMP_IOWR(nr, type)		_IOWR(SECCOMP_IOC_MAGIC, nr, type)
178 
179 /* Flags for seccomp notification fd ioctl. */
180 #define SECCOMP_IOCTL_NOTIF_RECV	SECCOMP_IOWR(0, struct seccomp_notif)
181 #define SECCOMP_IOCTL_NOTIF_SEND	SECCOMP_IOWR(1,	\
182 						struct seccomp_notif_resp)
183 #define SECCOMP_IOCTL_NOTIF_ID_VALID	SECCOMP_IOR(2, __u64)
184 
185 struct seccomp_notif {
186 	__u64 id;
187 	__u32 pid;
188 	__u32 flags;
189 	struct seccomp_data data;
190 };
191 
192 struct seccomp_notif_resp {
193 	__u64 id;
194 	__s64 val;
195 	__s32 error;
196 	__u32 flags;
197 };
198 
199 struct seccomp_notif_sizes {
200 	__u16 seccomp_notif;
201 	__u16 seccomp_notif_resp;
202 	__u16 seccomp_data;
203 };
204 #endif
205 
206 #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
207 #define PTRACE_EVENTMSG_SYSCALL_ENTRY	1
208 #define PTRACE_EVENTMSG_SYSCALL_EXIT	2
209 #endif
210 
211 #ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE
212 #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001
213 #endif
214 
215 #ifndef seccomp
216 int seccomp(unsigned int op, unsigned int flags, void *args)
217 {
218 	errno = 0;
219 	return syscall(__NR_seccomp, op, flags, args);
220 }
221 #endif
222 
223 #if __BYTE_ORDER == __LITTLE_ENDIAN
224 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
225 #elif __BYTE_ORDER == __BIG_ENDIAN
226 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
227 #else
228 #error "wut? Unknown __BYTE_ORDER?!"
229 #endif
230 
231 #define SIBLING_EXIT_UNKILLED	0xbadbeef
232 #define SIBLING_EXIT_FAILURE	0xbadface
233 #define SIBLING_EXIT_NEWPRIVS	0xbadfeed
234 
235 TEST(mode_strict_support)
236 {
237 	long ret;
238 
239 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
240 	ASSERT_EQ(0, ret) {
241 		TH_LOG("Kernel does not support CONFIG_SECCOMP");
242 	}
243 	syscall(__NR_exit, 0);
244 }
245 
246 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
247 {
248 	long ret;
249 
250 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
251 	ASSERT_EQ(0, ret) {
252 		TH_LOG("Kernel does not support CONFIG_SECCOMP");
253 	}
254 	syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
255 		NULL, NULL, NULL);
256 	EXPECT_FALSE(true) {
257 		TH_LOG("Unreachable!");
258 	}
259 }
260 
261 /* Note! This doesn't test no new privs behavior */
262 TEST(no_new_privs_support)
263 {
264 	long ret;
265 
266 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
267 	EXPECT_EQ(0, ret) {
268 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
269 	}
270 }
271 
272 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */
273 TEST(mode_filter_support)
274 {
275 	long ret;
276 
277 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
278 	ASSERT_EQ(0, ret) {
279 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
280 	}
281 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
282 	EXPECT_EQ(-1, ret);
283 	EXPECT_EQ(EFAULT, errno) {
284 		TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
285 	}
286 }
287 
288 TEST(mode_filter_without_nnp)
289 {
290 	struct sock_filter filter[] = {
291 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
292 	};
293 	struct sock_fprog prog = {
294 		.len = (unsigned short)ARRAY_SIZE(filter),
295 		.filter = filter,
296 	};
297 	long ret;
298 
299 	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
300 	ASSERT_LE(0, ret) {
301 		TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
302 	}
303 	errno = 0;
304 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
305 	/* Succeeds with CAP_SYS_ADMIN, fails without */
306 	/* TODO(wad) check caps not euid */
307 	if (geteuid()) {
308 		EXPECT_EQ(-1, ret);
309 		EXPECT_EQ(EACCES, errno);
310 	} else {
311 		EXPECT_EQ(0, ret);
312 	}
313 }
314 
315 #define MAX_INSNS_PER_PATH 32768
316 
317 TEST(filter_size_limits)
318 {
319 	int i;
320 	int count = BPF_MAXINSNS + 1;
321 	struct sock_filter allow[] = {
322 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
323 	};
324 	struct sock_filter *filter;
325 	struct sock_fprog prog = { };
326 	long ret;
327 
328 	filter = calloc(count, sizeof(*filter));
329 	ASSERT_NE(NULL, filter);
330 
331 	for (i = 0; i < count; i++)
332 		filter[i] = allow[0];
333 
334 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
335 	ASSERT_EQ(0, ret);
336 
337 	prog.filter = filter;
338 	prog.len = count;
339 
340 	/* Too many filter instructions in a single filter. */
341 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
342 	ASSERT_NE(0, ret) {
343 		TH_LOG("Installing %d insn filter was allowed", prog.len);
344 	}
345 
346 	/* One less is okay, though. */
347 	prog.len -= 1;
348 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
349 	ASSERT_EQ(0, ret) {
350 		TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
351 	}
352 }
353 
354 TEST(filter_chain_limits)
355 {
356 	int i;
357 	int count = BPF_MAXINSNS;
358 	struct sock_filter allow[] = {
359 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
360 	};
361 	struct sock_filter *filter;
362 	struct sock_fprog prog = { };
363 	long ret;
364 
365 	filter = calloc(count, sizeof(*filter));
366 	ASSERT_NE(NULL, filter);
367 
368 	for (i = 0; i < count; i++)
369 		filter[i] = allow[0];
370 
371 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
372 	ASSERT_EQ(0, ret);
373 
374 	prog.filter = filter;
375 	prog.len = 1;
376 
377 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
378 	ASSERT_EQ(0, ret);
379 
380 	prog.len = count;
381 
382 	/* Too many total filter instructions. */
383 	for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
384 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
385 		if (ret != 0)
386 			break;
387 	}
388 	ASSERT_NE(0, ret) {
389 		TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
390 		       i, count, i * (count + 4));
391 	}
392 }
393 
394 TEST(mode_filter_cannot_move_to_strict)
395 {
396 	struct sock_filter filter[] = {
397 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
398 	};
399 	struct sock_fprog prog = {
400 		.len = (unsigned short)ARRAY_SIZE(filter),
401 		.filter = filter,
402 	};
403 	long ret;
404 
405 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
406 	ASSERT_EQ(0, ret);
407 
408 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
409 	ASSERT_EQ(0, ret);
410 
411 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
412 	EXPECT_EQ(-1, ret);
413 	EXPECT_EQ(EINVAL, errno);
414 }
415 
416 
417 TEST(mode_filter_get_seccomp)
418 {
419 	struct sock_filter filter[] = {
420 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
421 	};
422 	struct sock_fprog prog = {
423 		.len = (unsigned short)ARRAY_SIZE(filter),
424 		.filter = filter,
425 	};
426 	long ret;
427 
428 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
429 	ASSERT_EQ(0, ret);
430 
431 	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
432 	EXPECT_EQ(0, ret);
433 
434 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
435 	ASSERT_EQ(0, ret);
436 
437 	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
438 	EXPECT_EQ(2, ret);
439 }
440 
441 
442 TEST(ALLOW_all)
443 {
444 	struct sock_filter filter[] = {
445 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
446 	};
447 	struct sock_fprog prog = {
448 		.len = (unsigned short)ARRAY_SIZE(filter),
449 		.filter = filter,
450 	};
451 	long ret;
452 
453 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
454 	ASSERT_EQ(0, ret);
455 
456 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
457 	ASSERT_EQ(0, ret);
458 }
459 
460 TEST(empty_prog)
461 {
462 	struct sock_filter filter[] = {
463 	};
464 	struct sock_fprog prog = {
465 		.len = (unsigned short)ARRAY_SIZE(filter),
466 		.filter = filter,
467 	};
468 	long ret;
469 
470 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
471 	ASSERT_EQ(0, ret);
472 
473 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
474 	EXPECT_EQ(-1, ret);
475 	EXPECT_EQ(EINVAL, errno);
476 }
477 
478 TEST(log_all)
479 {
480 	struct sock_filter filter[] = {
481 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
482 	};
483 	struct sock_fprog prog = {
484 		.len = (unsigned short)ARRAY_SIZE(filter),
485 		.filter = filter,
486 	};
487 	long ret;
488 	pid_t parent = getppid();
489 
490 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
491 	ASSERT_EQ(0, ret);
492 
493 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
494 	ASSERT_EQ(0, ret);
495 
496 	/* getppid() should succeed and be logged (no check for logging) */
497 	EXPECT_EQ(parent, syscall(__NR_getppid));
498 }
499 
500 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
501 {
502 	struct sock_filter filter[] = {
503 		BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
504 	};
505 	struct sock_fprog prog = {
506 		.len = (unsigned short)ARRAY_SIZE(filter),
507 		.filter = filter,
508 	};
509 	long ret;
510 
511 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
512 	ASSERT_EQ(0, ret);
513 
514 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
515 	ASSERT_EQ(0, ret);
516 	EXPECT_EQ(0, syscall(__NR_getpid)) {
517 		TH_LOG("getpid() shouldn't ever return");
518 	}
519 }
520 
521 /* return code >= 0x80000000 is unused. */
522 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
523 {
524 	struct sock_filter filter[] = {
525 		BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
526 	};
527 	struct sock_fprog prog = {
528 		.len = (unsigned short)ARRAY_SIZE(filter),
529 		.filter = filter,
530 	};
531 	long ret;
532 
533 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
534 	ASSERT_EQ(0, ret);
535 
536 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
537 	ASSERT_EQ(0, ret);
538 	EXPECT_EQ(0, syscall(__NR_getpid)) {
539 		TH_LOG("getpid() shouldn't ever return");
540 	}
541 }
542 
543 TEST_SIGNAL(KILL_all, SIGSYS)
544 {
545 	struct sock_filter filter[] = {
546 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
547 	};
548 	struct sock_fprog prog = {
549 		.len = (unsigned short)ARRAY_SIZE(filter),
550 		.filter = filter,
551 	};
552 	long ret;
553 
554 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
555 	ASSERT_EQ(0, ret);
556 
557 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
558 	ASSERT_EQ(0, ret);
559 }
560 
561 TEST_SIGNAL(KILL_one, SIGSYS)
562 {
563 	struct sock_filter filter[] = {
564 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
565 			offsetof(struct seccomp_data, nr)),
566 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
567 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
568 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
569 	};
570 	struct sock_fprog prog = {
571 		.len = (unsigned short)ARRAY_SIZE(filter),
572 		.filter = filter,
573 	};
574 	long ret;
575 	pid_t parent = getppid();
576 
577 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
578 	ASSERT_EQ(0, ret);
579 
580 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
581 	ASSERT_EQ(0, ret);
582 
583 	EXPECT_EQ(parent, syscall(__NR_getppid));
584 	/* getpid() should never return. */
585 	EXPECT_EQ(0, syscall(__NR_getpid));
586 }
587 
588 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
589 {
590 	void *fatal_address;
591 	struct sock_filter filter[] = {
592 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
593 			offsetof(struct seccomp_data, nr)),
594 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
595 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
596 		/* Only both with lower 32-bit for now. */
597 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
598 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
599 			(unsigned long)&fatal_address, 0, 1),
600 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
601 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
602 	};
603 	struct sock_fprog prog = {
604 		.len = (unsigned short)ARRAY_SIZE(filter),
605 		.filter = filter,
606 	};
607 	long ret;
608 	pid_t parent = getppid();
609 	struct tms timebuf;
610 	clock_t clock = times(&timebuf);
611 
612 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
613 	ASSERT_EQ(0, ret);
614 
615 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
616 	ASSERT_EQ(0, ret);
617 
618 	EXPECT_EQ(parent, syscall(__NR_getppid));
619 	EXPECT_LE(clock, syscall(__NR_times, &timebuf));
620 	/* times() should never return. */
621 	EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
622 }
623 
624 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
625 {
626 #ifndef __NR_mmap2
627 	int sysno = __NR_mmap;
628 #else
629 	int sysno = __NR_mmap2;
630 #endif
631 	struct sock_filter filter[] = {
632 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
633 			offsetof(struct seccomp_data, nr)),
634 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
635 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
636 		/* Only both with lower 32-bit for now. */
637 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
638 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
639 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
640 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
641 	};
642 	struct sock_fprog prog = {
643 		.len = (unsigned short)ARRAY_SIZE(filter),
644 		.filter = filter,
645 	};
646 	long ret;
647 	pid_t parent = getppid();
648 	int fd;
649 	void *map1, *map2;
650 	int page_size = sysconf(_SC_PAGESIZE);
651 
652 	ASSERT_LT(0, page_size);
653 
654 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
655 	ASSERT_EQ(0, ret);
656 
657 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
658 	ASSERT_EQ(0, ret);
659 
660 	fd = open("/dev/zero", O_RDONLY);
661 	ASSERT_NE(-1, fd);
662 
663 	EXPECT_EQ(parent, syscall(__NR_getppid));
664 	map1 = (void *)syscall(sysno,
665 		NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
666 	EXPECT_NE(MAP_FAILED, map1);
667 	/* mmap2() should never return. */
668 	map2 = (void *)syscall(sysno,
669 		 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
670 	EXPECT_EQ(MAP_FAILED, map2);
671 
672 	/* The test failed, so clean up the resources. */
673 	munmap(map1, page_size);
674 	munmap(map2, page_size);
675 	close(fd);
676 }
677 
678 /* This is a thread task to die via seccomp filter violation. */
679 void *kill_thread(void *data)
680 {
681 	bool die = (bool)data;
682 
683 	if (die) {
684 		prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
685 		return (void *)SIBLING_EXIT_FAILURE;
686 	}
687 
688 	return (void *)SIBLING_EXIT_UNKILLED;
689 }
690 
691 /* Prepare a thread that will kill itself or both of us. */
692 void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
693 {
694 	pthread_t thread;
695 	void *status;
696 	/* Kill only when calling __NR_prctl. */
697 	struct sock_filter filter_thread[] = {
698 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
699 			offsetof(struct seccomp_data, nr)),
700 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
701 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
702 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
703 	};
704 	struct sock_fprog prog_thread = {
705 		.len = (unsigned short)ARRAY_SIZE(filter_thread),
706 		.filter = filter_thread,
707 	};
708 	struct sock_filter filter_process[] = {
709 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
710 			offsetof(struct seccomp_data, nr)),
711 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
712 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
713 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
714 	};
715 	struct sock_fprog prog_process = {
716 		.len = (unsigned short)ARRAY_SIZE(filter_process),
717 		.filter = filter_process,
718 	};
719 
720 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
721 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
722 	}
723 
724 	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
725 			     kill_process ? &prog_process : &prog_thread));
726 
727 	/*
728 	 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
729 	 * flag cannot be downgraded by a new filter.
730 	 */
731 	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
732 
733 	/* Start a thread that will exit immediately. */
734 	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
735 	ASSERT_EQ(0, pthread_join(thread, &status));
736 	ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
737 
738 	/* Start a thread that will die immediately. */
739 	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
740 	ASSERT_EQ(0, pthread_join(thread, &status));
741 	ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
742 
743 	/*
744 	 * If we get here, only the spawned thread died. Let the parent know
745 	 * the whole process didn't die (i.e. this thread, the spawner,
746 	 * stayed running).
747 	 */
748 	exit(42);
749 }
750 
751 TEST(KILL_thread)
752 {
753 	int status;
754 	pid_t child_pid;
755 
756 	child_pid = fork();
757 	ASSERT_LE(0, child_pid);
758 	if (child_pid == 0) {
759 		kill_thread_or_group(_metadata, false);
760 		_exit(38);
761 	}
762 
763 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
764 
765 	/* If only the thread was killed, we'll see exit 42. */
766 	ASSERT_TRUE(WIFEXITED(status));
767 	ASSERT_EQ(42, WEXITSTATUS(status));
768 }
769 
770 TEST(KILL_process)
771 {
772 	int status;
773 	pid_t child_pid;
774 
775 	child_pid = fork();
776 	ASSERT_LE(0, child_pid);
777 	if (child_pid == 0) {
778 		kill_thread_or_group(_metadata, true);
779 		_exit(38);
780 	}
781 
782 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
783 
784 	/* If the entire process was killed, we'll see SIGSYS. */
785 	ASSERT_TRUE(WIFSIGNALED(status));
786 	ASSERT_EQ(SIGSYS, WTERMSIG(status));
787 }
788 
789 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
790 TEST(arg_out_of_range)
791 {
792 	struct sock_filter filter[] = {
793 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
794 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
795 	};
796 	struct sock_fprog prog = {
797 		.len = (unsigned short)ARRAY_SIZE(filter),
798 		.filter = filter,
799 	};
800 	long ret;
801 
802 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
803 	ASSERT_EQ(0, ret);
804 
805 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
806 	EXPECT_EQ(-1, ret);
807 	EXPECT_EQ(EINVAL, errno);
808 }
809 
810 #define ERRNO_FILTER(name, errno)					\
811 	struct sock_filter _read_filter_##name[] = {			\
812 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,				\
813 			offsetof(struct seccomp_data, nr)),		\
814 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),	\
815 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),	\
816 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),		\
817 	};								\
818 	struct sock_fprog prog_##name = {				\
819 		.len = (unsigned short)ARRAY_SIZE(_read_filter_##name),	\
820 		.filter = _read_filter_##name,				\
821 	}
822 
823 /* Make sure basic errno values are correctly passed through a filter. */
824 TEST(ERRNO_valid)
825 {
826 	ERRNO_FILTER(valid, E2BIG);
827 	long ret;
828 	pid_t parent = getppid();
829 
830 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
831 	ASSERT_EQ(0, ret);
832 
833 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
834 	ASSERT_EQ(0, ret);
835 
836 	EXPECT_EQ(parent, syscall(__NR_getppid));
837 	EXPECT_EQ(-1, read(0, NULL, 0));
838 	EXPECT_EQ(E2BIG, errno);
839 }
840 
841 /* Make sure an errno of zero is correctly handled by the arch code. */
842 TEST(ERRNO_zero)
843 {
844 	ERRNO_FILTER(zero, 0);
845 	long ret;
846 	pid_t parent = getppid();
847 
848 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
849 	ASSERT_EQ(0, ret);
850 
851 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
852 	ASSERT_EQ(0, ret);
853 
854 	EXPECT_EQ(parent, syscall(__NR_getppid));
855 	/* "errno" of 0 is ok. */
856 	EXPECT_EQ(0, read(0, NULL, 0));
857 }
858 
859 /*
860  * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
861  * This tests that the errno value gets capped correctly, fixed by
862  * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
863  */
864 TEST(ERRNO_capped)
865 {
866 	ERRNO_FILTER(capped, 4096);
867 	long ret;
868 	pid_t parent = getppid();
869 
870 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
871 	ASSERT_EQ(0, ret);
872 
873 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
874 	ASSERT_EQ(0, ret);
875 
876 	EXPECT_EQ(parent, syscall(__NR_getppid));
877 	EXPECT_EQ(-1, read(0, NULL, 0));
878 	EXPECT_EQ(4095, errno);
879 }
880 
881 /*
882  * Filters are processed in reverse order: last applied is executed first.
883  * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
884  * SECCOMP_RET_DATA mask results will follow the most recently applied
885  * matching filter return (and not the lowest or highest value).
886  */
887 TEST(ERRNO_order)
888 {
889 	ERRNO_FILTER(first,  11);
890 	ERRNO_FILTER(second, 13);
891 	ERRNO_FILTER(third,  12);
892 	long ret;
893 	pid_t parent = getppid();
894 
895 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
896 	ASSERT_EQ(0, ret);
897 
898 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
899 	ASSERT_EQ(0, ret);
900 
901 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
902 	ASSERT_EQ(0, ret);
903 
904 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
905 	ASSERT_EQ(0, ret);
906 
907 	EXPECT_EQ(parent, syscall(__NR_getppid));
908 	EXPECT_EQ(-1, read(0, NULL, 0));
909 	EXPECT_EQ(12, errno);
910 }
911 
912 FIXTURE_DATA(TRAP) {
913 	struct sock_fprog prog;
914 };
915 
916 FIXTURE_SETUP(TRAP)
917 {
918 	struct sock_filter filter[] = {
919 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
920 			offsetof(struct seccomp_data, nr)),
921 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
922 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
923 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
924 	};
925 
926 	memset(&self->prog, 0, sizeof(self->prog));
927 	self->prog.filter = malloc(sizeof(filter));
928 	ASSERT_NE(NULL, self->prog.filter);
929 	memcpy(self->prog.filter, filter, sizeof(filter));
930 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
931 }
932 
933 FIXTURE_TEARDOWN(TRAP)
934 {
935 	if (self->prog.filter)
936 		free(self->prog.filter);
937 }
938 
939 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
940 {
941 	long ret;
942 
943 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
944 	ASSERT_EQ(0, ret);
945 
946 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
947 	ASSERT_EQ(0, ret);
948 	syscall(__NR_getpid);
949 }
950 
951 /* Ensure that SIGSYS overrides SIG_IGN */
952 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
953 {
954 	long ret;
955 
956 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
957 	ASSERT_EQ(0, ret);
958 
959 	signal(SIGSYS, SIG_IGN);
960 
961 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
962 	ASSERT_EQ(0, ret);
963 	syscall(__NR_getpid);
964 }
965 
966 static siginfo_t TRAP_info;
967 static volatile int TRAP_nr;
968 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
969 {
970 	memcpy(&TRAP_info, info, sizeof(TRAP_info));
971 	TRAP_nr = nr;
972 }
973 
974 TEST_F(TRAP, handler)
975 {
976 	int ret, test;
977 	struct sigaction act;
978 	sigset_t mask;
979 
980 	memset(&act, 0, sizeof(act));
981 	sigemptyset(&mask);
982 	sigaddset(&mask, SIGSYS);
983 
984 	act.sa_sigaction = &TRAP_action;
985 	act.sa_flags = SA_SIGINFO;
986 	ret = sigaction(SIGSYS, &act, NULL);
987 	ASSERT_EQ(0, ret) {
988 		TH_LOG("sigaction failed");
989 	}
990 	ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
991 	ASSERT_EQ(0, ret) {
992 		TH_LOG("sigprocmask failed");
993 	}
994 
995 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
996 	ASSERT_EQ(0, ret);
997 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
998 	ASSERT_EQ(0, ret);
999 	TRAP_nr = 0;
1000 	memset(&TRAP_info, 0, sizeof(TRAP_info));
1001 	/* Expect the registers to be rolled back. (nr = error) may vary
1002 	 * based on arch. */
1003 	ret = syscall(__NR_getpid);
1004 	/* Silence gcc warning about volatile. */
1005 	test = TRAP_nr;
1006 	EXPECT_EQ(SIGSYS, test);
1007 	struct local_sigsys {
1008 		void *_call_addr;	/* calling user insn */
1009 		int _syscall;		/* triggering system call number */
1010 		unsigned int _arch;	/* AUDIT_ARCH_* of syscall */
1011 	} *sigsys = (struct local_sigsys *)
1012 #ifdef si_syscall
1013 		&(TRAP_info.si_call_addr);
1014 #else
1015 		&TRAP_info.si_pid;
1016 #endif
1017 	EXPECT_EQ(__NR_getpid, sigsys->_syscall);
1018 	/* Make sure arch is non-zero. */
1019 	EXPECT_NE(0, sigsys->_arch);
1020 	EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
1021 }
1022 
1023 FIXTURE_DATA(precedence) {
1024 	struct sock_fprog allow;
1025 	struct sock_fprog log;
1026 	struct sock_fprog trace;
1027 	struct sock_fprog error;
1028 	struct sock_fprog trap;
1029 	struct sock_fprog kill;
1030 };
1031 
1032 FIXTURE_SETUP(precedence)
1033 {
1034 	struct sock_filter allow_insns[] = {
1035 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1036 	};
1037 	struct sock_filter log_insns[] = {
1038 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1039 			offsetof(struct seccomp_data, nr)),
1040 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1041 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1042 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
1043 	};
1044 	struct sock_filter trace_insns[] = {
1045 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1046 			offsetof(struct seccomp_data, nr)),
1047 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1048 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1049 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
1050 	};
1051 	struct sock_filter error_insns[] = {
1052 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1053 			offsetof(struct seccomp_data, nr)),
1054 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1055 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1056 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
1057 	};
1058 	struct sock_filter trap_insns[] = {
1059 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1060 			offsetof(struct seccomp_data, nr)),
1061 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1062 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1063 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
1064 	};
1065 	struct sock_filter kill_insns[] = {
1066 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1067 			offsetof(struct seccomp_data, nr)),
1068 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1069 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1070 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1071 	};
1072 
1073 	memset(self, 0, sizeof(*self));
1074 #define FILTER_ALLOC(_x) \
1075 	self->_x.filter = malloc(sizeof(_x##_insns)); \
1076 	ASSERT_NE(NULL, self->_x.filter); \
1077 	memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
1078 	self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
1079 	FILTER_ALLOC(allow);
1080 	FILTER_ALLOC(log);
1081 	FILTER_ALLOC(trace);
1082 	FILTER_ALLOC(error);
1083 	FILTER_ALLOC(trap);
1084 	FILTER_ALLOC(kill);
1085 }
1086 
1087 FIXTURE_TEARDOWN(precedence)
1088 {
1089 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
1090 	FILTER_FREE(allow);
1091 	FILTER_FREE(log);
1092 	FILTER_FREE(trace);
1093 	FILTER_FREE(error);
1094 	FILTER_FREE(trap);
1095 	FILTER_FREE(kill);
1096 }
1097 
1098 TEST_F(precedence, allow_ok)
1099 {
1100 	pid_t parent, res = 0;
1101 	long ret;
1102 
1103 	parent = getppid();
1104 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1105 	ASSERT_EQ(0, ret);
1106 
1107 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1108 	ASSERT_EQ(0, ret);
1109 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1110 	ASSERT_EQ(0, ret);
1111 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1112 	ASSERT_EQ(0, ret);
1113 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1114 	ASSERT_EQ(0, ret);
1115 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1116 	ASSERT_EQ(0, ret);
1117 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1118 	ASSERT_EQ(0, ret);
1119 	/* Should work just fine. */
1120 	res = syscall(__NR_getppid);
1121 	EXPECT_EQ(parent, res);
1122 }
1123 
1124 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
1125 {
1126 	pid_t parent, res = 0;
1127 	long ret;
1128 
1129 	parent = getppid();
1130 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1131 	ASSERT_EQ(0, ret);
1132 
1133 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1134 	ASSERT_EQ(0, ret);
1135 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1136 	ASSERT_EQ(0, ret);
1137 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1138 	ASSERT_EQ(0, ret);
1139 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1140 	ASSERT_EQ(0, ret);
1141 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1142 	ASSERT_EQ(0, ret);
1143 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1144 	ASSERT_EQ(0, ret);
1145 	/* Should work just fine. */
1146 	res = syscall(__NR_getppid);
1147 	EXPECT_EQ(parent, res);
1148 	/* getpid() should never return. */
1149 	res = syscall(__NR_getpid);
1150 	EXPECT_EQ(0, res);
1151 }
1152 
1153 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
1154 {
1155 	pid_t parent;
1156 	long ret;
1157 
1158 	parent = getppid();
1159 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1160 	ASSERT_EQ(0, ret);
1161 
1162 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1163 	ASSERT_EQ(0, ret);
1164 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1165 	ASSERT_EQ(0, ret);
1166 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1167 	ASSERT_EQ(0, ret);
1168 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1169 	ASSERT_EQ(0, ret);
1170 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1171 	ASSERT_EQ(0, ret);
1172 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1173 	ASSERT_EQ(0, ret);
1174 	/* Should work just fine. */
1175 	EXPECT_EQ(parent, syscall(__NR_getppid));
1176 	/* getpid() should never return. */
1177 	EXPECT_EQ(0, syscall(__NR_getpid));
1178 }
1179 
1180 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
1181 {
1182 	pid_t parent;
1183 	long ret;
1184 
1185 	parent = getppid();
1186 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1187 	ASSERT_EQ(0, ret);
1188 
1189 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1190 	ASSERT_EQ(0, ret);
1191 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1192 	ASSERT_EQ(0, ret);
1193 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1194 	ASSERT_EQ(0, ret);
1195 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1196 	ASSERT_EQ(0, ret);
1197 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1198 	ASSERT_EQ(0, ret);
1199 	/* Should work just fine. */
1200 	EXPECT_EQ(parent, syscall(__NR_getppid));
1201 	/* getpid() should never return. */
1202 	EXPECT_EQ(0, syscall(__NR_getpid));
1203 }
1204 
1205 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
1206 {
1207 	pid_t parent;
1208 	long ret;
1209 
1210 	parent = getppid();
1211 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1212 	ASSERT_EQ(0, ret);
1213 
1214 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1215 	ASSERT_EQ(0, ret);
1216 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1217 	ASSERT_EQ(0, ret);
1218 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1219 	ASSERT_EQ(0, ret);
1220 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1221 	ASSERT_EQ(0, ret);
1222 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1223 	ASSERT_EQ(0, ret);
1224 	/* Should work just fine. */
1225 	EXPECT_EQ(parent, syscall(__NR_getppid));
1226 	/* getpid() should never return. */
1227 	EXPECT_EQ(0, syscall(__NR_getpid));
1228 }
1229 
1230 TEST_F(precedence, errno_is_third)
1231 {
1232 	pid_t parent;
1233 	long ret;
1234 
1235 	parent = getppid();
1236 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1237 	ASSERT_EQ(0, ret);
1238 
1239 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1240 	ASSERT_EQ(0, ret);
1241 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1242 	ASSERT_EQ(0, ret);
1243 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1244 	ASSERT_EQ(0, ret);
1245 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1246 	ASSERT_EQ(0, ret);
1247 	/* Should work just fine. */
1248 	EXPECT_EQ(parent, syscall(__NR_getppid));
1249 	EXPECT_EQ(0, syscall(__NR_getpid));
1250 }
1251 
1252 TEST_F(precedence, errno_is_third_in_any_order)
1253 {
1254 	pid_t parent;
1255 	long ret;
1256 
1257 	parent = getppid();
1258 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1259 	ASSERT_EQ(0, ret);
1260 
1261 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1262 	ASSERT_EQ(0, ret);
1263 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1264 	ASSERT_EQ(0, ret);
1265 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1266 	ASSERT_EQ(0, ret);
1267 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1268 	ASSERT_EQ(0, ret);
1269 	/* Should work just fine. */
1270 	EXPECT_EQ(parent, syscall(__NR_getppid));
1271 	EXPECT_EQ(0, syscall(__NR_getpid));
1272 }
1273 
1274 TEST_F(precedence, trace_is_fourth)
1275 {
1276 	pid_t parent;
1277 	long ret;
1278 
1279 	parent = getppid();
1280 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1281 	ASSERT_EQ(0, ret);
1282 
1283 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1284 	ASSERT_EQ(0, ret);
1285 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1286 	ASSERT_EQ(0, ret);
1287 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1288 	ASSERT_EQ(0, ret);
1289 	/* Should work just fine. */
1290 	EXPECT_EQ(parent, syscall(__NR_getppid));
1291 	/* No ptracer */
1292 	EXPECT_EQ(-1, syscall(__NR_getpid));
1293 }
1294 
1295 TEST_F(precedence, trace_is_fourth_in_any_order)
1296 {
1297 	pid_t parent;
1298 	long ret;
1299 
1300 	parent = getppid();
1301 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1302 	ASSERT_EQ(0, ret);
1303 
1304 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1305 	ASSERT_EQ(0, ret);
1306 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1307 	ASSERT_EQ(0, ret);
1308 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1309 	ASSERT_EQ(0, ret);
1310 	/* Should work just fine. */
1311 	EXPECT_EQ(parent, syscall(__NR_getppid));
1312 	/* No ptracer */
1313 	EXPECT_EQ(-1, syscall(__NR_getpid));
1314 }
1315 
1316 TEST_F(precedence, log_is_fifth)
1317 {
1318 	pid_t mypid, parent;
1319 	long ret;
1320 
1321 	mypid = getpid();
1322 	parent = getppid();
1323 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1324 	ASSERT_EQ(0, ret);
1325 
1326 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1327 	ASSERT_EQ(0, ret);
1328 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1329 	ASSERT_EQ(0, ret);
1330 	/* Should work just fine. */
1331 	EXPECT_EQ(parent, syscall(__NR_getppid));
1332 	/* Should also work just fine */
1333 	EXPECT_EQ(mypid, syscall(__NR_getpid));
1334 }
1335 
1336 TEST_F(precedence, log_is_fifth_in_any_order)
1337 {
1338 	pid_t mypid, parent;
1339 	long ret;
1340 
1341 	mypid = getpid();
1342 	parent = getppid();
1343 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1344 	ASSERT_EQ(0, ret);
1345 
1346 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1347 	ASSERT_EQ(0, ret);
1348 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1349 	ASSERT_EQ(0, ret);
1350 	/* Should work just fine. */
1351 	EXPECT_EQ(parent, syscall(__NR_getppid));
1352 	/* Should also work just fine */
1353 	EXPECT_EQ(mypid, syscall(__NR_getpid));
1354 }
1355 
1356 #ifndef PTRACE_O_TRACESECCOMP
1357 #define PTRACE_O_TRACESECCOMP	0x00000080
1358 #endif
1359 
1360 /* Catch the Ubuntu 12.04 value error. */
1361 #if PTRACE_EVENT_SECCOMP != 7
1362 #undef PTRACE_EVENT_SECCOMP
1363 #endif
1364 
1365 #ifndef PTRACE_EVENT_SECCOMP
1366 #define PTRACE_EVENT_SECCOMP 7
1367 #endif
1368 
1369 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
1370 bool tracer_running;
1371 void tracer_stop(int sig)
1372 {
1373 	tracer_running = false;
1374 }
1375 
1376 typedef void tracer_func_t(struct __test_metadata *_metadata,
1377 			   pid_t tracee, int status, void *args);
1378 
1379 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1380 	    tracer_func_t tracer_func, void *args, bool ptrace_syscall)
1381 {
1382 	int ret = -1;
1383 	struct sigaction action = {
1384 		.sa_handler = tracer_stop,
1385 	};
1386 
1387 	/* Allow external shutdown. */
1388 	tracer_running = true;
1389 	ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1390 
1391 	errno = 0;
1392 	while (ret == -1 && errno != EINVAL)
1393 		ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1394 	ASSERT_EQ(0, ret) {
1395 		kill(tracee, SIGKILL);
1396 	}
1397 	/* Wait for attach stop */
1398 	wait(NULL);
1399 
1400 	ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
1401 						      PTRACE_O_TRACESYSGOOD :
1402 						      PTRACE_O_TRACESECCOMP);
1403 	ASSERT_EQ(0, ret) {
1404 		TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1405 		kill(tracee, SIGKILL);
1406 	}
1407 	ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1408 		     tracee, NULL, 0);
1409 	ASSERT_EQ(0, ret);
1410 
1411 	/* Unblock the tracee */
1412 	ASSERT_EQ(1, write(fd, "A", 1));
1413 	ASSERT_EQ(0, close(fd));
1414 
1415 	/* Run until we're shut down. Must assert to stop execution. */
1416 	while (tracer_running) {
1417 		int status;
1418 
1419 		if (wait(&status) != tracee)
1420 			continue;
1421 		if (WIFSIGNALED(status) || WIFEXITED(status))
1422 			/* Child is dead. Time to go. */
1423 			return;
1424 
1425 		/* Check if this is a seccomp event. */
1426 		ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
1427 
1428 		tracer_func(_metadata, tracee, status, args);
1429 
1430 		ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1431 			     tracee, NULL, 0);
1432 		ASSERT_EQ(0, ret);
1433 	}
1434 	/* Directly report the status of our test harness results. */
1435 	syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1436 }
1437 
1438 /* Common tracer setup/teardown functions. */
1439 void cont_handler(int num)
1440 { }
1441 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1442 			  tracer_func_t func, void *args, bool ptrace_syscall)
1443 {
1444 	char sync;
1445 	int pipefd[2];
1446 	pid_t tracer_pid;
1447 	pid_t tracee = getpid();
1448 
1449 	/* Setup a pipe for clean synchronization. */
1450 	ASSERT_EQ(0, pipe(pipefd));
1451 
1452 	/* Fork a child which we'll promote to tracer */
1453 	tracer_pid = fork();
1454 	ASSERT_LE(0, tracer_pid);
1455 	signal(SIGALRM, cont_handler);
1456 	if (tracer_pid == 0) {
1457 		close(pipefd[0]);
1458 		start_tracer(_metadata, pipefd[1], tracee, func, args,
1459 			     ptrace_syscall);
1460 		syscall(__NR_exit, 0);
1461 	}
1462 	close(pipefd[1]);
1463 	prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1464 	read(pipefd[0], &sync, 1);
1465 	close(pipefd[0]);
1466 
1467 	return tracer_pid;
1468 }
1469 void teardown_trace_fixture(struct __test_metadata *_metadata,
1470 			    pid_t tracer)
1471 {
1472 	if (tracer) {
1473 		int status;
1474 		/*
1475 		 * Extract the exit code from the other process and
1476 		 * adopt it for ourselves in case its asserts failed.
1477 		 */
1478 		ASSERT_EQ(0, kill(tracer, SIGUSR1));
1479 		ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1480 		if (WEXITSTATUS(status))
1481 			_metadata->passed = 0;
1482 	}
1483 }
1484 
1485 /* "poke" tracer arguments and function. */
1486 struct tracer_args_poke_t {
1487 	unsigned long poke_addr;
1488 };
1489 
1490 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1491 		 void *args)
1492 {
1493 	int ret;
1494 	unsigned long msg;
1495 	struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1496 
1497 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1498 	EXPECT_EQ(0, ret);
1499 	/* If this fails, don't try to recover. */
1500 	ASSERT_EQ(0x1001, msg) {
1501 		kill(tracee, SIGKILL);
1502 	}
1503 	/*
1504 	 * Poke in the message.
1505 	 * Registers are not touched to try to keep this relatively arch
1506 	 * agnostic.
1507 	 */
1508 	ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1509 	EXPECT_EQ(0, ret);
1510 }
1511 
1512 FIXTURE_DATA(TRACE_poke) {
1513 	struct sock_fprog prog;
1514 	pid_t tracer;
1515 	long poked;
1516 	struct tracer_args_poke_t tracer_args;
1517 };
1518 
1519 FIXTURE_SETUP(TRACE_poke)
1520 {
1521 	struct sock_filter filter[] = {
1522 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1523 			offsetof(struct seccomp_data, nr)),
1524 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1525 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1526 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1527 	};
1528 
1529 	self->poked = 0;
1530 	memset(&self->prog, 0, sizeof(self->prog));
1531 	self->prog.filter = malloc(sizeof(filter));
1532 	ASSERT_NE(NULL, self->prog.filter);
1533 	memcpy(self->prog.filter, filter, sizeof(filter));
1534 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1535 
1536 	/* Set up tracer args. */
1537 	self->tracer_args.poke_addr = (unsigned long)&self->poked;
1538 
1539 	/* Launch tracer. */
1540 	self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1541 					   &self->tracer_args, false);
1542 }
1543 
1544 FIXTURE_TEARDOWN(TRACE_poke)
1545 {
1546 	teardown_trace_fixture(_metadata, self->tracer);
1547 	if (self->prog.filter)
1548 		free(self->prog.filter);
1549 }
1550 
1551 TEST_F(TRACE_poke, read_has_side_effects)
1552 {
1553 	ssize_t ret;
1554 
1555 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1556 	ASSERT_EQ(0, ret);
1557 
1558 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1559 	ASSERT_EQ(0, ret);
1560 
1561 	EXPECT_EQ(0, self->poked);
1562 	ret = read(-1, NULL, 0);
1563 	EXPECT_EQ(-1, ret);
1564 	EXPECT_EQ(0x1001, self->poked);
1565 }
1566 
1567 TEST_F(TRACE_poke, getpid_runs_normally)
1568 {
1569 	long ret;
1570 
1571 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1572 	ASSERT_EQ(0, ret);
1573 
1574 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1575 	ASSERT_EQ(0, ret);
1576 
1577 	EXPECT_EQ(0, self->poked);
1578 	EXPECT_NE(0, syscall(__NR_getpid));
1579 	EXPECT_EQ(0, self->poked);
1580 }
1581 
1582 #if defined(__x86_64__)
1583 # define ARCH_REGS	struct user_regs_struct
1584 # define SYSCALL_NUM	orig_rax
1585 # define SYSCALL_RET	rax
1586 #elif defined(__i386__)
1587 # define ARCH_REGS	struct user_regs_struct
1588 # define SYSCALL_NUM	orig_eax
1589 # define SYSCALL_RET	eax
1590 #elif defined(__arm__)
1591 # define ARCH_REGS	struct pt_regs
1592 # define SYSCALL_NUM	ARM_r7
1593 # define SYSCALL_RET	ARM_r0
1594 #elif defined(__aarch64__)
1595 # define ARCH_REGS	struct user_pt_regs
1596 # define SYSCALL_NUM	regs[8]
1597 # define SYSCALL_RET	regs[0]
1598 #elif defined(__riscv) && __riscv_xlen == 64
1599 # define ARCH_REGS	struct user_regs_struct
1600 # define SYSCALL_NUM	a7
1601 # define SYSCALL_RET	a0
1602 #elif defined(__hppa__)
1603 # define ARCH_REGS	struct user_regs_struct
1604 # define SYSCALL_NUM	gr[20]
1605 # define SYSCALL_RET	gr[28]
1606 #elif defined(__powerpc__)
1607 # define ARCH_REGS	struct pt_regs
1608 # define SYSCALL_NUM	gpr[0]
1609 # define SYSCALL_RET	gpr[3]
1610 #elif defined(__s390__)
1611 # define ARCH_REGS     s390_regs
1612 # define SYSCALL_NUM   gprs[2]
1613 # define SYSCALL_RET   gprs[2]
1614 #elif defined(__mips__)
1615 # define ARCH_REGS	struct pt_regs
1616 # define SYSCALL_NUM	regs[2]
1617 # define SYSCALL_SYSCALL_NUM regs[4]
1618 # define SYSCALL_RET	regs[2]
1619 # define SYSCALL_NUM_RET_SHARE_REG
1620 #else
1621 # error "Do not know how to find your architecture's registers and syscalls"
1622 #endif
1623 
1624 /* When the syscall return can't be changed, stub out the tests for it. */
1625 #ifdef SYSCALL_NUM_RET_SHARE_REG
1626 # define EXPECT_SYSCALL_RETURN(val, action)	EXPECT_EQ(-1, action)
1627 #else
1628 # define EXPECT_SYSCALL_RETURN(val, action)		\
1629 	do {						\
1630 		errno = 0;				\
1631 		if (val < 0) {				\
1632 			EXPECT_EQ(-1, action);		\
1633 			EXPECT_EQ(-(val), errno);	\
1634 		} else {				\
1635 			EXPECT_EQ(val, action);		\
1636 		}					\
1637 	} while (0)
1638 #endif
1639 
1640 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1641  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1642  */
1643 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
1644 #define HAVE_GETREGS
1645 #endif
1646 
1647 /* Architecture-specific syscall fetching routine. */
1648 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1649 {
1650 	ARCH_REGS regs;
1651 #ifdef HAVE_GETREGS
1652 	EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
1653 		TH_LOG("PTRACE_GETREGS failed");
1654 		return -1;
1655 	}
1656 #else
1657 	struct iovec iov;
1658 
1659 	iov.iov_base = &regs;
1660 	iov.iov_len = sizeof(regs);
1661 	EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1662 		TH_LOG("PTRACE_GETREGSET failed");
1663 		return -1;
1664 	}
1665 #endif
1666 
1667 #if defined(__mips__)
1668 	if (regs.SYSCALL_NUM == __NR_O32_Linux)
1669 		return regs.SYSCALL_SYSCALL_NUM;
1670 #endif
1671 	return regs.SYSCALL_NUM;
1672 }
1673 
1674 /* Architecture-specific syscall changing routine. */
1675 void change_syscall(struct __test_metadata *_metadata,
1676 		    pid_t tracee, int syscall, int result)
1677 {
1678 	int ret;
1679 	ARCH_REGS regs;
1680 #ifdef HAVE_GETREGS
1681 	ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
1682 #else
1683 	struct iovec iov;
1684 	iov.iov_base = &regs;
1685 	iov.iov_len = sizeof(regs);
1686 	ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1687 #endif
1688 	EXPECT_EQ(0, ret) {}
1689 
1690 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
1691 	defined(__s390__) || defined(__hppa__) || defined(__riscv)
1692 	{
1693 		regs.SYSCALL_NUM = syscall;
1694 	}
1695 #elif defined(__mips__)
1696 	{
1697 		if (regs.SYSCALL_NUM == __NR_O32_Linux)
1698 			regs.SYSCALL_SYSCALL_NUM = syscall;
1699 		else
1700 			regs.SYSCALL_NUM = syscall;
1701 	}
1702 
1703 #elif defined(__arm__)
1704 # ifndef PTRACE_SET_SYSCALL
1705 #  define PTRACE_SET_SYSCALL   23
1706 # endif
1707 	{
1708 		ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1709 		EXPECT_EQ(0, ret);
1710 	}
1711 
1712 #elif defined(__aarch64__)
1713 # ifndef NT_ARM_SYSTEM_CALL
1714 #  define NT_ARM_SYSTEM_CALL 0x404
1715 # endif
1716 	{
1717 		iov.iov_base = &syscall;
1718 		iov.iov_len = sizeof(syscall);
1719 		ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
1720 			     &iov);
1721 		EXPECT_EQ(0, ret);
1722 	}
1723 
1724 #else
1725 	ASSERT_EQ(1, 0) {
1726 		TH_LOG("How is the syscall changed on this architecture?");
1727 	}
1728 #endif
1729 
1730 	/* If syscall is skipped, change return value. */
1731 	if (syscall == -1)
1732 #ifdef SYSCALL_NUM_RET_SHARE_REG
1733 		TH_LOG("Can't modify syscall return on this architecture");
1734 #else
1735 		regs.SYSCALL_RET = result;
1736 #endif
1737 
1738 #ifdef HAVE_GETREGS
1739 	ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
1740 #else
1741 	iov.iov_base = &regs;
1742 	iov.iov_len = sizeof(regs);
1743 	ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1744 #endif
1745 	EXPECT_EQ(0, ret);
1746 }
1747 
1748 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1749 		    int status, void *args)
1750 {
1751 	int ret;
1752 	unsigned long msg;
1753 
1754 	/* Make sure we got the right message. */
1755 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1756 	EXPECT_EQ(0, ret);
1757 
1758 	/* Validate and take action on expected syscalls. */
1759 	switch (msg) {
1760 	case 0x1002:
1761 		/* change getpid to getppid. */
1762 		EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1763 		change_syscall(_metadata, tracee, __NR_getppid, 0);
1764 		break;
1765 	case 0x1003:
1766 		/* skip gettid with valid return code. */
1767 		EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1768 		change_syscall(_metadata, tracee, -1, 45000);
1769 		break;
1770 	case 0x1004:
1771 		/* skip openat with error. */
1772 		EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
1773 		change_syscall(_metadata, tracee, -1, -ESRCH);
1774 		break;
1775 	case 0x1005:
1776 		/* do nothing (allow getppid) */
1777 		EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1778 		break;
1779 	default:
1780 		EXPECT_EQ(0, msg) {
1781 			TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1782 			kill(tracee, SIGKILL);
1783 		}
1784 	}
1785 
1786 }
1787 
1788 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
1789 		   int status, void *args)
1790 {
1791 	int ret, nr;
1792 	unsigned long msg;
1793 	static bool entry;
1794 
1795 	/*
1796 	 * The traditional way to tell PTRACE_SYSCALL entry/exit
1797 	 * is by counting.
1798 	 */
1799 	entry = !entry;
1800 
1801 	/* Make sure we got an appropriate message. */
1802 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1803 	EXPECT_EQ(0, ret);
1804 	EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
1805 			: PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
1806 
1807 	if (!entry)
1808 		return;
1809 
1810 	nr = get_syscall(_metadata, tracee);
1811 
1812 	if (nr == __NR_getpid)
1813 		change_syscall(_metadata, tracee, __NR_getppid, 0);
1814 	if (nr == __NR_gettid)
1815 		change_syscall(_metadata, tracee, -1, 45000);
1816 	if (nr == __NR_openat)
1817 		change_syscall(_metadata, tracee, -1, -ESRCH);
1818 }
1819 
1820 FIXTURE_DATA(TRACE_syscall) {
1821 	struct sock_fprog prog;
1822 	pid_t tracer, mytid, mypid, parent;
1823 };
1824 
1825 FIXTURE_SETUP(TRACE_syscall)
1826 {
1827 	struct sock_filter filter[] = {
1828 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1829 			offsetof(struct seccomp_data, nr)),
1830 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1831 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1832 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1833 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1834 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1),
1835 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1836 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1837 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
1838 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1839 	};
1840 
1841 	memset(&self->prog, 0, sizeof(self->prog));
1842 	self->prog.filter = malloc(sizeof(filter));
1843 	ASSERT_NE(NULL, self->prog.filter);
1844 	memcpy(self->prog.filter, filter, sizeof(filter));
1845 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1846 
1847 	/* Prepare some testable syscall results. */
1848 	self->mytid = syscall(__NR_gettid);
1849 	ASSERT_GT(self->mytid, 0);
1850 	ASSERT_NE(self->mytid, 1) {
1851 		TH_LOG("Running this test as init is not supported. :)");
1852 	}
1853 
1854 	self->mypid = getpid();
1855 	ASSERT_GT(self->mypid, 0);
1856 	ASSERT_EQ(self->mytid, self->mypid);
1857 
1858 	self->parent = getppid();
1859 	ASSERT_GT(self->parent, 0);
1860 	ASSERT_NE(self->parent, self->mypid);
1861 
1862 	/* Launch tracer. */
1863 	self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
1864 					   false);
1865 }
1866 
1867 FIXTURE_TEARDOWN(TRACE_syscall)
1868 {
1869 	teardown_trace_fixture(_metadata, self->tracer);
1870 	if (self->prog.filter)
1871 		free(self->prog.filter);
1872 }
1873 
1874 TEST_F(TRACE_syscall, ptrace_syscall_redirected)
1875 {
1876 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1877 	teardown_trace_fixture(_metadata, self->tracer);
1878 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1879 					   true);
1880 
1881 	/* Tracer will redirect getpid to getppid. */
1882 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
1883 }
1884 
1885 TEST_F(TRACE_syscall, ptrace_syscall_errno)
1886 {
1887 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1888 	teardown_trace_fixture(_metadata, self->tracer);
1889 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1890 					   true);
1891 
1892 	/* Tracer should skip the open syscall, resulting in ESRCH. */
1893 	EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
1894 }
1895 
1896 TEST_F(TRACE_syscall, ptrace_syscall_faked)
1897 {
1898 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1899 	teardown_trace_fixture(_metadata, self->tracer);
1900 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1901 					   true);
1902 
1903 	/* Tracer should skip the gettid syscall, resulting fake pid. */
1904 	EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
1905 }
1906 
1907 TEST_F(TRACE_syscall, syscall_allowed)
1908 {
1909 	long ret;
1910 
1911 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1912 	ASSERT_EQ(0, ret);
1913 
1914 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1915 	ASSERT_EQ(0, ret);
1916 
1917 	/* getppid works as expected (no changes). */
1918 	EXPECT_EQ(self->parent, syscall(__NR_getppid));
1919 	EXPECT_NE(self->mypid, syscall(__NR_getppid));
1920 }
1921 
1922 TEST_F(TRACE_syscall, syscall_redirected)
1923 {
1924 	long ret;
1925 
1926 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1927 	ASSERT_EQ(0, ret);
1928 
1929 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1930 	ASSERT_EQ(0, ret);
1931 
1932 	/* getpid has been redirected to getppid as expected. */
1933 	EXPECT_EQ(self->parent, syscall(__NR_getpid));
1934 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
1935 }
1936 
1937 TEST_F(TRACE_syscall, syscall_errno)
1938 {
1939 	long ret;
1940 
1941 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1942 	ASSERT_EQ(0, ret);
1943 
1944 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1945 	ASSERT_EQ(0, ret);
1946 
1947 	/* openat has been skipped and an errno return. */
1948 	EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
1949 }
1950 
1951 TEST_F(TRACE_syscall, syscall_faked)
1952 {
1953 	long ret;
1954 
1955 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1956 	ASSERT_EQ(0, ret);
1957 
1958 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1959 	ASSERT_EQ(0, ret);
1960 
1961 	/* gettid has been skipped and an altered return value stored. */
1962 	EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
1963 }
1964 
1965 TEST_F(TRACE_syscall, skip_after_RET_TRACE)
1966 {
1967 	struct sock_filter filter[] = {
1968 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1969 			offsetof(struct seccomp_data, nr)),
1970 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1971 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1972 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1973 	};
1974 	struct sock_fprog prog = {
1975 		.len = (unsigned short)ARRAY_SIZE(filter),
1976 		.filter = filter,
1977 	};
1978 	long ret;
1979 
1980 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1981 	ASSERT_EQ(0, ret);
1982 
1983 	/* Install fixture filter. */
1984 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1985 	ASSERT_EQ(0, ret);
1986 
1987 	/* Install "errno on getppid" filter. */
1988 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1989 	ASSERT_EQ(0, ret);
1990 
1991 	/* Tracer will redirect getpid to getppid, and we should see EPERM. */
1992 	errno = 0;
1993 	EXPECT_EQ(-1, syscall(__NR_getpid));
1994 	EXPECT_EQ(EPERM, errno);
1995 }
1996 
1997 TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
1998 {
1999 	struct sock_filter filter[] = {
2000 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2001 			offsetof(struct seccomp_data, nr)),
2002 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2003 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2004 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2005 	};
2006 	struct sock_fprog prog = {
2007 		.len = (unsigned short)ARRAY_SIZE(filter),
2008 		.filter = filter,
2009 	};
2010 	long ret;
2011 
2012 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2013 	ASSERT_EQ(0, ret);
2014 
2015 	/* Install fixture filter. */
2016 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
2017 	ASSERT_EQ(0, ret);
2018 
2019 	/* Install "death on getppid" filter. */
2020 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2021 	ASSERT_EQ(0, ret);
2022 
2023 	/* Tracer will redirect getpid to getppid, and we should die. */
2024 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
2025 }
2026 
2027 TEST_F(TRACE_syscall, skip_after_ptrace)
2028 {
2029 	struct sock_filter filter[] = {
2030 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2031 			offsetof(struct seccomp_data, nr)),
2032 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2033 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
2034 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2035 	};
2036 	struct sock_fprog prog = {
2037 		.len = (unsigned short)ARRAY_SIZE(filter),
2038 		.filter = filter,
2039 	};
2040 	long ret;
2041 
2042 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
2043 	teardown_trace_fixture(_metadata, self->tracer);
2044 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
2045 					   true);
2046 
2047 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2048 	ASSERT_EQ(0, ret);
2049 
2050 	/* Install "errno on getppid" filter. */
2051 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2052 	ASSERT_EQ(0, ret);
2053 
2054 	/* Tracer will redirect getpid to getppid, and we should see EPERM. */
2055 	EXPECT_EQ(-1, syscall(__NR_getpid));
2056 	EXPECT_EQ(EPERM, errno);
2057 }
2058 
2059 TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
2060 {
2061 	struct sock_filter filter[] = {
2062 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2063 			offsetof(struct seccomp_data, nr)),
2064 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2065 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2066 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2067 	};
2068 	struct sock_fprog prog = {
2069 		.len = (unsigned short)ARRAY_SIZE(filter),
2070 		.filter = filter,
2071 	};
2072 	long ret;
2073 
2074 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
2075 	teardown_trace_fixture(_metadata, self->tracer);
2076 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
2077 					   true);
2078 
2079 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2080 	ASSERT_EQ(0, ret);
2081 
2082 	/* Install "death on getppid" filter. */
2083 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2084 	ASSERT_EQ(0, ret);
2085 
2086 	/* Tracer will redirect getpid to getppid, and we should die. */
2087 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
2088 }
2089 
2090 TEST(seccomp_syscall)
2091 {
2092 	struct sock_filter filter[] = {
2093 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2094 	};
2095 	struct sock_fprog prog = {
2096 		.len = (unsigned short)ARRAY_SIZE(filter),
2097 		.filter = filter,
2098 	};
2099 	long ret;
2100 
2101 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2102 	ASSERT_EQ(0, ret) {
2103 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2104 	}
2105 
2106 	/* Reject insane operation. */
2107 	ret = seccomp(-1, 0, &prog);
2108 	ASSERT_NE(ENOSYS, errno) {
2109 		TH_LOG("Kernel does not support seccomp syscall!");
2110 	}
2111 	EXPECT_EQ(EINVAL, errno) {
2112 		TH_LOG("Did not reject crazy op value!");
2113 	}
2114 
2115 	/* Reject strict with flags or pointer. */
2116 	ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
2117 	EXPECT_EQ(EINVAL, errno) {
2118 		TH_LOG("Did not reject mode strict with flags!");
2119 	}
2120 	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
2121 	EXPECT_EQ(EINVAL, errno) {
2122 		TH_LOG("Did not reject mode strict with uargs!");
2123 	}
2124 
2125 	/* Reject insane args for filter. */
2126 	ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
2127 	EXPECT_EQ(EINVAL, errno) {
2128 		TH_LOG("Did not reject crazy filter flags!");
2129 	}
2130 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
2131 	EXPECT_EQ(EFAULT, errno) {
2132 		TH_LOG("Did not reject NULL filter!");
2133 	}
2134 
2135 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2136 	EXPECT_EQ(0, errno) {
2137 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
2138 			strerror(errno));
2139 	}
2140 }
2141 
2142 TEST(seccomp_syscall_mode_lock)
2143 {
2144 	struct sock_filter filter[] = {
2145 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2146 	};
2147 	struct sock_fprog prog = {
2148 		.len = (unsigned short)ARRAY_SIZE(filter),
2149 		.filter = filter,
2150 	};
2151 	long ret;
2152 
2153 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2154 	ASSERT_EQ(0, ret) {
2155 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2156 	}
2157 
2158 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2159 	ASSERT_NE(ENOSYS, errno) {
2160 		TH_LOG("Kernel does not support seccomp syscall!");
2161 	}
2162 	EXPECT_EQ(0, ret) {
2163 		TH_LOG("Could not install filter!");
2164 	}
2165 
2166 	/* Make sure neither entry point will switch to strict. */
2167 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
2168 	EXPECT_EQ(EINVAL, errno) {
2169 		TH_LOG("Switched to mode strict!");
2170 	}
2171 
2172 	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
2173 	EXPECT_EQ(EINVAL, errno) {
2174 		TH_LOG("Switched to mode strict!");
2175 	}
2176 }
2177 
2178 /*
2179  * Test detection of known and unknown filter flags. Userspace needs to be able
2180  * to check if a filter flag is supported by the current kernel and a good way
2181  * of doing that is by attempting to enter filter mode, with the flag bit in
2182  * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
2183  * that the flag is valid and EINVAL indicates that the flag is invalid.
2184  */
2185 TEST(detect_seccomp_filter_flags)
2186 {
2187 	unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
2188 				 SECCOMP_FILTER_FLAG_LOG,
2189 				 SECCOMP_FILTER_FLAG_SPEC_ALLOW,
2190 				 SECCOMP_FILTER_FLAG_NEW_LISTENER };
2191 	unsigned int exclusive[] = {
2192 				SECCOMP_FILTER_FLAG_TSYNC,
2193 				SECCOMP_FILTER_FLAG_NEW_LISTENER };
2194 	unsigned int flag, all_flags, exclusive_mask;
2195 	int i;
2196 	long ret;
2197 
2198 	/* Test detection of individual known-good filter flags */
2199 	for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
2200 		int bits = 0;
2201 
2202 		flag = flags[i];
2203 		/* Make sure the flag is a single bit! */
2204 		while (flag) {
2205 			if (flag & 0x1)
2206 				bits ++;
2207 			flag >>= 1;
2208 		}
2209 		ASSERT_EQ(1, bits);
2210 		flag = flags[i];
2211 
2212 		ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2213 		ASSERT_NE(ENOSYS, errno) {
2214 			TH_LOG("Kernel does not support seccomp syscall!");
2215 		}
2216 		EXPECT_EQ(-1, ret);
2217 		EXPECT_EQ(EFAULT, errno) {
2218 			TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
2219 			       flag);
2220 		}
2221 
2222 		all_flags |= flag;
2223 	}
2224 
2225 	/*
2226 	 * Test detection of all known-good filter flags combined. But
2227 	 * for the exclusive flags we need to mask them out and try them
2228 	 * individually for the "all flags" testing.
2229 	 */
2230 	exclusive_mask = 0;
2231 	for (i = 0; i < ARRAY_SIZE(exclusive); i++)
2232 		exclusive_mask |= exclusive[i];
2233 	for (i = 0; i < ARRAY_SIZE(exclusive); i++) {
2234 		flag = all_flags & ~exclusive_mask;
2235 		flag |= exclusive[i];
2236 
2237 		ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2238 		EXPECT_EQ(-1, ret);
2239 		EXPECT_EQ(EFAULT, errno) {
2240 			TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
2241 			       flag);
2242 		}
2243 	}
2244 
2245 	/* Test detection of an unknown filter flags, without exclusives. */
2246 	flag = -1;
2247 	flag &= ~exclusive_mask;
2248 	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2249 	EXPECT_EQ(-1, ret);
2250 	EXPECT_EQ(EINVAL, errno) {
2251 		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
2252 		       flag);
2253 	}
2254 
2255 	/*
2256 	 * Test detection of an unknown filter flag that may simply need to be
2257 	 * added to this test
2258 	 */
2259 	flag = flags[ARRAY_SIZE(flags) - 1] << 1;
2260 	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2261 	EXPECT_EQ(-1, ret);
2262 	EXPECT_EQ(EINVAL, errno) {
2263 		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
2264 		       flag);
2265 	}
2266 }
2267 
2268 TEST(TSYNC_first)
2269 {
2270 	struct sock_filter filter[] = {
2271 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2272 	};
2273 	struct sock_fprog prog = {
2274 		.len = (unsigned short)ARRAY_SIZE(filter),
2275 		.filter = filter,
2276 	};
2277 	long ret;
2278 
2279 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2280 	ASSERT_EQ(0, ret) {
2281 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2282 	}
2283 
2284 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2285 		      &prog);
2286 	ASSERT_NE(ENOSYS, errno) {
2287 		TH_LOG("Kernel does not support seccomp syscall!");
2288 	}
2289 	EXPECT_EQ(0, ret) {
2290 		TH_LOG("Could not install initial filter with TSYNC!");
2291 	}
2292 }
2293 
2294 #define TSYNC_SIBLINGS 2
2295 struct tsync_sibling {
2296 	pthread_t tid;
2297 	pid_t system_tid;
2298 	sem_t *started;
2299 	pthread_cond_t *cond;
2300 	pthread_mutex_t *mutex;
2301 	int diverge;
2302 	int num_waits;
2303 	struct sock_fprog *prog;
2304 	struct __test_metadata *metadata;
2305 };
2306 
2307 /*
2308  * To avoid joining joined threads (which is not allowed by Bionic),
2309  * make sure we both successfully join and clear the tid to skip a
2310  * later join attempt during fixture teardown. Any remaining threads
2311  * will be directly killed during teardown.
2312  */
2313 #define PTHREAD_JOIN(tid, status)					\
2314 	do {								\
2315 		int _rc = pthread_join(tid, status);			\
2316 		if (_rc) {						\
2317 			TH_LOG("pthread_join of tid %u failed: %d\n",	\
2318 				(unsigned int)tid, _rc);		\
2319 		} else {						\
2320 			tid = 0;					\
2321 		}							\
2322 	} while (0)
2323 
2324 FIXTURE_DATA(TSYNC) {
2325 	struct sock_fprog root_prog, apply_prog;
2326 	struct tsync_sibling sibling[TSYNC_SIBLINGS];
2327 	sem_t started;
2328 	pthread_cond_t cond;
2329 	pthread_mutex_t mutex;
2330 	int sibling_count;
2331 };
2332 
2333 FIXTURE_SETUP(TSYNC)
2334 {
2335 	struct sock_filter root_filter[] = {
2336 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2337 	};
2338 	struct sock_filter apply_filter[] = {
2339 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2340 			offsetof(struct seccomp_data, nr)),
2341 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
2342 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2343 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2344 	};
2345 
2346 	memset(&self->root_prog, 0, sizeof(self->root_prog));
2347 	memset(&self->apply_prog, 0, sizeof(self->apply_prog));
2348 	memset(&self->sibling, 0, sizeof(self->sibling));
2349 	self->root_prog.filter = malloc(sizeof(root_filter));
2350 	ASSERT_NE(NULL, self->root_prog.filter);
2351 	memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
2352 	self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
2353 
2354 	self->apply_prog.filter = malloc(sizeof(apply_filter));
2355 	ASSERT_NE(NULL, self->apply_prog.filter);
2356 	memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
2357 	self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
2358 
2359 	self->sibling_count = 0;
2360 	pthread_mutex_init(&self->mutex, NULL);
2361 	pthread_cond_init(&self->cond, NULL);
2362 	sem_init(&self->started, 0, 0);
2363 	self->sibling[0].tid = 0;
2364 	self->sibling[0].cond = &self->cond;
2365 	self->sibling[0].started = &self->started;
2366 	self->sibling[0].mutex = &self->mutex;
2367 	self->sibling[0].diverge = 0;
2368 	self->sibling[0].num_waits = 1;
2369 	self->sibling[0].prog = &self->root_prog;
2370 	self->sibling[0].metadata = _metadata;
2371 	self->sibling[1].tid = 0;
2372 	self->sibling[1].cond = &self->cond;
2373 	self->sibling[1].started = &self->started;
2374 	self->sibling[1].mutex = &self->mutex;
2375 	self->sibling[1].diverge = 0;
2376 	self->sibling[1].prog = &self->root_prog;
2377 	self->sibling[1].num_waits = 1;
2378 	self->sibling[1].metadata = _metadata;
2379 }
2380 
2381 FIXTURE_TEARDOWN(TSYNC)
2382 {
2383 	int sib = 0;
2384 
2385 	if (self->root_prog.filter)
2386 		free(self->root_prog.filter);
2387 	if (self->apply_prog.filter)
2388 		free(self->apply_prog.filter);
2389 
2390 	for ( ; sib < self->sibling_count; ++sib) {
2391 		struct tsync_sibling *s = &self->sibling[sib];
2392 
2393 		if (!s->tid)
2394 			continue;
2395 		/*
2396 		 * If a thread is still running, it may be stuck, so hit
2397 		 * it over the head really hard.
2398 		 */
2399 		pthread_kill(s->tid, 9);
2400 	}
2401 	pthread_mutex_destroy(&self->mutex);
2402 	pthread_cond_destroy(&self->cond);
2403 	sem_destroy(&self->started);
2404 }
2405 
2406 void *tsync_sibling(void *data)
2407 {
2408 	long ret = 0;
2409 	struct tsync_sibling *me = data;
2410 
2411 	me->system_tid = syscall(__NR_gettid);
2412 
2413 	pthread_mutex_lock(me->mutex);
2414 	if (me->diverge) {
2415 		/* Just re-apply the root prog to fork the tree */
2416 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
2417 				me->prog, 0, 0);
2418 	}
2419 	sem_post(me->started);
2420 	/* Return outside of started so parent notices failures. */
2421 	if (ret) {
2422 		pthread_mutex_unlock(me->mutex);
2423 		return (void *)SIBLING_EXIT_FAILURE;
2424 	}
2425 	do {
2426 		pthread_cond_wait(me->cond, me->mutex);
2427 		me->num_waits = me->num_waits - 1;
2428 	} while (me->num_waits);
2429 	pthread_mutex_unlock(me->mutex);
2430 
2431 	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
2432 	if (!ret)
2433 		return (void *)SIBLING_EXIT_NEWPRIVS;
2434 	read(0, NULL, 0);
2435 	return (void *)SIBLING_EXIT_UNKILLED;
2436 }
2437 
2438 void tsync_start_sibling(struct tsync_sibling *sibling)
2439 {
2440 	pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
2441 }
2442 
2443 TEST_F(TSYNC, siblings_fail_prctl)
2444 {
2445 	long ret;
2446 	void *status;
2447 	struct sock_filter filter[] = {
2448 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2449 			offsetof(struct seccomp_data, nr)),
2450 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
2451 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
2452 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2453 	};
2454 	struct sock_fprog prog = {
2455 		.len = (unsigned short)ARRAY_SIZE(filter),
2456 		.filter = filter,
2457 	};
2458 
2459 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2460 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2461 	}
2462 
2463 	/* Check prctl failure detection by requesting sib 0 diverge. */
2464 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2465 	ASSERT_NE(ENOSYS, errno) {
2466 		TH_LOG("Kernel does not support seccomp syscall!");
2467 	}
2468 	ASSERT_EQ(0, ret) {
2469 		TH_LOG("setting filter failed");
2470 	}
2471 
2472 	self->sibling[0].diverge = 1;
2473 	tsync_start_sibling(&self->sibling[0]);
2474 	tsync_start_sibling(&self->sibling[1]);
2475 
2476 	while (self->sibling_count < TSYNC_SIBLINGS) {
2477 		sem_wait(&self->started);
2478 		self->sibling_count++;
2479 	}
2480 
2481 	/* Signal the threads to clean up*/
2482 	pthread_mutex_lock(&self->mutex);
2483 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2484 		TH_LOG("cond broadcast non-zero");
2485 	}
2486 	pthread_mutex_unlock(&self->mutex);
2487 
2488 	/* Ensure diverging sibling failed to call prctl. */
2489 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2490 	EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
2491 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2492 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2493 }
2494 
2495 TEST_F(TSYNC, two_siblings_with_ancestor)
2496 {
2497 	long ret;
2498 	void *status;
2499 
2500 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2501 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2502 	}
2503 
2504 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2505 	ASSERT_NE(ENOSYS, errno) {
2506 		TH_LOG("Kernel does not support seccomp syscall!");
2507 	}
2508 	ASSERT_EQ(0, ret) {
2509 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2510 	}
2511 	tsync_start_sibling(&self->sibling[0]);
2512 	tsync_start_sibling(&self->sibling[1]);
2513 
2514 	while (self->sibling_count < TSYNC_SIBLINGS) {
2515 		sem_wait(&self->started);
2516 		self->sibling_count++;
2517 	}
2518 
2519 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2520 		      &self->apply_prog);
2521 	ASSERT_EQ(0, ret) {
2522 		TH_LOG("Could install filter on all threads!");
2523 	}
2524 	/* Tell the siblings to test the policy */
2525 	pthread_mutex_lock(&self->mutex);
2526 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2527 		TH_LOG("cond broadcast non-zero");
2528 	}
2529 	pthread_mutex_unlock(&self->mutex);
2530 	/* Ensure they are both killed and don't exit cleanly. */
2531 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2532 	EXPECT_EQ(0x0, (long)status);
2533 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2534 	EXPECT_EQ(0x0, (long)status);
2535 }
2536 
2537 TEST_F(TSYNC, two_sibling_want_nnp)
2538 {
2539 	void *status;
2540 
2541 	/* start siblings before any prctl() operations */
2542 	tsync_start_sibling(&self->sibling[0]);
2543 	tsync_start_sibling(&self->sibling[1]);
2544 	while (self->sibling_count < TSYNC_SIBLINGS) {
2545 		sem_wait(&self->started);
2546 		self->sibling_count++;
2547 	}
2548 
2549 	/* Tell the siblings to test no policy */
2550 	pthread_mutex_lock(&self->mutex);
2551 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2552 		TH_LOG("cond broadcast non-zero");
2553 	}
2554 	pthread_mutex_unlock(&self->mutex);
2555 
2556 	/* Ensure they are both upset about lacking nnp. */
2557 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2558 	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2559 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2560 	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2561 }
2562 
2563 TEST_F(TSYNC, two_siblings_with_no_filter)
2564 {
2565 	long ret;
2566 	void *status;
2567 
2568 	/* start siblings before any prctl() operations */
2569 	tsync_start_sibling(&self->sibling[0]);
2570 	tsync_start_sibling(&self->sibling[1]);
2571 	while (self->sibling_count < TSYNC_SIBLINGS) {
2572 		sem_wait(&self->started);
2573 		self->sibling_count++;
2574 	}
2575 
2576 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2577 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2578 	}
2579 
2580 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2581 		      &self->apply_prog);
2582 	ASSERT_NE(ENOSYS, errno) {
2583 		TH_LOG("Kernel does not support seccomp syscall!");
2584 	}
2585 	ASSERT_EQ(0, ret) {
2586 		TH_LOG("Could install filter on all threads!");
2587 	}
2588 
2589 	/* Tell the siblings to test the policy */
2590 	pthread_mutex_lock(&self->mutex);
2591 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2592 		TH_LOG("cond broadcast non-zero");
2593 	}
2594 	pthread_mutex_unlock(&self->mutex);
2595 
2596 	/* Ensure they are both killed and don't exit cleanly. */
2597 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2598 	EXPECT_EQ(0x0, (long)status);
2599 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2600 	EXPECT_EQ(0x0, (long)status);
2601 }
2602 
2603 TEST_F(TSYNC, two_siblings_with_one_divergence)
2604 {
2605 	long ret;
2606 	void *status;
2607 
2608 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2609 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2610 	}
2611 
2612 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2613 	ASSERT_NE(ENOSYS, errno) {
2614 		TH_LOG("Kernel does not support seccomp syscall!");
2615 	}
2616 	ASSERT_EQ(0, ret) {
2617 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2618 	}
2619 	self->sibling[0].diverge = 1;
2620 	tsync_start_sibling(&self->sibling[0]);
2621 	tsync_start_sibling(&self->sibling[1]);
2622 
2623 	while (self->sibling_count < TSYNC_SIBLINGS) {
2624 		sem_wait(&self->started);
2625 		self->sibling_count++;
2626 	}
2627 
2628 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2629 		      &self->apply_prog);
2630 	ASSERT_EQ(self->sibling[0].system_tid, ret) {
2631 		TH_LOG("Did not fail on diverged sibling.");
2632 	}
2633 
2634 	/* Wake the threads */
2635 	pthread_mutex_lock(&self->mutex);
2636 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2637 		TH_LOG("cond broadcast non-zero");
2638 	}
2639 	pthread_mutex_unlock(&self->mutex);
2640 
2641 	/* Ensure they are both unkilled. */
2642 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2643 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2644 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2645 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2646 }
2647 
2648 TEST_F(TSYNC, two_siblings_not_under_filter)
2649 {
2650 	long ret, sib;
2651 	void *status;
2652 	struct timespec delay = { .tv_nsec = 100000000 };
2653 
2654 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2655 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2656 	}
2657 
2658 	/*
2659 	 * Sibling 0 will have its own seccomp policy
2660 	 * and Sibling 1 will not be under seccomp at
2661 	 * all. Sibling 1 will enter seccomp and 0
2662 	 * will cause failure.
2663 	 */
2664 	self->sibling[0].diverge = 1;
2665 	tsync_start_sibling(&self->sibling[0]);
2666 	tsync_start_sibling(&self->sibling[1]);
2667 
2668 	while (self->sibling_count < TSYNC_SIBLINGS) {
2669 		sem_wait(&self->started);
2670 		self->sibling_count++;
2671 	}
2672 
2673 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2674 	ASSERT_NE(ENOSYS, errno) {
2675 		TH_LOG("Kernel does not support seccomp syscall!");
2676 	}
2677 	ASSERT_EQ(0, ret) {
2678 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2679 	}
2680 
2681 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2682 		      &self->apply_prog);
2683 	ASSERT_EQ(ret, self->sibling[0].system_tid) {
2684 		TH_LOG("Did not fail on diverged sibling.");
2685 	}
2686 	sib = 1;
2687 	if (ret == self->sibling[0].system_tid)
2688 		sib = 0;
2689 
2690 	pthread_mutex_lock(&self->mutex);
2691 
2692 	/* Increment the other siblings num_waits so we can clean up
2693 	 * the one we just saw.
2694 	 */
2695 	self->sibling[!sib].num_waits += 1;
2696 
2697 	/* Signal the thread to clean up*/
2698 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2699 		TH_LOG("cond broadcast non-zero");
2700 	}
2701 	pthread_mutex_unlock(&self->mutex);
2702 	PTHREAD_JOIN(self->sibling[sib].tid, &status);
2703 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2704 	/* Poll for actual task death. pthread_join doesn't guarantee it. */
2705 	while (!kill(self->sibling[sib].system_tid, 0))
2706 		nanosleep(&delay, NULL);
2707 	/* Switch to the remaining sibling */
2708 	sib = !sib;
2709 
2710 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2711 		      &self->apply_prog);
2712 	ASSERT_EQ(0, ret) {
2713 		TH_LOG("Expected the remaining sibling to sync");
2714 	};
2715 
2716 	pthread_mutex_lock(&self->mutex);
2717 
2718 	/* If remaining sibling didn't have a chance to wake up during
2719 	 * the first broadcast, manually reduce the num_waits now.
2720 	 */
2721 	if (self->sibling[sib].num_waits > 1)
2722 		self->sibling[sib].num_waits = 1;
2723 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2724 		TH_LOG("cond broadcast non-zero");
2725 	}
2726 	pthread_mutex_unlock(&self->mutex);
2727 	PTHREAD_JOIN(self->sibling[sib].tid, &status);
2728 	EXPECT_EQ(0, (long)status);
2729 	/* Poll for actual task death. pthread_join doesn't guarantee it. */
2730 	while (!kill(self->sibling[sib].system_tid, 0))
2731 		nanosleep(&delay, NULL);
2732 
2733 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2734 		      &self->apply_prog);
2735 	ASSERT_EQ(0, ret);  /* just us chickens */
2736 }
2737 
2738 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
2739 TEST(syscall_restart)
2740 {
2741 	long ret;
2742 	unsigned long msg;
2743 	pid_t child_pid;
2744 	int pipefd[2];
2745 	int status;
2746 	siginfo_t info = { };
2747 	struct sock_filter filter[] = {
2748 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2749 			 offsetof(struct seccomp_data, nr)),
2750 
2751 #ifdef __NR_sigreturn
2752 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
2753 #endif
2754 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
2755 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
2756 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
2757 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
2758 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2759 
2760 		/* Allow __NR_write for easy logging. */
2761 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2762 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2763 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2764 		/* The nanosleep jump target. */
2765 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
2766 		/* The restart_syscall jump target. */
2767 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
2768 	};
2769 	struct sock_fprog prog = {
2770 		.len = (unsigned short)ARRAY_SIZE(filter),
2771 		.filter = filter,
2772 	};
2773 #if defined(__arm__)
2774 	struct utsname utsbuf;
2775 #endif
2776 
2777 	ASSERT_EQ(0, pipe(pipefd));
2778 
2779 	child_pid = fork();
2780 	ASSERT_LE(0, child_pid);
2781 	if (child_pid == 0) {
2782 		/* Child uses EXPECT not ASSERT to deliver status correctly. */
2783 		char buf = ' ';
2784 		struct timespec timeout = { };
2785 
2786 		/* Attach parent as tracer and stop. */
2787 		EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2788 		EXPECT_EQ(0, raise(SIGSTOP));
2789 
2790 		EXPECT_EQ(0, close(pipefd[1]));
2791 
2792 		EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2793 			TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2794 		}
2795 
2796 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2797 		EXPECT_EQ(0, ret) {
2798 			TH_LOG("Failed to install filter!");
2799 		}
2800 
2801 		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2802 			TH_LOG("Failed to read() sync from parent");
2803 		}
2804 		EXPECT_EQ('.', buf) {
2805 			TH_LOG("Failed to get sync data from read()");
2806 		}
2807 
2808 		/* Start nanosleep to be interrupted. */
2809 		timeout.tv_sec = 1;
2810 		errno = 0;
2811 		EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
2812 			TH_LOG("Call to nanosleep() failed (errno %d)", errno);
2813 		}
2814 
2815 		/* Read final sync from parent. */
2816 		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2817 			TH_LOG("Failed final read() from parent");
2818 		}
2819 		EXPECT_EQ('!', buf) {
2820 			TH_LOG("Failed to get final data from read()");
2821 		}
2822 
2823 		/* Directly report the status of our test harness results. */
2824 		syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2825 						     : EXIT_FAILURE);
2826 	}
2827 	EXPECT_EQ(0, close(pipefd[0]));
2828 
2829 	/* Attach to child, setup options, and release. */
2830 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2831 	ASSERT_EQ(true, WIFSTOPPED(status));
2832 	ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2833 			    PTRACE_O_TRACESECCOMP));
2834 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2835 	ASSERT_EQ(1, write(pipefd[1], ".", 1));
2836 
2837 	/* Wait for nanosleep() to start. */
2838 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2839 	ASSERT_EQ(true, WIFSTOPPED(status));
2840 	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2841 	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2842 	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2843 	ASSERT_EQ(0x100, msg);
2844 	EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
2845 
2846 	/* Might as well check siginfo for sanity while we're here. */
2847 	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2848 	ASSERT_EQ(SIGTRAP, info.si_signo);
2849 	ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2850 	EXPECT_EQ(0, info.si_errno);
2851 	EXPECT_EQ(getuid(), info.si_uid);
2852 	/* Verify signal delivery came from child (seccomp-triggered). */
2853 	EXPECT_EQ(child_pid, info.si_pid);
2854 
2855 	/* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
2856 	ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2857 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2858 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2859 	ASSERT_EQ(true, WIFSTOPPED(status));
2860 	ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2861 	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2862 	/*
2863 	 * There is no siginfo on SIGSTOP any more, so we can't verify
2864 	 * signal delivery came from parent now (getpid() == info.si_pid).
2865 	 * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com
2866 	 * At least verify the SIGSTOP via PTRACE_GETSIGINFO.
2867 	 */
2868 	EXPECT_EQ(SIGSTOP, info.si_signo);
2869 
2870 	/* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
2871 	ASSERT_EQ(0, kill(child_pid, SIGCONT));
2872 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2873 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2874 	ASSERT_EQ(true, WIFSTOPPED(status));
2875 	ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2876 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2877 
2878 	/* Wait for restart_syscall() to start. */
2879 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2880 	ASSERT_EQ(true, WIFSTOPPED(status));
2881 	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2882 	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2883 	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2884 
2885 	ASSERT_EQ(0x200, msg);
2886 	ret = get_syscall(_metadata, child_pid);
2887 #if defined(__arm__)
2888 	/*
2889 	 * FIXME:
2890 	 * - native ARM registers do NOT expose true syscall.
2891 	 * - compat ARM registers on ARM64 DO expose true syscall.
2892 	 */
2893 	ASSERT_EQ(0, uname(&utsbuf));
2894 	if (strncmp(utsbuf.machine, "arm", 3) == 0) {
2895 		EXPECT_EQ(__NR_nanosleep, ret);
2896 	} else
2897 #endif
2898 	{
2899 		EXPECT_EQ(__NR_restart_syscall, ret);
2900 	}
2901 
2902 	/* Write again to end test. */
2903 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2904 	ASSERT_EQ(1, write(pipefd[1], "!", 1));
2905 	EXPECT_EQ(0, close(pipefd[1]));
2906 
2907 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2908 	if (WIFSIGNALED(status) || WEXITSTATUS(status))
2909 		_metadata->passed = 0;
2910 }
2911 
2912 TEST_SIGNAL(filter_flag_log, SIGSYS)
2913 {
2914 	struct sock_filter allow_filter[] = {
2915 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2916 	};
2917 	struct sock_filter kill_filter[] = {
2918 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2919 			offsetof(struct seccomp_data, nr)),
2920 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
2921 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2922 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2923 	};
2924 	struct sock_fprog allow_prog = {
2925 		.len = (unsigned short)ARRAY_SIZE(allow_filter),
2926 		.filter = allow_filter,
2927 	};
2928 	struct sock_fprog kill_prog = {
2929 		.len = (unsigned short)ARRAY_SIZE(kill_filter),
2930 		.filter = kill_filter,
2931 	};
2932 	long ret;
2933 	pid_t parent = getppid();
2934 
2935 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2936 	ASSERT_EQ(0, ret);
2937 
2938 	/* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
2939 	ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
2940 		      &allow_prog);
2941 	ASSERT_NE(ENOSYS, errno) {
2942 		TH_LOG("Kernel does not support seccomp syscall!");
2943 	}
2944 	EXPECT_NE(0, ret) {
2945 		TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
2946 	}
2947 	EXPECT_EQ(EINVAL, errno) {
2948 		TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
2949 	}
2950 
2951 	/* Verify that a simple, permissive filter can be added with no flags */
2952 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
2953 	EXPECT_EQ(0, ret);
2954 
2955 	/* See if the same filter can be added with the FILTER_FLAG_LOG flag */
2956 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
2957 		      &allow_prog);
2958 	ASSERT_NE(EINVAL, errno) {
2959 		TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
2960 	}
2961 	EXPECT_EQ(0, ret);
2962 
2963 	/* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
2964 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
2965 		      &kill_prog);
2966 	EXPECT_EQ(0, ret);
2967 
2968 	EXPECT_EQ(parent, syscall(__NR_getppid));
2969 	/* getpid() should never return. */
2970 	EXPECT_EQ(0, syscall(__NR_getpid));
2971 }
2972 
2973 TEST(get_action_avail)
2974 {
2975 	__u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
2976 			    SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
2977 			    SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
2978 	__u32 unknown_action = 0x10000000U;
2979 	int i;
2980 	long ret;
2981 
2982 	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
2983 	ASSERT_NE(ENOSYS, errno) {
2984 		TH_LOG("Kernel does not support seccomp syscall!");
2985 	}
2986 	ASSERT_NE(EINVAL, errno) {
2987 		TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
2988 	}
2989 	EXPECT_EQ(ret, 0);
2990 
2991 	for (i = 0; i < ARRAY_SIZE(actions); i++) {
2992 		ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
2993 		EXPECT_EQ(ret, 0) {
2994 			TH_LOG("Expected action (0x%X) not available!",
2995 			       actions[i]);
2996 		}
2997 	}
2998 
2999 	/* Check that an unknown action is handled properly (EOPNOTSUPP) */
3000 	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
3001 	EXPECT_EQ(ret, -1);
3002 	EXPECT_EQ(errno, EOPNOTSUPP);
3003 }
3004 
3005 TEST(get_metadata)
3006 {
3007 	pid_t pid;
3008 	int pipefd[2];
3009 	char buf;
3010 	struct seccomp_metadata md;
3011 	long ret;
3012 
3013 	/* Only real root can get metadata. */
3014 	if (geteuid()) {
3015 		XFAIL(return, "get_metadata requires real root");
3016 		return;
3017 	}
3018 
3019 	ASSERT_EQ(0, pipe(pipefd));
3020 
3021 	pid = fork();
3022 	ASSERT_GE(pid, 0);
3023 	if (pid == 0) {
3024 		struct sock_filter filter[] = {
3025 			BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3026 		};
3027 		struct sock_fprog prog = {
3028 			.len = (unsigned short)ARRAY_SIZE(filter),
3029 			.filter = filter,
3030 		};
3031 
3032 		/* one with log, one without */
3033 		EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
3034 				     SECCOMP_FILTER_FLAG_LOG, &prog));
3035 		EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
3036 
3037 		EXPECT_EQ(0, close(pipefd[0]));
3038 		ASSERT_EQ(1, write(pipefd[1], "1", 1));
3039 		ASSERT_EQ(0, close(pipefd[1]));
3040 
3041 		while (1)
3042 			sleep(100);
3043 	}
3044 
3045 	ASSERT_EQ(0, close(pipefd[1]));
3046 	ASSERT_EQ(1, read(pipefd[0], &buf, 1));
3047 
3048 	ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
3049 	ASSERT_EQ(pid, waitpid(pid, NULL, 0));
3050 
3051 	/* Past here must not use ASSERT or child process is never killed. */
3052 
3053 	md.filter_off = 0;
3054 	errno = 0;
3055 	ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3056 	EXPECT_EQ(sizeof(md), ret) {
3057 		if (errno == EINVAL)
3058 			XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
3059 	}
3060 
3061 	EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
3062 	EXPECT_EQ(md.filter_off, 0);
3063 
3064 	md.filter_off = 1;
3065 	ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3066 	EXPECT_EQ(sizeof(md), ret);
3067 	EXPECT_EQ(md.flags, 0);
3068 	EXPECT_EQ(md.filter_off, 1);
3069 
3070 skip:
3071 	ASSERT_EQ(0, kill(pid, SIGKILL));
3072 }
3073 
3074 static int user_trap_syscall(int nr, unsigned int flags)
3075 {
3076 	struct sock_filter filter[] = {
3077 		BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
3078 			offsetof(struct seccomp_data, nr)),
3079 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
3080 		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
3081 		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
3082 	};
3083 
3084 	struct sock_fprog prog = {
3085 		.len = (unsigned short)ARRAY_SIZE(filter),
3086 		.filter = filter,
3087 	};
3088 
3089 	return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
3090 }
3091 
3092 #define USER_NOTIF_MAGIC INT_MAX
3093 TEST(user_notification_basic)
3094 {
3095 	pid_t pid;
3096 	long ret;
3097 	int status, listener;
3098 	struct seccomp_notif req = {};
3099 	struct seccomp_notif_resp resp = {};
3100 	struct pollfd pollfd;
3101 
3102 	struct sock_filter filter[] = {
3103 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3104 	};
3105 	struct sock_fprog prog = {
3106 		.len = (unsigned short)ARRAY_SIZE(filter),
3107 		.filter = filter,
3108 	};
3109 
3110 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3111 	ASSERT_EQ(0, ret) {
3112 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3113 	}
3114 
3115 	pid = fork();
3116 	ASSERT_GE(pid, 0);
3117 
3118 	/* Check that we get -ENOSYS with no listener attached */
3119 	if (pid == 0) {
3120 		if (user_trap_syscall(__NR_getppid, 0) < 0)
3121 			exit(1);
3122 		ret = syscall(__NR_getppid);
3123 		exit(ret >= 0 || errno != ENOSYS);
3124 	}
3125 
3126 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3127 	EXPECT_EQ(true, WIFEXITED(status));
3128 	EXPECT_EQ(0, WEXITSTATUS(status));
3129 
3130 	/* Add some no-op filters for grins. */
3131 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3132 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3133 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3134 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3135 
3136 	/* Check that the basic notification machinery works */
3137 	listener = user_trap_syscall(__NR_getppid,
3138 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3139 	ASSERT_GE(listener, 0);
3140 
3141 	/* Installing a second listener in the chain should EBUSY */
3142 	EXPECT_EQ(user_trap_syscall(__NR_getppid,
3143 				    SECCOMP_FILTER_FLAG_NEW_LISTENER),
3144 		  -1);
3145 	EXPECT_EQ(errno, EBUSY);
3146 
3147 	pid = fork();
3148 	ASSERT_GE(pid, 0);
3149 
3150 	if (pid == 0) {
3151 		ret = syscall(__NR_getppid);
3152 		exit(ret != USER_NOTIF_MAGIC);
3153 	}
3154 
3155 	pollfd.fd = listener;
3156 	pollfd.events = POLLIN | POLLOUT;
3157 
3158 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3159 	EXPECT_EQ(pollfd.revents, POLLIN);
3160 
3161 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3162 
3163 	pollfd.fd = listener;
3164 	pollfd.events = POLLIN | POLLOUT;
3165 
3166 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3167 	EXPECT_EQ(pollfd.revents, POLLOUT);
3168 
3169 	EXPECT_EQ(req.data.nr,  __NR_getppid);
3170 
3171 	resp.id = req.id;
3172 	resp.error = 0;
3173 	resp.val = USER_NOTIF_MAGIC;
3174 
3175 	/* check that we make sure flags == 0 */
3176 	resp.flags = 1;
3177 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3178 	EXPECT_EQ(errno, EINVAL);
3179 
3180 	resp.flags = 0;
3181 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3182 
3183 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3184 	EXPECT_EQ(true, WIFEXITED(status));
3185 	EXPECT_EQ(0, WEXITSTATUS(status));
3186 }
3187 
3188 TEST(user_notification_kill_in_middle)
3189 {
3190 	pid_t pid;
3191 	long ret;
3192 	int listener;
3193 	struct seccomp_notif req = {};
3194 	struct seccomp_notif_resp resp = {};
3195 
3196 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3197 	ASSERT_EQ(0, ret) {
3198 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3199 	}
3200 
3201 	listener = user_trap_syscall(__NR_getppid,
3202 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3203 	ASSERT_GE(listener, 0);
3204 
3205 	/*
3206 	 * Check that nothing bad happens when we kill the task in the middle
3207 	 * of a syscall.
3208 	 */
3209 	pid = fork();
3210 	ASSERT_GE(pid, 0);
3211 
3212 	if (pid == 0) {
3213 		ret = syscall(__NR_getppid);
3214 		exit(ret != USER_NOTIF_MAGIC);
3215 	}
3216 
3217 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3218 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0);
3219 
3220 	EXPECT_EQ(kill(pid, SIGKILL), 0);
3221 	EXPECT_EQ(waitpid(pid, NULL, 0), pid);
3222 
3223 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1);
3224 
3225 	resp.id = req.id;
3226 	ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp);
3227 	EXPECT_EQ(ret, -1);
3228 	EXPECT_EQ(errno, ENOENT);
3229 }
3230 
3231 static int handled = -1;
3232 
3233 static void signal_handler(int signal)
3234 {
3235 	if (write(handled, "c", 1) != 1)
3236 		perror("write from signal");
3237 }
3238 
3239 TEST(user_notification_signal)
3240 {
3241 	pid_t pid;
3242 	long ret;
3243 	int status, listener, sk_pair[2];
3244 	struct seccomp_notif req = {};
3245 	struct seccomp_notif_resp resp = {};
3246 	char c;
3247 
3248 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3249 	ASSERT_EQ(0, ret) {
3250 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3251 	}
3252 
3253 	ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
3254 
3255 	listener = user_trap_syscall(__NR_gettid,
3256 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3257 	ASSERT_GE(listener, 0);
3258 
3259 	pid = fork();
3260 	ASSERT_GE(pid, 0);
3261 
3262 	if (pid == 0) {
3263 		close(sk_pair[0]);
3264 		handled = sk_pair[1];
3265 		if (signal(SIGUSR1, signal_handler) == SIG_ERR) {
3266 			perror("signal");
3267 			exit(1);
3268 		}
3269 		/*
3270 		 * ERESTARTSYS behavior is a bit hard to test, because we need
3271 		 * to rely on a signal that has not yet been handled. Let's at
3272 		 * least check that the error code gets propagated through, and
3273 		 * hope that it doesn't break when there is actually a signal :)
3274 		 */
3275 		ret = syscall(__NR_gettid);
3276 		exit(!(ret == -1 && errno == 512));
3277 	}
3278 
3279 	close(sk_pair[1]);
3280 
3281 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3282 
3283 	EXPECT_EQ(kill(pid, SIGUSR1), 0);
3284 
3285 	/*
3286 	 * Make sure the signal really is delivered, which means we're not
3287 	 * stuck in the user notification code any more and the notification
3288 	 * should be dead.
3289 	 */
3290 	EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
3291 
3292 	resp.id = req.id;
3293 	resp.error = -EPERM;
3294 	resp.val = 0;
3295 
3296 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3297 	EXPECT_EQ(errno, ENOENT);
3298 
3299 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3300 
3301 	resp.id = req.id;
3302 	resp.error = -512; /* -ERESTARTSYS */
3303 	resp.val = 0;
3304 
3305 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3306 
3307 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3308 	EXPECT_EQ(true, WIFEXITED(status));
3309 	EXPECT_EQ(0, WEXITSTATUS(status));
3310 }
3311 
3312 TEST(user_notification_closed_listener)
3313 {
3314 	pid_t pid;
3315 	long ret;
3316 	int status, listener;
3317 
3318 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3319 	ASSERT_EQ(0, ret) {
3320 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3321 	}
3322 
3323 	listener = user_trap_syscall(__NR_getppid,
3324 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3325 	ASSERT_GE(listener, 0);
3326 
3327 	/*
3328 	 * Check that we get an ENOSYS when the listener is closed.
3329 	 */
3330 	pid = fork();
3331 	ASSERT_GE(pid, 0);
3332 	if (pid == 0) {
3333 		close(listener);
3334 		ret = syscall(__NR_getppid);
3335 		exit(ret != -1 && errno != ENOSYS);
3336 	}
3337 
3338 	close(listener);
3339 
3340 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3341 	EXPECT_EQ(true, WIFEXITED(status));
3342 	EXPECT_EQ(0, WEXITSTATUS(status));
3343 }
3344 
3345 /*
3346  * Check that a pid in a child namespace still shows up as valid in ours.
3347  */
3348 TEST(user_notification_child_pid_ns)
3349 {
3350 	pid_t pid;
3351 	int status, listener;
3352 	struct seccomp_notif req = {};
3353 	struct seccomp_notif_resp resp = {};
3354 
3355 	ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0);
3356 
3357 	listener = user_trap_syscall(__NR_getppid,
3358 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3359 	ASSERT_GE(listener, 0);
3360 
3361 	pid = fork();
3362 	ASSERT_GE(pid, 0);
3363 
3364 	if (pid == 0)
3365 		exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3366 
3367 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3368 	EXPECT_EQ(req.pid, pid);
3369 
3370 	resp.id = req.id;
3371 	resp.error = 0;
3372 	resp.val = USER_NOTIF_MAGIC;
3373 
3374 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3375 
3376 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3377 	EXPECT_EQ(true, WIFEXITED(status));
3378 	EXPECT_EQ(0, WEXITSTATUS(status));
3379 	close(listener);
3380 }
3381 
3382 /*
3383  * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e.
3384  * invalid.
3385  */
3386 TEST(user_notification_sibling_pid_ns)
3387 {
3388 	pid_t pid, pid2;
3389 	int status, listener;
3390 	struct seccomp_notif req = {};
3391 	struct seccomp_notif_resp resp = {};
3392 
3393 	ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) {
3394 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3395 	}
3396 
3397 	listener = user_trap_syscall(__NR_getppid,
3398 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3399 	ASSERT_GE(listener, 0);
3400 
3401 	pid = fork();
3402 	ASSERT_GE(pid, 0);
3403 
3404 	if (pid == 0) {
3405 		ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3406 
3407 		pid2 = fork();
3408 		ASSERT_GE(pid2, 0);
3409 
3410 		if (pid2 == 0)
3411 			exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3412 
3413 		EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3414 		EXPECT_EQ(true, WIFEXITED(status));
3415 		EXPECT_EQ(0, WEXITSTATUS(status));
3416 		exit(WEXITSTATUS(status));
3417 	}
3418 
3419 	/* Create the sibling ns, and sibling in it. */
3420 	ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3421 	ASSERT_EQ(errno, 0);
3422 
3423 	pid2 = fork();
3424 	ASSERT_GE(pid2, 0);
3425 
3426 	if (pid2 == 0) {
3427 		ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3428 		/*
3429 		 * The pid should be 0, i.e. the task is in some namespace that
3430 		 * we can't "see".
3431 		 */
3432 		EXPECT_EQ(req.pid, 0);
3433 
3434 		resp.id = req.id;
3435 		resp.error = 0;
3436 		resp.val = USER_NOTIF_MAGIC;
3437 
3438 		ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3439 		exit(0);
3440 	}
3441 
3442 	close(listener);
3443 
3444 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3445 	EXPECT_EQ(true, WIFEXITED(status));
3446 	EXPECT_EQ(0, WEXITSTATUS(status));
3447 
3448 	EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3449 	EXPECT_EQ(true, WIFEXITED(status));
3450 	EXPECT_EQ(0, WEXITSTATUS(status));
3451 }
3452 
3453 TEST(user_notification_fault_recv)
3454 {
3455 	pid_t pid;
3456 	int status, listener;
3457 	struct seccomp_notif req = {};
3458 	struct seccomp_notif_resp resp = {};
3459 
3460 	ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
3461 
3462 	listener = user_trap_syscall(__NR_getppid,
3463 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3464 	ASSERT_GE(listener, 0);
3465 
3466 	pid = fork();
3467 	ASSERT_GE(pid, 0);
3468 
3469 	if (pid == 0)
3470 		exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3471 
3472 	/* Do a bad recv() */
3473 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1);
3474 	EXPECT_EQ(errno, EFAULT);
3475 
3476 	/* We should still be able to receive this notification, though. */
3477 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3478 	EXPECT_EQ(req.pid, pid);
3479 
3480 	resp.id = req.id;
3481 	resp.error = 0;
3482 	resp.val = USER_NOTIF_MAGIC;
3483 
3484 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3485 
3486 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3487 	EXPECT_EQ(true, WIFEXITED(status));
3488 	EXPECT_EQ(0, WEXITSTATUS(status));
3489 }
3490 
3491 TEST(seccomp_get_notif_sizes)
3492 {
3493 	struct seccomp_notif_sizes sizes;
3494 
3495 	ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0);
3496 	EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif));
3497 	EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp));
3498 }
3499 
3500 static int filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
3501 {
3502 #ifdef __NR_kcmp
3503 	return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
3504 #else
3505 	errno = ENOSYS;
3506 	return -1;
3507 #endif
3508 }
3509 
3510 TEST(user_notification_continue)
3511 {
3512 	pid_t pid;
3513 	long ret;
3514 	int status, listener;
3515 	struct seccomp_notif req = {};
3516 	struct seccomp_notif_resp resp = {};
3517 	struct pollfd pollfd;
3518 
3519 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3520 	ASSERT_EQ(0, ret) {
3521 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3522 	}
3523 
3524 	listener = user_trap_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
3525 	ASSERT_GE(listener, 0);
3526 
3527 	pid = fork();
3528 	ASSERT_GE(pid, 0);
3529 
3530 	if (pid == 0) {
3531 		int dup_fd, pipe_fds[2];
3532 		pid_t self;
3533 
3534 		ret = pipe(pipe_fds);
3535 		if (ret < 0)
3536 			exit(1);
3537 
3538 		dup_fd = dup(pipe_fds[0]);
3539 		if (dup_fd < 0)
3540 			exit(1);
3541 
3542 		self = getpid();
3543 
3544 		ret = filecmp(self, self, pipe_fds[0], dup_fd);
3545 		if (ret)
3546 			exit(2);
3547 
3548 		exit(0);
3549 	}
3550 
3551 	pollfd.fd = listener;
3552 	pollfd.events = POLLIN | POLLOUT;
3553 
3554 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3555 	EXPECT_EQ(pollfd.revents, POLLIN);
3556 
3557 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3558 
3559 	pollfd.fd = listener;
3560 	pollfd.events = POLLIN | POLLOUT;
3561 
3562 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3563 	EXPECT_EQ(pollfd.revents, POLLOUT);
3564 
3565 	EXPECT_EQ(req.data.nr, __NR_dup);
3566 
3567 	resp.id = req.id;
3568 	resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE;
3569 
3570 	/*
3571 	 * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other
3572 	 * args be set to 0.
3573 	 */
3574 	resp.error = 0;
3575 	resp.val = USER_NOTIF_MAGIC;
3576 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3577 	EXPECT_EQ(errno, EINVAL);
3578 
3579 	resp.error = USER_NOTIF_MAGIC;
3580 	resp.val = 0;
3581 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3582 	EXPECT_EQ(errno, EINVAL);
3583 
3584 	resp.error = 0;
3585 	resp.val = 0;
3586 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) {
3587 		if (errno == EINVAL)
3588 			XFAIL(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE");
3589 	}
3590 
3591 skip:
3592 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3593 	EXPECT_EQ(true, WIFEXITED(status));
3594 	EXPECT_EQ(0, WEXITSTATUS(status)) {
3595 		if (WEXITSTATUS(status) == 2) {
3596 			XFAIL(return, "Kernel does not support kcmp() syscall");
3597 			return;
3598 		}
3599 	}
3600 }
3601 
3602 /*
3603  * TODO:
3604  * - add microbenchmarks
3605  * - expand NNP testing
3606  * - better arch-specific TRACE and TRAP handlers.
3607  * - endianness checking when appropriate
3608  * - 64-bit arg prodding
3609  * - arch value testing (x86 modes especially)
3610  * - verify that FILTER_FLAG_LOG filters generate log messages
3611  * - verify that RET_LOG generates log messages
3612  * - ...
3613  */
3614 
3615 TEST_HARNESS_MAIN
3616