xref: /openbmc/linux/tools/testing/selftests/seccomp/seccomp_bpf.c (revision 9c6d26df1fae6ad4718d51c48e6517913304ed27)
1 /*
2  * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3  * Use of this source code is governed by the GPLv2 license.
4  *
5  * Test code for seccomp bpf.
6  */
7 
8 #include <sys/types.h>
9 
10 /*
11  * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
12  * we need to use the kernel's siginfo.h file and trick glibc
13  * into accepting it.
14  */
15 #if !__GLIBC_PREREQ(2, 26)
16 # include <asm/siginfo.h>
17 # define __have_siginfo_t 1
18 # define __have_sigval_t 1
19 # define __have_sigevent_t 1
20 #endif
21 
22 #include <errno.h>
23 #include <linux/filter.h>
24 #include <sys/prctl.h>
25 #include <sys/ptrace.h>
26 #include <sys/user.h>
27 #include <linux/prctl.h>
28 #include <linux/ptrace.h>
29 #include <linux/seccomp.h>
30 #include <pthread.h>
31 #include <semaphore.h>
32 #include <signal.h>
33 #include <stddef.h>
34 #include <stdbool.h>
35 #include <string.h>
36 #include <time.h>
37 #include <linux/elf.h>
38 #include <sys/uio.h>
39 #include <sys/utsname.h>
40 #include <sys/fcntl.h>
41 #include <sys/mman.h>
42 #include <sys/times.h>
43 
44 #define _GNU_SOURCE
45 #include <unistd.h>
46 #include <sys/syscall.h>
47 
48 #include "../kselftest_harness.h"
49 
50 #ifndef PR_SET_PTRACER
51 # define PR_SET_PTRACER 0x59616d61
52 #endif
53 
54 #ifndef PR_SET_NO_NEW_PRIVS
55 #define PR_SET_NO_NEW_PRIVS 38
56 #define PR_GET_NO_NEW_PRIVS 39
57 #endif
58 
59 #ifndef PR_SECCOMP_EXT
60 #define PR_SECCOMP_EXT 43
61 #endif
62 
63 #ifndef SECCOMP_EXT_ACT
64 #define SECCOMP_EXT_ACT 1
65 #endif
66 
67 #ifndef SECCOMP_EXT_ACT_TSYNC
68 #define SECCOMP_EXT_ACT_TSYNC 1
69 #endif
70 
71 #ifndef SECCOMP_MODE_STRICT
72 #define SECCOMP_MODE_STRICT 1
73 #endif
74 
75 #ifndef SECCOMP_MODE_FILTER
76 #define SECCOMP_MODE_FILTER 2
77 #endif
78 
79 #ifndef SECCOMP_RET_ALLOW
80 struct seccomp_data {
81 	int nr;
82 	__u32 arch;
83 	__u64 instruction_pointer;
84 	__u64 args[6];
85 };
86 #endif
87 
88 #ifndef SECCOMP_RET_KILL_PROCESS
89 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
90 #define SECCOMP_RET_KILL_THREAD	 0x00000000U /* kill the thread */
91 #endif
92 #ifndef SECCOMP_RET_KILL
93 #define SECCOMP_RET_KILL	 SECCOMP_RET_KILL_THREAD
94 #define SECCOMP_RET_TRAP	 0x00030000U /* disallow and force a SIGSYS */
95 #define SECCOMP_RET_ERRNO	 0x00050000U /* returns an errno */
96 #define SECCOMP_RET_TRACE	 0x7ff00000U /* pass to a tracer or disallow */
97 #define SECCOMP_RET_ALLOW	 0x7fff0000U /* allow */
98 #endif
99 #ifndef SECCOMP_RET_LOG
100 #define SECCOMP_RET_LOG		 0x7ffc0000U /* allow after logging */
101 #endif
102 
103 #ifndef __NR_seccomp
104 # if defined(__i386__)
105 #  define __NR_seccomp 354
106 # elif defined(__x86_64__)
107 #  define __NR_seccomp 317
108 # elif defined(__arm__)
109 #  define __NR_seccomp 383
110 # elif defined(__aarch64__)
111 #  define __NR_seccomp 277
112 # elif defined(__hppa__)
113 #  define __NR_seccomp 338
114 # elif defined(__powerpc__)
115 #  define __NR_seccomp 358
116 # elif defined(__s390__)
117 #  define __NR_seccomp 348
118 # else
119 #  warning "seccomp syscall number unknown for this architecture"
120 #  define __NR_seccomp 0xffff
121 # endif
122 #endif
123 
124 #ifndef SECCOMP_SET_MODE_STRICT
125 #define SECCOMP_SET_MODE_STRICT 0
126 #endif
127 
128 #ifndef SECCOMP_SET_MODE_FILTER
129 #define SECCOMP_SET_MODE_FILTER 1
130 #endif
131 
132 #ifndef SECCOMP_GET_ACTION_AVAIL
133 #define SECCOMP_GET_ACTION_AVAIL 2
134 #endif
135 
136 #ifndef SECCOMP_FILTER_FLAG_TSYNC
137 #define SECCOMP_FILTER_FLAG_TSYNC 1
138 #endif
139 
140 #ifndef SECCOMP_FILTER_FLAG_LOG
141 #define SECCOMP_FILTER_FLAG_LOG 2
142 #endif
143 
144 #ifndef PTRACE_SECCOMP_GET_METADATA
145 #define PTRACE_SECCOMP_GET_METADATA	0x420d
146 
147 struct seccomp_metadata {
148 	__u64 filter_off;       /* Input: which filter */
149 	__u64 flags;             /* Output: filter's flags */
150 };
151 #endif
152 
153 #ifndef seccomp
154 int seccomp(unsigned int op, unsigned int flags, void *args)
155 {
156 	errno = 0;
157 	return syscall(__NR_seccomp, op, flags, args);
158 }
159 #endif
160 
161 #if __BYTE_ORDER == __LITTLE_ENDIAN
162 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
163 #elif __BYTE_ORDER == __BIG_ENDIAN
164 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
165 #else
166 #error "wut? Unknown __BYTE_ORDER?!"
167 #endif
168 
169 #define SIBLING_EXIT_UNKILLED	0xbadbeef
170 #define SIBLING_EXIT_FAILURE	0xbadface
171 #define SIBLING_EXIT_NEWPRIVS	0xbadfeed
172 
173 TEST(mode_strict_support)
174 {
175 	long ret;
176 
177 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
178 	ASSERT_EQ(0, ret) {
179 		TH_LOG("Kernel does not support CONFIG_SECCOMP");
180 	}
181 	syscall(__NR_exit, 0);
182 }
183 
184 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
185 {
186 	long ret;
187 
188 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
189 	ASSERT_EQ(0, ret) {
190 		TH_LOG("Kernel does not support CONFIG_SECCOMP");
191 	}
192 	syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
193 		NULL, NULL, NULL);
194 	EXPECT_FALSE(true) {
195 		TH_LOG("Unreachable!");
196 	}
197 }
198 
199 /* Note! This doesn't test no new privs behavior */
200 TEST(no_new_privs_support)
201 {
202 	long ret;
203 
204 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
205 	EXPECT_EQ(0, ret) {
206 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
207 	}
208 }
209 
210 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */
211 TEST(mode_filter_support)
212 {
213 	long ret;
214 
215 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
216 	ASSERT_EQ(0, ret) {
217 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
218 	}
219 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
220 	EXPECT_EQ(-1, ret);
221 	EXPECT_EQ(EFAULT, errno) {
222 		TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
223 	}
224 }
225 
226 TEST(mode_filter_without_nnp)
227 {
228 	struct sock_filter filter[] = {
229 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
230 	};
231 	struct sock_fprog prog = {
232 		.len = (unsigned short)ARRAY_SIZE(filter),
233 		.filter = filter,
234 	};
235 	long ret;
236 
237 	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
238 	ASSERT_LE(0, ret) {
239 		TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
240 	}
241 	errno = 0;
242 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
243 	/* Succeeds with CAP_SYS_ADMIN, fails without */
244 	/* TODO(wad) check caps not euid */
245 	if (geteuid()) {
246 		EXPECT_EQ(-1, ret);
247 		EXPECT_EQ(EACCES, errno);
248 	} else {
249 		EXPECT_EQ(0, ret);
250 	}
251 }
252 
253 #define MAX_INSNS_PER_PATH 32768
254 
255 TEST(filter_size_limits)
256 {
257 	int i;
258 	int count = BPF_MAXINSNS + 1;
259 	struct sock_filter allow[] = {
260 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
261 	};
262 	struct sock_filter *filter;
263 	struct sock_fprog prog = { };
264 	long ret;
265 
266 	filter = calloc(count, sizeof(*filter));
267 	ASSERT_NE(NULL, filter);
268 
269 	for (i = 0; i < count; i++)
270 		filter[i] = allow[0];
271 
272 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
273 	ASSERT_EQ(0, ret);
274 
275 	prog.filter = filter;
276 	prog.len = count;
277 
278 	/* Too many filter instructions in a single filter. */
279 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
280 	ASSERT_NE(0, ret) {
281 		TH_LOG("Installing %d insn filter was allowed", prog.len);
282 	}
283 
284 	/* One less is okay, though. */
285 	prog.len -= 1;
286 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
287 	ASSERT_EQ(0, ret) {
288 		TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
289 	}
290 }
291 
292 TEST(filter_chain_limits)
293 {
294 	int i;
295 	int count = BPF_MAXINSNS;
296 	struct sock_filter allow[] = {
297 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
298 	};
299 	struct sock_filter *filter;
300 	struct sock_fprog prog = { };
301 	long ret;
302 
303 	filter = calloc(count, sizeof(*filter));
304 	ASSERT_NE(NULL, filter);
305 
306 	for (i = 0; i < count; i++)
307 		filter[i] = allow[0];
308 
309 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
310 	ASSERT_EQ(0, ret);
311 
312 	prog.filter = filter;
313 	prog.len = 1;
314 
315 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
316 	ASSERT_EQ(0, ret);
317 
318 	prog.len = count;
319 
320 	/* Too many total filter instructions. */
321 	for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
322 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
323 		if (ret != 0)
324 			break;
325 	}
326 	ASSERT_NE(0, ret) {
327 		TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
328 		       i, count, i * (count + 4));
329 	}
330 }
331 
332 TEST(mode_filter_cannot_move_to_strict)
333 {
334 	struct sock_filter filter[] = {
335 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
336 	};
337 	struct sock_fprog prog = {
338 		.len = (unsigned short)ARRAY_SIZE(filter),
339 		.filter = filter,
340 	};
341 	long ret;
342 
343 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
344 	ASSERT_EQ(0, ret);
345 
346 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
347 	ASSERT_EQ(0, ret);
348 
349 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
350 	EXPECT_EQ(-1, ret);
351 	EXPECT_EQ(EINVAL, errno);
352 }
353 
354 
355 TEST(mode_filter_get_seccomp)
356 {
357 	struct sock_filter filter[] = {
358 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
359 	};
360 	struct sock_fprog prog = {
361 		.len = (unsigned short)ARRAY_SIZE(filter),
362 		.filter = filter,
363 	};
364 	long ret;
365 
366 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
367 	ASSERT_EQ(0, ret);
368 
369 	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
370 	EXPECT_EQ(0, ret);
371 
372 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
373 	ASSERT_EQ(0, ret);
374 
375 	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
376 	EXPECT_EQ(2, ret);
377 }
378 
379 
380 TEST(ALLOW_all)
381 {
382 	struct sock_filter filter[] = {
383 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
384 	};
385 	struct sock_fprog prog = {
386 		.len = (unsigned short)ARRAY_SIZE(filter),
387 		.filter = filter,
388 	};
389 	long ret;
390 
391 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
392 	ASSERT_EQ(0, ret);
393 
394 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
395 	ASSERT_EQ(0, ret);
396 }
397 
398 TEST(empty_prog)
399 {
400 	struct sock_filter filter[] = {
401 	};
402 	struct sock_fprog prog = {
403 		.len = (unsigned short)ARRAY_SIZE(filter),
404 		.filter = filter,
405 	};
406 	long ret;
407 
408 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
409 	ASSERT_EQ(0, ret);
410 
411 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
412 	EXPECT_EQ(-1, ret);
413 	EXPECT_EQ(EINVAL, errno);
414 }
415 
416 TEST(log_all)
417 {
418 	struct sock_filter filter[] = {
419 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
420 	};
421 	struct sock_fprog prog = {
422 		.len = (unsigned short)ARRAY_SIZE(filter),
423 		.filter = filter,
424 	};
425 	long ret;
426 	pid_t parent = getppid();
427 
428 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
429 	ASSERT_EQ(0, ret);
430 
431 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
432 	ASSERT_EQ(0, ret);
433 
434 	/* getppid() should succeed and be logged (no check for logging) */
435 	EXPECT_EQ(parent, syscall(__NR_getppid));
436 }
437 
438 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
439 {
440 	struct sock_filter filter[] = {
441 		BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
442 	};
443 	struct sock_fprog prog = {
444 		.len = (unsigned short)ARRAY_SIZE(filter),
445 		.filter = filter,
446 	};
447 	long ret;
448 
449 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
450 	ASSERT_EQ(0, ret);
451 
452 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
453 	ASSERT_EQ(0, ret);
454 	EXPECT_EQ(0, syscall(__NR_getpid)) {
455 		TH_LOG("getpid() shouldn't ever return");
456 	}
457 }
458 
459 /* return code >= 0x80000000 is unused. */
460 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
461 {
462 	struct sock_filter filter[] = {
463 		BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
464 	};
465 	struct sock_fprog prog = {
466 		.len = (unsigned short)ARRAY_SIZE(filter),
467 		.filter = filter,
468 	};
469 	long ret;
470 
471 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
472 	ASSERT_EQ(0, ret);
473 
474 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
475 	ASSERT_EQ(0, ret);
476 	EXPECT_EQ(0, syscall(__NR_getpid)) {
477 		TH_LOG("getpid() shouldn't ever return");
478 	}
479 }
480 
481 TEST_SIGNAL(KILL_all, SIGSYS)
482 {
483 	struct sock_filter filter[] = {
484 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
485 	};
486 	struct sock_fprog prog = {
487 		.len = (unsigned short)ARRAY_SIZE(filter),
488 		.filter = filter,
489 	};
490 	long ret;
491 
492 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
493 	ASSERT_EQ(0, ret);
494 
495 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
496 	ASSERT_EQ(0, ret);
497 }
498 
499 TEST_SIGNAL(KILL_one, SIGSYS)
500 {
501 	struct sock_filter filter[] = {
502 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
503 			offsetof(struct seccomp_data, nr)),
504 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
505 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
506 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
507 	};
508 	struct sock_fprog prog = {
509 		.len = (unsigned short)ARRAY_SIZE(filter),
510 		.filter = filter,
511 	};
512 	long ret;
513 	pid_t parent = getppid();
514 
515 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
516 	ASSERT_EQ(0, ret);
517 
518 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
519 	ASSERT_EQ(0, ret);
520 
521 	EXPECT_EQ(parent, syscall(__NR_getppid));
522 	/* getpid() should never return. */
523 	EXPECT_EQ(0, syscall(__NR_getpid));
524 }
525 
526 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
527 {
528 	void *fatal_address;
529 	struct sock_filter filter[] = {
530 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
531 			offsetof(struct seccomp_data, nr)),
532 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
533 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
534 		/* Only both with lower 32-bit for now. */
535 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
536 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
537 			(unsigned long)&fatal_address, 0, 1),
538 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
539 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
540 	};
541 	struct sock_fprog prog = {
542 		.len = (unsigned short)ARRAY_SIZE(filter),
543 		.filter = filter,
544 	};
545 	long ret;
546 	pid_t parent = getppid();
547 	struct tms timebuf;
548 	clock_t clock = times(&timebuf);
549 
550 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
551 	ASSERT_EQ(0, ret);
552 
553 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
554 	ASSERT_EQ(0, ret);
555 
556 	EXPECT_EQ(parent, syscall(__NR_getppid));
557 	EXPECT_LE(clock, syscall(__NR_times, &timebuf));
558 	/* times() should never return. */
559 	EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
560 }
561 
562 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
563 {
564 #ifndef __NR_mmap2
565 	int sysno = __NR_mmap;
566 #else
567 	int sysno = __NR_mmap2;
568 #endif
569 	struct sock_filter filter[] = {
570 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
571 			offsetof(struct seccomp_data, nr)),
572 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
573 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
574 		/* Only both with lower 32-bit for now. */
575 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
576 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
577 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
578 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
579 	};
580 	struct sock_fprog prog = {
581 		.len = (unsigned short)ARRAY_SIZE(filter),
582 		.filter = filter,
583 	};
584 	long ret;
585 	pid_t parent = getppid();
586 	int fd;
587 	void *map1, *map2;
588 	int page_size = sysconf(_SC_PAGESIZE);
589 
590 	ASSERT_LT(0, page_size);
591 
592 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
593 	ASSERT_EQ(0, ret);
594 
595 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
596 	ASSERT_EQ(0, ret);
597 
598 	fd = open("/dev/zero", O_RDONLY);
599 	ASSERT_NE(-1, fd);
600 
601 	EXPECT_EQ(parent, syscall(__NR_getppid));
602 	map1 = (void *)syscall(sysno,
603 		NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
604 	EXPECT_NE(MAP_FAILED, map1);
605 	/* mmap2() should never return. */
606 	map2 = (void *)syscall(sysno,
607 		 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
608 	EXPECT_EQ(MAP_FAILED, map2);
609 
610 	/* The test failed, so clean up the resources. */
611 	munmap(map1, page_size);
612 	munmap(map2, page_size);
613 	close(fd);
614 }
615 
616 /* This is a thread task to die via seccomp filter violation. */
617 void *kill_thread(void *data)
618 {
619 	bool die = (bool)data;
620 
621 	if (die) {
622 		prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
623 		return (void *)SIBLING_EXIT_FAILURE;
624 	}
625 
626 	return (void *)SIBLING_EXIT_UNKILLED;
627 }
628 
629 /* Prepare a thread that will kill itself or both of us. */
630 void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
631 {
632 	pthread_t thread;
633 	void *status;
634 	/* Kill only when calling __NR_prctl. */
635 	struct sock_filter filter_thread[] = {
636 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
637 			offsetof(struct seccomp_data, nr)),
638 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
639 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
640 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
641 	};
642 	struct sock_fprog prog_thread = {
643 		.len = (unsigned short)ARRAY_SIZE(filter_thread),
644 		.filter = filter_thread,
645 	};
646 	struct sock_filter filter_process[] = {
647 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
648 			offsetof(struct seccomp_data, nr)),
649 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
650 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
651 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
652 	};
653 	struct sock_fprog prog_process = {
654 		.len = (unsigned short)ARRAY_SIZE(filter_process),
655 		.filter = filter_process,
656 	};
657 
658 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
659 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
660 	}
661 
662 	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
663 			     kill_process ? &prog_process : &prog_thread));
664 
665 	/*
666 	 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
667 	 * flag cannot be downgraded by a new filter.
668 	 */
669 	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
670 
671 	/* Start a thread that will exit immediately. */
672 	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
673 	ASSERT_EQ(0, pthread_join(thread, &status));
674 	ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
675 
676 	/* Start a thread that will die immediately. */
677 	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
678 	ASSERT_EQ(0, pthread_join(thread, &status));
679 	ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
680 
681 	/*
682 	 * If we get here, only the spawned thread died. Let the parent know
683 	 * the whole process didn't die (i.e. this thread, the spawner,
684 	 * stayed running).
685 	 */
686 	exit(42);
687 }
688 
689 TEST(KILL_thread)
690 {
691 	int status;
692 	pid_t child_pid;
693 
694 	child_pid = fork();
695 	ASSERT_LE(0, child_pid);
696 	if (child_pid == 0) {
697 		kill_thread_or_group(_metadata, false);
698 		_exit(38);
699 	}
700 
701 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
702 
703 	/* If only the thread was killed, we'll see exit 42. */
704 	ASSERT_TRUE(WIFEXITED(status));
705 	ASSERT_EQ(42, WEXITSTATUS(status));
706 }
707 
708 TEST(KILL_process)
709 {
710 	int status;
711 	pid_t child_pid;
712 
713 	child_pid = fork();
714 	ASSERT_LE(0, child_pid);
715 	if (child_pid == 0) {
716 		kill_thread_or_group(_metadata, true);
717 		_exit(38);
718 	}
719 
720 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
721 
722 	/* If the entire process was killed, we'll see SIGSYS. */
723 	ASSERT_TRUE(WIFSIGNALED(status));
724 	ASSERT_EQ(SIGSYS, WTERMSIG(status));
725 }
726 
727 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
728 TEST(arg_out_of_range)
729 {
730 	struct sock_filter filter[] = {
731 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
732 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
733 	};
734 	struct sock_fprog prog = {
735 		.len = (unsigned short)ARRAY_SIZE(filter),
736 		.filter = filter,
737 	};
738 	long ret;
739 
740 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
741 	ASSERT_EQ(0, ret);
742 
743 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
744 	EXPECT_EQ(-1, ret);
745 	EXPECT_EQ(EINVAL, errno);
746 }
747 
748 #define ERRNO_FILTER(name, errno)					\
749 	struct sock_filter _read_filter_##name[] = {			\
750 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,				\
751 			offsetof(struct seccomp_data, nr)),		\
752 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),	\
753 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),	\
754 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),		\
755 	};								\
756 	struct sock_fprog prog_##name = {				\
757 		.len = (unsigned short)ARRAY_SIZE(_read_filter_##name),	\
758 		.filter = _read_filter_##name,				\
759 	}
760 
761 /* Make sure basic errno values are correctly passed through a filter. */
762 TEST(ERRNO_valid)
763 {
764 	ERRNO_FILTER(valid, E2BIG);
765 	long ret;
766 	pid_t parent = getppid();
767 
768 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
769 	ASSERT_EQ(0, ret);
770 
771 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
772 	ASSERT_EQ(0, ret);
773 
774 	EXPECT_EQ(parent, syscall(__NR_getppid));
775 	EXPECT_EQ(-1, read(0, NULL, 0));
776 	EXPECT_EQ(E2BIG, errno);
777 }
778 
779 /* Make sure an errno of zero is correctly handled by the arch code. */
780 TEST(ERRNO_zero)
781 {
782 	ERRNO_FILTER(zero, 0);
783 	long ret;
784 	pid_t parent = getppid();
785 
786 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
787 	ASSERT_EQ(0, ret);
788 
789 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
790 	ASSERT_EQ(0, ret);
791 
792 	EXPECT_EQ(parent, syscall(__NR_getppid));
793 	/* "errno" of 0 is ok. */
794 	EXPECT_EQ(0, read(0, NULL, 0));
795 }
796 
797 /*
798  * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
799  * This tests that the errno value gets capped correctly, fixed by
800  * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
801  */
802 TEST(ERRNO_capped)
803 {
804 	ERRNO_FILTER(capped, 4096);
805 	long ret;
806 	pid_t parent = getppid();
807 
808 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
809 	ASSERT_EQ(0, ret);
810 
811 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
812 	ASSERT_EQ(0, ret);
813 
814 	EXPECT_EQ(parent, syscall(__NR_getppid));
815 	EXPECT_EQ(-1, read(0, NULL, 0));
816 	EXPECT_EQ(4095, errno);
817 }
818 
819 /*
820  * Filters are processed in reverse order: last applied is executed first.
821  * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
822  * SECCOMP_RET_DATA mask results will follow the most recently applied
823  * matching filter return (and not the lowest or highest value).
824  */
825 TEST(ERRNO_order)
826 {
827 	ERRNO_FILTER(first,  11);
828 	ERRNO_FILTER(second, 13);
829 	ERRNO_FILTER(third,  12);
830 	long ret;
831 	pid_t parent = getppid();
832 
833 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
834 	ASSERT_EQ(0, ret);
835 
836 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
837 	ASSERT_EQ(0, ret);
838 
839 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
840 	ASSERT_EQ(0, ret);
841 
842 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
843 	ASSERT_EQ(0, ret);
844 
845 	EXPECT_EQ(parent, syscall(__NR_getppid));
846 	EXPECT_EQ(-1, read(0, NULL, 0));
847 	EXPECT_EQ(12, errno);
848 }
849 
850 FIXTURE_DATA(TRAP) {
851 	struct sock_fprog prog;
852 };
853 
854 FIXTURE_SETUP(TRAP)
855 {
856 	struct sock_filter filter[] = {
857 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
858 			offsetof(struct seccomp_data, nr)),
859 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
860 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
861 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
862 	};
863 
864 	memset(&self->prog, 0, sizeof(self->prog));
865 	self->prog.filter = malloc(sizeof(filter));
866 	ASSERT_NE(NULL, self->prog.filter);
867 	memcpy(self->prog.filter, filter, sizeof(filter));
868 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
869 }
870 
871 FIXTURE_TEARDOWN(TRAP)
872 {
873 	if (self->prog.filter)
874 		free(self->prog.filter);
875 }
876 
877 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
878 {
879 	long ret;
880 
881 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
882 	ASSERT_EQ(0, ret);
883 
884 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
885 	ASSERT_EQ(0, ret);
886 	syscall(__NR_getpid);
887 }
888 
889 /* Ensure that SIGSYS overrides SIG_IGN */
890 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
891 {
892 	long ret;
893 
894 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
895 	ASSERT_EQ(0, ret);
896 
897 	signal(SIGSYS, SIG_IGN);
898 
899 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
900 	ASSERT_EQ(0, ret);
901 	syscall(__NR_getpid);
902 }
903 
904 static siginfo_t TRAP_info;
905 static volatile int TRAP_nr;
906 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
907 {
908 	memcpy(&TRAP_info, info, sizeof(TRAP_info));
909 	TRAP_nr = nr;
910 }
911 
912 TEST_F(TRAP, handler)
913 {
914 	int ret, test;
915 	struct sigaction act;
916 	sigset_t mask;
917 
918 	memset(&act, 0, sizeof(act));
919 	sigemptyset(&mask);
920 	sigaddset(&mask, SIGSYS);
921 
922 	act.sa_sigaction = &TRAP_action;
923 	act.sa_flags = SA_SIGINFO;
924 	ret = sigaction(SIGSYS, &act, NULL);
925 	ASSERT_EQ(0, ret) {
926 		TH_LOG("sigaction failed");
927 	}
928 	ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
929 	ASSERT_EQ(0, ret) {
930 		TH_LOG("sigprocmask failed");
931 	}
932 
933 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
934 	ASSERT_EQ(0, ret);
935 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
936 	ASSERT_EQ(0, ret);
937 	TRAP_nr = 0;
938 	memset(&TRAP_info, 0, sizeof(TRAP_info));
939 	/* Expect the registers to be rolled back. (nr = error) may vary
940 	 * based on arch. */
941 	ret = syscall(__NR_getpid);
942 	/* Silence gcc warning about volatile. */
943 	test = TRAP_nr;
944 	EXPECT_EQ(SIGSYS, test);
945 	struct local_sigsys {
946 		void *_call_addr;	/* calling user insn */
947 		int _syscall;		/* triggering system call number */
948 		unsigned int _arch;	/* AUDIT_ARCH_* of syscall */
949 	} *sigsys = (struct local_sigsys *)
950 #ifdef si_syscall
951 		&(TRAP_info.si_call_addr);
952 #else
953 		&TRAP_info.si_pid;
954 #endif
955 	EXPECT_EQ(__NR_getpid, sigsys->_syscall);
956 	/* Make sure arch is non-zero. */
957 	EXPECT_NE(0, sigsys->_arch);
958 	EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
959 }
960 
961 FIXTURE_DATA(precedence) {
962 	struct sock_fprog allow;
963 	struct sock_fprog log;
964 	struct sock_fprog trace;
965 	struct sock_fprog error;
966 	struct sock_fprog trap;
967 	struct sock_fprog kill;
968 };
969 
970 FIXTURE_SETUP(precedence)
971 {
972 	struct sock_filter allow_insns[] = {
973 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
974 	};
975 	struct sock_filter log_insns[] = {
976 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
977 			offsetof(struct seccomp_data, nr)),
978 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
979 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
980 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
981 	};
982 	struct sock_filter trace_insns[] = {
983 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
984 			offsetof(struct seccomp_data, nr)),
985 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
986 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
987 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
988 	};
989 	struct sock_filter error_insns[] = {
990 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
991 			offsetof(struct seccomp_data, nr)),
992 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
993 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
994 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
995 	};
996 	struct sock_filter trap_insns[] = {
997 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
998 			offsetof(struct seccomp_data, nr)),
999 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1000 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1001 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
1002 	};
1003 	struct sock_filter kill_insns[] = {
1004 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1005 			offsetof(struct seccomp_data, nr)),
1006 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1007 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1008 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1009 	};
1010 
1011 	memset(self, 0, sizeof(*self));
1012 #define FILTER_ALLOC(_x) \
1013 	self->_x.filter = malloc(sizeof(_x##_insns)); \
1014 	ASSERT_NE(NULL, self->_x.filter); \
1015 	memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
1016 	self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
1017 	FILTER_ALLOC(allow);
1018 	FILTER_ALLOC(log);
1019 	FILTER_ALLOC(trace);
1020 	FILTER_ALLOC(error);
1021 	FILTER_ALLOC(trap);
1022 	FILTER_ALLOC(kill);
1023 }
1024 
1025 FIXTURE_TEARDOWN(precedence)
1026 {
1027 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
1028 	FILTER_FREE(allow);
1029 	FILTER_FREE(log);
1030 	FILTER_FREE(trace);
1031 	FILTER_FREE(error);
1032 	FILTER_FREE(trap);
1033 	FILTER_FREE(kill);
1034 }
1035 
1036 TEST_F(precedence, allow_ok)
1037 {
1038 	pid_t parent, res = 0;
1039 	long ret;
1040 
1041 	parent = getppid();
1042 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1043 	ASSERT_EQ(0, ret);
1044 
1045 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1046 	ASSERT_EQ(0, ret);
1047 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1048 	ASSERT_EQ(0, ret);
1049 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1050 	ASSERT_EQ(0, ret);
1051 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1052 	ASSERT_EQ(0, ret);
1053 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1054 	ASSERT_EQ(0, ret);
1055 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1056 	ASSERT_EQ(0, ret);
1057 	/* Should work just fine. */
1058 	res = syscall(__NR_getppid);
1059 	EXPECT_EQ(parent, res);
1060 }
1061 
1062 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
1063 {
1064 	pid_t parent, res = 0;
1065 	long ret;
1066 
1067 	parent = getppid();
1068 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1069 	ASSERT_EQ(0, ret);
1070 
1071 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1072 	ASSERT_EQ(0, ret);
1073 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1074 	ASSERT_EQ(0, ret);
1075 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1076 	ASSERT_EQ(0, ret);
1077 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1078 	ASSERT_EQ(0, ret);
1079 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1080 	ASSERT_EQ(0, ret);
1081 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1082 	ASSERT_EQ(0, ret);
1083 	/* Should work just fine. */
1084 	res = syscall(__NR_getppid);
1085 	EXPECT_EQ(parent, res);
1086 	/* getpid() should never return. */
1087 	res = syscall(__NR_getpid);
1088 	EXPECT_EQ(0, res);
1089 }
1090 
1091 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
1092 {
1093 	pid_t parent;
1094 	long ret;
1095 
1096 	parent = getppid();
1097 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1098 	ASSERT_EQ(0, ret);
1099 
1100 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1101 	ASSERT_EQ(0, ret);
1102 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1103 	ASSERT_EQ(0, ret);
1104 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1105 	ASSERT_EQ(0, ret);
1106 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1107 	ASSERT_EQ(0, ret);
1108 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1109 	ASSERT_EQ(0, ret);
1110 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1111 	ASSERT_EQ(0, ret);
1112 	/* Should work just fine. */
1113 	EXPECT_EQ(parent, syscall(__NR_getppid));
1114 	/* getpid() should never return. */
1115 	EXPECT_EQ(0, syscall(__NR_getpid));
1116 }
1117 
1118 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
1119 {
1120 	pid_t parent;
1121 	long ret;
1122 
1123 	parent = getppid();
1124 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1125 	ASSERT_EQ(0, ret);
1126 
1127 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1128 	ASSERT_EQ(0, ret);
1129 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1130 	ASSERT_EQ(0, ret);
1131 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1132 	ASSERT_EQ(0, ret);
1133 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1134 	ASSERT_EQ(0, ret);
1135 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1136 	ASSERT_EQ(0, ret);
1137 	/* Should work just fine. */
1138 	EXPECT_EQ(parent, syscall(__NR_getppid));
1139 	/* getpid() should never return. */
1140 	EXPECT_EQ(0, syscall(__NR_getpid));
1141 }
1142 
1143 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
1144 {
1145 	pid_t parent;
1146 	long ret;
1147 
1148 	parent = getppid();
1149 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1150 	ASSERT_EQ(0, ret);
1151 
1152 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1153 	ASSERT_EQ(0, ret);
1154 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1155 	ASSERT_EQ(0, ret);
1156 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1157 	ASSERT_EQ(0, ret);
1158 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1159 	ASSERT_EQ(0, ret);
1160 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1161 	ASSERT_EQ(0, ret);
1162 	/* Should work just fine. */
1163 	EXPECT_EQ(parent, syscall(__NR_getppid));
1164 	/* getpid() should never return. */
1165 	EXPECT_EQ(0, syscall(__NR_getpid));
1166 }
1167 
1168 TEST_F(precedence, errno_is_third)
1169 {
1170 	pid_t parent;
1171 	long ret;
1172 
1173 	parent = getppid();
1174 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1175 	ASSERT_EQ(0, ret);
1176 
1177 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1178 	ASSERT_EQ(0, ret);
1179 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1180 	ASSERT_EQ(0, ret);
1181 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1182 	ASSERT_EQ(0, ret);
1183 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1184 	ASSERT_EQ(0, ret);
1185 	/* Should work just fine. */
1186 	EXPECT_EQ(parent, syscall(__NR_getppid));
1187 	EXPECT_EQ(0, syscall(__NR_getpid));
1188 }
1189 
1190 TEST_F(precedence, errno_is_third_in_any_order)
1191 {
1192 	pid_t parent;
1193 	long ret;
1194 
1195 	parent = getppid();
1196 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1197 	ASSERT_EQ(0, ret);
1198 
1199 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1200 	ASSERT_EQ(0, ret);
1201 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1202 	ASSERT_EQ(0, ret);
1203 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1204 	ASSERT_EQ(0, ret);
1205 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1206 	ASSERT_EQ(0, ret);
1207 	/* Should work just fine. */
1208 	EXPECT_EQ(parent, syscall(__NR_getppid));
1209 	EXPECT_EQ(0, syscall(__NR_getpid));
1210 }
1211 
1212 TEST_F(precedence, trace_is_fourth)
1213 {
1214 	pid_t parent;
1215 	long ret;
1216 
1217 	parent = getppid();
1218 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1219 	ASSERT_EQ(0, ret);
1220 
1221 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1222 	ASSERT_EQ(0, ret);
1223 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1224 	ASSERT_EQ(0, ret);
1225 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1226 	ASSERT_EQ(0, ret);
1227 	/* Should work just fine. */
1228 	EXPECT_EQ(parent, syscall(__NR_getppid));
1229 	/* No ptracer */
1230 	EXPECT_EQ(-1, syscall(__NR_getpid));
1231 }
1232 
1233 TEST_F(precedence, trace_is_fourth_in_any_order)
1234 {
1235 	pid_t parent;
1236 	long ret;
1237 
1238 	parent = getppid();
1239 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1240 	ASSERT_EQ(0, ret);
1241 
1242 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1243 	ASSERT_EQ(0, ret);
1244 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1245 	ASSERT_EQ(0, ret);
1246 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1247 	ASSERT_EQ(0, ret);
1248 	/* Should work just fine. */
1249 	EXPECT_EQ(parent, syscall(__NR_getppid));
1250 	/* No ptracer */
1251 	EXPECT_EQ(-1, syscall(__NR_getpid));
1252 }
1253 
1254 TEST_F(precedence, log_is_fifth)
1255 {
1256 	pid_t mypid, parent;
1257 	long ret;
1258 
1259 	mypid = getpid();
1260 	parent = getppid();
1261 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1262 	ASSERT_EQ(0, ret);
1263 
1264 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1265 	ASSERT_EQ(0, ret);
1266 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1267 	ASSERT_EQ(0, ret);
1268 	/* Should work just fine. */
1269 	EXPECT_EQ(parent, syscall(__NR_getppid));
1270 	/* Should also work just fine */
1271 	EXPECT_EQ(mypid, syscall(__NR_getpid));
1272 }
1273 
1274 TEST_F(precedence, log_is_fifth_in_any_order)
1275 {
1276 	pid_t mypid, parent;
1277 	long ret;
1278 
1279 	mypid = getpid();
1280 	parent = getppid();
1281 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1282 	ASSERT_EQ(0, ret);
1283 
1284 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1285 	ASSERT_EQ(0, ret);
1286 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1287 	ASSERT_EQ(0, ret);
1288 	/* Should work just fine. */
1289 	EXPECT_EQ(parent, syscall(__NR_getppid));
1290 	/* Should also work just fine */
1291 	EXPECT_EQ(mypid, syscall(__NR_getpid));
1292 }
1293 
1294 #ifndef PTRACE_O_TRACESECCOMP
1295 #define PTRACE_O_TRACESECCOMP	0x00000080
1296 #endif
1297 
1298 /* Catch the Ubuntu 12.04 value error. */
1299 #if PTRACE_EVENT_SECCOMP != 7
1300 #undef PTRACE_EVENT_SECCOMP
1301 #endif
1302 
1303 #ifndef PTRACE_EVENT_SECCOMP
1304 #define PTRACE_EVENT_SECCOMP 7
1305 #endif
1306 
1307 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
1308 bool tracer_running;
1309 void tracer_stop(int sig)
1310 {
1311 	tracer_running = false;
1312 }
1313 
1314 typedef void tracer_func_t(struct __test_metadata *_metadata,
1315 			   pid_t tracee, int status, void *args);
1316 
1317 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1318 	    tracer_func_t tracer_func, void *args, bool ptrace_syscall)
1319 {
1320 	int ret = -1;
1321 	struct sigaction action = {
1322 		.sa_handler = tracer_stop,
1323 	};
1324 
1325 	/* Allow external shutdown. */
1326 	tracer_running = true;
1327 	ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1328 
1329 	errno = 0;
1330 	while (ret == -1 && errno != EINVAL)
1331 		ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1332 	ASSERT_EQ(0, ret) {
1333 		kill(tracee, SIGKILL);
1334 	}
1335 	/* Wait for attach stop */
1336 	wait(NULL);
1337 
1338 	ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
1339 						      PTRACE_O_TRACESYSGOOD :
1340 						      PTRACE_O_TRACESECCOMP);
1341 	ASSERT_EQ(0, ret) {
1342 		TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1343 		kill(tracee, SIGKILL);
1344 	}
1345 	ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1346 		     tracee, NULL, 0);
1347 	ASSERT_EQ(0, ret);
1348 
1349 	/* Unblock the tracee */
1350 	ASSERT_EQ(1, write(fd, "A", 1));
1351 	ASSERT_EQ(0, close(fd));
1352 
1353 	/* Run until we're shut down. Must assert to stop execution. */
1354 	while (tracer_running) {
1355 		int status;
1356 
1357 		if (wait(&status) != tracee)
1358 			continue;
1359 		if (WIFSIGNALED(status) || WIFEXITED(status))
1360 			/* Child is dead. Time to go. */
1361 			return;
1362 
1363 		/* Check if this is a seccomp event. */
1364 		ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
1365 
1366 		tracer_func(_metadata, tracee, status, args);
1367 
1368 		ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1369 			     tracee, NULL, 0);
1370 		ASSERT_EQ(0, ret);
1371 	}
1372 	/* Directly report the status of our test harness results. */
1373 	syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1374 }
1375 
1376 /* Common tracer setup/teardown functions. */
1377 void cont_handler(int num)
1378 { }
1379 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1380 			  tracer_func_t func, void *args, bool ptrace_syscall)
1381 {
1382 	char sync;
1383 	int pipefd[2];
1384 	pid_t tracer_pid;
1385 	pid_t tracee = getpid();
1386 
1387 	/* Setup a pipe for clean synchronization. */
1388 	ASSERT_EQ(0, pipe(pipefd));
1389 
1390 	/* Fork a child which we'll promote to tracer */
1391 	tracer_pid = fork();
1392 	ASSERT_LE(0, tracer_pid);
1393 	signal(SIGALRM, cont_handler);
1394 	if (tracer_pid == 0) {
1395 		close(pipefd[0]);
1396 		start_tracer(_metadata, pipefd[1], tracee, func, args,
1397 			     ptrace_syscall);
1398 		syscall(__NR_exit, 0);
1399 	}
1400 	close(pipefd[1]);
1401 	prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1402 	read(pipefd[0], &sync, 1);
1403 	close(pipefd[0]);
1404 
1405 	return tracer_pid;
1406 }
1407 void teardown_trace_fixture(struct __test_metadata *_metadata,
1408 			    pid_t tracer)
1409 {
1410 	if (tracer) {
1411 		int status;
1412 		/*
1413 		 * Extract the exit code from the other process and
1414 		 * adopt it for ourselves in case its asserts failed.
1415 		 */
1416 		ASSERT_EQ(0, kill(tracer, SIGUSR1));
1417 		ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1418 		if (WEXITSTATUS(status))
1419 			_metadata->passed = 0;
1420 	}
1421 }
1422 
1423 /* "poke" tracer arguments and function. */
1424 struct tracer_args_poke_t {
1425 	unsigned long poke_addr;
1426 };
1427 
1428 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1429 		 void *args)
1430 {
1431 	int ret;
1432 	unsigned long msg;
1433 	struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1434 
1435 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1436 	EXPECT_EQ(0, ret);
1437 	/* If this fails, don't try to recover. */
1438 	ASSERT_EQ(0x1001, msg) {
1439 		kill(tracee, SIGKILL);
1440 	}
1441 	/*
1442 	 * Poke in the message.
1443 	 * Registers are not touched to try to keep this relatively arch
1444 	 * agnostic.
1445 	 */
1446 	ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1447 	EXPECT_EQ(0, ret);
1448 }
1449 
1450 FIXTURE_DATA(TRACE_poke) {
1451 	struct sock_fprog prog;
1452 	pid_t tracer;
1453 	long poked;
1454 	struct tracer_args_poke_t tracer_args;
1455 };
1456 
1457 FIXTURE_SETUP(TRACE_poke)
1458 {
1459 	struct sock_filter filter[] = {
1460 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1461 			offsetof(struct seccomp_data, nr)),
1462 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1463 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1464 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1465 	};
1466 
1467 	self->poked = 0;
1468 	memset(&self->prog, 0, sizeof(self->prog));
1469 	self->prog.filter = malloc(sizeof(filter));
1470 	ASSERT_NE(NULL, self->prog.filter);
1471 	memcpy(self->prog.filter, filter, sizeof(filter));
1472 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1473 
1474 	/* Set up tracer args. */
1475 	self->tracer_args.poke_addr = (unsigned long)&self->poked;
1476 
1477 	/* Launch tracer. */
1478 	self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1479 					   &self->tracer_args, false);
1480 }
1481 
1482 FIXTURE_TEARDOWN(TRACE_poke)
1483 {
1484 	teardown_trace_fixture(_metadata, self->tracer);
1485 	if (self->prog.filter)
1486 		free(self->prog.filter);
1487 }
1488 
1489 TEST_F(TRACE_poke, read_has_side_effects)
1490 {
1491 	ssize_t ret;
1492 
1493 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1494 	ASSERT_EQ(0, ret);
1495 
1496 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1497 	ASSERT_EQ(0, ret);
1498 
1499 	EXPECT_EQ(0, self->poked);
1500 	ret = read(-1, NULL, 0);
1501 	EXPECT_EQ(-1, ret);
1502 	EXPECT_EQ(0x1001, self->poked);
1503 }
1504 
1505 TEST_F(TRACE_poke, getpid_runs_normally)
1506 {
1507 	long ret;
1508 
1509 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1510 	ASSERT_EQ(0, ret);
1511 
1512 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1513 	ASSERT_EQ(0, ret);
1514 
1515 	EXPECT_EQ(0, self->poked);
1516 	EXPECT_NE(0, syscall(__NR_getpid));
1517 	EXPECT_EQ(0, self->poked);
1518 }
1519 
1520 #if defined(__x86_64__)
1521 # define ARCH_REGS	struct user_regs_struct
1522 # define SYSCALL_NUM	orig_rax
1523 # define SYSCALL_RET	rax
1524 #elif defined(__i386__)
1525 # define ARCH_REGS	struct user_regs_struct
1526 # define SYSCALL_NUM	orig_eax
1527 # define SYSCALL_RET	eax
1528 #elif defined(__arm__)
1529 # define ARCH_REGS	struct pt_regs
1530 # define SYSCALL_NUM	ARM_r7
1531 # define SYSCALL_RET	ARM_r0
1532 #elif defined(__aarch64__)
1533 # define ARCH_REGS	struct user_pt_regs
1534 # define SYSCALL_NUM	regs[8]
1535 # define SYSCALL_RET	regs[0]
1536 #elif defined(__hppa__)
1537 # define ARCH_REGS	struct user_regs_struct
1538 # define SYSCALL_NUM	gr[20]
1539 # define SYSCALL_RET	gr[28]
1540 #elif defined(__powerpc__)
1541 # define ARCH_REGS	struct pt_regs
1542 # define SYSCALL_NUM	gpr[0]
1543 # define SYSCALL_RET	gpr[3]
1544 #elif defined(__s390__)
1545 # define ARCH_REGS     s390_regs
1546 # define SYSCALL_NUM   gprs[2]
1547 # define SYSCALL_RET   gprs[2]
1548 #elif defined(__mips__)
1549 # define ARCH_REGS	struct pt_regs
1550 # define SYSCALL_NUM	regs[2]
1551 # define SYSCALL_SYSCALL_NUM regs[4]
1552 # define SYSCALL_RET	regs[2]
1553 # define SYSCALL_NUM_RET_SHARE_REG
1554 #else
1555 # error "Do not know how to find your architecture's registers and syscalls"
1556 #endif
1557 
1558 /* When the syscall return can't be changed, stub out the tests for it. */
1559 #ifdef SYSCALL_NUM_RET_SHARE_REG
1560 # define EXPECT_SYSCALL_RETURN(val, action)	EXPECT_EQ(-1, action)
1561 #else
1562 # define EXPECT_SYSCALL_RETURN(val, action)	EXPECT_EQ(val, action)
1563 #endif
1564 
1565 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1566  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1567  */
1568 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
1569 #define HAVE_GETREGS
1570 #endif
1571 
1572 /* Architecture-specific syscall fetching routine. */
1573 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1574 {
1575 	ARCH_REGS regs;
1576 #ifdef HAVE_GETREGS
1577 	EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
1578 		TH_LOG("PTRACE_GETREGS failed");
1579 		return -1;
1580 	}
1581 #else
1582 	struct iovec iov;
1583 
1584 	iov.iov_base = &regs;
1585 	iov.iov_len = sizeof(regs);
1586 	EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1587 		TH_LOG("PTRACE_GETREGSET failed");
1588 		return -1;
1589 	}
1590 #endif
1591 
1592 #if defined(__mips__)
1593 	if (regs.SYSCALL_NUM == __NR_O32_Linux)
1594 		return regs.SYSCALL_SYSCALL_NUM;
1595 #endif
1596 	return regs.SYSCALL_NUM;
1597 }
1598 
1599 /* Architecture-specific syscall changing routine. */
1600 void change_syscall(struct __test_metadata *_metadata,
1601 		    pid_t tracee, int syscall)
1602 {
1603 	int ret;
1604 	ARCH_REGS regs;
1605 #ifdef HAVE_GETREGS
1606 	ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
1607 #else
1608 	struct iovec iov;
1609 	iov.iov_base = &regs;
1610 	iov.iov_len = sizeof(regs);
1611 	ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1612 #endif
1613 	EXPECT_EQ(0, ret) {}
1614 
1615 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
1616     defined(__s390__) || defined(__hppa__)
1617 	{
1618 		regs.SYSCALL_NUM = syscall;
1619 	}
1620 #elif defined(__mips__)
1621 	{
1622 		if (regs.SYSCALL_NUM == __NR_O32_Linux)
1623 			regs.SYSCALL_SYSCALL_NUM = syscall;
1624 		else
1625 			regs.SYSCALL_NUM = syscall;
1626 	}
1627 
1628 #elif defined(__arm__)
1629 # ifndef PTRACE_SET_SYSCALL
1630 #  define PTRACE_SET_SYSCALL   23
1631 # endif
1632 	{
1633 		ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1634 		EXPECT_EQ(0, ret);
1635 	}
1636 
1637 #elif defined(__aarch64__)
1638 # ifndef NT_ARM_SYSTEM_CALL
1639 #  define NT_ARM_SYSTEM_CALL 0x404
1640 # endif
1641 	{
1642 		iov.iov_base = &syscall;
1643 		iov.iov_len = sizeof(syscall);
1644 		ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
1645 			     &iov);
1646 		EXPECT_EQ(0, ret);
1647 	}
1648 
1649 #else
1650 	ASSERT_EQ(1, 0) {
1651 		TH_LOG("How is the syscall changed on this architecture?");
1652 	}
1653 #endif
1654 
1655 	/* If syscall is skipped, change return value. */
1656 	if (syscall == -1)
1657 #ifdef SYSCALL_NUM_RET_SHARE_REG
1658 		TH_LOG("Can't modify syscall return on this architecture");
1659 #else
1660 		regs.SYSCALL_RET = EPERM;
1661 #endif
1662 
1663 #ifdef HAVE_GETREGS
1664 	ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
1665 #else
1666 	iov.iov_base = &regs;
1667 	iov.iov_len = sizeof(regs);
1668 	ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1669 #endif
1670 	EXPECT_EQ(0, ret);
1671 }
1672 
1673 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1674 		    int status, void *args)
1675 {
1676 	int ret;
1677 	unsigned long msg;
1678 
1679 	/* Make sure we got the right message. */
1680 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1681 	EXPECT_EQ(0, ret);
1682 
1683 	/* Validate and take action on expected syscalls. */
1684 	switch (msg) {
1685 	case 0x1002:
1686 		/* change getpid to getppid. */
1687 		EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1688 		change_syscall(_metadata, tracee, __NR_getppid);
1689 		break;
1690 	case 0x1003:
1691 		/* skip gettid. */
1692 		EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1693 		change_syscall(_metadata, tracee, -1);
1694 		break;
1695 	case 0x1004:
1696 		/* do nothing (allow getppid) */
1697 		EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1698 		break;
1699 	default:
1700 		EXPECT_EQ(0, msg) {
1701 			TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1702 			kill(tracee, SIGKILL);
1703 		}
1704 	}
1705 
1706 }
1707 
1708 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
1709 		   int status, void *args)
1710 {
1711 	int ret, nr;
1712 	unsigned long msg;
1713 	static bool entry;
1714 
1715 	/* Make sure we got an empty message. */
1716 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1717 	EXPECT_EQ(0, ret);
1718 	EXPECT_EQ(0, msg);
1719 
1720 	/* The only way to tell PTRACE_SYSCALL entry/exit is by counting. */
1721 	entry = !entry;
1722 	if (!entry)
1723 		return;
1724 
1725 	nr = get_syscall(_metadata, tracee);
1726 
1727 	if (nr == __NR_getpid)
1728 		change_syscall(_metadata, tracee, __NR_getppid);
1729 	if (nr == __NR_openat)
1730 		change_syscall(_metadata, tracee, -1);
1731 }
1732 
1733 FIXTURE_DATA(TRACE_syscall) {
1734 	struct sock_fprog prog;
1735 	pid_t tracer, mytid, mypid, parent;
1736 };
1737 
1738 FIXTURE_SETUP(TRACE_syscall)
1739 {
1740 	struct sock_filter filter[] = {
1741 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1742 			offsetof(struct seccomp_data, nr)),
1743 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1744 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1745 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1746 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1747 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1748 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1749 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1750 	};
1751 
1752 	memset(&self->prog, 0, sizeof(self->prog));
1753 	self->prog.filter = malloc(sizeof(filter));
1754 	ASSERT_NE(NULL, self->prog.filter);
1755 	memcpy(self->prog.filter, filter, sizeof(filter));
1756 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1757 
1758 	/* Prepare some testable syscall results. */
1759 	self->mytid = syscall(__NR_gettid);
1760 	ASSERT_GT(self->mytid, 0);
1761 	ASSERT_NE(self->mytid, 1) {
1762 		TH_LOG("Running this test as init is not supported. :)");
1763 	}
1764 
1765 	self->mypid = getpid();
1766 	ASSERT_GT(self->mypid, 0);
1767 	ASSERT_EQ(self->mytid, self->mypid);
1768 
1769 	self->parent = getppid();
1770 	ASSERT_GT(self->parent, 0);
1771 	ASSERT_NE(self->parent, self->mypid);
1772 
1773 	/* Launch tracer. */
1774 	self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
1775 					   false);
1776 }
1777 
1778 FIXTURE_TEARDOWN(TRACE_syscall)
1779 {
1780 	teardown_trace_fixture(_metadata, self->tracer);
1781 	if (self->prog.filter)
1782 		free(self->prog.filter);
1783 }
1784 
1785 TEST_F(TRACE_syscall, ptrace_syscall_redirected)
1786 {
1787 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1788 	teardown_trace_fixture(_metadata, self->tracer);
1789 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1790 					   true);
1791 
1792 	/* Tracer will redirect getpid to getppid. */
1793 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
1794 }
1795 
1796 TEST_F(TRACE_syscall, ptrace_syscall_dropped)
1797 {
1798 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1799 	teardown_trace_fixture(_metadata, self->tracer);
1800 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1801 					   true);
1802 
1803 	/* Tracer should skip the open syscall, resulting in EPERM. */
1804 	EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_openat));
1805 }
1806 
1807 TEST_F(TRACE_syscall, syscall_allowed)
1808 {
1809 	long ret;
1810 
1811 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1812 	ASSERT_EQ(0, ret);
1813 
1814 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1815 	ASSERT_EQ(0, ret);
1816 
1817 	/* getppid works as expected (no changes). */
1818 	EXPECT_EQ(self->parent, syscall(__NR_getppid));
1819 	EXPECT_NE(self->mypid, syscall(__NR_getppid));
1820 }
1821 
1822 TEST_F(TRACE_syscall, syscall_redirected)
1823 {
1824 	long ret;
1825 
1826 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1827 	ASSERT_EQ(0, ret);
1828 
1829 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1830 	ASSERT_EQ(0, ret);
1831 
1832 	/* getpid has been redirected to getppid as expected. */
1833 	EXPECT_EQ(self->parent, syscall(__NR_getpid));
1834 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
1835 }
1836 
1837 TEST_F(TRACE_syscall, syscall_dropped)
1838 {
1839 	long ret;
1840 
1841 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1842 	ASSERT_EQ(0, ret);
1843 
1844 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1845 	ASSERT_EQ(0, ret);
1846 
1847 	/* gettid has been skipped and an altered return value stored. */
1848 	EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_gettid));
1849 	EXPECT_NE(self->mytid, syscall(__NR_gettid));
1850 }
1851 
1852 TEST_F(TRACE_syscall, skip_after_RET_TRACE)
1853 {
1854 	struct sock_filter filter[] = {
1855 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1856 			offsetof(struct seccomp_data, nr)),
1857 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1858 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1859 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1860 	};
1861 	struct sock_fprog prog = {
1862 		.len = (unsigned short)ARRAY_SIZE(filter),
1863 		.filter = filter,
1864 	};
1865 	long ret;
1866 
1867 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1868 	ASSERT_EQ(0, ret);
1869 
1870 	/* Install fixture filter. */
1871 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1872 	ASSERT_EQ(0, ret);
1873 
1874 	/* Install "errno on getppid" filter. */
1875 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1876 	ASSERT_EQ(0, ret);
1877 
1878 	/* Tracer will redirect getpid to getppid, and we should see EPERM. */
1879 	errno = 0;
1880 	EXPECT_EQ(-1, syscall(__NR_getpid));
1881 	EXPECT_EQ(EPERM, errno);
1882 }
1883 
1884 TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
1885 {
1886 	struct sock_filter filter[] = {
1887 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1888 			offsetof(struct seccomp_data, nr)),
1889 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1890 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1891 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1892 	};
1893 	struct sock_fprog prog = {
1894 		.len = (unsigned short)ARRAY_SIZE(filter),
1895 		.filter = filter,
1896 	};
1897 	long ret;
1898 
1899 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1900 	ASSERT_EQ(0, ret);
1901 
1902 	/* Install fixture filter. */
1903 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1904 	ASSERT_EQ(0, ret);
1905 
1906 	/* Install "death on getppid" filter. */
1907 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1908 	ASSERT_EQ(0, ret);
1909 
1910 	/* Tracer will redirect getpid to getppid, and we should die. */
1911 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
1912 }
1913 
1914 TEST_F(TRACE_syscall, skip_after_ptrace)
1915 {
1916 	struct sock_filter filter[] = {
1917 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1918 			offsetof(struct seccomp_data, nr)),
1919 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1920 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1921 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1922 	};
1923 	struct sock_fprog prog = {
1924 		.len = (unsigned short)ARRAY_SIZE(filter),
1925 		.filter = filter,
1926 	};
1927 	long ret;
1928 
1929 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1930 	teardown_trace_fixture(_metadata, self->tracer);
1931 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1932 					   true);
1933 
1934 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1935 	ASSERT_EQ(0, ret);
1936 
1937 	/* Install "errno on getppid" filter. */
1938 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1939 	ASSERT_EQ(0, ret);
1940 
1941 	/* Tracer will redirect getpid to getppid, and we should see EPERM. */
1942 	EXPECT_EQ(-1, syscall(__NR_getpid));
1943 	EXPECT_EQ(EPERM, errno);
1944 }
1945 
1946 TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
1947 {
1948 	struct sock_filter filter[] = {
1949 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1950 			offsetof(struct seccomp_data, nr)),
1951 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1952 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1953 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1954 	};
1955 	struct sock_fprog prog = {
1956 		.len = (unsigned short)ARRAY_SIZE(filter),
1957 		.filter = filter,
1958 	};
1959 	long ret;
1960 
1961 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1962 	teardown_trace_fixture(_metadata, self->tracer);
1963 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1964 					   true);
1965 
1966 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1967 	ASSERT_EQ(0, ret);
1968 
1969 	/* Install "death on getppid" filter. */
1970 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1971 	ASSERT_EQ(0, ret);
1972 
1973 	/* Tracer will redirect getpid to getppid, and we should die. */
1974 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
1975 }
1976 
1977 TEST(seccomp_syscall)
1978 {
1979 	struct sock_filter filter[] = {
1980 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1981 	};
1982 	struct sock_fprog prog = {
1983 		.len = (unsigned short)ARRAY_SIZE(filter),
1984 		.filter = filter,
1985 	};
1986 	long ret;
1987 
1988 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1989 	ASSERT_EQ(0, ret) {
1990 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1991 	}
1992 
1993 	/* Reject insane operation. */
1994 	ret = seccomp(-1, 0, &prog);
1995 	ASSERT_NE(ENOSYS, errno) {
1996 		TH_LOG("Kernel does not support seccomp syscall!");
1997 	}
1998 	EXPECT_EQ(EINVAL, errno) {
1999 		TH_LOG("Did not reject crazy op value!");
2000 	}
2001 
2002 	/* Reject strict with flags or pointer. */
2003 	ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
2004 	EXPECT_EQ(EINVAL, errno) {
2005 		TH_LOG("Did not reject mode strict with flags!");
2006 	}
2007 	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
2008 	EXPECT_EQ(EINVAL, errno) {
2009 		TH_LOG("Did not reject mode strict with uargs!");
2010 	}
2011 
2012 	/* Reject insane args for filter. */
2013 	ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
2014 	EXPECT_EQ(EINVAL, errno) {
2015 		TH_LOG("Did not reject crazy filter flags!");
2016 	}
2017 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
2018 	EXPECT_EQ(EFAULT, errno) {
2019 		TH_LOG("Did not reject NULL filter!");
2020 	}
2021 
2022 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2023 	EXPECT_EQ(0, errno) {
2024 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
2025 			strerror(errno));
2026 	}
2027 }
2028 
2029 TEST(seccomp_syscall_mode_lock)
2030 {
2031 	struct sock_filter filter[] = {
2032 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2033 	};
2034 	struct sock_fprog prog = {
2035 		.len = (unsigned short)ARRAY_SIZE(filter),
2036 		.filter = filter,
2037 	};
2038 	long ret;
2039 
2040 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2041 	ASSERT_EQ(0, ret) {
2042 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2043 	}
2044 
2045 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2046 	ASSERT_NE(ENOSYS, errno) {
2047 		TH_LOG("Kernel does not support seccomp syscall!");
2048 	}
2049 	EXPECT_EQ(0, ret) {
2050 		TH_LOG("Could not install filter!");
2051 	}
2052 
2053 	/* Make sure neither entry point will switch to strict. */
2054 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
2055 	EXPECT_EQ(EINVAL, errno) {
2056 		TH_LOG("Switched to mode strict!");
2057 	}
2058 
2059 	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
2060 	EXPECT_EQ(EINVAL, errno) {
2061 		TH_LOG("Switched to mode strict!");
2062 	}
2063 }
2064 
2065 /*
2066  * Test detection of known and unknown filter flags. Userspace needs to be able
2067  * to check if a filter flag is supported by the current kernel and a good way
2068  * of doing that is by attempting to enter filter mode, with the flag bit in
2069  * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
2070  * that the flag is valid and EINVAL indicates that the flag is invalid.
2071  */
2072 TEST(detect_seccomp_filter_flags)
2073 {
2074 	unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
2075 				 SECCOMP_FILTER_FLAG_LOG };
2076 	unsigned int flag, all_flags;
2077 	int i;
2078 	long ret;
2079 
2080 	/* Test detection of known-good filter flags */
2081 	for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
2082 		flag = flags[i];
2083 		ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2084 		ASSERT_NE(ENOSYS, errno) {
2085 			TH_LOG("Kernel does not support seccomp syscall!");
2086 		}
2087 		EXPECT_EQ(-1, ret);
2088 		EXPECT_EQ(EFAULT, errno) {
2089 			TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
2090 			       flag);
2091 		}
2092 
2093 		all_flags |= flag;
2094 	}
2095 
2096 	/* Test detection of all known-good filter flags */
2097 	ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL);
2098 	EXPECT_EQ(-1, ret);
2099 	EXPECT_EQ(EFAULT, errno) {
2100 		TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
2101 		       all_flags);
2102 	}
2103 
2104 	/* Test detection of an unknown filter flag */
2105 	flag = -1;
2106 	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2107 	EXPECT_EQ(-1, ret);
2108 	EXPECT_EQ(EINVAL, errno) {
2109 		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
2110 		       flag);
2111 	}
2112 
2113 	/*
2114 	 * Test detection of an unknown filter flag that may simply need to be
2115 	 * added to this test
2116 	 */
2117 	flag = flags[ARRAY_SIZE(flags) - 1] << 1;
2118 	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2119 	EXPECT_EQ(-1, ret);
2120 	EXPECT_EQ(EINVAL, errno) {
2121 		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
2122 		       flag);
2123 	}
2124 }
2125 
2126 TEST(TSYNC_first)
2127 {
2128 	struct sock_filter filter[] = {
2129 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2130 	};
2131 	struct sock_fprog prog = {
2132 		.len = (unsigned short)ARRAY_SIZE(filter),
2133 		.filter = filter,
2134 	};
2135 	long ret;
2136 
2137 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2138 	ASSERT_EQ(0, ret) {
2139 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2140 	}
2141 
2142 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2143 		      &prog);
2144 	ASSERT_NE(ENOSYS, errno) {
2145 		TH_LOG("Kernel does not support seccomp syscall!");
2146 	}
2147 	EXPECT_EQ(0, ret) {
2148 		TH_LOG("Could not install initial filter with TSYNC!");
2149 	}
2150 }
2151 
2152 #define TSYNC_SIBLINGS 2
2153 struct tsync_sibling {
2154 	pthread_t tid;
2155 	pid_t system_tid;
2156 	sem_t *started;
2157 	pthread_cond_t *cond;
2158 	pthread_mutex_t *mutex;
2159 	int diverge;
2160 	int num_waits;
2161 	struct sock_fprog *prog;
2162 	struct __test_metadata *metadata;
2163 };
2164 
2165 /*
2166  * To avoid joining joined threads (which is not allowed by Bionic),
2167  * make sure we both successfully join and clear the tid to skip a
2168  * later join attempt during fixture teardown. Any remaining threads
2169  * will be directly killed during teardown.
2170  */
2171 #define PTHREAD_JOIN(tid, status)					\
2172 	do {								\
2173 		int _rc = pthread_join(tid, status);			\
2174 		if (_rc) {						\
2175 			TH_LOG("pthread_join of tid %u failed: %d\n",	\
2176 				(unsigned int)tid, _rc);		\
2177 		} else {						\
2178 			tid = 0;					\
2179 		}							\
2180 	} while (0)
2181 
2182 FIXTURE_DATA(TSYNC) {
2183 	struct sock_fprog root_prog, apply_prog;
2184 	struct tsync_sibling sibling[TSYNC_SIBLINGS];
2185 	sem_t started;
2186 	pthread_cond_t cond;
2187 	pthread_mutex_t mutex;
2188 	int sibling_count;
2189 };
2190 
2191 FIXTURE_SETUP(TSYNC)
2192 {
2193 	struct sock_filter root_filter[] = {
2194 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2195 	};
2196 	struct sock_filter apply_filter[] = {
2197 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2198 			offsetof(struct seccomp_data, nr)),
2199 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
2200 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2201 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2202 	};
2203 
2204 	memset(&self->root_prog, 0, sizeof(self->root_prog));
2205 	memset(&self->apply_prog, 0, sizeof(self->apply_prog));
2206 	memset(&self->sibling, 0, sizeof(self->sibling));
2207 	self->root_prog.filter = malloc(sizeof(root_filter));
2208 	ASSERT_NE(NULL, self->root_prog.filter);
2209 	memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
2210 	self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
2211 
2212 	self->apply_prog.filter = malloc(sizeof(apply_filter));
2213 	ASSERT_NE(NULL, self->apply_prog.filter);
2214 	memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
2215 	self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
2216 
2217 	self->sibling_count = 0;
2218 	pthread_mutex_init(&self->mutex, NULL);
2219 	pthread_cond_init(&self->cond, NULL);
2220 	sem_init(&self->started, 0, 0);
2221 	self->sibling[0].tid = 0;
2222 	self->sibling[0].cond = &self->cond;
2223 	self->sibling[0].started = &self->started;
2224 	self->sibling[0].mutex = &self->mutex;
2225 	self->sibling[0].diverge = 0;
2226 	self->sibling[0].num_waits = 1;
2227 	self->sibling[0].prog = &self->root_prog;
2228 	self->sibling[0].metadata = _metadata;
2229 	self->sibling[1].tid = 0;
2230 	self->sibling[1].cond = &self->cond;
2231 	self->sibling[1].started = &self->started;
2232 	self->sibling[1].mutex = &self->mutex;
2233 	self->sibling[1].diverge = 0;
2234 	self->sibling[1].prog = &self->root_prog;
2235 	self->sibling[1].num_waits = 1;
2236 	self->sibling[1].metadata = _metadata;
2237 }
2238 
2239 FIXTURE_TEARDOWN(TSYNC)
2240 {
2241 	int sib = 0;
2242 
2243 	if (self->root_prog.filter)
2244 		free(self->root_prog.filter);
2245 	if (self->apply_prog.filter)
2246 		free(self->apply_prog.filter);
2247 
2248 	for ( ; sib < self->sibling_count; ++sib) {
2249 		struct tsync_sibling *s = &self->sibling[sib];
2250 
2251 		if (!s->tid)
2252 			continue;
2253 		/*
2254 		 * If a thread is still running, it may be stuck, so hit
2255 		 * it over the head really hard.
2256 		 */
2257 		pthread_kill(s->tid, 9);
2258 	}
2259 	pthread_mutex_destroy(&self->mutex);
2260 	pthread_cond_destroy(&self->cond);
2261 	sem_destroy(&self->started);
2262 }
2263 
2264 void *tsync_sibling(void *data)
2265 {
2266 	long ret = 0;
2267 	struct tsync_sibling *me = data;
2268 
2269 	me->system_tid = syscall(__NR_gettid);
2270 
2271 	pthread_mutex_lock(me->mutex);
2272 	if (me->diverge) {
2273 		/* Just re-apply the root prog to fork the tree */
2274 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
2275 				me->prog, 0, 0);
2276 	}
2277 	sem_post(me->started);
2278 	/* Return outside of started so parent notices failures. */
2279 	if (ret) {
2280 		pthread_mutex_unlock(me->mutex);
2281 		return (void *)SIBLING_EXIT_FAILURE;
2282 	}
2283 	do {
2284 		pthread_cond_wait(me->cond, me->mutex);
2285 		me->num_waits = me->num_waits - 1;
2286 	} while (me->num_waits);
2287 	pthread_mutex_unlock(me->mutex);
2288 
2289 	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
2290 	if (!ret)
2291 		return (void *)SIBLING_EXIT_NEWPRIVS;
2292 	read(0, NULL, 0);
2293 	return (void *)SIBLING_EXIT_UNKILLED;
2294 }
2295 
2296 void tsync_start_sibling(struct tsync_sibling *sibling)
2297 {
2298 	pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
2299 }
2300 
2301 TEST_F(TSYNC, siblings_fail_prctl)
2302 {
2303 	long ret;
2304 	void *status;
2305 	struct sock_filter filter[] = {
2306 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2307 			offsetof(struct seccomp_data, nr)),
2308 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
2309 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
2310 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2311 	};
2312 	struct sock_fprog prog = {
2313 		.len = (unsigned short)ARRAY_SIZE(filter),
2314 		.filter = filter,
2315 	};
2316 
2317 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2318 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2319 	}
2320 
2321 	/* Check prctl failure detection by requesting sib 0 diverge. */
2322 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2323 	ASSERT_NE(ENOSYS, errno) {
2324 		TH_LOG("Kernel does not support seccomp syscall!");
2325 	}
2326 	ASSERT_EQ(0, ret) {
2327 		TH_LOG("setting filter failed");
2328 	}
2329 
2330 	self->sibling[0].diverge = 1;
2331 	tsync_start_sibling(&self->sibling[0]);
2332 	tsync_start_sibling(&self->sibling[1]);
2333 
2334 	while (self->sibling_count < TSYNC_SIBLINGS) {
2335 		sem_wait(&self->started);
2336 		self->sibling_count++;
2337 	}
2338 
2339 	/* Signal the threads to clean up*/
2340 	pthread_mutex_lock(&self->mutex);
2341 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2342 		TH_LOG("cond broadcast non-zero");
2343 	}
2344 	pthread_mutex_unlock(&self->mutex);
2345 
2346 	/* Ensure diverging sibling failed to call prctl. */
2347 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2348 	EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
2349 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2350 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2351 }
2352 
2353 TEST_F(TSYNC, two_siblings_with_ancestor)
2354 {
2355 	long ret;
2356 	void *status;
2357 
2358 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2359 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2360 	}
2361 
2362 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2363 	ASSERT_NE(ENOSYS, errno) {
2364 		TH_LOG("Kernel does not support seccomp syscall!");
2365 	}
2366 	ASSERT_EQ(0, ret) {
2367 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2368 	}
2369 	tsync_start_sibling(&self->sibling[0]);
2370 	tsync_start_sibling(&self->sibling[1]);
2371 
2372 	while (self->sibling_count < TSYNC_SIBLINGS) {
2373 		sem_wait(&self->started);
2374 		self->sibling_count++;
2375 	}
2376 
2377 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2378 		      &self->apply_prog);
2379 	ASSERT_EQ(0, ret) {
2380 		TH_LOG("Could install filter on all threads!");
2381 	}
2382 	/* Tell the siblings to test the policy */
2383 	pthread_mutex_lock(&self->mutex);
2384 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2385 		TH_LOG("cond broadcast non-zero");
2386 	}
2387 	pthread_mutex_unlock(&self->mutex);
2388 	/* Ensure they are both killed and don't exit cleanly. */
2389 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2390 	EXPECT_EQ(0x0, (long)status);
2391 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2392 	EXPECT_EQ(0x0, (long)status);
2393 }
2394 
2395 TEST_F(TSYNC, two_sibling_want_nnp)
2396 {
2397 	void *status;
2398 
2399 	/* start siblings before any prctl() operations */
2400 	tsync_start_sibling(&self->sibling[0]);
2401 	tsync_start_sibling(&self->sibling[1]);
2402 	while (self->sibling_count < TSYNC_SIBLINGS) {
2403 		sem_wait(&self->started);
2404 		self->sibling_count++;
2405 	}
2406 
2407 	/* Tell the siblings to test no policy */
2408 	pthread_mutex_lock(&self->mutex);
2409 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2410 		TH_LOG("cond broadcast non-zero");
2411 	}
2412 	pthread_mutex_unlock(&self->mutex);
2413 
2414 	/* Ensure they are both upset about lacking nnp. */
2415 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2416 	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2417 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2418 	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2419 }
2420 
2421 TEST_F(TSYNC, two_siblings_with_no_filter)
2422 {
2423 	long ret;
2424 	void *status;
2425 
2426 	/* start siblings before any prctl() operations */
2427 	tsync_start_sibling(&self->sibling[0]);
2428 	tsync_start_sibling(&self->sibling[1]);
2429 	while (self->sibling_count < TSYNC_SIBLINGS) {
2430 		sem_wait(&self->started);
2431 		self->sibling_count++;
2432 	}
2433 
2434 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2435 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2436 	}
2437 
2438 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2439 		      &self->apply_prog);
2440 	ASSERT_NE(ENOSYS, errno) {
2441 		TH_LOG("Kernel does not support seccomp syscall!");
2442 	}
2443 	ASSERT_EQ(0, ret) {
2444 		TH_LOG("Could install filter on all threads!");
2445 	}
2446 
2447 	/* Tell the siblings to test the policy */
2448 	pthread_mutex_lock(&self->mutex);
2449 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2450 		TH_LOG("cond broadcast non-zero");
2451 	}
2452 	pthread_mutex_unlock(&self->mutex);
2453 
2454 	/* Ensure they are both killed and don't exit cleanly. */
2455 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2456 	EXPECT_EQ(0x0, (long)status);
2457 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2458 	EXPECT_EQ(0x0, (long)status);
2459 }
2460 
2461 TEST_F(TSYNC, two_siblings_with_one_divergence)
2462 {
2463 	long ret;
2464 	void *status;
2465 
2466 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2467 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2468 	}
2469 
2470 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2471 	ASSERT_NE(ENOSYS, errno) {
2472 		TH_LOG("Kernel does not support seccomp syscall!");
2473 	}
2474 	ASSERT_EQ(0, ret) {
2475 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2476 	}
2477 	self->sibling[0].diverge = 1;
2478 	tsync_start_sibling(&self->sibling[0]);
2479 	tsync_start_sibling(&self->sibling[1]);
2480 
2481 	while (self->sibling_count < TSYNC_SIBLINGS) {
2482 		sem_wait(&self->started);
2483 		self->sibling_count++;
2484 	}
2485 
2486 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2487 		      &self->apply_prog);
2488 	ASSERT_EQ(self->sibling[0].system_tid, ret) {
2489 		TH_LOG("Did not fail on diverged sibling.");
2490 	}
2491 
2492 	/* Wake the threads */
2493 	pthread_mutex_lock(&self->mutex);
2494 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2495 		TH_LOG("cond broadcast non-zero");
2496 	}
2497 	pthread_mutex_unlock(&self->mutex);
2498 
2499 	/* Ensure they are both unkilled. */
2500 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2501 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2502 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2503 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2504 }
2505 
2506 TEST_F(TSYNC, two_siblings_not_under_filter)
2507 {
2508 	long ret, sib;
2509 	void *status;
2510 
2511 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2512 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2513 	}
2514 
2515 	/*
2516 	 * Sibling 0 will have its own seccomp policy
2517 	 * and Sibling 1 will not be under seccomp at
2518 	 * all. Sibling 1 will enter seccomp and 0
2519 	 * will cause failure.
2520 	 */
2521 	self->sibling[0].diverge = 1;
2522 	tsync_start_sibling(&self->sibling[0]);
2523 	tsync_start_sibling(&self->sibling[1]);
2524 
2525 	while (self->sibling_count < TSYNC_SIBLINGS) {
2526 		sem_wait(&self->started);
2527 		self->sibling_count++;
2528 	}
2529 
2530 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2531 	ASSERT_NE(ENOSYS, errno) {
2532 		TH_LOG("Kernel does not support seccomp syscall!");
2533 	}
2534 	ASSERT_EQ(0, ret) {
2535 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2536 	}
2537 
2538 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2539 		      &self->apply_prog);
2540 	ASSERT_EQ(ret, self->sibling[0].system_tid) {
2541 		TH_LOG("Did not fail on diverged sibling.");
2542 	}
2543 	sib = 1;
2544 	if (ret == self->sibling[0].system_tid)
2545 		sib = 0;
2546 
2547 	pthread_mutex_lock(&self->mutex);
2548 
2549 	/* Increment the other siblings num_waits so we can clean up
2550 	 * the one we just saw.
2551 	 */
2552 	self->sibling[!sib].num_waits += 1;
2553 
2554 	/* Signal the thread to clean up*/
2555 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2556 		TH_LOG("cond broadcast non-zero");
2557 	}
2558 	pthread_mutex_unlock(&self->mutex);
2559 	PTHREAD_JOIN(self->sibling[sib].tid, &status);
2560 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2561 	/* Poll for actual task death. pthread_join doesn't guarantee it. */
2562 	while (!kill(self->sibling[sib].system_tid, 0))
2563 		sleep(0.1);
2564 	/* Switch to the remaining sibling */
2565 	sib = !sib;
2566 
2567 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2568 		      &self->apply_prog);
2569 	ASSERT_EQ(0, ret) {
2570 		TH_LOG("Expected the remaining sibling to sync");
2571 	};
2572 
2573 	pthread_mutex_lock(&self->mutex);
2574 
2575 	/* If remaining sibling didn't have a chance to wake up during
2576 	 * the first broadcast, manually reduce the num_waits now.
2577 	 */
2578 	if (self->sibling[sib].num_waits > 1)
2579 		self->sibling[sib].num_waits = 1;
2580 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2581 		TH_LOG("cond broadcast non-zero");
2582 	}
2583 	pthread_mutex_unlock(&self->mutex);
2584 	PTHREAD_JOIN(self->sibling[sib].tid, &status);
2585 	EXPECT_EQ(0, (long)status);
2586 	/* Poll for actual task death. pthread_join doesn't guarantee it. */
2587 	while (!kill(self->sibling[sib].system_tid, 0))
2588 		sleep(0.1);
2589 
2590 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2591 		      &self->apply_prog);
2592 	ASSERT_EQ(0, ret);  /* just us chickens */
2593 }
2594 
2595 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
2596 TEST(syscall_restart)
2597 {
2598 	long ret;
2599 	unsigned long msg;
2600 	pid_t child_pid;
2601 	int pipefd[2];
2602 	int status;
2603 	siginfo_t info = { };
2604 	struct sock_filter filter[] = {
2605 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2606 			 offsetof(struct seccomp_data, nr)),
2607 
2608 #ifdef __NR_sigreturn
2609 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
2610 #endif
2611 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
2612 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
2613 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
2614 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
2615 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2616 
2617 		/* Allow __NR_write for easy logging. */
2618 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2619 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2620 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2621 		/* The nanosleep jump target. */
2622 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
2623 		/* The restart_syscall jump target. */
2624 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
2625 	};
2626 	struct sock_fprog prog = {
2627 		.len = (unsigned short)ARRAY_SIZE(filter),
2628 		.filter = filter,
2629 	};
2630 #if defined(__arm__)
2631 	struct utsname utsbuf;
2632 #endif
2633 
2634 	ASSERT_EQ(0, pipe(pipefd));
2635 
2636 	child_pid = fork();
2637 	ASSERT_LE(0, child_pid);
2638 	if (child_pid == 0) {
2639 		/* Child uses EXPECT not ASSERT to deliver status correctly. */
2640 		char buf = ' ';
2641 		struct timespec timeout = { };
2642 
2643 		/* Attach parent as tracer and stop. */
2644 		EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2645 		EXPECT_EQ(0, raise(SIGSTOP));
2646 
2647 		EXPECT_EQ(0, close(pipefd[1]));
2648 
2649 		EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2650 			TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2651 		}
2652 
2653 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2654 		EXPECT_EQ(0, ret) {
2655 			TH_LOG("Failed to install filter!");
2656 		}
2657 
2658 		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2659 			TH_LOG("Failed to read() sync from parent");
2660 		}
2661 		EXPECT_EQ('.', buf) {
2662 			TH_LOG("Failed to get sync data from read()");
2663 		}
2664 
2665 		/* Start nanosleep to be interrupted. */
2666 		timeout.tv_sec = 1;
2667 		errno = 0;
2668 		EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
2669 			TH_LOG("Call to nanosleep() failed (errno %d)", errno);
2670 		}
2671 
2672 		/* Read final sync from parent. */
2673 		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2674 			TH_LOG("Failed final read() from parent");
2675 		}
2676 		EXPECT_EQ('!', buf) {
2677 			TH_LOG("Failed to get final data from read()");
2678 		}
2679 
2680 		/* Directly report the status of our test harness results. */
2681 		syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2682 						     : EXIT_FAILURE);
2683 	}
2684 	EXPECT_EQ(0, close(pipefd[0]));
2685 
2686 	/* Attach to child, setup options, and release. */
2687 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2688 	ASSERT_EQ(true, WIFSTOPPED(status));
2689 	ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2690 			    PTRACE_O_TRACESECCOMP));
2691 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2692 	ASSERT_EQ(1, write(pipefd[1], ".", 1));
2693 
2694 	/* Wait for nanosleep() to start. */
2695 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2696 	ASSERT_EQ(true, WIFSTOPPED(status));
2697 	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2698 	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2699 	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2700 	ASSERT_EQ(0x100, msg);
2701 	EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
2702 
2703 	/* Might as well check siginfo for sanity while we're here. */
2704 	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2705 	ASSERT_EQ(SIGTRAP, info.si_signo);
2706 	ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2707 	EXPECT_EQ(0, info.si_errno);
2708 	EXPECT_EQ(getuid(), info.si_uid);
2709 	/* Verify signal delivery came from child (seccomp-triggered). */
2710 	EXPECT_EQ(child_pid, info.si_pid);
2711 
2712 	/* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
2713 	ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2714 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2715 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2716 	ASSERT_EQ(true, WIFSTOPPED(status));
2717 	ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2718 	/* Verify signal delivery came from parent now. */
2719 	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2720 	EXPECT_EQ(getpid(), info.si_pid);
2721 
2722 	/* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
2723 	ASSERT_EQ(0, kill(child_pid, SIGCONT));
2724 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2725 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2726 	ASSERT_EQ(true, WIFSTOPPED(status));
2727 	ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2728 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2729 
2730 	/* Wait for restart_syscall() to start. */
2731 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2732 	ASSERT_EQ(true, WIFSTOPPED(status));
2733 	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2734 	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2735 	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2736 
2737 	ASSERT_EQ(0x200, msg);
2738 	ret = get_syscall(_metadata, child_pid);
2739 #if defined(__arm__)
2740 	/*
2741 	 * FIXME:
2742 	 * - native ARM registers do NOT expose true syscall.
2743 	 * - compat ARM registers on ARM64 DO expose true syscall.
2744 	 */
2745 	ASSERT_EQ(0, uname(&utsbuf));
2746 	if (strncmp(utsbuf.machine, "arm", 3) == 0) {
2747 		EXPECT_EQ(__NR_nanosleep, ret);
2748 	} else
2749 #endif
2750 	{
2751 		EXPECT_EQ(__NR_restart_syscall, ret);
2752 	}
2753 
2754 	/* Write again to end test. */
2755 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2756 	ASSERT_EQ(1, write(pipefd[1], "!", 1));
2757 	EXPECT_EQ(0, close(pipefd[1]));
2758 
2759 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2760 	if (WIFSIGNALED(status) || WEXITSTATUS(status))
2761 		_metadata->passed = 0;
2762 }
2763 
2764 TEST_SIGNAL(filter_flag_log, SIGSYS)
2765 {
2766 	struct sock_filter allow_filter[] = {
2767 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2768 	};
2769 	struct sock_filter kill_filter[] = {
2770 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2771 			offsetof(struct seccomp_data, nr)),
2772 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
2773 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2774 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2775 	};
2776 	struct sock_fprog allow_prog = {
2777 		.len = (unsigned short)ARRAY_SIZE(allow_filter),
2778 		.filter = allow_filter,
2779 	};
2780 	struct sock_fprog kill_prog = {
2781 		.len = (unsigned short)ARRAY_SIZE(kill_filter),
2782 		.filter = kill_filter,
2783 	};
2784 	long ret;
2785 	pid_t parent = getppid();
2786 
2787 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2788 	ASSERT_EQ(0, ret);
2789 
2790 	/* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
2791 	ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
2792 		      &allow_prog);
2793 	ASSERT_NE(ENOSYS, errno) {
2794 		TH_LOG("Kernel does not support seccomp syscall!");
2795 	}
2796 	EXPECT_NE(0, ret) {
2797 		TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
2798 	}
2799 	EXPECT_EQ(EINVAL, errno) {
2800 		TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
2801 	}
2802 
2803 	/* Verify that a simple, permissive filter can be added with no flags */
2804 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
2805 	EXPECT_EQ(0, ret);
2806 
2807 	/* See if the same filter can be added with the FILTER_FLAG_LOG flag */
2808 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
2809 		      &allow_prog);
2810 	ASSERT_NE(EINVAL, errno) {
2811 		TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
2812 	}
2813 	EXPECT_EQ(0, ret);
2814 
2815 	/* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
2816 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
2817 		      &kill_prog);
2818 	EXPECT_EQ(0, ret);
2819 
2820 	EXPECT_EQ(parent, syscall(__NR_getppid));
2821 	/* getpid() should never return. */
2822 	EXPECT_EQ(0, syscall(__NR_getpid));
2823 }
2824 
2825 TEST(get_action_avail)
2826 {
2827 	__u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
2828 			    SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
2829 			    SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
2830 	__u32 unknown_action = 0x10000000U;
2831 	int i;
2832 	long ret;
2833 
2834 	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
2835 	ASSERT_NE(ENOSYS, errno) {
2836 		TH_LOG("Kernel does not support seccomp syscall!");
2837 	}
2838 	ASSERT_NE(EINVAL, errno) {
2839 		TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
2840 	}
2841 	EXPECT_EQ(ret, 0);
2842 
2843 	for (i = 0; i < ARRAY_SIZE(actions); i++) {
2844 		ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
2845 		EXPECT_EQ(ret, 0) {
2846 			TH_LOG("Expected action (0x%X) not available!",
2847 			       actions[i]);
2848 		}
2849 	}
2850 
2851 	/* Check that an unknown action is handled properly (EOPNOTSUPP) */
2852 	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
2853 	EXPECT_EQ(ret, -1);
2854 	EXPECT_EQ(errno, EOPNOTSUPP);
2855 }
2856 
2857 TEST(get_metadata)
2858 {
2859 	pid_t pid;
2860 	int pipefd[2];
2861 	char buf;
2862 	struct seccomp_metadata md;
2863 	long ret;
2864 
2865 	ASSERT_EQ(0, pipe(pipefd));
2866 
2867 	pid = fork();
2868 	ASSERT_GE(pid, 0);
2869 	if (pid == 0) {
2870 		struct sock_filter filter[] = {
2871 			BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2872 		};
2873 		struct sock_fprog prog = {
2874 			.len = (unsigned short)ARRAY_SIZE(filter),
2875 			.filter = filter,
2876 		};
2877 
2878 		/* one with log, one without */
2879 		ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
2880 				     SECCOMP_FILTER_FLAG_LOG, &prog));
2881 		ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
2882 
2883 		ASSERT_EQ(0, close(pipefd[0]));
2884 		ASSERT_EQ(1, write(pipefd[1], "1", 1));
2885 		ASSERT_EQ(0, close(pipefd[1]));
2886 
2887 		while (1)
2888 			sleep(100);
2889 	}
2890 
2891 	ASSERT_EQ(0, close(pipefd[1]));
2892 	ASSERT_EQ(1, read(pipefd[0], &buf, 1));
2893 
2894 	ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
2895 	ASSERT_EQ(pid, waitpid(pid, NULL, 0));
2896 
2897 	/* Past here must not use ASSERT or child process is never killed. */
2898 
2899 	md.filter_off = 0;
2900 	errno = 0;
2901 	ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
2902 	EXPECT_EQ(sizeof(md), ret) {
2903 		if (errno == EINVAL)
2904 			XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
2905 	}
2906 
2907 	EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
2908 	EXPECT_EQ(md.filter_off, 0);
2909 
2910 	md.filter_off = 1;
2911 	ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
2912 	EXPECT_EQ(sizeof(md), ret);
2913 	EXPECT_EQ(md.flags, 0);
2914 	EXPECT_EQ(md.filter_off, 1);
2915 
2916 skip:
2917 	ASSERT_EQ(0, kill(pid, SIGKILL));
2918 }
2919 
2920 /*
2921  * TODO:
2922  * - add microbenchmarks
2923  * - expand NNP testing
2924  * - better arch-specific TRACE and TRAP handlers.
2925  * - endianness checking when appropriate
2926  * - 64-bit arg prodding
2927  * - arch value testing (x86 modes especially)
2928  * - verify that FILTER_FLAG_LOG filters generate log messages
2929  * - verify that RET_LOG generates log messages
2930  * - ...
2931  */
2932 
2933 TEST_HARNESS_MAIN
2934