1d87ae0faSGabriel Krisman Bertazi // SPDX-License-Identifier: GPL-2.0-only
2d87ae0faSGabriel Krisman Bertazi /*
3d87ae0faSGabriel Krisman Bertazi * Copyright (c) 2020 Collabora Ltd.
4d87ae0faSGabriel Krisman Bertazi *
5d87ae0faSGabriel Krisman Bertazi * Benchmark and test syscall user dispatch
6d87ae0faSGabriel Krisman Bertazi */
7d87ae0faSGabriel Krisman Bertazi
8d87ae0faSGabriel Krisman Bertazi #define _GNU_SOURCE
9d87ae0faSGabriel Krisman Bertazi #include <stdio.h>
10d87ae0faSGabriel Krisman Bertazi #include <string.h>
11d87ae0faSGabriel Krisman Bertazi #include <stdlib.h>
12d87ae0faSGabriel Krisman Bertazi #include <signal.h>
13d87ae0faSGabriel Krisman Bertazi #include <errno.h>
14d87ae0faSGabriel Krisman Bertazi #include <time.h>
15d87ae0faSGabriel Krisman Bertazi #include <sys/time.h>
16d87ae0faSGabriel Krisman Bertazi #include <unistd.h>
17d87ae0faSGabriel Krisman Bertazi #include <sys/sysinfo.h>
18d87ae0faSGabriel Krisman Bertazi #include <sys/prctl.h>
19d87ae0faSGabriel Krisman Bertazi #include <sys/syscall.h>
20d87ae0faSGabriel Krisman Bertazi
21d87ae0faSGabriel Krisman Bertazi #ifndef PR_SET_SYSCALL_USER_DISPATCH
22d87ae0faSGabriel Krisman Bertazi # define PR_SET_SYSCALL_USER_DISPATCH 59
23d87ae0faSGabriel Krisman Bertazi # define PR_SYS_DISPATCH_OFF 0
24d87ae0faSGabriel Krisman Bertazi # define PR_SYS_DISPATCH_ON 1
25*36a6c843SGabriel Krisman Bertazi # define SYSCALL_DISPATCH_FILTER_ALLOW 0
26*36a6c843SGabriel Krisman Bertazi # define SYSCALL_DISPATCH_FILTER_BLOCK 1
27d87ae0faSGabriel Krisman Bertazi #endif
28d87ae0faSGabriel Krisman Bertazi
29d87ae0faSGabriel Krisman Bertazi #ifdef __NR_syscalls
30d87ae0faSGabriel Krisman Bertazi # define MAGIC_SYSCALL_1 (__NR_syscalls + 1) /* Bad Linux syscall number */
31d87ae0faSGabriel Krisman Bertazi #else
32d87ae0faSGabriel Krisman Bertazi # define MAGIC_SYSCALL_1 (0xff00) /* Bad Linux syscall number */
33d87ae0faSGabriel Krisman Bertazi #endif
34d87ae0faSGabriel Krisman Bertazi
35d87ae0faSGabriel Krisman Bertazi /*
36d87ae0faSGabriel Krisman Bertazi * To test returning from a sigsys with selector blocked, the test
37d87ae0faSGabriel Krisman Bertazi * requires some per-architecture support (i.e. knowledge about the
38d87ae0faSGabriel Krisman Bertazi * signal trampoline address). On i386, we know it is on the vdso, and
39d87ae0faSGabriel Krisman Bertazi * a small trampoline is open-coded for x86_64. Other architectures
40d87ae0faSGabriel Krisman Bertazi * that have a trampoline in the vdso will support TEST_BLOCKED_RETURN
41d87ae0faSGabriel Krisman Bertazi * out of the box, but don't enable them until they support syscall user
42d87ae0faSGabriel Krisman Bertazi * dispatch.
43d87ae0faSGabriel Krisman Bertazi */
44d87ae0faSGabriel Krisman Bertazi #if defined(__x86_64__) || defined(__i386__)
45d87ae0faSGabriel Krisman Bertazi #define TEST_BLOCKED_RETURN
46d87ae0faSGabriel Krisman Bertazi #endif
47d87ae0faSGabriel Krisman Bertazi
48d87ae0faSGabriel Krisman Bertazi #ifdef __x86_64__
49d87ae0faSGabriel Krisman Bertazi void* (syscall_dispatcher_start)(void);
50d87ae0faSGabriel Krisman Bertazi void* (syscall_dispatcher_end)(void);
51d87ae0faSGabriel Krisman Bertazi #else
52d87ae0faSGabriel Krisman Bertazi unsigned long syscall_dispatcher_start = 0;
53d87ae0faSGabriel Krisman Bertazi unsigned long syscall_dispatcher_end = 0;
54d87ae0faSGabriel Krisman Bertazi #endif
55d87ae0faSGabriel Krisman Bertazi
56d87ae0faSGabriel Krisman Bertazi unsigned long trapped_call_count = 0;
57d87ae0faSGabriel Krisman Bertazi unsigned long native_call_count = 0;
58d87ae0faSGabriel Krisman Bertazi
59d87ae0faSGabriel Krisman Bertazi char selector;
60*36a6c843SGabriel Krisman Bertazi #define SYSCALL_BLOCK (selector = SYSCALL_DISPATCH_FILTER_BLOCK)
61*36a6c843SGabriel Krisman Bertazi #define SYSCALL_UNBLOCK (selector = SYSCALL_DISPATCH_FILTER_ALLOW)
62d87ae0faSGabriel Krisman Bertazi
63d87ae0faSGabriel Krisman Bertazi #define CALIBRATION_STEP 100000
64d87ae0faSGabriel Krisman Bertazi #define CALIBRATE_TO_SECS 5
65d87ae0faSGabriel Krisman Bertazi int factor;
66d87ae0faSGabriel Krisman Bertazi
one_sysinfo_step(void)67d87ae0faSGabriel Krisman Bertazi static double one_sysinfo_step(void)
68d87ae0faSGabriel Krisman Bertazi {
69d87ae0faSGabriel Krisman Bertazi struct timespec t1, t2;
70d87ae0faSGabriel Krisman Bertazi int i;
71d87ae0faSGabriel Krisman Bertazi struct sysinfo info;
72d87ae0faSGabriel Krisman Bertazi
73d87ae0faSGabriel Krisman Bertazi clock_gettime(CLOCK_MONOTONIC, &t1);
74d87ae0faSGabriel Krisman Bertazi for (i = 0; i < CALIBRATION_STEP; i++)
75d87ae0faSGabriel Krisman Bertazi sysinfo(&info);
76d87ae0faSGabriel Krisman Bertazi clock_gettime(CLOCK_MONOTONIC, &t2);
77d87ae0faSGabriel Krisman Bertazi return (t2.tv_sec - t1.tv_sec) + 1.0e-9 * (t2.tv_nsec - t1.tv_nsec);
78d87ae0faSGabriel Krisman Bertazi }
79d87ae0faSGabriel Krisman Bertazi
calibrate_set(void)80d87ae0faSGabriel Krisman Bertazi static void calibrate_set(void)
81d87ae0faSGabriel Krisman Bertazi {
82d87ae0faSGabriel Krisman Bertazi double elapsed = 0;
83d87ae0faSGabriel Krisman Bertazi
84d87ae0faSGabriel Krisman Bertazi printf("Calibrating test set to last ~%d seconds...\n", CALIBRATE_TO_SECS);
85d87ae0faSGabriel Krisman Bertazi
86d87ae0faSGabriel Krisman Bertazi while (elapsed < 1) {
87d87ae0faSGabriel Krisman Bertazi elapsed += one_sysinfo_step();
88d87ae0faSGabriel Krisman Bertazi factor += CALIBRATE_TO_SECS;
89d87ae0faSGabriel Krisman Bertazi }
90d87ae0faSGabriel Krisman Bertazi
91d87ae0faSGabriel Krisman Bertazi printf("test iterations = %d\n", CALIBRATION_STEP * factor);
92d87ae0faSGabriel Krisman Bertazi }
93d87ae0faSGabriel Krisman Bertazi
perf_syscall(void)94d87ae0faSGabriel Krisman Bertazi static double perf_syscall(void)
95d87ae0faSGabriel Krisman Bertazi {
96d87ae0faSGabriel Krisman Bertazi unsigned int i;
97d87ae0faSGabriel Krisman Bertazi double partial = 0;
98d87ae0faSGabriel Krisman Bertazi
99d87ae0faSGabriel Krisman Bertazi for (i = 0; i < factor; ++i)
100d87ae0faSGabriel Krisman Bertazi partial += one_sysinfo_step()/(CALIBRATION_STEP*factor);
101d87ae0faSGabriel Krisman Bertazi return partial;
102d87ae0faSGabriel Krisman Bertazi }
103d87ae0faSGabriel Krisman Bertazi
handle_sigsys(int sig,siginfo_t * info,void * ucontext)104d87ae0faSGabriel Krisman Bertazi static void handle_sigsys(int sig, siginfo_t *info, void *ucontext)
105d87ae0faSGabriel Krisman Bertazi {
106d87ae0faSGabriel Krisman Bertazi char buf[1024];
107d87ae0faSGabriel Krisman Bertazi int len;
108d87ae0faSGabriel Krisman Bertazi
109d87ae0faSGabriel Krisman Bertazi SYSCALL_UNBLOCK;
110d87ae0faSGabriel Krisman Bertazi
111d87ae0faSGabriel Krisman Bertazi /* printf and friends are not signal-safe. */
112d87ae0faSGabriel Krisman Bertazi len = snprintf(buf, 1024, "Caught sys_%x\n", info->si_syscall);
113d87ae0faSGabriel Krisman Bertazi write(1, buf, len);
114d87ae0faSGabriel Krisman Bertazi
115d87ae0faSGabriel Krisman Bertazi if (info->si_syscall == MAGIC_SYSCALL_1)
116d87ae0faSGabriel Krisman Bertazi trapped_call_count++;
117d87ae0faSGabriel Krisman Bertazi else
118d87ae0faSGabriel Krisman Bertazi native_call_count++;
119d87ae0faSGabriel Krisman Bertazi
120d87ae0faSGabriel Krisman Bertazi #ifdef TEST_BLOCKED_RETURN
121d87ae0faSGabriel Krisman Bertazi SYSCALL_BLOCK;
122d87ae0faSGabriel Krisman Bertazi #endif
123d87ae0faSGabriel Krisman Bertazi
124d87ae0faSGabriel Krisman Bertazi #ifdef __x86_64__
125d87ae0faSGabriel Krisman Bertazi __asm__ volatile("movq $0xf, %rax");
126d87ae0faSGabriel Krisman Bertazi __asm__ volatile("leaveq");
127d87ae0faSGabriel Krisman Bertazi __asm__ volatile("add $0x8, %rsp");
128d87ae0faSGabriel Krisman Bertazi __asm__ volatile("syscall_dispatcher_start:");
129d87ae0faSGabriel Krisman Bertazi __asm__ volatile("syscall");
130d87ae0faSGabriel Krisman Bertazi __asm__ volatile("nop"); /* Landing pad within dispatcher area */
131d87ae0faSGabriel Krisman Bertazi __asm__ volatile("syscall_dispatcher_end:");
132d87ae0faSGabriel Krisman Bertazi #endif
133d87ae0faSGabriel Krisman Bertazi
134d87ae0faSGabriel Krisman Bertazi }
135d87ae0faSGabriel Krisman Bertazi
main(void)136d87ae0faSGabriel Krisman Bertazi int main(void)
137d87ae0faSGabriel Krisman Bertazi {
138d87ae0faSGabriel Krisman Bertazi struct sigaction act;
139d87ae0faSGabriel Krisman Bertazi double time1, time2;
140d87ae0faSGabriel Krisman Bertazi int ret;
141d87ae0faSGabriel Krisman Bertazi sigset_t mask;
142d87ae0faSGabriel Krisman Bertazi
143d87ae0faSGabriel Krisman Bertazi memset(&act, 0, sizeof(act));
144d87ae0faSGabriel Krisman Bertazi sigemptyset(&mask);
145d87ae0faSGabriel Krisman Bertazi
146d87ae0faSGabriel Krisman Bertazi act.sa_sigaction = handle_sigsys;
147d87ae0faSGabriel Krisman Bertazi act.sa_flags = SA_SIGINFO;
148d87ae0faSGabriel Krisman Bertazi act.sa_mask = mask;
149d87ae0faSGabriel Krisman Bertazi
150d87ae0faSGabriel Krisman Bertazi calibrate_set();
151d87ae0faSGabriel Krisman Bertazi
152d87ae0faSGabriel Krisman Bertazi time1 = perf_syscall();
153d87ae0faSGabriel Krisman Bertazi printf("Avg syscall time %.0lfns.\n", time1 * 1.0e9);
154d87ae0faSGabriel Krisman Bertazi
155d87ae0faSGabriel Krisman Bertazi ret = sigaction(SIGSYS, &act, NULL);
156d87ae0faSGabriel Krisman Bertazi if (ret) {
157d87ae0faSGabriel Krisman Bertazi perror("Error sigaction:");
158d87ae0faSGabriel Krisman Bertazi exit(-1);
159d87ae0faSGabriel Krisman Bertazi }
160d87ae0faSGabriel Krisman Bertazi
161d87ae0faSGabriel Krisman Bertazi fprintf(stderr, "Enabling syscall trapping.\n");
162d87ae0faSGabriel Krisman Bertazi
163d87ae0faSGabriel Krisman Bertazi if (prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON,
164d87ae0faSGabriel Krisman Bertazi syscall_dispatcher_start,
165d87ae0faSGabriel Krisman Bertazi (syscall_dispatcher_end - syscall_dispatcher_start + 1),
166d87ae0faSGabriel Krisman Bertazi &selector)) {
167d87ae0faSGabriel Krisman Bertazi perror("prctl failed\n");
168d87ae0faSGabriel Krisman Bertazi exit(-1);
169d87ae0faSGabriel Krisman Bertazi }
170d87ae0faSGabriel Krisman Bertazi
171d87ae0faSGabriel Krisman Bertazi SYSCALL_BLOCK;
172d87ae0faSGabriel Krisman Bertazi syscall(MAGIC_SYSCALL_1);
173d87ae0faSGabriel Krisman Bertazi
174d87ae0faSGabriel Krisman Bertazi #ifdef TEST_BLOCKED_RETURN
175*36a6c843SGabriel Krisman Bertazi if (selector == SYSCALL_DISPATCH_FILTER_ALLOW) {
176d87ae0faSGabriel Krisman Bertazi fprintf(stderr, "Failed to return with selector blocked.\n");
177d87ae0faSGabriel Krisman Bertazi exit(-1);
178d87ae0faSGabriel Krisman Bertazi }
179d87ae0faSGabriel Krisman Bertazi #endif
180d87ae0faSGabriel Krisman Bertazi
181d87ae0faSGabriel Krisman Bertazi SYSCALL_UNBLOCK;
182d87ae0faSGabriel Krisman Bertazi
183d87ae0faSGabriel Krisman Bertazi if (!trapped_call_count) {
184d87ae0faSGabriel Krisman Bertazi fprintf(stderr, "syscall trapping does not work.\n");
185d87ae0faSGabriel Krisman Bertazi exit(-1);
186d87ae0faSGabriel Krisman Bertazi }
187d87ae0faSGabriel Krisman Bertazi
188d87ae0faSGabriel Krisman Bertazi time2 = perf_syscall();
189d87ae0faSGabriel Krisman Bertazi
190d87ae0faSGabriel Krisman Bertazi if (native_call_count) {
191d87ae0faSGabriel Krisman Bertazi perror("syscall trapping intercepted more syscalls than expected\n");
192d87ae0faSGabriel Krisman Bertazi exit(-1);
193d87ae0faSGabriel Krisman Bertazi }
194d87ae0faSGabriel Krisman Bertazi
195d87ae0faSGabriel Krisman Bertazi printf("trapped_call_count %lu, native_call_count %lu.\n",
196d87ae0faSGabriel Krisman Bertazi trapped_call_count, native_call_count);
197d87ae0faSGabriel Krisman Bertazi printf("Avg syscall time %.0lfns.\n", time2 * 1.0e9);
198d87ae0faSGabriel Krisman Bertazi printf("Interception overhead: %.1lf%% (+%.0lfns).\n",
199d87ae0faSGabriel Krisman Bertazi 100.0 * (time2 / time1 - 1.0), 1.0e9 * (time2 - time1));
200d87ae0faSGabriel Krisman Bertazi return 0;
201d87ae0faSGabriel Krisman Bertazi
202d87ae0faSGabriel Krisman Bertazi }
203