1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2020 Collabora Ltd.
4  *
5  * Benchmark and test syscall user dispatch
6  */
7 
8 #define _GNU_SOURCE
9 #include <stdio.h>
10 #include <string.h>
11 #include <stdlib.h>
12 #include <signal.h>
13 #include <errno.h>
14 #include <time.h>
15 #include <sys/time.h>
16 #include <unistd.h>
17 #include <sys/sysinfo.h>
18 #include <sys/prctl.h>
19 #include <sys/syscall.h>
20 
21 #ifndef PR_SET_SYSCALL_USER_DISPATCH
22 # define PR_SET_SYSCALL_USER_DISPATCH	59
23 # define PR_SYS_DISPATCH_OFF	0
24 # define PR_SYS_DISPATCH_ON	1
25 #endif
26 
27 #ifdef __NR_syscalls
28 # define MAGIC_SYSCALL_1 (__NR_syscalls + 1) /* Bad Linux syscall number */
29 #else
30 # define MAGIC_SYSCALL_1 (0xff00)  /* Bad Linux syscall number */
31 #endif
32 
33 /*
34  * To test returning from a sigsys with selector blocked, the test
35  * requires some per-architecture support (i.e. knowledge about the
36  * signal trampoline address).  On i386, we know it is on the vdso, and
37  * a small trampoline is open-coded for x86_64.  Other architectures
38  * that have a trampoline in the vdso will support TEST_BLOCKED_RETURN
39  * out of the box, but don't enable them until they support syscall user
40  * dispatch.
41  */
42 #if defined(__x86_64__) || defined(__i386__)
43 #define TEST_BLOCKED_RETURN
44 #endif
45 
46 #ifdef __x86_64__
47 void* (syscall_dispatcher_start)(void);
48 void* (syscall_dispatcher_end)(void);
49 #else
50 unsigned long syscall_dispatcher_start = 0;
51 unsigned long syscall_dispatcher_end = 0;
52 #endif
53 
54 unsigned long trapped_call_count = 0;
55 unsigned long native_call_count = 0;
56 
57 char selector;
58 #define SYSCALL_BLOCK   (selector = PR_SYS_DISPATCH_ON)
59 #define SYSCALL_UNBLOCK (selector = PR_SYS_DISPATCH_OFF)
60 
61 #define CALIBRATION_STEP 100000
62 #define CALIBRATE_TO_SECS 5
63 int factor;
64 
65 static double one_sysinfo_step(void)
66 {
67 	struct timespec t1, t2;
68 	int i;
69 	struct sysinfo info;
70 
71 	clock_gettime(CLOCK_MONOTONIC, &t1);
72 	for (i = 0; i < CALIBRATION_STEP; i++)
73 		sysinfo(&info);
74 	clock_gettime(CLOCK_MONOTONIC, &t2);
75 	return (t2.tv_sec - t1.tv_sec) + 1.0e-9 * (t2.tv_nsec - t1.tv_nsec);
76 }
77 
78 static void calibrate_set(void)
79 {
80 	double elapsed = 0;
81 
82 	printf("Calibrating test set to last ~%d seconds...\n", CALIBRATE_TO_SECS);
83 
84 	while (elapsed < 1) {
85 		elapsed += one_sysinfo_step();
86 		factor += CALIBRATE_TO_SECS;
87 	}
88 
89 	printf("test iterations = %d\n", CALIBRATION_STEP * factor);
90 }
91 
92 static double perf_syscall(void)
93 {
94 	unsigned int i;
95 	double partial = 0;
96 
97 	for (i = 0; i < factor; ++i)
98 		partial += one_sysinfo_step()/(CALIBRATION_STEP*factor);
99 	return partial;
100 }
101 
102 static void handle_sigsys(int sig, siginfo_t *info, void *ucontext)
103 {
104 	char buf[1024];
105 	int len;
106 
107 	SYSCALL_UNBLOCK;
108 
109 	/* printf and friends are not signal-safe. */
110 	len = snprintf(buf, 1024, "Caught sys_%x\n", info->si_syscall);
111 	write(1, buf, len);
112 
113 	if (info->si_syscall == MAGIC_SYSCALL_1)
114 		trapped_call_count++;
115 	else
116 		native_call_count++;
117 
118 #ifdef TEST_BLOCKED_RETURN
119 	SYSCALL_BLOCK;
120 #endif
121 
122 #ifdef __x86_64__
123 	__asm__ volatile("movq $0xf, %rax");
124 	__asm__ volatile("leaveq");
125 	__asm__ volatile("add $0x8, %rsp");
126 	__asm__ volatile("syscall_dispatcher_start:");
127 	__asm__ volatile("syscall");
128 	__asm__ volatile("nop"); /* Landing pad within dispatcher area */
129 	__asm__ volatile("syscall_dispatcher_end:");
130 #endif
131 
132 }
133 
134 int main(void)
135 {
136 	struct sigaction act;
137 	double time1, time2;
138 	int ret;
139 	sigset_t mask;
140 
141 	memset(&act, 0, sizeof(act));
142 	sigemptyset(&mask);
143 
144 	act.sa_sigaction = handle_sigsys;
145 	act.sa_flags = SA_SIGINFO;
146 	act.sa_mask = mask;
147 
148 	calibrate_set();
149 
150 	time1 = perf_syscall();
151 	printf("Avg syscall time %.0lfns.\n", time1 * 1.0e9);
152 
153 	ret = sigaction(SIGSYS, &act, NULL);
154 	if (ret) {
155 		perror("Error sigaction:");
156 		exit(-1);
157 	}
158 
159 	fprintf(stderr, "Enabling syscall trapping.\n");
160 
161 	if (prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON,
162 		  syscall_dispatcher_start,
163 		  (syscall_dispatcher_end - syscall_dispatcher_start + 1),
164 		  &selector)) {
165 		perror("prctl failed\n");
166 		exit(-1);
167 	}
168 
169 	SYSCALL_BLOCK;
170 	syscall(MAGIC_SYSCALL_1);
171 
172 #ifdef TEST_BLOCKED_RETURN
173 	if (selector == PR_SYS_DISPATCH_OFF) {
174 		fprintf(stderr, "Failed to return with selector blocked.\n");
175 		exit(-1);
176 	}
177 #endif
178 
179 	SYSCALL_UNBLOCK;
180 
181 	if (!trapped_call_count) {
182 		fprintf(stderr, "syscall trapping does not work.\n");
183 		exit(-1);
184 	}
185 
186 	time2 = perf_syscall();
187 
188 	if (native_call_count) {
189 		perror("syscall trapping intercepted more syscalls than expected\n");
190 		exit(-1);
191 	}
192 
193 	printf("trapped_call_count %lu, native_call_count %lu.\n",
194 	       trapped_call_count, native_call_count);
195 	printf("Avg syscall time %.0lfns.\n", time2 * 1.0e9);
196 	printf("Interception overhead: %.1lf%% (+%.0lfns).\n",
197 	       100.0 * (time2 / time1 - 1.0), 1.0e9 * (time2 - time1));
198 	return 0;
199 
200 }
201