1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2020 Collabora Ltd. 4 * 5 * Benchmark and test syscall user dispatch 6 */ 7 8 #define _GNU_SOURCE 9 #include <stdio.h> 10 #include <string.h> 11 #include <stdlib.h> 12 #include <signal.h> 13 #include <errno.h> 14 #include <time.h> 15 #include <sys/time.h> 16 #include <unistd.h> 17 #include <sys/sysinfo.h> 18 #include <sys/prctl.h> 19 #include <sys/syscall.h> 20 21 #ifndef PR_SET_SYSCALL_USER_DISPATCH 22 # define PR_SET_SYSCALL_USER_DISPATCH 59 23 # define PR_SYS_DISPATCH_OFF 0 24 # define PR_SYS_DISPATCH_ON 1 25 # define SYSCALL_DISPATCH_FILTER_ALLOW 0 26 # define SYSCALL_DISPATCH_FILTER_BLOCK 1 27 #endif 28 29 #ifdef __NR_syscalls 30 # define MAGIC_SYSCALL_1 (__NR_syscalls + 1) /* Bad Linux syscall number */ 31 #else 32 # define MAGIC_SYSCALL_1 (0xff00) /* Bad Linux syscall number */ 33 #endif 34 35 /* 36 * To test returning from a sigsys with selector blocked, the test 37 * requires some per-architecture support (i.e. knowledge about the 38 * signal trampoline address). On i386, we know it is on the vdso, and 39 * a small trampoline is open-coded for x86_64. Other architectures 40 * that have a trampoline in the vdso will support TEST_BLOCKED_RETURN 41 * out of the box, but don't enable them until they support syscall user 42 * dispatch. 43 */ 44 #if defined(__x86_64__) || defined(__i386__) 45 #define TEST_BLOCKED_RETURN 46 #endif 47 48 #ifdef __x86_64__ 49 void* (syscall_dispatcher_start)(void); 50 void* (syscall_dispatcher_end)(void); 51 #else 52 unsigned long syscall_dispatcher_start = 0; 53 unsigned long syscall_dispatcher_end = 0; 54 #endif 55 56 unsigned long trapped_call_count = 0; 57 unsigned long native_call_count = 0; 58 59 char selector; 60 #define SYSCALL_BLOCK (selector = SYSCALL_DISPATCH_FILTER_BLOCK) 61 #define SYSCALL_UNBLOCK (selector = SYSCALL_DISPATCH_FILTER_ALLOW) 62 63 #define CALIBRATION_STEP 100000 64 #define CALIBRATE_TO_SECS 5 65 int factor; 66 67 static double one_sysinfo_step(void) 68 { 69 struct timespec t1, t2; 70 int i; 71 struct sysinfo info; 72 73 clock_gettime(CLOCK_MONOTONIC, &t1); 74 for (i = 0; i < CALIBRATION_STEP; i++) 75 sysinfo(&info); 76 clock_gettime(CLOCK_MONOTONIC, &t2); 77 return (t2.tv_sec - t1.tv_sec) + 1.0e-9 * (t2.tv_nsec - t1.tv_nsec); 78 } 79 80 static void calibrate_set(void) 81 { 82 double elapsed = 0; 83 84 printf("Calibrating test set to last ~%d seconds...\n", CALIBRATE_TO_SECS); 85 86 while (elapsed < 1) { 87 elapsed += one_sysinfo_step(); 88 factor += CALIBRATE_TO_SECS; 89 } 90 91 printf("test iterations = %d\n", CALIBRATION_STEP * factor); 92 } 93 94 static double perf_syscall(void) 95 { 96 unsigned int i; 97 double partial = 0; 98 99 for (i = 0; i < factor; ++i) 100 partial += one_sysinfo_step()/(CALIBRATION_STEP*factor); 101 return partial; 102 } 103 104 static void handle_sigsys(int sig, siginfo_t *info, void *ucontext) 105 { 106 char buf[1024]; 107 int len; 108 109 SYSCALL_UNBLOCK; 110 111 /* printf and friends are not signal-safe. */ 112 len = snprintf(buf, 1024, "Caught sys_%x\n", info->si_syscall); 113 write(1, buf, len); 114 115 if (info->si_syscall == MAGIC_SYSCALL_1) 116 trapped_call_count++; 117 else 118 native_call_count++; 119 120 #ifdef TEST_BLOCKED_RETURN 121 SYSCALL_BLOCK; 122 #endif 123 124 #ifdef __x86_64__ 125 __asm__ volatile("movq $0xf, %rax"); 126 __asm__ volatile("leaveq"); 127 __asm__ volatile("add $0x8, %rsp"); 128 __asm__ volatile("syscall_dispatcher_start:"); 129 __asm__ volatile("syscall"); 130 __asm__ volatile("nop"); /* Landing pad within dispatcher area */ 131 __asm__ volatile("syscall_dispatcher_end:"); 132 #endif 133 134 } 135 136 int main(void) 137 { 138 struct sigaction act; 139 double time1, time2; 140 int ret; 141 sigset_t mask; 142 143 memset(&act, 0, sizeof(act)); 144 sigemptyset(&mask); 145 146 act.sa_sigaction = handle_sigsys; 147 act.sa_flags = SA_SIGINFO; 148 act.sa_mask = mask; 149 150 calibrate_set(); 151 152 time1 = perf_syscall(); 153 printf("Avg syscall time %.0lfns.\n", time1 * 1.0e9); 154 155 ret = sigaction(SIGSYS, &act, NULL); 156 if (ret) { 157 perror("Error sigaction:"); 158 exit(-1); 159 } 160 161 fprintf(stderr, "Enabling syscall trapping.\n"); 162 163 if (prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 164 syscall_dispatcher_start, 165 (syscall_dispatcher_end - syscall_dispatcher_start + 1), 166 &selector)) { 167 perror("prctl failed\n"); 168 exit(-1); 169 } 170 171 SYSCALL_BLOCK; 172 syscall(MAGIC_SYSCALL_1); 173 174 #ifdef TEST_BLOCKED_RETURN 175 if (selector == SYSCALL_DISPATCH_FILTER_ALLOW) { 176 fprintf(stderr, "Failed to return with selector blocked.\n"); 177 exit(-1); 178 } 179 #endif 180 181 SYSCALL_UNBLOCK; 182 183 if (!trapped_call_count) { 184 fprintf(stderr, "syscall trapping does not work.\n"); 185 exit(-1); 186 } 187 188 time2 = perf_syscall(); 189 190 if (native_call_count) { 191 perror("syscall trapping intercepted more syscalls than expected\n"); 192 exit(-1); 193 } 194 195 printf("trapped_call_count %lu, native_call_count %lu.\n", 196 trapped_call_count, native_call_count); 197 printf("Avg syscall time %.0lfns.\n", time2 * 1.0e9); 198 printf("Interception overhead: %.1lf%% (+%.0lfns).\n", 199 100.0 * (time2 / time1 - 1.0), 1.0e9 * (time2 - time1)); 200 return 0; 201 202 } 203