1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * single_step_syscall.c - single-steps various x86 syscalls 4 * Copyright (c) 2014-2015 Andrew Lutomirski 5 * 6 * This is a very simple series of tests that makes system calls with 7 * the TF flag set. This exercises some nasty kernel code in the 8 * SYSENTER case: SYSENTER does not clear TF, so SYSENTER with TF set 9 * immediately issues #DB from CPL 0. This requires special handling in 10 * the kernel. 11 */ 12 13 #define _GNU_SOURCE 14 15 #include <sys/time.h> 16 #include <time.h> 17 #include <stdlib.h> 18 #include <sys/syscall.h> 19 #include <unistd.h> 20 #include <stdio.h> 21 #include <string.h> 22 #include <inttypes.h> 23 #include <sys/mman.h> 24 #include <sys/signal.h> 25 #include <sys/ucontext.h> 26 #include <asm/ldt.h> 27 #include <err.h> 28 #include <setjmp.h> 29 #include <stddef.h> 30 #include <stdbool.h> 31 #include <sys/ptrace.h> 32 #include <sys/user.h> 33 34 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), 35 int flags) 36 { 37 struct sigaction sa; 38 memset(&sa, 0, sizeof(sa)); 39 sa.sa_sigaction = handler; 40 sa.sa_flags = SA_SIGINFO | flags; 41 sigemptyset(&sa.sa_mask); 42 if (sigaction(sig, &sa, 0)) 43 err(1, "sigaction"); 44 } 45 46 static void clearhandler(int sig) 47 { 48 struct sigaction sa; 49 memset(&sa, 0, sizeof(sa)); 50 sa.sa_handler = SIG_DFL; 51 sigemptyset(&sa.sa_mask); 52 if (sigaction(sig, &sa, 0)) 53 err(1, "sigaction"); 54 } 55 56 static volatile sig_atomic_t sig_traps, sig_eflags; 57 sigjmp_buf jmpbuf; 58 static unsigned char altstack_data[SIGSTKSZ]; 59 60 #ifdef __x86_64__ 61 # define REG_IP REG_RIP 62 # define WIDTH "q" 63 # define INT80_CLOBBERS "r8", "r9", "r10", "r11" 64 #else 65 # define REG_IP REG_EIP 66 # define WIDTH "l" 67 # define INT80_CLOBBERS 68 #endif 69 70 static unsigned long get_eflags(void) 71 { 72 unsigned long eflags; 73 asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags)); 74 return eflags; 75 } 76 77 static void set_eflags(unsigned long eflags) 78 { 79 asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH 80 : : "rm" (eflags) : "flags"); 81 } 82 83 #define X86_EFLAGS_TF (1UL << 8) 84 85 static void sigtrap(int sig, siginfo_t *info, void *ctx_void) 86 { 87 ucontext_t *ctx = (ucontext_t*)ctx_void; 88 89 if (get_eflags() & X86_EFLAGS_TF) { 90 set_eflags(get_eflags() & ~X86_EFLAGS_TF); 91 printf("[WARN]\tSIGTRAP handler had TF set\n"); 92 _exit(1); 93 } 94 95 sig_traps++; 96 97 if (sig_traps == 10000 || sig_traps == 10001) { 98 printf("[WARN]\tHit %d SIGTRAPs with si_addr 0x%lx, ip 0x%lx\n", 99 (int)sig_traps, 100 (unsigned long)info->si_addr, 101 (unsigned long)ctx->uc_mcontext.gregs[REG_IP]); 102 } 103 } 104 105 static char const * const signames[] = { 106 [SIGSEGV] = "SIGSEGV", 107 [SIGBUS] = "SIBGUS", 108 [SIGTRAP] = "SIGTRAP", 109 [SIGILL] = "SIGILL", 110 }; 111 112 static void print_and_longjmp(int sig, siginfo_t *si, void *ctx_void) 113 { 114 ucontext_t *ctx = ctx_void; 115 116 printf("\tGot %s with RIP=%lx, TF=%ld\n", signames[sig], 117 (unsigned long)ctx->uc_mcontext.gregs[REG_IP], 118 (unsigned long)ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_TF); 119 120 sig_eflags = (unsigned long)ctx->uc_mcontext.gregs[REG_EFL]; 121 siglongjmp(jmpbuf, 1); 122 } 123 124 static void check_result(void) 125 { 126 unsigned long new_eflags = get_eflags(); 127 set_eflags(new_eflags & ~X86_EFLAGS_TF); 128 129 if (!sig_traps) { 130 printf("[FAIL]\tNo SIGTRAP\n"); 131 exit(1); 132 } 133 134 if (!(new_eflags & X86_EFLAGS_TF)) { 135 printf("[FAIL]\tTF was cleared\n"); 136 exit(1); 137 } 138 139 printf("[OK]\tSurvived with TF set and %d traps\n", (int)sig_traps); 140 sig_traps = 0; 141 } 142 143 static void fast_syscall_no_tf(void) 144 { 145 sig_traps = 0; 146 printf("[RUN]\tFast syscall with TF cleared\n"); 147 fflush(stdout); /* Force a syscall */ 148 if (get_eflags() & X86_EFLAGS_TF) { 149 printf("[FAIL]\tTF is now set\n"); 150 exit(1); 151 } 152 if (sig_traps) { 153 printf("[FAIL]\tGot SIGTRAP\n"); 154 exit(1); 155 } 156 printf("[OK]\tNothing unexpected happened\n"); 157 } 158 159 int main() 160 { 161 #ifdef CAN_BUILD_32 162 int tmp; 163 #endif 164 165 sethandler(SIGTRAP, sigtrap, 0); 166 167 printf("[RUN]\tSet TF and check nop\n"); 168 set_eflags(get_eflags() | X86_EFLAGS_TF); 169 asm volatile ("nop"); 170 check_result(); 171 172 #ifdef __x86_64__ 173 printf("[RUN]\tSet TF and check syscall-less opportunistic sysret\n"); 174 set_eflags(get_eflags() | X86_EFLAGS_TF); 175 extern unsigned char post_nop[]; 176 asm volatile ("pushf" WIDTH "\n\t" 177 "pop" WIDTH " %%r11\n\t" 178 "nop\n\t" 179 "post_nop:" 180 : : "c" (post_nop) : "r11"); 181 check_result(); 182 #endif 183 #ifdef CAN_BUILD_32 184 printf("[RUN]\tSet TF and check int80\n"); 185 set_eflags(get_eflags() | X86_EFLAGS_TF); 186 asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid) 187 : INT80_CLOBBERS); 188 check_result(); 189 #endif 190 191 /* 192 * This test is particularly interesting if fast syscalls use 193 * SYSENTER: it triggers a nasty design flaw in SYSENTER. 194 * Specifically, SYSENTER does not clear TF, so either SYSENTER 195 * or the next instruction traps at CPL0. (Of course, Intel 196 * mostly forgot to document exactly what happens here.) So we 197 * get a CPL0 fault with usergs (on 64-bit kernels) and possibly 198 * no stack. The only sane way the kernel can possibly handle 199 * it is to clear TF on return from the #DB handler, but this 200 * happens way too early to set TF in the saved pt_regs, so the 201 * kernel has to do something clever to avoid losing track of 202 * the TF bit. 203 * 204 * Needless to say, we've had bugs in this area. 205 */ 206 syscall(SYS_getpid); /* Force symbol binding without TF set. */ 207 printf("[RUN]\tSet TF and check a fast syscall\n"); 208 set_eflags(get_eflags() | X86_EFLAGS_TF); 209 syscall(SYS_getpid); 210 check_result(); 211 212 /* Now make sure that another fast syscall doesn't set TF again. */ 213 fast_syscall_no_tf(); 214 215 /* 216 * And do a forced SYSENTER to make sure that this works even if 217 * fast syscalls don't use SYSENTER. 218 * 219 * Invoking SYSENTER directly breaks all the rules. Just handle 220 * the SIGSEGV. 221 */ 222 if (sigsetjmp(jmpbuf, 1) == 0) { 223 unsigned long nr = SYS_getpid; 224 printf("[RUN]\tSet TF and check SYSENTER\n"); 225 stack_t stack = { 226 .ss_sp = altstack_data, 227 .ss_size = SIGSTKSZ, 228 }; 229 if (sigaltstack(&stack, NULL) != 0) 230 err(1, "sigaltstack"); 231 sethandler(SIGSEGV, print_and_longjmp, 232 SA_RESETHAND | SA_ONSTACK); 233 sethandler(SIGILL, print_and_longjmp, SA_RESETHAND); 234 set_eflags(get_eflags() | X86_EFLAGS_TF); 235 /* Clear EBP first to make sure we segfault cleanly. */ 236 asm volatile ("xorl %%ebp, %%ebp; SYSENTER" : "+a" (nr) :: "flags", "rcx" 237 #ifdef __x86_64__ 238 , "r11" 239 #endif 240 ); 241 242 /* We're unreachable here. SYSENTER forgets RIP. */ 243 } 244 clearhandler(SIGSEGV); 245 clearhandler(SIGILL); 246 if (!(sig_eflags & X86_EFLAGS_TF)) { 247 printf("[FAIL]\tTF was cleared\n"); 248 exit(1); 249 } 250 251 /* Now make sure that another fast syscall doesn't set TF again. */ 252 fast_syscall_no_tf(); 253 254 return 0; 255 } 256