1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * single_step_syscall.c - single-steps various x86 syscalls
4  * Copyright (c) 2014-2015 Andrew Lutomirski
5  *
6  * This is a very simple series of tests that makes system calls with
7  * the TF flag set.  This exercises some nasty kernel code in the
8  * SYSENTER case: SYSENTER does not clear TF, so SYSENTER with TF set
9  * immediately issues #DB from CPL 0.  This requires special handling in
10  * the kernel.
11  */
12 
13 #define _GNU_SOURCE
14 
15 #include <sys/time.h>
16 #include <time.h>
17 #include <stdlib.h>
18 #include <sys/syscall.h>
19 #include <unistd.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <inttypes.h>
23 #include <sys/mman.h>
24 #include <sys/signal.h>
25 #include <sys/ucontext.h>
26 #include <asm/ldt.h>
27 #include <err.h>
28 #include <setjmp.h>
29 #include <stddef.h>
30 #include <stdbool.h>
31 #include <sys/ptrace.h>
32 #include <sys/user.h>
33 
34 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
35 		       int flags)
36 {
37 	struct sigaction sa;
38 	memset(&sa, 0, sizeof(sa));
39 	sa.sa_sigaction = handler;
40 	sa.sa_flags = SA_SIGINFO | flags;
41 	sigemptyset(&sa.sa_mask);
42 	if (sigaction(sig, &sa, 0))
43 		err(1, "sigaction");
44 }
45 
46 static void clearhandler(int sig)
47 {
48 	struct sigaction sa;
49 	memset(&sa, 0, sizeof(sa));
50 	sa.sa_handler = SIG_DFL;
51 	sigemptyset(&sa.sa_mask);
52 	if (sigaction(sig, &sa, 0))
53 		err(1, "sigaction");
54 }
55 
56 static volatile sig_atomic_t sig_traps, sig_eflags;
57 sigjmp_buf jmpbuf;
58 static unsigned char altstack_data[SIGSTKSZ];
59 
60 #ifdef __x86_64__
61 # define REG_IP REG_RIP
62 # define WIDTH "q"
63 # define INT80_CLOBBERS "r8", "r9", "r10", "r11"
64 #else
65 # define REG_IP REG_EIP
66 # define WIDTH "l"
67 # define INT80_CLOBBERS
68 #endif
69 
70 static unsigned long get_eflags(void)
71 {
72 	unsigned long eflags;
73 	asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
74 	return eflags;
75 }
76 
77 static void set_eflags(unsigned long eflags)
78 {
79 	asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
80 		      : : "rm" (eflags) : "flags");
81 }
82 
83 #define X86_EFLAGS_TF (1UL << 8)
84 
85 static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
86 {
87 	ucontext_t *ctx = (ucontext_t*)ctx_void;
88 
89 	if (get_eflags() & X86_EFLAGS_TF) {
90 		set_eflags(get_eflags() & ~X86_EFLAGS_TF);
91 		printf("[WARN]\tSIGTRAP handler had TF set\n");
92 		_exit(1);
93 	}
94 
95 	sig_traps++;
96 
97 	if (sig_traps == 10000 || sig_traps == 10001) {
98 		printf("[WARN]\tHit %d SIGTRAPs with si_addr 0x%lx, ip 0x%lx\n",
99 		       (int)sig_traps,
100 		       (unsigned long)info->si_addr,
101 		       (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
102 	}
103 }
104 
105 static char const * const signames[] = {
106 	[SIGSEGV] = "SIGSEGV",
107 	[SIGBUS] = "SIBGUS",
108 	[SIGTRAP] = "SIGTRAP",
109 	[SIGILL] = "SIGILL",
110 };
111 
112 static void print_and_longjmp(int sig, siginfo_t *si, void *ctx_void)
113 {
114 	ucontext_t *ctx = ctx_void;
115 
116 	printf("\tGot %s with RIP=%lx, TF=%ld\n", signames[sig],
117 	       (unsigned long)ctx->uc_mcontext.gregs[REG_IP],
118 	       (unsigned long)ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_TF);
119 
120 	sig_eflags = (unsigned long)ctx->uc_mcontext.gregs[REG_EFL];
121 	siglongjmp(jmpbuf, 1);
122 }
123 
124 static void check_result(void)
125 {
126 	unsigned long new_eflags = get_eflags();
127 	set_eflags(new_eflags & ~X86_EFLAGS_TF);
128 
129 	if (!sig_traps) {
130 		printf("[FAIL]\tNo SIGTRAP\n");
131 		exit(1);
132 	}
133 
134 	if (!(new_eflags & X86_EFLAGS_TF)) {
135 		printf("[FAIL]\tTF was cleared\n");
136 		exit(1);
137 	}
138 
139 	printf("[OK]\tSurvived with TF set and %d traps\n", (int)sig_traps);
140 	sig_traps = 0;
141 }
142 
143 static void fast_syscall_no_tf(void)
144 {
145 	sig_traps = 0;
146 	printf("[RUN]\tFast syscall with TF cleared\n");
147 	fflush(stdout);  /* Force a syscall */
148 	if (get_eflags() & X86_EFLAGS_TF) {
149 		printf("[FAIL]\tTF is now set\n");
150 		exit(1);
151 	}
152 	if (sig_traps) {
153 		printf("[FAIL]\tGot SIGTRAP\n");
154 		exit(1);
155 	}
156 	printf("[OK]\tNothing unexpected happened\n");
157 }
158 
159 int main()
160 {
161 #ifdef CAN_BUILD_32
162 	int tmp;
163 #endif
164 
165 	sethandler(SIGTRAP, sigtrap, 0);
166 
167 	printf("[RUN]\tSet TF and check nop\n");
168 	set_eflags(get_eflags() | X86_EFLAGS_TF);
169 	asm volatile ("nop");
170 	check_result();
171 
172 #ifdef __x86_64__
173 	printf("[RUN]\tSet TF and check syscall-less opportunistic sysret\n");
174 	set_eflags(get_eflags() | X86_EFLAGS_TF);
175 	extern unsigned char post_nop[];
176 	asm volatile ("pushf" WIDTH "\n\t"
177 		      "pop" WIDTH " %%r11\n\t"
178 		      "nop\n\t"
179 		      "post_nop:"
180 		      : : "c" (post_nop) : "r11");
181 	check_result();
182 #endif
183 #ifdef CAN_BUILD_32
184 	printf("[RUN]\tSet TF and check int80\n");
185 	set_eflags(get_eflags() | X86_EFLAGS_TF);
186 	asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
187 			: INT80_CLOBBERS);
188 	check_result();
189 #endif
190 
191 	/*
192 	 * This test is particularly interesting if fast syscalls use
193 	 * SYSENTER: it triggers a nasty design flaw in SYSENTER.
194 	 * Specifically, SYSENTER does not clear TF, so either SYSENTER
195 	 * or the next instruction traps at CPL0.  (Of course, Intel
196 	 * mostly forgot to document exactly what happens here.)  So we
197 	 * get a CPL0 fault with usergs (on 64-bit kernels) and possibly
198 	 * no stack.  The only sane way the kernel can possibly handle
199 	 * it is to clear TF on return from the #DB handler, but this
200 	 * happens way too early to set TF in the saved pt_regs, so the
201 	 * kernel has to do something clever to avoid losing track of
202 	 * the TF bit.
203 	 *
204 	 * Needless to say, we've had bugs in this area.
205 	 */
206 	syscall(SYS_getpid);  /* Force symbol binding without TF set. */
207 	printf("[RUN]\tSet TF and check a fast syscall\n");
208 	set_eflags(get_eflags() | X86_EFLAGS_TF);
209 	syscall(SYS_getpid);
210 	check_result();
211 
212 	/* Now make sure that another fast syscall doesn't set TF again. */
213 	fast_syscall_no_tf();
214 
215 	/*
216 	 * And do a forced SYSENTER to make sure that this works even if
217 	 * fast syscalls don't use SYSENTER.
218 	 *
219 	 * Invoking SYSENTER directly breaks all the rules.  Just handle
220 	 * the SIGSEGV.
221 	 */
222 	if (sigsetjmp(jmpbuf, 1) == 0) {
223 		unsigned long nr = SYS_getpid;
224 		printf("[RUN]\tSet TF and check SYSENTER\n");
225 		stack_t stack = {
226 			.ss_sp = altstack_data,
227 			.ss_size = SIGSTKSZ,
228 		};
229 		if (sigaltstack(&stack, NULL) != 0)
230 			err(1, "sigaltstack");
231 		sethandler(SIGSEGV, print_and_longjmp,
232 			   SA_RESETHAND | SA_ONSTACK);
233 		sethandler(SIGILL, print_and_longjmp, SA_RESETHAND);
234 		set_eflags(get_eflags() | X86_EFLAGS_TF);
235 		/* Clear EBP first to make sure we segfault cleanly. */
236 		asm volatile ("xorl %%ebp, %%ebp; SYSENTER" : "+a" (nr) :: "flags", "rcx"
237 #ifdef __x86_64__
238 				, "r11"
239 #endif
240 			);
241 
242 		/* We're unreachable here.  SYSENTER forgets RIP. */
243 	}
244 	clearhandler(SIGSEGV);
245 	clearhandler(SIGILL);
246 	if (!(sig_eflags & X86_EFLAGS_TF)) {
247 		printf("[FAIL]\tTF was cleared\n");
248 		exit(1);
249 	}
250 
251 	/* Now make sure that another fast syscall doesn't set TF again. */
252 	fast_syscall_no_tf();
253 
254 	return 0;
255 }
256