1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * single_step_syscall.c - single-steps various x86 syscalls
4  * Copyright (c) 2014-2015 Andrew Lutomirski
5  *
6  * This is a very simple series of tests that makes system calls with
7  * the TF flag set.  This exercises some nasty kernel code in the
8  * SYSENTER case: SYSENTER does not clear TF, so SYSENTER with TF set
9  * immediately issues #DB from CPL 0.  This requires special handling in
10  * the kernel.
11  */
12 
13 #define _GNU_SOURCE
14 
15 #include <sys/time.h>
16 #include <time.h>
17 #include <stdlib.h>
18 #include <sys/syscall.h>
19 #include <unistd.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <inttypes.h>
23 #include <sys/mman.h>
24 #include <sys/signal.h>
25 #include <sys/ucontext.h>
26 #include <asm/ldt.h>
27 #include <err.h>
28 #include <setjmp.h>
29 #include <stddef.h>
30 #include <stdbool.h>
31 #include <sys/ptrace.h>
32 #include <sys/user.h>
33 
34 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
35 		       int flags)
36 {
37 	struct sigaction sa;
38 	memset(&sa, 0, sizeof(sa));
39 	sa.sa_sigaction = handler;
40 	sa.sa_flags = SA_SIGINFO | flags;
41 	sigemptyset(&sa.sa_mask);
42 	if (sigaction(sig, &sa, 0))
43 		err(1, "sigaction");
44 }
45 
46 static volatile sig_atomic_t sig_traps;
47 
48 #ifdef __x86_64__
49 # define REG_IP REG_RIP
50 # define WIDTH "q"
51 # define INT80_CLOBBERS "r8", "r9", "r10", "r11"
52 #else
53 # define REG_IP REG_EIP
54 # define WIDTH "l"
55 # define INT80_CLOBBERS
56 #endif
57 
58 static unsigned long get_eflags(void)
59 {
60 	unsigned long eflags;
61 	asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
62 	return eflags;
63 }
64 
65 static void set_eflags(unsigned long eflags)
66 {
67 	asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
68 		      : : "rm" (eflags) : "flags");
69 }
70 
71 #define X86_EFLAGS_TF (1UL << 8)
72 
73 static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
74 {
75 	ucontext_t *ctx = (ucontext_t*)ctx_void;
76 
77 	if (get_eflags() & X86_EFLAGS_TF) {
78 		set_eflags(get_eflags() & ~X86_EFLAGS_TF);
79 		printf("[WARN]\tSIGTRAP handler had TF set\n");
80 		_exit(1);
81 	}
82 
83 	sig_traps++;
84 
85 	if (sig_traps == 10000 || sig_traps == 10001) {
86 		printf("[WARN]\tHit %d SIGTRAPs with si_addr 0x%lx, ip 0x%lx\n",
87 		       (int)sig_traps,
88 		       (unsigned long)info->si_addr,
89 		       (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
90 	}
91 }
92 
93 static void check_result(void)
94 {
95 	unsigned long new_eflags = get_eflags();
96 	set_eflags(new_eflags & ~X86_EFLAGS_TF);
97 
98 	if (!sig_traps) {
99 		printf("[FAIL]\tNo SIGTRAP\n");
100 		exit(1);
101 	}
102 
103 	if (!(new_eflags & X86_EFLAGS_TF)) {
104 		printf("[FAIL]\tTF was cleared\n");
105 		exit(1);
106 	}
107 
108 	printf("[OK]\tSurvived with TF set and %d traps\n", (int)sig_traps);
109 	sig_traps = 0;
110 }
111 
112 int main()
113 {
114 #ifdef CAN_BUILD_32
115 	int tmp;
116 #endif
117 
118 	sethandler(SIGTRAP, sigtrap, 0);
119 
120 	printf("[RUN]\tSet TF and check nop\n");
121 	set_eflags(get_eflags() | X86_EFLAGS_TF);
122 	asm volatile ("nop");
123 	check_result();
124 
125 #ifdef __x86_64__
126 	printf("[RUN]\tSet TF and check syscall-less opportunistic sysret\n");
127 	set_eflags(get_eflags() | X86_EFLAGS_TF);
128 	extern unsigned char post_nop[];
129 	asm volatile ("pushf" WIDTH "\n\t"
130 		      "pop" WIDTH " %%r11\n\t"
131 		      "nop\n\t"
132 		      "post_nop:"
133 		      : : "c" (post_nop) : "r11");
134 	check_result();
135 #endif
136 #ifdef CAN_BUILD_32
137 	printf("[RUN]\tSet TF and check int80\n");
138 	set_eflags(get_eflags() | X86_EFLAGS_TF);
139 	asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
140 			: INT80_CLOBBERS);
141 	check_result();
142 #endif
143 
144 	/*
145 	 * This test is particularly interesting if fast syscalls use
146 	 * SYSENTER: it triggers a nasty design flaw in SYSENTER.
147 	 * Specifically, SYSENTER does not clear TF, so either SYSENTER
148 	 * or the next instruction traps at CPL0.  (Of course, Intel
149 	 * mostly forgot to document exactly what happens here.)  So we
150 	 * get a CPL0 fault with usergs (on 64-bit kernels) and possibly
151 	 * no stack.  The only sane way the kernel can possibly handle
152 	 * it is to clear TF on return from the #DB handler, but this
153 	 * happens way too early to set TF in the saved pt_regs, so the
154 	 * kernel has to do something clever to avoid losing track of
155 	 * the TF bit.
156 	 *
157 	 * Needless to say, we've had bugs in this area.
158 	 */
159 	syscall(SYS_getpid);  /* Force symbol binding without TF set. */
160 	printf("[RUN]\tSet TF and check a fast syscall\n");
161 	set_eflags(get_eflags() | X86_EFLAGS_TF);
162 	syscall(SYS_getpid);
163 	check_result();
164 
165 	/* Now make sure that another fast syscall doesn't set TF again. */
166 	printf("[RUN]\tFast syscall with TF cleared\n");
167 	fflush(stdout);  /* Force a syscall */
168 	if (get_eflags() & X86_EFLAGS_TF) {
169 		printf("[FAIL]\tTF is now set\n");
170 		exit(1);
171 	}
172 	if (sig_traps) {
173 		printf("[FAIL]\tGot SIGTRAP\n");
174 		exit(1);
175 	}
176 	printf("[OK]\tNothing unexpected happened\n");
177 
178 	return 0;
179 }
180