1 /*
2  * single_step_syscall.c - single-steps various x86 syscalls
3  * Copyright (c) 2014-2015 Andrew Lutomirski
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * This is a very simple series of tests that makes system calls with
15  * the TF flag set.  This exercises some nasty kernel code in the
16  * SYSENTER case: SYSENTER does not clear TF, so SYSENTER with TF set
17  * immediately issues #DB from CPL 0.  This requires special handling in
18  * the kernel.
19  */
20 
21 #define _GNU_SOURCE
22 
23 #include <sys/time.h>
24 #include <time.h>
25 #include <stdlib.h>
26 #include <sys/syscall.h>
27 #include <unistd.h>
28 #include <stdio.h>
29 #include <string.h>
30 #include <inttypes.h>
31 #include <sys/mman.h>
32 #include <sys/signal.h>
33 #include <sys/ucontext.h>
34 #include <asm/ldt.h>
35 #include <err.h>
36 #include <setjmp.h>
37 #include <stddef.h>
38 #include <stdbool.h>
39 #include <sys/ptrace.h>
40 #include <sys/user.h>
41 
42 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
43 		       int flags)
44 {
45 	struct sigaction sa;
46 	memset(&sa, 0, sizeof(sa));
47 	sa.sa_sigaction = handler;
48 	sa.sa_flags = SA_SIGINFO | flags;
49 	sigemptyset(&sa.sa_mask);
50 	if (sigaction(sig, &sa, 0))
51 		err(1, "sigaction");
52 }
53 
54 static volatile sig_atomic_t sig_traps;
55 
56 #ifdef __x86_64__
57 # define REG_IP REG_RIP
58 # define WIDTH "q"
59 #else
60 # define REG_IP REG_EIP
61 # define WIDTH "l"
62 #endif
63 
64 static unsigned long get_eflags(void)
65 {
66 	unsigned long eflags;
67 	asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
68 	return eflags;
69 }
70 
71 static void set_eflags(unsigned long eflags)
72 {
73 	asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
74 		      : : "rm" (eflags) : "flags");
75 }
76 
77 #define X86_EFLAGS_TF (1UL << 8)
78 
79 static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
80 {
81 	ucontext_t *ctx = (ucontext_t*)ctx_void;
82 
83 	if (get_eflags() & X86_EFLAGS_TF) {
84 		set_eflags(get_eflags() & ~X86_EFLAGS_TF);
85 		printf("[WARN]\tSIGTRAP handler had TF set\n");
86 		_exit(1);
87 	}
88 
89 	sig_traps++;
90 
91 	if (sig_traps == 10000 || sig_traps == 10001) {
92 		printf("[WARN]\tHit %d SIGTRAPs with si_addr 0x%lx, ip 0x%lx\n",
93 		       (int)sig_traps,
94 		       (unsigned long)info->si_addr,
95 		       (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
96 	}
97 }
98 
99 static void check_result(void)
100 {
101 	unsigned long new_eflags = get_eflags();
102 	set_eflags(new_eflags & ~X86_EFLAGS_TF);
103 
104 	if (!sig_traps) {
105 		printf("[FAIL]\tNo SIGTRAP\n");
106 		exit(1);
107 	}
108 
109 	if (!(new_eflags & X86_EFLAGS_TF)) {
110 		printf("[FAIL]\tTF was cleared\n");
111 		exit(1);
112 	}
113 
114 	printf("[OK]\tSurvived with TF set and %d traps\n", (int)sig_traps);
115 	sig_traps = 0;
116 }
117 
118 int main()
119 {
120 	int tmp;
121 
122 	sethandler(SIGTRAP, sigtrap, 0);
123 
124 	printf("[RUN]\tSet TF and check nop\n");
125 	set_eflags(get_eflags() | X86_EFLAGS_TF);
126 	asm volatile ("nop");
127 	check_result();
128 
129 #ifdef __x86_64__
130 	printf("[RUN]\tSet TF and check syscall-less opportunistic sysret\n");
131 	set_eflags(get_eflags() | X86_EFLAGS_TF);
132 	extern unsigned char post_nop[];
133 	asm volatile ("pushf" WIDTH "\n\t"
134 		      "pop" WIDTH " %%r11\n\t"
135 		      "nop\n\t"
136 		      "post_nop:"
137 		      : : "c" (post_nop) : "r11");
138 	check_result();
139 #endif
140 
141 	printf("[RUN]\tSet TF and check int80\n");
142 	set_eflags(get_eflags() | X86_EFLAGS_TF);
143 	asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid));
144 	check_result();
145 
146 	/*
147 	 * This test is particularly interesting if fast syscalls use
148 	 * SYSENTER: it triggers a nasty design flaw in SYSENTER.
149 	 * Specifically, SYSENTER does not clear TF, so either SYSENTER
150 	 * or the next instruction traps at CPL0.  (Of course, Intel
151 	 * mostly forgot to document exactly what happens here.)  So we
152 	 * get a CPL0 fault with usergs (on 64-bit kernels) and possibly
153 	 * no stack.  The only sane way the kernel can possibly handle
154 	 * it is to clear TF on return from the #DB handler, but this
155 	 * happens way too early to set TF in the saved pt_regs, so the
156 	 * kernel has to do something clever to avoid losing track of
157 	 * the TF bit.
158 	 *
159 	 * Needless to say, we've had bugs in this area.
160 	 */
161 	syscall(SYS_getpid);  /* Force symbol binding without TF set. */
162 	printf("[RUN]\tSet TF and check a fast syscall\n");
163 	set_eflags(get_eflags() | X86_EFLAGS_TF);
164 	syscall(SYS_getpid);
165 	check_result();
166 
167 	/* Now make sure that another fast syscall doesn't set TF again. */
168 	printf("[RUN]\tFast syscall with TF cleared\n");
169 	fflush(stdout);  /* Force a syscall */
170 	if (get_eflags() & X86_EFLAGS_TF) {
171 		printf("[FAIL]\tTF is now set\n");
172 		exit(1);
173 	}
174 	if (sig_traps) {
175 		printf("[FAIL]\tGot SIGTRAP\n");
176 		exit(1);
177 	}
178 	printf("[OK]\tNothing unexpected happened\n");
179 
180 	return 0;
181 }
182