1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * 32-bit syscall ABI conformance test.
4 *
5 * Copyright (c) 2015 Denys Vlasenko
6 */
7 /*
8 * Can be built statically:
9 * gcc -Os -Wall -static -m32 test_syscall_vdso.c thunks_32.S
10 */
11 #undef _GNU_SOURCE
12 #define _GNU_SOURCE 1
13 #undef __USE_GNU
14 #define __USE_GNU 1
15 #include <unistd.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <stdio.h>
19 #include <signal.h>
20 #include <sys/types.h>
21 #include <sys/select.h>
22 #include <sys/time.h>
23 #include <elf.h>
24 #include <sys/ptrace.h>
25 #include <sys/wait.h>
26
27 #if !defined(__i386__)
main(int argc,char ** argv,char ** envp)28 int main(int argc, char **argv, char **envp)
29 {
30 printf("[SKIP]\tNot a 32-bit x86 userspace\n");
31 return 0;
32 }
33 #else
34
35 long syscall_addr;
get_syscall(char ** envp)36 long get_syscall(char **envp)
37 {
38 Elf32_auxv_t *auxv;
39 while (*envp++ != NULL)
40 continue;
41 for (auxv = (void *)envp; auxv->a_type != AT_NULL; auxv++)
42 if (auxv->a_type == AT_SYSINFO)
43 return auxv->a_un.a_val;
44 printf("[WARN]\tAT_SYSINFO not supplied\n");
45 return 0;
46 }
47
48 asm (
49 " .pushsection .text\n"
50 " .global int80\n"
51 "int80:\n"
52 " int $0x80\n"
53 " ret\n"
54 " .popsection\n"
55 );
56 extern char int80;
57
58 struct regs64 {
59 uint64_t rax, rbx, rcx, rdx;
60 uint64_t rsi, rdi, rbp, rsp;
61 uint64_t r8, r9, r10, r11;
62 uint64_t r12, r13, r14, r15;
63 };
64 struct regs64 regs64;
65 int kernel_is_64bit;
66
67 asm (
68 " .pushsection .text\n"
69 " .code64\n"
70 "get_regs64:\n"
71 " push %rax\n"
72 " mov $regs64, %eax\n"
73 " pop 0*8(%rax)\n"
74 " movq %rbx, 1*8(%rax)\n"
75 " movq %rcx, 2*8(%rax)\n"
76 " movq %rdx, 3*8(%rax)\n"
77 " movq %rsi, 4*8(%rax)\n"
78 " movq %rdi, 5*8(%rax)\n"
79 " movq %rbp, 6*8(%rax)\n"
80 " movq %rsp, 7*8(%rax)\n"
81 " movq %r8, 8*8(%rax)\n"
82 " movq %r9, 9*8(%rax)\n"
83 " movq %r10, 10*8(%rax)\n"
84 " movq %r11, 11*8(%rax)\n"
85 " movq %r12, 12*8(%rax)\n"
86 " movq %r13, 13*8(%rax)\n"
87 " movq %r14, 14*8(%rax)\n"
88 " movq %r15, 15*8(%rax)\n"
89 " ret\n"
90 "poison_regs64:\n"
91 " movq $0x7f7f7f7f, %r8\n"
92 " shl $32, %r8\n"
93 " orq $0x7f7f7f7f, %r8\n"
94 " movq %r8, %r9\n"
95 " incq %r9\n"
96 " movq %r9, %r10\n"
97 " incq %r10\n"
98 " movq %r10, %r11\n"
99 " incq %r11\n"
100 " movq %r11, %r12\n"
101 " incq %r12\n"
102 " movq %r12, %r13\n"
103 " incq %r13\n"
104 " movq %r13, %r14\n"
105 " incq %r14\n"
106 " movq %r14, %r15\n"
107 " incq %r15\n"
108 " ret\n"
109 " .code32\n"
110 " .popsection\n"
111 );
112 extern void get_regs64(void);
113 extern void poison_regs64(void);
114 extern unsigned long call64_from_32(void (*function)(void));
print_regs64(void)115 void print_regs64(void)
116 {
117 if (!kernel_is_64bit)
118 return;
119 printf("ax:%016llx bx:%016llx cx:%016llx dx:%016llx\n", regs64.rax, regs64.rbx, regs64.rcx, regs64.rdx);
120 printf("si:%016llx di:%016llx bp:%016llx sp:%016llx\n", regs64.rsi, regs64.rdi, regs64.rbp, regs64.rsp);
121 printf(" 8:%016llx 9:%016llx 10:%016llx 11:%016llx\n", regs64.r8 , regs64.r9 , regs64.r10, regs64.r11);
122 printf("12:%016llx 13:%016llx 14:%016llx 15:%016llx\n", regs64.r12, regs64.r13, regs64.r14, regs64.r15);
123 }
124
check_regs64(void)125 int check_regs64(void)
126 {
127 int err = 0;
128 int num = 8;
129 uint64_t *r64 = ®s64.r8;
130 uint64_t expected = 0x7f7f7f7f7f7f7f7fULL;
131
132 if (!kernel_is_64bit)
133 return 0;
134
135 do {
136 if (*r64 == expected++)
137 continue; /* register did not change */
138 if (syscall_addr != (long)&int80) {
139 /*
140 * Non-INT80 syscall entrypoints are allowed to clobber R8+ regs:
141 * either clear them to 0, or for R11, load EFLAGS.
142 */
143 if (*r64 == 0)
144 continue;
145 if (num == 11) {
146 printf("[NOTE]\tR11 has changed:%016llx - assuming clobbered by SYSRET insn\n", *r64);
147 continue;
148 }
149 } else {
150 /*
151 * INT80 syscall entrypoint can be used by
152 * 64-bit programs too, unlike SYSCALL/SYSENTER.
153 * Therefore it must preserve R12+
154 * (they are callee-saved registers in 64-bit C ABI).
155 *
156 * Starting in Linux 4.17 (and any kernel that
157 * backports the change), R8..11 are preserved.
158 * Historically (and probably unintentionally), they
159 * were clobbered or zeroed.
160 */
161 }
162 printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64);
163 err++;
164 } while (r64++, ++num < 16);
165
166 if (!err)
167 printf("[OK]\tR8..R15 did not leak kernel data\n");
168 return err;
169 }
170
171 int nfds;
172 fd_set rfds;
173 fd_set wfds;
174 fd_set efds;
175 struct timespec timeout;
176 sigset_t sigmask;
177 struct {
178 sigset_t *sp;
179 int sz;
180 } sigmask_desc;
181
prep_args()182 void prep_args()
183 {
184 nfds = 42;
185 FD_ZERO(&rfds);
186 FD_ZERO(&wfds);
187 FD_ZERO(&efds);
188 FD_SET(0, &rfds);
189 FD_SET(1, &wfds);
190 FD_SET(2, &efds);
191 timeout.tv_sec = 0;
192 timeout.tv_nsec = 123;
193 sigemptyset(&sigmask);
194 sigaddset(&sigmask, SIGINT);
195 sigaddset(&sigmask, SIGUSR2);
196 sigaddset(&sigmask, SIGRTMAX);
197 sigmask_desc.sp = &sigmask;
198 sigmask_desc.sz = 8; /* bytes */
199 }
200
print_flags(const char * name,unsigned long r)201 static void print_flags(const char *name, unsigned long r)
202 {
203 static const char *bitarray[] = {
204 "\n" ,"c\n" ,/* Carry Flag */
205 "0 " ,"1 " ,/* Bit 1 - always on */
206 "" ,"p " ,/* Parity Flag */
207 "0 " ,"3? " ,
208 "" ,"a " ,/* Auxiliary carry Flag */
209 "0 " ,"5? " ,
210 "" ,"z " ,/* Zero Flag */
211 "" ,"s " ,/* Sign Flag */
212 "" ,"t " ,/* Trap Flag */
213 "" ,"i " ,/* Interrupt Flag */
214 "" ,"d " ,/* Direction Flag */
215 "" ,"o " ,/* Overflow Flag */
216 "0 " ,"1 " ,/* I/O Privilege Level (2 bits) */
217 "0" ,"1" ,/* I/O Privilege Level (2 bits) */
218 "" ,"n " ,/* Nested Task */
219 "0 " ,"15? ",
220 "" ,"r " ,/* Resume Flag */
221 "" ,"v " ,/* Virtual Mode */
222 "" ,"ac " ,/* Alignment Check/Access Control */
223 "" ,"vif ",/* Virtual Interrupt Flag */
224 "" ,"vip ",/* Virtual Interrupt Pending */
225 "" ,"id " ,/* CPUID detection */
226 NULL
227 };
228 const char **bitstr;
229 int bit;
230
231 printf("%s=%016lx ", name, r);
232 bitstr = bitarray + 42;
233 bit = 21;
234 if ((r >> 22) != 0)
235 printf("(extra bits are set) ");
236 do {
237 if (bitstr[(r >> bit) & 1][0])
238 fputs(bitstr[(r >> bit) & 1], stdout);
239 bitstr -= 2;
240 bit--;
241 } while (bit >= 0);
242 }
243
run_syscall(void)244 int run_syscall(void)
245 {
246 long flags, bad_arg;
247
248 prep_args();
249
250 if (kernel_is_64bit)
251 call64_from_32(poison_regs64);
252 /*print_regs64();*/
253
254 asm("\n"
255 /* Try 6-arg syscall: pselect. It should return quickly */
256 " push %%ebp\n"
257 " mov $308, %%eax\n" /* PSELECT */
258 " mov nfds, %%ebx\n" /* ebx arg1 */
259 " mov $rfds, %%ecx\n" /* ecx arg2 */
260 " mov $wfds, %%edx\n" /* edx arg3 */
261 " mov $efds, %%esi\n" /* esi arg4 */
262 " mov $timeout, %%edi\n" /* edi arg5 */
263 " mov $sigmask_desc, %%ebp\n" /* %ebp arg6 */
264 " push $0x200ed7\n" /* set almost all flags */
265 " popf\n" /* except TF, IOPL, NT, RF, VM, AC, VIF, VIP */
266 " call *syscall_addr\n"
267 /* Check that registers are not clobbered */
268 " pushf\n"
269 " pop %%eax\n"
270 " cld\n"
271 " cmp nfds, %%ebx\n" /* ebx arg1 */
272 " mov $1, %%ebx\n"
273 " jne 1f\n"
274 " cmp $rfds, %%ecx\n" /* ecx arg2 */
275 " mov $2, %%ebx\n"
276 " jne 1f\n"
277 " cmp $wfds, %%edx\n" /* edx arg3 */
278 " mov $3, %%ebx\n"
279 " jne 1f\n"
280 " cmp $efds, %%esi\n" /* esi arg4 */
281 " mov $4, %%ebx\n"
282 " jne 1f\n"
283 " cmp $timeout, %%edi\n" /* edi arg5 */
284 " mov $5, %%ebx\n"
285 " jne 1f\n"
286 " cmpl $sigmask_desc, %%ebp\n" /* %ebp arg6 */
287 " mov $6, %%ebx\n"
288 " jne 1f\n"
289 " mov $0, %%ebx\n"
290 "1:\n"
291 " pop %%ebp\n"
292 : "=a" (flags), "=b" (bad_arg)
293 :
294 : "cx", "dx", "si", "di"
295 );
296
297 if (kernel_is_64bit) {
298 memset(®s64, 0x77, sizeof(regs64));
299 call64_from_32(get_regs64);
300 /*print_regs64();*/
301 }
302
303 /*
304 * On paravirt kernels, flags are not preserved across syscalls.
305 * Thus, we do not consider it a bug if some are changed.
306 * We just show ones which do.
307 */
308 if ((0x200ed7 ^ flags) != 0) {
309 print_flags("[WARN]\tFlags before", 0x200ed7);
310 print_flags("[WARN]\tFlags after", flags);
311 print_flags("[WARN]\tFlags change", (0x200ed7 ^ flags));
312 }
313
314 if (bad_arg) {
315 printf("[FAIL]\targ#%ld clobbered\n", bad_arg);
316 return 1;
317 }
318 printf("[OK]\tArguments are preserved across syscall\n");
319
320 return check_regs64();
321 }
322
run_syscall_twice()323 int run_syscall_twice()
324 {
325 int exitcode = 0;
326 long sv;
327
328 if (syscall_addr) {
329 printf("[RUN]\tExecuting 6-argument 32-bit syscall via VDSO\n");
330 exitcode = run_syscall();
331 }
332 sv = syscall_addr;
333 syscall_addr = (long)&int80;
334 printf("[RUN]\tExecuting 6-argument 32-bit syscall via INT 80\n");
335 exitcode += run_syscall();
336 syscall_addr = sv;
337 return exitcode;
338 }
339
ptrace_me()340 void ptrace_me()
341 {
342 pid_t pid;
343
344 fflush(NULL);
345 pid = fork();
346 if (pid < 0)
347 exit(1);
348 if (pid == 0) {
349 /* child */
350 if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) != 0)
351 exit(0);
352 raise(SIGSTOP);
353 return;
354 }
355 /* parent */
356 printf("[RUN]\tRunning tests under ptrace\n");
357 while (1) {
358 int status;
359 pid = waitpid(-1, &status, __WALL);
360 if (WIFEXITED(status))
361 exit(WEXITSTATUS(status));
362 if (WIFSIGNALED(status))
363 exit(WTERMSIG(status));
364 if (pid <= 0 || !WIFSTOPPED(status)) /* paranoia */
365 exit(255);
366 /*
367 * Note: we do not inject sig = WSTOPSIG(status).
368 * We probably should, but careful: do not inject SIGTRAP
369 * generated by syscall entry/exit stops.
370 * That kills the child.
371 */
372 ptrace(PTRACE_SYSCALL, pid, 0L, 0L /*sig*/);
373 }
374 }
375
main(int argc,char ** argv,char ** envp)376 int main(int argc, char **argv, char **envp)
377 {
378 int exitcode = 0;
379 int cs;
380
381 asm("\n"
382 " movl %%cs, %%eax\n"
383 : "=a" (cs)
384 );
385 kernel_is_64bit = (cs == 0x23);
386 if (!kernel_is_64bit)
387 printf("[NOTE]\tNot a 64-bit kernel, won't test R8..R15 leaks\n");
388
389 /* This only works for non-static builds:
390 * syscall_addr = dlsym(dlopen("linux-gate.so.1", RTLD_NOW), "__kernel_vsyscall");
391 */
392 syscall_addr = get_syscall(envp);
393
394 exitcode += run_syscall_twice();
395 ptrace_me();
396 exitcode += run_syscall_twice();
397
398 return exitcode;
399 }
400 #endif
401