xref: /openbmc/linux/tools/testing/selftests/x86/sigreturn.c (revision 762f99f4f3cb41a775b5157dd761217beba65873)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace
4   * Copyright (c) 2014-2015 Andrew Lutomirski
5   *
6   * This is a series of tests that exercises the sigreturn(2) syscall and
7   * the IRET / SYSRET paths in the kernel.
8   *
9   * For now, this focuses on the effects of unusual CS and SS values,
10   * and it has a bunch of tests to make sure that ESP/RSP is restored
11   * properly.
12   *
13   * The basic idea behind these tests is to raise(SIGUSR1) to create a
14   * sigcontext frame, plug in the values to be tested, and then return,
15   * which implicitly invokes sigreturn(2) and programs the user context
16   * as desired.
17   *
18   * For tests for which we expect sigreturn and the subsequent return to
19   * user mode to succeed, we return to a short trampoline that generates
20   * SIGTRAP so that the meat of the tests can be ordinary C code in a
21   * SIGTRAP handler.
22   *
23   * The inner workings of each test is documented below.
24   *
25   * Do not run on outdated, unpatched kernels at risk of nasty crashes.
26   */
27  
28  #define _GNU_SOURCE
29  
30  #include <sys/time.h>
31  #include <time.h>
32  #include <stdlib.h>
33  #include <sys/syscall.h>
34  #include <unistd.h>
35  #include <stdio.h>
36  #include <string.h>
37  #include <inttypes.h>
38  #include <sys/mman.h>
39  #include <sys/signal.h>
40  #include <sys/ucontext.h>
41  #include <asm/ldt.h>
42  #include <err.h>
43  #include <setjmp.h>
44  #include <stddef.h>
45  #include <stdbool.h>
46  #include <sys/ptrace.h>
47  #include <sys/user.h>
48  
49  /* Pull in AR_xyz defines. */
50  typedef unsigned int u32;
51  typedef unsigned short u16;
52  #include "../../../../arch/x86/include/asm/desc_defs.h"
53  
54  /*
55   * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc
56   * headers.
57   */
58  #ifdef __x86_64__
59  /*
60   * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
61   * kernels that save SS in the sigcontext.  All kernels that set
62   * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
63   * regardless of SS (i.e. they implement espfix).
64   *
65   * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
66   * when delivering a signal that came from 64-bit code.
67   *
68   * Sigreturn restores SS as follows:
69   *
70   * if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
71   *     saved CS is not 64-bit)
72   *         new SS = saved SS  (will fail IRET and signal if invalid)
73   * else
74   *         new SS = a flat 32-bit data segment
75   */
76  #define UC_SIGCONTEXT_SS       0x2
77  #define UC_STRICT_RESTORE_SS   0x4
78  #endif
79  
80  /*
81   * In principle, this test can run on Linux emulation layers (e.g.
82   * Illumos "LX branded zones").  Solaris-based kernels reserve LDT
83   * entries 0-5 for their own internal purposes, so start our LDT
84   * allocations above that reservation.  (The tests don't pass on LX
85   * branded zones, but at least this lets them run.)
86   */
87  #define LDT_OFFSET 6
88  
89  /* An aligned stack accessible through some of our segments. */
90  static unsigned char stack16[65536] __attribute__((aligned(4096)));
91  
92  /*
93   * An aligned int3 instruction used as a trampoline.  Some of the tests
94   * want to fish out their ss values, so this trampoline copies ss to eax
95   * before the int3.
96   */
97  asm (".pushsection .text\n\t"
98       ".type int3, @function\n\t"
99       ".align 4096\n\t"
100       "int3:\n\t"
101       "mov %ss,%ecx\n\t"
102       "int3\n\t"
103       ".size int3, . - int3\n\t"
104       ".align 4096, 0xcc\n\t"
105       ".popsection");
106  extern char int3[4096];
107  
108  /*
109   * At startup, we prepapre:
110   *
111   * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero
112   *   descriptor or out of bounds).
113   * - code16_sel: A 16-bit LDT code segment pointing to int3.
114   * - data16_sel: A 16-bit LDT data segment pointing to stack16.
115   * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3.
116   * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16.
117   * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16.
118   * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to
119   *   stack16.
120   *
121   * For no particularly good reason, xyz_sel is a selector value with the
122   * RPL and LDT bits filled in, whereas xyz_idx is just an index into the
123   * descriptor table.  These variables will be zero if their respective
124   * segments could not be allocated.
125   */
126  static unsigned short ldt_nonexistent_sel;
127  static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel;
128  
129  static unsigned short gdt_data16_idx, gdt_npdata32_idx;
130  
GDT3(int idx)131  static unsigned short GDT3(int idx)
132  {
133  	return (idx << 3) | 3;
134  }
135  
LDT3(int idx)136  static unsigned short LDT3(int idx)
137  {
138  	return (idx << 3) | 7;
139  }
140  
sethandler(int sig,void (* handler)(int,siginfo_t *,void *),int flags)141  static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
142  		       int flags)
143  {
144  	struct sigaction sa;
145  	memset(&sa, 0, sizeof(sa));
146  	sa.sa_sigaction = handler;
147  	sa.sa_flags = SA_SIGINFO | flags;
148  	sigemptyset(&sa.sa_mask);
149  	if (sigaction(sig, &sa, 0))
150  		err(1, "sigaction");
151  }
152  
clearhandler(int sig)153  static void clearhandler(int sig)
154  {
155  	struct sigaction sa;
156  	memset(&sa, 0, sizeof(sa));
157  	sa.sa_handler = SIG_DFL;
158  	sigemptyset(&sa.sa_mask);
159  	if (sigaction(sig, &sa, 0))
160  		err(1, "sigaction");
161  }
162  
add_ldt(const struct user_desc * desc,unsigned short * var,const char * name)163  static void add_ldt(const struct user_desc *desc, unsigned short *var,
164  		    const char *name)
165  {
166  	if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) {
167  		*var = LDT3(desc->entry_number);
168  	} else {
169  		printf("[NOTE]\tFailed to create %s segment\n", name);
170  		*var = 0;
171  	}
172  }
173  
setup_ldt(void)174  static void setup_ldt(void)
175  {
176  	if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16))
177  		errx(1, "stack16 is too high\n");
178  	if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3))
179  		errx(1, "int3 is too high\n");
180  
181  	ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2);
182  
183  	const struct user_desc code16_desc = {
184  		.entry_number    = LDT_OFFSET + 0,
185  		.base_addr       = (unsigned long)int3,
186  		.limit           = 4095,
187  		.seg_32bit       = 0,
188  		.contents        = 2, /* Code, not conforming */
189  		.read_exec_only  = 0,
190  		.limit_in_pages  = 0,
191  		.seg_not_present = 0,
192  		.useable         = 0
193  	};
194  	add_ldt(&code16_desc, &code16_sel, "code16");
195  
196  	const struct user_desc data16_desc = {
197  		.entry_number    = LDT_OFFSET + 1,
198  		.base_addr       = (unsigned long)stack16,
199  		.limit           = 0xffff,
200  		.seg_32bit       = 0,
201  		.contents        = 0, /* Data, grow-up */
202  		.read_exec_only  = 0,
203  		.limit_in_pages  = 0,
204  		.seg_not_present = 0,
205  		.useable         = 0
206  	};
207  	add_ldt(&data16_desc, &data16_sel, "data16");
208  
209  	const struct user_desc npcode32_desc = {
210  		.entry_number    = LDT_OFFSET + 3,
211  		.base_addr       = (unsigned long)int3,
212  		.limit           = 4095,
213  		.seg_32bit       = 1,
214  		.contents        = 2, /* Code, not conforming */
215  		.read_exec_only  = 0,
216  		.limit_in_pages  = 0,
217  		.seg_not_present = 1,
218  		.useable         = 0
219  	};
220  	add_ldt(&npcode32_desc, &npcode32_sel, "npcode32");
221  
222  	const struct user_desc npdata32_desc = {
223  		.entry_number    = LDT_OFFSET + 4,
224  		.base_addr       = (unsigned long)stack16,
225  		.limit           = 0xffff,
226  		.seg_32bit       = 1,
227  		.contents        = 0, /* Data, grow-up */
228  		.read_exec_only  = 0,
229  		.limit_in_pages  = 0,
230  		.seg_not_present = 1,
231  		.useable         = 0
232  	};
233  	add_ldt(&npdata32_desc, &npdata32_sel, "npdata32");
234  
235  	struct user_desc gdt_data16_desc = {
236  		.entry_number    = -1,
237  		.base_addr       = (unsigned long)stack16,
238  		.limit           = 0xffff,
239  		.seg_32bit       = 0,
240  		.contents        = 0, /* Data, grow-up */
241  		.read_exec_only  = 0,
242  		.limit_in_pages  = 0,
243  		.seg_not_present = 0,
244  		.useable         = 0
245  	};
246  
247  	if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) {
248  		/*
249  		 * This probably indicates vulnerability to CVE-2014-8133.
250  		 * Merely getting here isn't definitive, though, and we'll
251  		 * diagnose the problem for real later on.
252  		 */
253  		printf("[WARN]\tset_thread_area allocated data16 at index %d\n",
254  		       gdt_data16_desc.entry_number);
255  		gdt_data16_idx = gdt_data16_desc.entry_number;
256  	} else {
257  		printf("[OK]\tset_thread_area refused 16-bit data\n");
258  	}
259  
260  	struct user_desc gdt_npdata32_desc = {
261  		.entry_number    = -1,
262  		.base_addr       = (unsigned long)stack16,
263  		.limit           = 0xffff,
264  		.seg_32bit       = 1,
265  		.contents        = 0, /* Data, grow-up */
266  		.read_exec_only  = 0,
267  		.limit_in_pages  = 0,
268  		.seg_not_present = 1,
269  		.useable         = 0
270  	};
271  
272  	if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) {
273  		/*
274  		 * As a hardening measure, newer kernels don't allow this.
275  		 */
276  		printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n",
277  		       gdt_npdata32_desc.entry_number);
278  		gdt_npdata32_idx = gdt_npdata32_desc.entry_number;
279  	} else {
280  		printf("[OK]\tset_thread_area refused 16-bit data\n");
281  	}
282  }
283  
284  /* State used by our signal handlers. */
285  static gregset_t initial_regs, requested_regs, resulting_regs;
286  
287  /* Instructions for the SIGUSR1 handler. */
288  static volatile unsigned short sig_cs, sig_ss;
289  static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno;
290  #ifdef __x86_64__
291  static volatile sig_atomic_t sig_corrupt_final_ss;
292  #endif
293  
294  /* Abstractions for some 32-bit vs 64-bit differences. */
295  #ifdef __x86_64__
296  # define REG_IP REG_RIP
297  # define REG_SP REG_RSP
298  # define REG_CX REG_RCX
299  
300  struct selectors {
301  	unsigned short cs, gs, fs, ss;
302  };
303  
ssptr(ucontext_t * ctx)304  static unsigned short *ssptr(ucontext_t *ctx)
305  {
306  	struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
307  	return &sels->ss;
308  }
309  
csptr(ucontext_t * ctx)310  static unsigned short *csptr(ucontext_t *ctx)
311  {
312  	struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
313  	return &sels->cs;
314  }
315  #else
316  # define REG_IP REG_EIP
317  # define REG_SP REG_ESP
318  # define REG_CX REG_ECX
319  
ssptr(ucontext_t * ctx)320  static greg_t *ssptr(ucontext_t *ctx)
321  {
322  	return &ctx->uc_mcontext.gregs[REG_SS];
323  }
324  
csptr(ucontext_t * ctx)325  static greg_t *csptr(ucontext_t *ctx)
326  {
327  	return &ctx->uc_mcontext.gregs[REG_CS];
328  }
329  #endif
330  
331  /*
332   * Checks a given selector for its code bitness or returns -1 if it's not
333   * a usable code segment selector.
334   */
cs_bitness(unsigned short cs)335  int cs_bitness(unsigned short cs)
336  {
337  	uint32_t valid = 0, ar;
338  	asm ("lar %[cs], %[ar]\n\t"
339  	     "jnz 1f\n\t"
340  	     "mov $1, %[valid]\n\t"
341  	     "1:"
342  	     : [ar] "=r" (ar), [valid] "+rm" (valid)
343  	     : [cs] "r" (cs));
344  
345  	if (!valid)
346  		return -1;
347  
348  	bool db = (ar & (1 << 22));
349  	bool l = (ar & (1 << 21));
350  
351  	if (!(ar & (1<<11)))
352  	    return -1;	/* Not code. */
353  
354  	if (l && !db)
355  		return 64;
356  	else if (!l && db)
357  		return 32;
358  	else if (!l && !db)
359  		return 16;
360  	else
361  		return -1;	/* Unknown bitness. */
362  }
363  
364  /*
365   * Checks a given selector for its code bitness or returns -1 if it's not
366   * a usable code segment selector.
367   */
is_valid_ss(unsigned short cs)368  bool is_valid_ss(unsigned short cs)
369  {
370  	uint32_t valid = 0, ar;
371  	asm ("lar %[cs], %[ar]\n\t"
372  	     "jnz 1f\n\t"
373  	     "mov $1, %[valid]\n\t"
374  	     "1:"
375  	     : [ar] "=r" (ar), [valid] "+rm" (valid)
376  	     : [cs] "r" (cs));
377  
378  	if (!valid)
379  		return false;
380  
381  	if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA &&
382  	    (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN)
383  		return false;
384  
385  	return (ar & AR_P);
386  }
387  
388  /* Number of errors in the current test case. */
389  static volatile sig_atomic_t nerrs;
390  
validate_signal_ss(int sig,ucontext_t * ctx)391  static void validate_signal_ss(int sig, ucontext_t *ctx)
392  {
393  #ifdef __x86_64__
394  	bool was_64bit = (cs_bitness(*csptr(ctx)) == 64);
395  
396  	if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) {
397  		printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n");
398  		nerrs++;
399  
400  		/*
401  		 * This happens on Linux 4.1.  The rest will fail, too, so
402  		 * return now to reduce the noise.
403  		 */
404  		return;
405  	}
406  
407  	/* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */
408  	if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) {
409  		printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n",
410  		       sig);
411  		nerrs++;
412  	}
413  
414  	if (is_valid_ss(*ssptr(ctx))) {
415  		/*
416  		 * DOSEMU was written before 64-bit sigcontext had SS, and
417  		 * it tries to figure out the signal source SS by looking at
418  		 * the physical register.  Make sure that keeps working.
419  		 */
420  		unsigned short hw_ss;
421  		asm ("mov %%ss, %0" : "=rm" (hw_ss));
422  		if (hw_ss != *ssptr(ctx)) {
423  			printf("[FAIL]\tHW SS didn't match saved SS\n");
424  			nerrs++;
425  		}
426  	}
427  #endif
428  }
429  
430  /*
431   * SIGUSR1 handler.  Sets CS and SS as requested and points IP to the
432   * int3 trampoline.  Sets SP to a large known value so that we can see
433   * whether the value round-trips back to user mode correctly.
434   */
sigusr1(int sig,siginfo_t * info,void * ctx_void)435  static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
436  {
437  	ucontext_t *ctx = (ucontext_t*)ctx_void;
438  
439  	validate_signal_ss(sig, ctx);
440  
441  	memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
442  
443  	*csptr(ctx) = sig_cs;
444  	*ssptr(ctx) = sig_ss;
445  
446  	ctx->uc_mcontext.gregs[REG_IP] =
447  		sig_cs == code16_sel ? 0 : (unsigned long)&int3;
448  	ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
449  	ctx->uc_mcontext.gregs[REG_CX] = 0;
450  
451  #ifdef __i386__
452  	/*
453  	 * Make sure the kernel doesn't inadvertently use DS or ES-relative
454  	 * accesses in a region where user DS or ES is loaded.
455  	 *
456  	 * Skip this for 64-bit builds because long mode doesn't care about
457  	 * DS and ES and skipping it increases test coverage a little bit,
458  	 * since 64-bit kernels can still run the 32-bit build.
459  	 */
460  	ctx->uc_mcontext.gregs[REG_DS] = 0;
461  	ctx->uc_mcontext.gregs[REG_ES] = 0;
462  #endif
463  
464  	memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
465  	requested_regs[REG_CX] = *ssptr(ctx);	/* The asm code does this. */
466  
467  	return;
468  }
469  
470  /*
471   * Called after a successful sigreturn (via int3) or from a failed
472   * sigreturn (directly by kernel).  Restores our state so that the
473   * original raise(SIGUSR1) returns.
474   */
sigtrap(int sig,siginfo_t * info,void * ctx_void)475  static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
476  {
477  	ucontext_t *ctx = (ucontext_t*)ctx_void;
478  
479  	validate_signal_ss(sig, ctx);
480  
481  	sig_err = ctx->uc_mcontext.gregs[REG_ERR];
482  	sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
483  
484  	unsigned short ss;
485  	asm ("mov %%ss,%0" : "=r" (ss));
486  
487  	greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX];
488  	if (asm_ss != sig_ss && sig == SIGTRAP) {
489  		/* Sanity check failure. */
490  		printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
491  		       ss, *ssptr(ctx), (unsigned long long)asm_ss);
492  		nerrs++;
493  	}
494  
495  	memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
496  	memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
497  
498  #ifdef __x86_64__
499  	if (sig_corrupt_final_ss) {
500  		if (ctx->uc_flags & UC_STRICT_RESTORE_SS) {
501  			printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n");
502  			nerrs++;
503  		} else {
504  			/*
505  			 * DOSEMU transitions from 32-bit to 64-bit mode by
506  			 * adjusting sigcontext, and it requires that this work
507  			 * even if the saved SS is bogus.
508  			 */
509  			printf("\tCorrupting SS on return to 64-bit mode\n");
510  			*ssptr(ctx) = 0;
511  		}
512  	}
513  #endif
514  
515  	sig_trapped = sig;
516  }
517  
518  #ifdef __x86_64__
519  /* Tests recovery if !UC_STRICT_RESTORE_SS */
sigusr2(int sig,siginfo_t * info,void * ctx_void)520  static void sigusr2(int sig, siginfo_t *info, void *ctx_void)
521  {
522  	ucontext_t *ctx = (ucontext_t*)ctx_void;
523  
524  	if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) {
525  		printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n");
526  		nerrs++;
527  		return;  /* We can't do the rest. */
528  	}
529  
530  	ctx->uc_flags &= ~UC_STRICT_RESTORE_SS;
531  	*ssptr(ctx) = 0;
532  
533  	/* Return.  The kernel should recover without sending another signal. */
534  }
535  
test_nonstrict_ss(void)536  static int test_nonstrict_ss(void)
537  {
538  	clearhandler(SIGUSR1);
539  	clearhandler(SIGTRAP);
540  	clearhandler(SIGSEGV);
541  	clearhandler(SIGILL);
542  	sethandler(SIGUSR2, sigusr2, 0);
543  
544  	nerrs = 0;
545  
546  	printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n");
547  	raise(SIGUSR2);
548  	if (!nerrs)
549  		printf("[OK]\tIt worked\n");
550  
551  	return nerrs;
552  }
553  #endif
554  
555  /* Finds a usable code segment of the requested bitness. */
find_cs(int bitness)556  int find_cs(int bitness)
557  {
558  	unsigned short my_cs;
559  
560  	asm ("mov %%cs,%0" :  "=r" (my_cs));
561  
562  	if (cs_bitness(my_cs) == bitness)
563  		return my_cs;
564  	if (cs_bitness(my_cs + (2 << 3)) == bitness)
565  		return my_cs + (2 << 3);
566  	if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness)
567  	    return my_cs - (2 << 3);
568  	if (cs_bitness(code16_sel) == bitness)
569  		return code16_sel;
570  
571  	printf("[WARN]\tCould not find %d-bit CS\n", bitness);
572  	return -1;
573  }
574  
test_valid_sigreturn(int cs_bits,bool use_16bit_ss,int force_ss)575  static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
576  {
577  	int cs = find_cs(cs_bits);
578  	if (cs == -1) {
579  		printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n",
580  		       cs_bits, use_16bit_ss ? 16 : 32);
581  		return 0;
582  	}
583  
584  	if (force_ss != -1) {
585  		sig_ss = force_ss;
586  	} else {
587  		if (use_16bit_ss) {
588  			if (!data16_sel) {
589  				printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n",
590  				       cs_bits);
591  				return 0;
592  			}
593  			sig_ss = data16_sel;
594  		} else {
595  			asm volatile ("mov %%ss,%0" : "=r" (sig_ss));
596  		}
597  	}
598  
599  	sig_cs = cs;
600  
601  	printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n",
602  	       cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss,
603  	       (sig_ss & 4) ? "" : ", GDT");
604  
605  	raise(SIGUSR1);
606  
607  	nerrs = 0;
608  
609  	/*
610  	 * Check that each register had an acceptable value when the
611  	 * int3 trampoline was invoked.
612  	 */
613  	for (int i = 0; i < NGREG; i++) {
614  		greg_t req = requested_regs[i], res = resulting_regs[i];
615  
616  		if (i == REG_TRAPNO || i == REG_IP)
617  			continue;	/* don't care */
618  
619  		if (i == REG_SP) {
620  			/*
621  			 * If we were using a 16-bit stack segment, then
622  			 * the kernel is a bit stuck: IRET only restores
623  			 * the low 16 bits of ESP/RSP if SS is 16-bit.
624  			 * The kernel uses a hack to restore bits 31:16,
625  			 * but that hack doesn't help with bits 63:32.
626  			 * On Intel CPUs, bits 63:32 end up zeroed, and, on
627  			 * AMD CPUs, they leak the high bits of the kernel
628  			 * espfix64 stack pointer.  There's very little that
629  			 * the kernel can do about it.
630  			 *
631  			 * Similarly, if we are returning to a 32-bit context,
632  			 * the CPU will often lose the high 32 bits of RSP.
633  			 */
634  
635  			if (res == req)
636  				continue;
637  
638  			if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) {
639  				printf("[NOTE]\tSP: %llx -> %llx\n",
640  				       (unsigned long long)req,
641  				       (unsigned long long)res);
642  				continue;
643  			}
644  
645  			printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n",
646  			       (unsigned long long)requested_regs[i],
647  			       (unsigned long long)resulting_regs[i]);
648  			nerrs++;
649  			continue;
650  		}
651  
652  		bool ignore_reg = false;
653  #if __i386__
654  		if (i == REG_UESP)
655  			ignore_reg = true;
656  #else
657  		if (i == REG_CSGSFS) {
658  			struct selectors *req_sels =
659  				(void *)&requested_regs[REG_CSGSFS];
660  			struct selectors *res_sels =
661  				(void *)&resulting_regs[REG_CSGSFS];
662  			if (req_sels->cs != res_sels->cs) {
663  				printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n",
664  				       req_sels->cs, res_sels->cs);
665  				nerrs++;
666  			}
667  
668  			if (req_sels->ss != res_sels->ss) {
669  				printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n",
670  				       req_sels->ss, res_sels->ss);
671  				nerrs++;
672  			}
673  
674  			continue;
675  		}
676  #endif
677  
678  		/* Sanity check on the kernel */
679  		if (i == REG_CX && req != res) {
680  			printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
681  			       (unsigned long long)req,
682  			       (unsigned long long)res);
683  			nerrs++;
684  			continue;
685  		}
686  
687  		if (req != res && !ignore_reg) {
688  			printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
689  			       i, (unsigned long long)req,
690  			       (unsigned long long)res);
691  			nerrs++;
692  		}
693  	}
694  
695  	if (nerrs == 0)
696  		printf("[OK]\tall registers okay\n");
697  
698  	return nerrs;
699  }
700  
test_bad_iret(int cs_bits,unsigned short ss,int force_cs)701  static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs)
702  {
703  	int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs;
704  	if (cs == -1)
705  		return 0;
706  
707  	sig_cs = cs;
708  	sig_ss = ss;
709  
710  	printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n",
711  	       cs_bits, sig_cs, sig_ss);
712  
713  	sig_trapped = 0;
714  	raise(SIGUSR1);
715  	if (sig_trapped) {
716  		char errdesc[32] = "";
717  		if (sig_err) {
718  			const char *src = (sig_err & 1) ? " EXT" : "";
719  			const char *table;
720  			if ((sig_err & 0x6) == 0x0)
721  				table = "GDT";
722  			else if ((sig_err & 0x6) == 0x4)
723  				table = "LDT";
724  			else if ((sig_err & 0x6) == 0x2)
725  				table = "IDT";
726  			else
727  				table = "???";
728  
729  			sprintf(errdesc, "%s%s index %d, ",
730  				table, src, sig_err >> 3);
731  		}
732  
733  		char trapname[32];
734  		if (sig_trapno == 13)
735  			strcpy(trapname, "GP");
736  		else if (sig_trapno == 11)
737  			strcpy(trapname, "NP");
738  		else if (sig_trapno == 12)
739  			strcpy(trapname, "SS");
740  		else if (sig_trapno == 32)
741  			strcpy(trapname, "IRET");  /* X86_TRAP_IRET */
742  		else
743  			sprintf(trapname, "%d", sig_trapno);
744  
745  		printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n",
746  		       trapname, (unsigned long)sig_err,
747  		       errdesc, strsignal(sig_trapped));
748  		return 0;
749  	} else {
750  		/*
751  		 * This also implicitly tests UC_STRICT_RESTORE_SS:
752  		 * We check that these signals set UC_STRICT_RESTORE_SS and,
753  		 * if UC_STRICT_RESTORE_SS doesn't cause strict behavior,
754  		 * then we won't get SIGSEGV.
755  		 */
756  		printf("[FAIL]\tDid not get SIGSEGV\n");
757  		return 1;
758  	}
759  }
760  
main()761  int main()
762  {
763  	int total_nerrs = 0;
764  	unsigned short my_cs, my_ss;
765  
766  	asm volatile ("mov %%cs,%0" : "=r" (my_cs));
767  	asm volatile ("mov %%ss,%0" : "=r" (my_ss));
768  	setup_ldt();
769  
770  	stack_t stack = {
771  		/* Our sigaltstack scratch space. */
772  		.ss_sp = malloc(sizeof(char) * SIGSTKSZ),
773  		.ss_size = SIGSTKSZ,
774  	};
775  	if (sigaltstack(&stack, NULL) != 0)
776  		err(1, "sigaltstack");
777  
778  	sethandler(SIGUSR1, sigusr1, 0);
779  	sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
780  
781  	/* Easy cases: return to a 32-bit SS in each possible CS bitness. */
782  	total_nerrs += test_valid_sigreturn(64, false, -1);
783  	total_nerrs += test_valid_sigreturn(32, false, -1);
784  	total_nerrs += test_valid_sigreturn(16, false, -1);
785  
786  	/*
787  	 * Test easy espfix cases: return to a 16-bit LDT SS in each possible
788  	 * CS bitness.  NB: with a long mode CS, the SS bitness is irrelevant.
789  	 *
790  	 * This catches the original missing-espfix-on-64-bit-kernels issue
791  	 * as well as CVE-2014-8134.
792  	 */
793  	total_nerrs += test_valid_sigreturn(64, true, -1);
794  	total_nerrs += test_valid_sigreturn(32, true, -1);
795  	total_nerrs += test_valid_sigreturn(16, true, -1);
796  
797  	if (gdt_data16_idx) {
798  		/*
799  		 * For performance reasons, Linux skips espfix if SS points
800  		 * to the GDT.  If we were able to allocate a 16-bit SS in
801  		 * the GDT, see if it leaks parts of the kernel stack pointer.
802  		 *
803  		 * This tests for CVE-2014-8133.
804  		 */
805  		total_nerrs += test_valid_sigreturn(64, true,
806  						    GDT3(gdt_data16_idx));
807  		total_nerrs += test_valid_sigreturn(32, true,
808  						    GDT3(gdt_data16_idx));
809  		total_nerrs += test_valid_sigreturn(16, true,
810  						    GDT3(gdt_data16_idx));
811  	}
812  
813  #ifdef __x86_64__
814  	/* Nasty ABI case: check SS corruption handling. */
815  	sig_corrupt_final_ss = 1;
816  	total_nerrs += test_valid_sigreturn(32, false, -1);
817  	total_nerrs += test_valid_sigreturn(32, true, -1);
818  	sig_corrupt_final_ss = 0;
819  #endif
820  
821  	/*
822  	 * We're done testing valid sigreturn cases.  Now we test states
823  	 * for which sigreturn itself will succeed but the subsequent
824  	 * entry to user mode will fail.
825  	 *
826  	 * Depending on the failure mode and the kernel bitness, these
827  	 * entry failures can generate SIGSEGV, SIGBUS, or SIGILL.
828  	 */
829  	clearhandler(SIGTRAP);
830  	sethandler(SIGSEGV, sigtrap, SA_ONSTACK);
831  	sethandler(SIGBUS, sigtrap, SA_ONSTACK);
832  	sethandler(SIGILL, sigtrap, SA_ONSTACK);  /* 32-bit kernels do this */
833  
834  	/* Easy failures: invalid SS, resulting in #GP(0) */
835  	test_bad_iret(64, ldt_nonexistent_sel, -1);
836  	test_bad_iret(32, ldt_nonexistent_sel, -1);
837  	test_bad_iret(16, ldt_nonexistent_sel, -1);
838  
839  	/* These fail because SS isn't a data segment, resulting in #GP(SS) */
840  	test_bad_iret(64, my_cs, -1);
841  	test_bad_iret(32, my_cs, -1);
842  	test_bad_iret(16, my_cs, -1);
843  
844  	/* Try to return to a not-present code segment, triggering #NP(SS). */
845  	test_bad_iret(32, my_ss, npcode32_sel);
846  
847  	/*
848  	 * Try to return to a not-present but otherwise valid data segment.
849  	 * This will cause IRET to fail with #SS on the espfix stack.  This
850  	 * exercises CVE-2014-9322.
851  	 *
852  	 * Note that, if espfix is enabled, 64-bit Linux will lose track
853  	 * of the actual cause of failure and report #GP(0) instead.
854  	 * This would be very difficult for Linux to avoid, because
855  	 * espfix64 causes IRET failures to be promoted to #DF, so the
856  	 * original exception frame is never pushed onto the stack.
857  	 */
858  	test_bad_iret(32, npdata32_sel, -1);
859  
860  	/*
861  	 * Try to return to a not-present but otherwise valid data
862  	 * segment without invoking espfix.  Newer kernels don't allow
863  	 * this to happen in the first place.  On older kernels, though,
864  	 * this can trigger CVE-2014-9322.
865  	 */
866  	if (gdt_npdata32_idx)
867  		test_bad_iret(32, GDT3(gdt_npdata32_idx), -1);
868  
869  #ifdef __x86_64__
870  	total_nerrs += test_nonstrict_ss();
871  #endif
872  
873  	free(stack.ss_sp);
874  	return total_nerrs ? 1 : 0;
875  }
876