1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace 4 * Copyright (c) 2014-2015 Andrew Lutomirski 5 * 6 * This is a series of tests that exercises the sigreturn(2) syscall and 7 * the IRET / SYSRET paths in the kernel. 8 * 9 * For now, this focuses on the effects of unusual CS and SS values, 10 * and it has a bunch of tests to make sure that ESP/RSP is restored 11 * properly. 12 * 13 * The basic idea behind these tests is to raise(SIGUSR1) to create a 14 * sigcontext frame, plug in the values to be tested, and then return, 15 * which implicitly invokes sigreturn(2) and programs the user context 16 * as desired. 17 * 18 * For tests for which we expect sigreturn and the subsequent return to 19 * user mode to succeed, we return to a short trampoline that generates 20 * SIGTRAP so that the meat of the tests can be ordinary C code in a 21 * SIGTRAP handler. 22 * 23 * The inner workings of each test is documented below. 24 * 25 * Do not run on outdated, unpatched kernels at risk of nasty crashes. 26 */ 27 28 #define _GNU_SOURCE 29 30 #include <sys/time.h> 31 #include <time.h> 32 #include <stdlib.h> 33 #include <sys/syscall.h> 34 #include <unistd.h> 35 #include <stdio.h> 36 #include <string.h> 37 #include <inttypes.h> 38 #include <sys/mman.h> 39 #include <sys/signal.h> 40 #include <sys/ucontext.h> 41 #include <asm/ldt.h> 42 #include <err.h> 43 #include <setjmp.h> 44 #include <stddef.h> 45 #include <stdbool.h> 46 #include <sys/ptrace.h> 47 #include <sys/user.h> 48 49 /* Pull in AR_xyz defines. */ 50 typedef unsigned int u32; 51 typedef unsigned short u16; 52 #include "../../../../arch/x86/include/asm/desc_defs.h" 53 54 /* 55 * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc 56 * headers. 57 */ 58 #ifdef __x86_64__ 59 /* 60 * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on 61 * kernels that save SS in the sigcontext. All kernels that set 62 * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp 63 * regardless of SS (i.e. they implement espfix). 64 * 65 * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS 66 * when delivering a signal that came from 64-bit code. 67 * 68 * Sigreturn restores SS as follows: 69 * 70 * if (saved SS is valid || UC_STRICT_RESTORE_SS is set || 71 * saved CS is not 64-bit) 72 * new SS = saved SS (will fail IRET and signal if invalid) 73 * else 74 * new SS = a flat 32-bit data segment 75 */ 76 #define UC_SIGCONTEXT_SS 0x2 77 #define UC_STRICT_RESTORE_SS 0x4 78 #endif 79 80 /* 81 * In principle, this test can run on Linux emulation layers (e.g. 82 * Illumos "LX branded zones"). Solaris-based kernels reserve LDT 83 * entries 0-5 for their own internal purposes, so start our LDT 84 * allocations above that reservation. (The tests don't pass on LX 85 * branded zones, but at least this lets them run.) 86 */ 87 #define LDT_OFFSET 6 88 89 /* An aligned stack accessible through some of our segments. */ 90 static unsigned char stack16[65536] __attribute__((aligned(4096))); 91 92 /* 93 * An aligned int3 instruction used as a trampoline. Some of the tests 94 * want to fish out their ss values, so this trampoline copies ss to eax 95 * before the int3. 96 */ 97 asm (".pushsection .text\n\t" 98 ".type int3, @function\n\t" 99 ".align 4096\n\t" 100 "int3:\n\t" 101 "mov %ss,%ecx\n\t" 102 "int3\n\t" 103 ".size int3, . - int3\n\t" 104 ".align 4096, 0xcc\n\t" 105 ".popsection"); 106 extern char int3[4096]; 107 108 /* 109 * At startup, we prepapre: 110 * 111 * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero 112 * descriptor or out of bounds). 113 * - code16_sel: A 16-bit LDT code segment pointing to int3. 114 * - data16_sel: A 16-bit LDT data segment pointing to stack16. 115 * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3. 116 * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16. 117 * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16. 118 * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to 119 * stack16. 120 * 121 * For no particularly good reason, xyz_sel is a selector value with the 122 * RPL and LDT bits filled in, whereas xyz_idx is just an index into the 123 * descriptor table. These variables will be zero if their respective 124 * segments could not be allocated. 125 */ 126 static unsigned short ldt_nonexistent_sel; 127 static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel; 128 129 static unsigned short gdt_data16_idx, gdt_npdata32_idx; 130 131 static unsigned short GDT3(int idx) 132 { 133 return (idx << 3) | 3; 134 } 135 136 static unsigned short LDT3(int idx) 137 { 138 return (idx << 3) | 7; 139 } 140 141 /* Our sigaltstack scratch space. */ 142 static char altstack_data[SIGSTKSZ]; 143 144 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), 145 int flags) 146 { 147 struct sigaction sa; 148 memset(&sa, 0, sizeof(sa)); 149 sa.sa_sigaction = handler; 150 sa.sa_flags = SA_SIGINFO | flags; 151 sigemptyset(&sa.sa_mask); 152 if (sigaction(sig, &sa, 0)) 153 err(1, "sigaction"); 154 } 155 156 static void clearhandler(int sig) 157 { 158 struct sigaction sa; 159 memset(&sa, 0, sizeof(sa)); 160 sa.sa_handler = SIG_DFL; 161 sigemptyset(&sa.sa_mask); 162 if (sigaction(sig, &sa, 0)) 163 err(1, "sigaction"); 164 } 165 166 static void add_ldt(const struct user_desc *desc, unsigned short *var, 167 const char *name) 168 { 169 if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) { 170 *var = LDT3(desc->entry_number); 171 } else { 172 printf("[NOTE]\tFailed to create %s segment\n", name); 173 *var = 0; 174 } 175 } 176 177 static void setup_ldt(void) 178 { 179 if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16)) 180 errx(1, "stack16 is too high\n"); 181 if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3)) 182 errx(1, "int3 is too high\n"); 183 184 ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2); 185 186 const struct user_desc code16_desc = { 187 .entry_number = LDT_OFFSET + 0, 188 .base_addr = (unsigned long)int3, 189 .limit = 4095, 190 .seg_32bit = 0, 191 .contents = 2, /* Code, not conforming */ 192 .read_exec_only = 0, 193 .limit_in_pages = 0, 194 .seg_not_present = 0, 195 .useable = 0 196 }; 197 add_ldt(&code16_desc, &code16_sel, "code16"); 198 199 const struct user_desc data16_desc = { 200 .entry_number = LDT_OFFSET + 1, 201 .base_addr = (unsigned long)stack16, 202 .limit = 0xffff, 203 .seg_32bit = 0, 204 .contents = 0, /* Data, grow-up */ 205 .read_exec_only = 0, 206 .limit_in_pages = 0, 207 .seg_not_present = 0, 208 .useable = 0 209 }; 210 add_ldt(&data16_desc, &data16_sel, "data16"); 211 212 const struct user_desc npcode32_desc = { 213 .entry_number = LDT_OFFSET + 3, 214 .base_addr = (unsigned long)int3, 215 .limit = 4095, 216 .seg_32bit = 1, 217 .contents = 2, /* Code, not conforming */ 218 .read_exec_only = 0, 219 .limit_in_pages = 0, 220 .seg_not_present = 1, 221 .useable = 0 222 }; 223 add_ldt(&npcode32_desc, &npcode32_sel, "npcode32"); 224 225 const struct user_desc npdata32_desc = { 226 .entry_number = LDT_OFFSET + 4, 227 .base_addr = (unsigned long)stack16, 228 .limit = 0xffff, 229 .seg_32bit = 1, 230 .contents = 0, /* Data, grow-up */ 231 .read_exec_only = 0, 232 .limit_in_pages = 0, 233 .seg_not_present = 1, 234 .useable = 0 235 }; 236 add_ldt(&npdata32_desc, &npdata32_sel, "npdata32"); 237 238 struct user_desc gdt_data16_desc = { 239 .entry_number = -1, 240 .base_addr = (unsigned long)stack16, 241 .limit = 0xffff, 242 .seg_32bit = 0, 243 .contents = 0, /* Data, grow-up */ 244 .read_exec_only = 0, 245 .limit_in_pages = 0, 246 .seg_not_present = 0, 247 .useable = 0 248 }; 249 250 if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) { 251 /* 252 * This probably indicates vulnerability to CVE-2014-8133. 253 * Merely getting here isn't definitive, though, and we'll 254 * diagnose the problem for real later on. 255 */ 256 printf("[WARN]\tset_thread_area allocated data16 at index %d\n", 257 gdt_data16_desc.entry_number); 258 gdt_data16_idx = gdt_data16_desc.entry_number; 259 } else { 260 printf("[OK]\tset_thread_area refused 16-bit data\n"); 261 } 262 263 struct user_desc gdt_npdata32_desc = { 264 .entry_number = -1, 265 .base_addr = (unsigned long)stack16, 266 .limit = 0xffff, 267 .seg_32bit = 1, 268 .contents = 0, /* Data, grow-up */ 269 .read_exec_only = 0, 270 .limit_in_pages = 0, 271 .seg_not_present = 1, 272 .useable = 0 273 }; 274 275 if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) { 276 /* 277 * As a hardening measure, newer kernels don't allow this. 278 */ 279 printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n", 280 gdt_npdata32_desc.entry_number); 281 gdt_npdata32_idx = gdt_npdata32_desc.entry_number; 282 } else { 283 printf("[OK]\tset_thread_area refused 16-bit data\n"); 284 } 285 } 286 287 /* State used by our signal handlers. */ 288 static gregset_t initial_regs, requested_regs, resulting_regs; 289 290 /* Instructions for the SIGUSR1 handler. */ 291 static volatile unsigned short sig_cs, sig_ss; 292 static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno; 293 #ifdef __x86_64__ 294 static volatile sig_atomic_t sig_corrupt_final_ss; 295 #endif 296 297 /* Abstractions for some 32-bit vs 64-bit differences. */ 298 #ifdef __x86_64__ 299 # define REG_IP REG_RIP 300 # define REG_SP REG_RSP 301 # define REG_CX REG_RCX 302 303 struct selectors { 304 unsigned short cs, gs, fs, ss; 305 }; 306 307 static unsigned short *ssptr(ucontext_t *ctx) 308 { 309 struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; 310 return &sels->ss; 311 } 312 313 static unsigned short *csptr(ucontext_t *ctx) 314 { 315 struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; 316 return &sels->cs; 317 } 318 #else 319 # define REG_IP REG_EIP 320 # define REG_SP REG_ESP 321 # define REG_CX REG_ECX 322 323 static greg_t *ssptr(ucontext_t *ctx) 324 { 325 return &ctx->uc_mcontext.gregs[REG_SS]; 326 } 327 328 static greg_t *csptr(ucontext_t *ctx) 329 { 330 return &ctx->uc_mcontext.gregs[REG_CS]; 331 } 332 #endif 333 334 /* 335 * Checks a given selector for its code bitness or returns -1 if it's not 336 * a usable code segment selector. 337 */ 338 int cs_bitness(unsigned short cs) 339 { 340 uint32_t valid = 0, ar; 341 asm ("lar %[cs], %[ar]\n\t" 342 "jnz 1f\n\t" 343 "mov $1, %[valid]\n\t" 344 "1:" 345 : [ar] "=r" (ar), [valid] "+rm" (valid) 346 : [cs] "r" (cs)); 347 348 if (!valid) 349 return -1; 350 351 bool db = (ar & (1 << 22)); 352 bool l = (ar & (1 << 21)); 353 354 if (!(ar & (1<<11))) 355 return -1; /* Not code. */ 356 357 if (l && !db) 358 return 64; 359 else if (!l && db) 360 return 32; 361 else if (!l && !db) 362 return 16; 363 else 364 return -1; /* Unknown bitness. */ 365 } 366 367 /* 368 * Checks a given selector for its code bitness or returns -1 if it's not 369 * a usable code segment selector. 370 */ 371 bool is_valid_ss(unsigned short cs) 372 { 373 uint32_t valid = 0, ar; 374 asm ("lar %[cs], %[ar]\n\t" 375 "jnz 1f\n\t" 376 "mov $1, %[valid]\n\t" 377 "1:" 378 : [ar] "=r" (ar), [valid] "+rm" (valid) 379 : [cs] "r" (cs)); 380 381 if (!valid) 382 return false; 383 384 if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA && 385 (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN) 386 return false; 387 388 return (ar & AR_P); 389 } 390 391 /* Number of errors in the current test case. */ 392 static volatile sig_atomic_t nerrs; 393 394 static void validate_signal_ss(int sig, ucontext_t *ctx) 395 { 396 #ifdef __x86_64__ 397 bool was_64bit = (cs_bitness(*csptr(ctx)) == 64); 398 399 if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) { 400 printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n"); 401 nerrs++; 402 403 /* 404 * This happens on Linux 4.1. The rest will fail, too, so 405 * return now to reduce the noise. 406 */ 407 return; 408 } 409 410 /* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */ 411 if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) { 412 printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n", 413 sig); 414 nerrs++; 415 } 416 417 if (is_valid_ss(*ssptr(ctx))) { 418 /* 419 * DOSEMU was written before 64-bit sigcontext had SS, and 420 * it tries to figure out the signal source SS by looking at 421 * the physical register. Make sure that keeps working. 422 */ 423 unsigned short hw_ss; 424 asm ("mov %%ss, %0" : "=rm" (hw_ss)); 425 if (hw_ss != *ssptr(ctx)) { 426 printf("[FAIL]\tHW SS didn't match saved SS\n"); 427 nerrs++; 428 } 429 } 430 #endif 431 } 432 433 /* 434 * SIGUSR1 handler. Sets CS and SS as requested and points IP to the 435 * int3 trampoline. Sets SP to a large known value so that we can see 436 * whether the value round-trips back to user mode correctly. 437 */ 438 static void sigusr1(int sig, siginfo_t *info, void *ctx_void) 439 { 440 ucontext_t *ctx = (ucontext_t*)ctx_void; 441 442 validate_signal_ss(sig, ctx); 443 444 memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); 445 446 *csptr(ctx) = sig_cs; 447 *ssptr(ctx) = sig_ss; 448 449 ctx->uc_mcontext.gregs[REG_IP] = 450 sig_cs == code16_sel ? 0 : (unsigned long)&int3; 451 ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL; 452 ctx->uc_mcontext.gregs[REG_CX] = 0; 453 454 #ifdef __i386__ 455 /* 456 * Make sure the kernel doesn't inadvertently use DS or ES-relative 457 * accesses in a region where user DS or ES is loaded. 458 * 459 * Skip this for 64-bit builds because long mode doesn't care about 460 * DS and ES and skipping it increases test coverage a little bit, 461 * since 64-bit kernels can still run the 32-bit build. 462 */ 463 ctx->uc_mcontext.gregs[REG_DS] = 0; 464 ctx->uc_mcontext.gregs[REG_ES] = 0; 465 #endif 466 467 memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); 468 requested_regs[REG_CX] = *ssptr(ctx); /* The asm code does this. */ 469 470 return; 471 } 472 473 /* 474 * Called after a successful sigreturn (via int3) or from a failed 475 * sigreturn (directly by kernel). Restores our state so that the 476 * original raise(SIGUSR1) returns. 477 */ 478 static void sigtrap(int sig, siginfo_t *info, void *ctx_void) 479 { 480 ucontext_t *ctx = (ucontext_t*)ctx_void; 481 482 validate_signal_ss(sig, ctx); 483 484 sig_err = ctx->uc_mcontext.gregs[REG_ERR]; 485 sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO]; 486 487 unsigned short ss; 488 asm ("mov %%ss,%0" : "=r" (ss)); 489 490 greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX]; 491 if (asm_ss != sig_ss && sig == SIGTRAP) { 492 /* Sanity check failure. */ 493 printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n", 494 ss, *ssptr(ctx), (unsigned long long)asm_ss); 495 nerrs++; 496 } 497 498 memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); 499 memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t)); 500 501 #ifdef __x86_64__ 502 if (sig_corrupt_final_ss) { 503 if (ctx->uc_flags & UC_STRICT_RESTORE_SS) { 504 printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n"); 505 nerrs++; 506 } else { 507 /* 508 * DOSEMU transitions from 32-bit to 64-bit mode by 509 * adjusting sigcontext, and it requires that this work 510 * even if the saved SS is bogus. 511 */ 512 printf("\tCorrupting SS on return to 64-bit mode\n"); 513 *ssptr(ctx) = 0; 514 } 515 } 516 #endif 517 518 sig_trapped = sig; 519 } 520 521 #ifdef __x86_64__ 522 /* Tests recovery if !UC_STRICT_RESTORE_SS */ 523 static void sigusr2(int sig, siginfo_t *info, void *ctx_void) 524 { 525 ucontext_t *ctx = (ucontext_t*)ctx_void; 526 527 if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) { 528 printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n"); 529 nerrs++; 530 return; /* We can't do the rest. */ 531 } 532 533 ctx->uc_flags &= ~UC_STRICT_RESTORE_SS; 534 *ssptr(ctx) = 0; 535 536 /* Return. The kernel should recover without sending another signal. */ 537 } 538 539 static int test_nonstrict_ss(void) 540 { 541 clearhandler(SIGUSR1); 542 clearhandler(SIGTRAP); 543 clearhandler(SIGSEGV); 544 clearhandler(SIGILL); 545 sethandler(SIGUSR2, sigusr2, 0); 546 547 nerrs = 0; 548 549 printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n"); 550 raise(SIGUSR2); 551 if (!nerrs) 552 printf("[OK]\tIt worked\n"); 553 554 return nerrs; 555 } 556 #endif 557 558 /* Finds a usable code segment of the requested bitness. */ 559 int find_cs(int bitness) 560 { 561 unsigned short my_cs; 562 563 asm ("mov %%cs,%0" : "=r" (my_cs)); 564 565 if (cs_bitness(my_cs) == bitness) 566 return my_cs; 567 if (cs_bitness(my_cs + (2 << 3)) == bitness) 568 return my_cs + (2 << 3); 569 if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness) 570 return my_cs - (2 << 3); 571 if (cs_bitness(code16_sel) == bitness) 572 return code16_sel; 573 574 printf("[WARN]\tCould not find %d-bit CS\n", bitness); 575 return -1; 576 } 577 578 static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) 579 { 580 int cs = find_cs(cs_bits); 581 if (cs == -1) { 582 printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n", 583 cs_bits, use_16bit_ss ? 16 : 32); 584 return 0; 585 } 586 587 if (force_ss != -1) { 588 sig_ss = force_ss; 589 } else { 590 if (use_16bit_ss) { 591 if (!data16_sel) { 592 printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n", 593 cs_bits); 594 return 0; 595 } 596 sig_ss = data16_sel; 597 } else { 598 asm volatile ("mov %%ss,%0" : "=r" (sig_ss)); 599 } 600 } 601 602 sig_cs = cs; 603 604 printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n", 605 cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss, 606 (sig_ss & 4) ? "" : ", GDT"); 607 608 raise(SIGUSR1); 609 610 nerrs = 0; 611 612 /* 613 * Check that each register had an acceptable value when the 614 * int3 trampoline was invoked. 615 */ 616 for (int i = 0; i < NGREG; i++) { 617 greg_t req = requested_regs[i], res = resulting_regs[i]; 618 619 if (i == REG_TRAPNO || i == REG_IP) 620 continue; /* don't care */ 621 622 if (i == REG_SP) { 623 /* 624 * If we were using a 16-bit stack segment, then 625 * the kernel is a bit stuck: IRET only restores 626 * the low 16 bits of ESP/RSP if SS is 16-bit. 627 * The kernel uses a hack to restore bits 31:16, 628 * but that hack doesn't help with bits 63:32. 629 * On Intel CPUs, bits 63:32 end up zeroed, and, on 630 * AMD CPUs, they leak the high bits of the kernel 631 * espfix64 stack pointer. There's very little that 632 * the kernel can do about it. 633 * 634 * Similarly, if we are returning to a 32-bit context, 635 * the CPU will often lose the high 32 bits of RSP. 636 */ 637 638 if (res == req) 639 continue; 640 641 if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) { 642 printf("[NOTE]\tSP: %llx -> %llx\n", 643 (unsigned long long)req, 644 (unsigned long long)res); 645 continue; 646 } 647 648 printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n", 649 (unsigned long long)requested_regs[i], 650 (unsigned long long)resulting_regs[i]); 651 nerrs++; 652 continue; 653 } 654 655 bool ignore_reg = false; 656 #if __i386__ 657 if (i == REG_UESP) 658 ignore_reg = true; 659 #else 660 if (i == REG_CSGSFS) { 661 struct selectors *req_sels = 662 (void *)&requested_regs[REG_CSGSFS]; 663 struct selectors *res_sels = 664 (void *)&resulting_regs[REG_CSGSFS]; 665 if (req_sels->cs != res_sels->cs) { 666 printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n", 667 req_sels->cs, res_sels->cs); 668 nerrs++; 669 } 670 671 if (req_sels->ss != res_sels->ss) { 672 printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n", 673 req_sels->ss, res_sels->ss); 674 nerrs++; 675 } 676 677 continue; 678 } 679 #endif 680 681 /* Sanity check on the kernel */ 682 if (i == REG_CX && req != res) { 683 printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n", 684 (unsigned long long)req, 685 (unsigned long long)res); 686 nerrs++; 687 continue; 688 } 689 690 if (req != res && !ignore_reg) { 691 printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n", 692 i, (unsigned long long)req, 693 (unsigned long long)res); 694 nerrs++; 695 } 696 } 697 698 if (nerrs == 0) 699 printf("[OK]\tall registers okay\n"); 700 701 return nerrs; 702 } 703 704 static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs) 705 { 706 int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs; 707 if (cs == -1) 708 return 0; 709 710 sig_cs = cs; 711 sig_ss = ss; 712 713 printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n", 714 cs_bits, sig_cs, sig_ss); 715 716 sig_trapped = 0; 717 raise(SIGUSR1); 718 if (sig_trapped) { 719 char errdesc[32] = ""; 720 if (sig_err) { 721 const char *src = (sig_err & 1) ? " EXT" : ""; 722 const char *table; 723 if ((sig_err & 0x6) == 0x0) 724 table = "GDT"; 725 else if ((sig_err & 0x6) == 0x4) 726 table = "LDT"; 727 else if ((sig_err & 0x6) == 0x2) 728 table = "IDT"; 729 else 730 table = "???"; 731 732 sprintf(errdesc, "%s%s index %d, ", 733 table, src, sig_err >> 3); 734 } 735 736 char trapname[32]; 737 if (sig_trapno == 13) 738 strcpy(trapname, "GP"); 739 else if (sig_trapno == 11) 740 strcpy(trapname, "NP"); 741 else if (sig_trapno == 12) 742 strcpy(trapname, "SS"); 743 else if (sig_trapno == 32) 744 strcpy(trapname, "IRET"); /* X86_TRAP_IRET */ 745 else 746 sprintf(trapname, "%d", sig_trapno); 747 748 printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n", 749 trapname, (unsigned long)sig_err, 750 errdesc, strsignal(sig_trapped)); 751 return 0; 752 } else { 753 /* 754 * This also implicitly tests UC_STRICT_RESTORE_SS: 755 * We check that these signals set UC_STRICT_RESTORE_SS and, 756 * if UC_STRICT_RESTORE_SS doesn't cause strict behavior, 757 * then we won't get SIGSEGV. 758 */ 759 printf("[FAIL]\tDid not get SIGSEGV\n"); 760 return 1; 761 } 762 } 763 764 int main() 765 { 766 int total_nerrs = 0; 767 unsigned short my_cs, my_ss; 768 769 asm volatile ("mov %%cs,%0" : "=r" (my_cs)); 770 asm volatile ("mov %%ss,%0" : "=r" (my_ss)); 771 setup_ldt(); 772 773 stack_t stack = { 774 .ss_sp = altstack_data, 775 .ss_size = SIGSTKSZ, 776 }; 777 if (sigaltstack(&stack, NULL) != 0) 778 err(1, "sigaltstack"); 779 780 sethandler(SIGUSR1, sigusr1, 0); 781 sethandler(SIGTRAP, sigtrap, SA_ONSTACK); 782 783 /* Easy cases: return to a 32-bit SS in each possible CS bitness. */ 784 total_nerrs += test_valid_sigreturn(64, false, -1); 785 total_nerrs += test_valid_sigreturn(32, false, -1); 786 total_nerrs += test_valid_sigreturn(16, false, -1); 787 788 /* 789 * Test easy espfix cases: return to a 16-bit LDT SS in each possible 790 * CS bitness. NB: with a long mode CS, the SS bitness is irrelevant. 791 * 792 * This catches the original missing-espfix-on-64-bit-kernels issue 793 * as well as CVE-2014-8134. 794 */ 795 total_nerrs += test_valid_sigreturn(64, true, -1); 796 total_nerrs += test_valid_sigreturn(32, true, -1); 797 total_nerrs += test_valid_sigreturn(16, true, -1); 798 799 if (gdt_data16_idx) { 800 /* 801 * For performance reasons, Linux skips espfix if SS points 802 * to the GDT. If we were able to allocate a 16-bit SS in 803 * the GDT, see if it leaks parts of the kernel stack pointer. 804 * 805 * This tests for CVE-2014-8133. 806 */ 807 total_nerrs += test_valid_sigreturn(64, true, 808 GDT3(gdt_data16_idx)); 809 total_nerrs += test_valid_sigreturn(32, true, 810 GDT3(gdt_data16_idx)); 811 total_nerrs += test_valid_sigreturn(16, true, 812 GDT3(gdt_data16_idx)); 813 } 814 815 #ifdef __x86_64__ 816 /* Nasty ABI case: check SS corruption handling. */ 817 sig_corrupt_final_ss = 1; 818 total_nerrs += test_valid_sigreturn(32, false, -1); 819 total_nerrs += test_valid_sigreturn(32, true, -1); 820 sig_corrupt_final_ss = 0; 821 #endif 822 823 /* 824 * We're done testing valid sigreturn cases. Now we test states 825 * for which sigreturn itself will succeed but the subsequent 826 * entry to user mode will fail. 827 * 828 * Depending on the failure mode and the kernel bitness, these 829 * entry failures can generate SIGSEGV, SIGBUS, or SIGILL. 830 */ 831 clearhandler(SIGTRAP); 832 sethandler(SIGSEGV, sigtrap, SA_ONSTACK); 833 sethandler(SIGBUS, sigtrap, SA_ONSTACK); 834 sethandler(SIGILL, sigtrap, SA_ONSTACK); /* 32-bit kernels do this */ 835 836 /* Easy failures: invalid SS, resulting in #GP(0) */ 837 test_bad_iret(64, ldt_nonexistent_sel, -1); 838 test_bad_iret(32, ldt_nonexistent_sel, -1); 839 test_bad_iret(16, ldt_nonexistent_sel, -1); 840 841 /* These fail because SS isn't a data segment, resulting in #GP(SS) */ 842 test_bad_iret(64, my_cs, -1); 843 test_bad_iret(32, my_cs, -1); 844 test_bad_iret(16, my_cs, -1); 845 846 /* Try to return to a not-present code segment, triggering #NP(SS). */ 847 test_bad_iret(32, my_ss, npcode32_sel); 848 849 /* 850 * Try to return to a not-present but otherwise valid data segment. 851 * This will cause IRET to fail with #SS on the espfix stack. This 852 * exercises CVE-2014-9322. 853 * 854 * Note that, if espfix is enabled, 64-bit Linux will lose track 855 * of the actual cause of failure and report #GP(0) instead. 856 * This would be very difficult for Linux to avoid, because 857 * espfix64 causes IRET failures to be promoted to #DF, so the 858 * original exception frame is never pushed onto the stack. 859 */ 860 test_bad_iret(32, npdata32_sel, -1); 861 862 /* 863 * Try to return to a not-present but otherwise valid data 864 * segment without invoking espfix. Newer kernels don't allow 865 * this to happen in the first place. On older kernels, though, 866 * this can trigger CVE-2014-9322. 867 */ 868 if (gdt_npdata32_idx) 869 test_bad_iret(32, GDT3(gdt_npdata32_idx), -1); 870 871 #ifdef __x86_64__ 872 total_nerrs += test_nonstrict_ss(); 873 #endif 874 875 return total_nerrs ? 1 : 0; 876 } 877