1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace 4 * Copyright (c) 2014-2015 Andrew Lutomirski 5 * 6 * This is a series of tests that exercises the sigreturn(2) syscall and 7 * the IRET / SYSRET paths in the kernel. 8 * 9 * For now, this focuses on the effects of unusual CS and SS values, 10 * and it has a bunch of tests to make sure that ESP/RSP is restored 11 * properly. 12 * 13 * The basic idea behind these tests is to raise(SIGUSR1) to create a 14 * sigcontext frame, plug in the values to be tested, and then return, 15 * which implicitly invokes sigreturn(2) and programs the user context 16 * as desired. 17 * 18 * For tests for which we expect sigreturn and the subsequent return to 19 * user mode to succeed, we return to a short trampoline that generates 20 * SIGTRAP so that the meat of the tests can be ordinary C code in a 21 * SIGTRAP handler. 22 * 23 * The inner workings of each test is documented below. 24 * 25 * Do not run on outdated, unpatched kernels at risk of nasty crashes. 26 */ 27 28 #define _GNU_SOURCE 29 30 #include <sys/time.h> 31 #include <time.h> 32 #include <stdlib.h> 33 #include <sys/syscall.h> 34 #include <unistd.h> 35 #include <stdio.h> 36 #include <string.h> 37 #include <inttypes.h> 38 #include <sys/mman.h> 39 #include <sys/signal.h> 40 #include <sys/ucontext.h> 41 #include <asm/ldt.h> 42 #include <err.h> 43 #include <setjmp.h> 44 #include <stddef.h> 45 #include <stdbool.h> 46 #include <sys/ptrace.h> 47 #include <sys/user.h> 48 49 /* Pull in AR_xyz defines. */ 50 typedef unsigned int u32; 51 typedef unsigned short u16; 52 #include "../../../../arch/x86/include/asm/desc_defs.h" 53 54 /* 55 * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc 56 * headers. 57 */ 58 #ifdef __x86_64__ 59 /* 60 * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on 61 * kernels that save SS in the sigcontext. All kernels that set 62 * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp 63 * regardless of SS (i.e. they implement espfix). 64 * 65 * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS 66 * when delivering a signal that came from 64-bit code. 67 * 68 * Sigreturn restores SS as follows: 69 * 70 * if (saved SS is valid || UC_STRICT_RESTORE_SS is set || 71 * saved CS is not 64-bit) 72 * new SS = saved SS (will fail IRET and signal if invalid) 73 * else 74 * new SS = a flat 32-bit data segment 75 */ 76 #define UC_SIGCONTEXT_SS 0x2 77 #define UC_STRICT_RESTORE_SS 0x4 78 #endif 79 80 /* 81 * In principle, this test can run on Linux emulation layers (e.g. 82 * Illumos "LX branded zones"). Solaris-based kernels reserve LDT 83 * entries 0-5 for their own internal purposes, so start our LDT 84 * allocations above that reservation. (The tests don't pass on LX 85 * branded zones, but at least this lets them run.) 86 */ 87 #define LDT_OFFSET 6 88 89 /* An aligned stack accessible through some of our segments. */ 90 static unsigned char stack16[65536] __attribute__((aligned(4096))); 91 92 /* 93 * An aligned int3 instruction used as a trampoline. Some of the tests 94 * want to fish out their ss values, so this trampoline copies ss to eax 95 * before the int3. 96 */ 97 asm (".pushsection .text\n\t" 98 ".type int3, @function\n\t" 99 ".align 4096\n\t" 100 "int3:\n\t" 101 "mov %ss,%ecx\n\t" 102 "int3\n\t" 103 ".size int3, . - int3\n\t" 104 ".align 4096, 0xcc\n\t" 105 ".popsection"); 106 extern char int3[4096]; 107 108 /* 109 * At startup, we prepapre: 110 * 111 * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero 112 * descriptor or out of bounds). 113 * - code16_sel: A 16-bit LDT code segment pointing to int3. 114 * - data16_sel: A 16-bit LDT data segment pointing to stack16. 115 * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3. 116 * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16. 117 * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16. 118 * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to 119 * stack16. 120 * 121 * For no particularly good reason, xyz_sel is a selector value with the 122 * RPL and LDT bits filled in, whereas xyz_idx is just an index into the 123 * descriptor table. These variables will be zero if their respective 124 * segments could not be allocated. 125 */ 126 static unsigned short ldt_nonexistent_sel; 127 static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel; 128 129 static unsigned short gdt_data16_idx, gdt_npdata32_idx; 130 131 static unsigned short GDT3(int idx) 132 { 133 return (idx << 3) | 3; 134 } 135 136 static unsigned short LDT3(int idx) 137 { 138 return (idx << 3) | 7; 139 } 140 141 /* Our sigaltstack scratch space. */ 142 static char altstack_data[SIGSTKSZ]; 143 144 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), 145 int flags) 146 { 147 struct sigaction sa; 148 memset(&sa, 0, sizeof(sa)); 149 sa.sa_sigaction = handler; 150 sa.sa_flags = SA_SIGINFO | flags; 151 sigemptyset(&sa.sa_mask); 152 if (sigaction(sig, &sa, 0)) 153 err(1, "sigaction"); 154 } 155 156 static void clearhandler(int sig) 157 { 158 struct sigaction sa; 159 memset(&sa, 0, sizeof(sa)); 160 sa.sa_handler = SIG_DFL; 161 sigemptyset(&sa.sa_mask); 162 if (sigaction(sig, &sa, 0)) 163 err(1, "sigaction"); 164 } 165 166 static void add_ldt(const struct user_desc *desc, unsigned short *var, 167 const char *name) 168 { 169 if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) { 170 *var = LDT3(desc->entry_number); 171 } else { 172 printf("[NOTE]\tFailed to create %s segment\n", name); 173 *var = 0; 174 } 175 } 176 177 static void setup_ldt(void) 178 { 179 if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16)) 180 errx(1, "stack16 is too high\n"); 181 if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3)) 182 errx(1, "int3 is too high\n"); 183 184 ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2); 185 186 const struct user_desc code16_desc = { 187 .entry_number = LDT_OFFSET + 0, 188 .base_addr = (unsigned long)int3, 189 .limit = 4095, 190 .seg_32bit = 0, 191 .contents = 2, /* Code, not conforming */ 192 .read_exec_only = 0, 193 .limit_in_pages = 0, 194 .seg_not_present = 0, 195 .useable = 0 196 }; 197 add_ldt(&code16_desc, &code16_sel, "code16"); 198 199 const struct user_desc data16_desc = { 200 .entry_number = LDT_OFFSET + 1, 201 .base_addr = (unsigned long)stack16, 202 .limit = 0xffff, 203 .seg_32bit = 0, 204 .contents = 0, /* Data, grow-up */ 205 .read_exec_only = 0, 206 .limit_in_pages = 0, 207 .seg_not_present = 0, 208 .useable = 0 209 }; 210 add_ldt(&data16_desc, &data16_sel, "data16"); 211 212 const struct user_desc npcode32_desc = { 213 .entry_number = LDT_OFFSET + 3, 214 .base_addr = (unsigned long)int3, 215 .limit = 4095, 216 .seg_32bit = 1, 217 .contents = 2, /* Code, not conforming */ 218 .read_exec_only = 0, 219 .limit_in_pages = 0, 220 .seg_not_present = 1, 221 .useable = 0 222 }; 223 add_ldt(&npcode32_desc, &npcode32_sel, "npcode32"); 224 225 const struct user_desc npdata32_desc = { 226 .entry_number = LDT_OFFSET + 4, 227 .base_addr = (unsigned long)stack16, 228 .limit = 0xffff, 229 .seg_32bit = 1, 230 .contents = 0, /* Data, grow-up */ 231 .read_exec_only = 0, 232 .limit_in_pages = 0, 233 .seg_not_present = 1, 234 .useable = 0 235 }; 236 add_ldt(&npdata32_desc, &npdata32_sel, "npdata32"); 237 238 struct user_desc gdt_data16_desc = { 239 .entry_number = -1, 240 .base_addr = (unsigned long)stack16, 241 .limit = 0xffff, 242 .seg_32bit = 0, 243 .contents = 0, /* Data, grow-up */ 244 .read_exec_only = 0, 245 .limit_in_pages = 0, 246 .seg_not_present = 0, 247 .useable = 0 248 }; 249 250 if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) { 251 /* 252 * This probably indicates vulnerability to CVE-2014-8133. 253 * Merely getting here isn't definitive, though, and we'll 254 * diagnose the problem for real later on. 255 */ 256 printf("[WARN]\tset_thread_area allocated data16 at index %d\n", 257 gdt_data16_desc.entry_number); 258 gdt_data16_idx = gdt_data16_desc.entry_number; 259 } else { 260 printf("[OK]\tset_thread_area refused 16-bit data\n"); 261 } 262 263 struct user_desc gdt_npdata32_desc = { 264 .entry_number = -1, 265 .base_addr = (unsigned long)stack16, 266 .limit = 0xffff, 267 .seg_32bit = 1, 268 .contents = 0, /* Data, grow-up */ 269 .read_exec_only = 0, 270 .limit_in_pages = 0, 271 .seg_not_present = 1, 272 .useable = 0 273 }; 274 275 if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) { 276 /* 277 * As a hardening measure, newer kernels don't allow this. 278 */ 279 printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n", 280 gdt_npdata32_desc.entry_number); 281 gdt_npdata32_idx = gdt_npdata32_desc.entry_number; 282 } else { 283 printf("[OK]\tset_thread_area refused 16-bit data\n"); 284 } 285 } 286 287 /* State used by our signal handlers. */ 288 static gregset_t initial_regs, requested_regs, resulting_regs; 289 290 /* Instructions for the SIGUSR1 handler. */ 291 static volatile unsigned short sig_cs, sig_ss; 292 static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno; 293 #ifdef __x86_64__ 294 static volatile sig_atomic_t sig_corrupt_final_ss; 295 #endif 296 297 /* Abstractions for some 32-bit vs 64-bit differences. */ 298 #ifdef __x86_64__ 299 # define REG_IP REG_RIP 300 # define REG_SP REG_RSP 301 # define REG_CX REG_RCX 302 303 struct selectors { 304 unsigned short cs, gs, fs, ss; 305 }; 306 307 static unsigned short *ssptr(ucontext_t *ctx) 308 { 309 struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; 310 return &sels->ss; 311 } 312 313 static unsigned short *csptr(ucontext_t *ctx) 314 { 315 struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; 316 return &sels->cs; 317 } 318 #else 319 # define REG_IP REG_EIP 320 # define REG_SP REG_ESP 321 # define REG_CX REG_ECX 322 323 static greg_t *ssptr(ucontext_t *ctx) 324 { 325 return &ctx->uc_mcontext.gregs[REG_SS]; 326 } 327 328 static greg_t *csptr(ucontext_t *ctx) 329 { 330 return &ctx->uc_mcontext.gregs[REG_CS]; 331 } 332 #endif 333 334 /* 335 * Checks a given selector for its code bitness or returns -1 if it's not 336 * a usable code segment selector. 337 */ 338 int cs_bitness(unsigned short cs) 339 { 340 uint32_t valid = 0, ar; 341 asm ("lar %[cs], %[ar]\n\t" 342 "jnz 1f\n\t" 343 "mov $1, %[valid]\n\t" 344 "1:" 345 : [ar] "=r" (ar), [valid] "+rm" (valid) 346 : [cs] "r" (cs)); 347 348 if (!valid) 349 return -1; 350 351 bool db = (ar & (1 << 22)); 352 bool l = (ar & (1 << 21)); 353 354 if (!(ar & (1<<11))) 355 return -1; /* Not code. */ 356 357 if (l && !db) 358 return 64; 359 else if (!l && db) 360 return 32; 361 else if (!l && !db) 362 return 16; 363 else 364 return -1; /* Unknown bitness. */ 365 } 366 367 /* 368 * Checks a given selector for its code bitness or returns -1 if it's not 369 * a usable code segment selector. 370 */ 371 bool is_valid_ss(unsigned short cs) 372 { 373 uint32_t valid = 0, ar; 374 asm ("lar %[cs], %[ar]\n\t" 375 "jnz 1f\n\t" 376 "mov $1, %[valid]\n\t" 377 "1:" 378 : [ar] "=r" (ar), [valid] "+rm" (valid) 379 : [cs] "r" (cs)); 380 381 if (!valid) 382 return false; 383 384 if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA && 385 (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN) 386 return false; 387 388 return (ar & AR_P); 389 } 390 391 /* Number of errors in the current test case. */ 392 static volatile sig_atomic_t nerrs; 393 394 static void validate_signal_ss(int sig, ucontext_t *ctx) 395 { 396 #ifdef __x86_64__ 397 bool was_64bit = (cs_bitness(*csptr(ctx)) == 64); 398 399 if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) { 400 printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n"); 401 nerrs++; 402 403 /* 404 * This happens on Linux 4.1. The rest will fail, too, so 405 * return now to reduce the noise. 406 */ 407 return; 408 } 409 410 /* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */ 411 if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) { 412 printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n", 413 sig); 414 nerrs++; 415 } 416 417 if (is_valid_ss(*ssptr(ctx))) { 418 /* 419 * DOSEMU was written before 64-bit sigcontext had SS, and 420 * it tries to figure out the signal source SS by looking at 421 * the physical register. Make sure that keeps working. 422 */ 423 unsigned short hw_ss; 424 asm ("mov %%ss, %0" : "=rm" (hw_ss)); 425 if (hw_ss != *ssptr(ctx)) { 426 printf("[FAIL]\tHW SS didn't match saved SS\n"); 427 nerrs++; 428 } 429 } 430 #endif 431 } 432 433 /* 434 * SIGUSR1 handler. Sets CS and SS as requested and points IP to the 435 * int3 trampoline. Sets SP to a large known value so that we can see 436 * whether the value round-trips back to user mode correctly. 437 */ 438 static void sigusr1(int sig, siginfo_t *info, void *ctx_void) 439 { 440 ucontext_t *ctx = (ucontext_t*)ctx_void; 441 442 validate_signal_ss(sig, ctx); 443 444 memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); 445 446 *csptr(ctx) = sig_cs; 447 *ssptr(ctx) = sig_ss; 448 449 ctx->uc_mcontext.gregs[REG_IP] = 450 sig_cs == code16_sel ? 0 : (unsigned long)&int3; 451 ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL; 452 ctx->uc_mcontext.gregs[REG_CX] = 0; 453 454 memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); 455 requested_regs[REG_CX] = *ssptr(ctx); /* The asm code does this. */ 456 457 return; 458 } 459 460 /* 461 * Called after a successful sigreturn (via int3) or from a failed 462 * sigreturn (directly by kernel). Restores our state so that the 463 * original raise(SIGUSR1) returns. 464 */ 465 static void sigtrap(int sig, siginfo_t *info, void *ctx_void) 466 { 467 ucontext_t *ctx = (ucontext_t*)ctx_void; 468 469 validate_signal_ss(sig, ctx); 470 471 sig_err = ctx->uc_mcontext.gregs[REG_ERR]; 472 sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO]; 473 474 unsigned short ss; 475 asm ("mov %%ss,%0" : "=r" (ss)); 476 477 greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX]; 478 if (asm_ss != sig_ss && sig == SIGTRAP) { 479 /* Sanity check failure. */ 480 printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n", 481 ss, *ssptr(ctx), (unsigned long long)asm_ss); 482 nerrs++; 483 } 484 485 memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); 486 memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t)); 487 488 #ifdef __x86_64__ 489 if (sig_corrupt_final_ss) { 490 if (ctx->uc_flags & UC_STRICT_RESTORE_SS) { 491 printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n"); 492 nerrs++; 493 } else { 494 /* 495 * DOSEMU transitions from 32-bit to 64-bit mode by 496 * adjusting sigcontext, and it requires that this work 497 * even if the saved SS is bogus. 498 */ 499 printf("\tCorrupting SS on return to 64-bit mode\n"); 500 *ssptr(ctx) = 0; 501 } 502 } 503 #endif 504 505 sig_trapped = sig; 506 } 507 508 #ifdef __x86_64__ 509 /* Tests recovery if !UC_STRICT_RESTORE_SS */ 510 static void sigusr2(int sig, siginfo_t *info, void *ctx_void) 511 { 512 ucontext_t *ctx = (ucontext_t*)ctx_void; 513 514 if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) { 515 printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n"); 516 nerrs++; 517 return; /* We can't do the rest. */ 518 } 519 520 ctx->uc_flags &= ~UC_STRICT_RESTORE_SS; 521 *ssptr(ctx) = 0; 522 523 /* Return. The kernel should recover without sending another signal. */ 524 } 525 526 static int test_nonstrict_ss(void) 527 { 528 clearhandler(SIGUSR1); 529 clearhandler(SIGTRAP); 530 clearhandler(SIGSEGV); 531 clearhandler(SIGILL); 532 sethandler(SIGUSR2, sigusr2, 0); 533 534 nerrs = 0; 535 536 printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n"); 537 raise(SIGUSR2); 538 if (!nerrs) 539 printf("[OK]\tIt worked\n"); 540 541 return nerrs; 542 } 543 #endif 544 545 /* Finds a usable code segment of the requested bitness. */ 546 int find_cs(int bitness) 547 { 548 unsigned short my_cs; 549 550 asm ("mov %%cs,%0" : "=r" (my_cs)); 551 552 if (cs_bitness(my_cs) == bitness) 553 return my_cs; 554 if (cs_bitness(my_cs + (2 << 3)) == bitness) 555 return my_cs + (2 << 3); 556 if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness) 557 return my_cs - (2 << 3); 558 if (cs_bitness(code16_sel) == bitness) 559 return code16_sel; 560 561 printf("[WARN]\tCould not find %d-bit CS\n", bitness); 562 return -1; 563 } 564 565 static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) 566 { 567 int cs = find_cs(cs_bits); 568 if (cs == -1) { 569 printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n", 570 cs_bits, use_16bit_ss ? 16 : 32); 571 return 0; 572 } 573 574 if (force_ss != -1) { 575 sig_ss = force_ss; 576 } else { 577 if (use_16bit_ss) { 578 if (!data16_sel) { 579 printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n", 580 cs_bits); 581 return 0; 582 } 583 sig_ss = data16_sel; 584 } else { 585 asm volatile ("mov %%ss,%0" : "=r" (sig_ss)); 586 } 587 } 588 589 sig_cs = cs; 590 591 printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n", 592 cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss, 593 (sig_ss & 4) ? "" : ", GDT"); 594 595 raise(SIGUSR1); 596 597 nerrs = 0; 598 599 /* 600 * Check that each register had an acceptable value when the 601 * int3 trampoline was invoked. 602 */ 603 for (int i = 0; i < NGREG; i++) { 604 greg_t req = requested_regs[i], res = resulting_regs[i]; 605 606 if (i == REG_TRAPNO || i == REG_IP) 607 continue; /* don't care */ 608 609 if (i == REG_SP) { 610 /* 611 * If we were using a 16-bit stack segment, then 612 * the kernel is a bit stuck: IRET only restores 613 * the low 16 bits of ESP/RSP if SS is 16-bit. 614 * The kernel uses a hack to restore bits 31:16, 615 * but that hack doesn't help with bits 63:32. 616 * On Intel CPUs, bits 63:32 end up zeroed, and, on 617 * AMD CPUs, they leak the high bits of the kernel 618 * espfix64 stack pointer. There's very little that 619 * the kernel can do about it. 620 * 621 * Similarly, if we are returning to a 32-bit context, 622 * the CPU will often lose the high 32 bits of RSP. 623 */ 624 625 if (res == req) 626 continue; 627 628 if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) { 629 printf("[NOTE]\tSP: %llx -> %llx\n", 630 (unsigned long long)req, 631 (unsigned long long)res); 632 continue; 633 } 634 635 printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n", 636 (unsigned long long)requested_regs[i], 637 (unsigned long long)resulting_regs[i]); 638 nerrs++; 639 continue; 640 } 641 642 bool ignore_reg = false; 643 #if __i386__ 644 if (i == REG_UESP) 645 ignore_reg = true; 646 #else 647 if (i == REG_CSGSFS) { 648 struct selectors *req_sels = 649 (void *)&requested_regs[REG_CSGSFS]; 650 struct selectors *res_sels = 651 (void *)&resulting_regs[REG_CSGSFS]; 652 if (req_sels->cs != res_sels->cs) { 653 printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n", 654 req_sels->cs, res_sels->cs); 655 nerrs++; 656 } 657 658 if (req_sels->ss != res_sels->ss) { 659 printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n", 660 req_sels->ss, res_sels->ss); 661 nerrs++; 662 } 663 664 continue; 665 } 666 #endif 667 668 /* Sanity check on the kernel */ 669 if (i == REG_CX && req != res) { 670 printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n", 671 (unsigned long long)req, 672 (unsigned long long)res); 673 nerrs++; 674 continue; 675 } 676 677 if (req != res && !ignore_reg) { 678 printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n", 679 i, (unsigned long long)req, 680 (unsigned long long)res); 681 nerrs++; 682 } 683 } 684 685 if (nerrs == 0) 686 printf("[OK]\tall registers okay\n"); 687 688 return nerrs; 689 } 690 691 static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs) 692 { 693 int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs; 694 if (cs == -1) 695 return 0; 696 697 sig_cs = cs; 698 sig_ss = ss; 699 700 printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n", 701 cs_bits, sig_cs, sig_ss); 702 703 sig_trapped = 0; 704 raise(SIGUSR1); 705 if (sig_trapped) { 706 char errdesc[32] = ""; 707 if (sig_err) { 708 const char *src = (sig_err & 1) ? " EXT" : ""; 709 const char *table; 710 if ((sig_err & 0x6) == 0x0) 711 table = "GDT"; 712 else if ((sig_err & 0x6) == 0x4) 713 table = "LDT"; 714 else if ((sig_err & 0x6) == 0x2) 715 table = "IDT"; 716 else 717 table = "???"; 718 719 sprintf(errdesc, "%s%s index %d, ", 720 table, src, sig_err >> 3); 721 } 722 723 char trapname[32]; 724 if (sig_trapno == 13) 725 strcpy(trapname, "GP"); 726 else if (sig_trapno == 11) 727 strcpy(trapname, "NP"); 728 else if (sig_trapno == 12) 729 strcpy(trapname, "SS"); 730 else if (sig_trapno == 32) 731 strcpy(trapname, "IRET"); /* X86_TRAP_IRET */ 732 else 733 sprintf(trapname, "%d", sig_trapno); 734 735 printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n", 736 trapname, (unsigned long)sig_err, 737 errdesc, strsignal(sig_trapped)); 738 return 0; 739 } else { 740 /* 741 * This also implicitly tests UC_STRICT_RESTORE_SS: 742 * We check that these signals set UC_STRICT_RESTORE_SS and, 743 * if UC_STRICT_RESTORE_SS doesn't cause strict behavior, 744 * then we won't get SIGSEGV. 745 */ 746 printf("[FAIL]\tDid not get SIGSEGV\n"); 747 return 1; 748 } 749 } 750 751 int main() 752 { 753 int total_nerrs = 0; 754 unsigned short my_cs, my_ss; 755 756 asm volatile ("mov %%cs,%0" : "=r" (my_cs)); 757 asm volatile ("mov %%ss,%0" : "=r" (my_ss)); 758 setup_ldt(); 759 760 stack_t stack = { 761 .ss_sp = altstack_data, 762 .ss_size = SIGSTKSZ, 763 }; 764 if (sigaltstack(&stack, NULL) != 0) 765 err(1, "sigaltstack"); 766 767 sethandler(SIGUSR1, sigusr1, 0); 768 sethandler(SIGTRAP, sigtrap, SA_ONSTACK); 769 770 /* Easy cases: return to a 32-bit SS in each possible CS bitness. */ 771 total_nerrs += test_valid_sigreturn(64, false, -1); 772 total_nerrs += test_valid_sigreturn(32, false, -1); 773 total_nerrs += test_valid_sigreturn(16, false, -1); 774 775 /* 776 * Test easy espfix cases: return to a 16-bit LDT SS in each possible 777 * CS bitness. NB: with a long mode CS, the SS bitness is irrelevant. 778 * 779 * This catches the original missing-espfix-on-64-bit-kernels issue 780 * as well as CVE-2014-8134. 781 */ 782 total_nerrs += test_valid_sigreturn(64, true, -1); 783 total_nerrs += test_valid_sigreturn(32, true, -1); 784 total_nerrs += test_valid_sigreturn(16, true, -1); 785 786 if (gdt_data16_idx) { 787 /* 788 * For performance reasons, Linux skips espfix if SS points 789 * to the GDT. If we were able to allocate a 16-bit SS in 790 * the GDT, see if it leaks parts of the kernel stack pointer. 791 * 792 * This tests for CVE-2014-8133. 793 */ 794 total_nerrs += test_valid_sigreturn(64, true, 795 GDT3(gdt_data16_idx)); 796 total_nerrs += test_valid_sigreturn(32, true, 797 GDT3(gdt_data16_idx)); 798 total_nerrs += test_valid_sigreturn(16, true, 799 GDT3(gdt_data16_idx)); 800 } 801 802 #ifdef __x86_64__ 803 /* Nasty ABI case: check SS corruption handling. */ 804 sig_corrupt_final_ss = 1; 805 total_nerrs += test_valid_sigreturn(32, false, -1); 806 total_nerrs += test_valid_sigreturn(32, true, -1); 807 sig_corrupt_final_ss = 0; 808 #endif 809 810 /* 811 * We're done testing valid sigreturn cases. Now we test states 812 * for which sigreturn itself will succeed but the subsequent 813 * entry to user mode will fail. 814 * 815 * Depending on the failure mode and the kernel bitness, these 816 * entry failures can generate SIGSEGV, SIGBUS, or SIGILL. 817 */ 818 clearhandler(SIGTRAP); 819 sethandler(SIGSEGV, sigtrap, SA_ONSTACK); 820 sethandler(SIGBUS, sigtrap, SA_ONSTACK); 821 sethandler(SIGILL, sigtrap, SA_ONSTACK); /* 32-bit kernels do this */ 822 823 /* Easy failures: invalid SS, resulting in #GP(0) */ 824 test_bad_iret(64, ldt_nonexistent_sel, -1); 825 test_bad_iret(32, ldt_nonexistent_sel, -1); 826 test_bad_iret(16, ldt_nonexistent_sel, -1); 827 828 /* These fail because SS isn't a data segment, resulting in #GP(SS) */ 829 test_bad_iret(64, my_cs, -1); 830 test_bad_iret(32, my_cs, -1); 831 test_bad_iret(16, my_cs, -1); 832 833 /* Try to return to a not-present code segment, triggering #NP(SS). */ 834 test_bad_iret(32, my_ss, npcode32_sel); 835 836 /* 837 * Try to return to a not-present but otherwise valid data segment. 838 * This will cause IRET to fail with #SS on the espfix stack. This 839 * exercises CVE-2014-9322. 840 * 841 * Note that, if espfix is enabled, 64-bit Linux will lose track 842 * of the actual cause of failure and report #GP(0) instead. 843 * This would be very difficult for Linux to avoid, because 844 * espfix64 causes IRET failures to be promoted to #DF, so the 845 * original exception frame is never pushed onto the stack. 846 */ 847 test_bad_iret(32, npdata32_sel, -1); 848 849 /* 850 * Try to return to a not-present but otherwise valid data 851 * segment without invoking espfix. Newer kernels don't allow 852 * this to happen in the first place. On older kernels, though, 853 * this can trigger CVE-2014-9322. 854 */ 855 if (gdt_npdata32_idx) 856 test_bad_iret(32, GDT3(gdt_npdata32_idx), -1); 857 858 #ifdef __x86_64__ 859 total_nerrs += test_nonstrict_ss(); 860 #endif 861 862 return total_nerrs ? 1 : 0; 863 } 864