1 /* 2 * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace 3 * Copyright (c) 2014-2015 Andrew Lutomirski 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but 10 * WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * General Public License for more details. 13 * 14 * This is a series of tests that exercises the sigreturn(2) syscall and 15 * the IRET / SYSRET paths in the kernel. 16 * 17 * For now, this focuses on the effects of unusual CS and SS values, 18 * and it has a bunch of tests to make sure that ESP/RSP is restored 19 * properly. 20 * 21 * The basic idea behind these tests is to raise(SIGUSR1) to create a 22 * sigcontext frame, plug in the values to be tested, and then return, 23 * which implicitly invokes sigreturn(2) and programs the user context 24 * as desired. 25 * 26 * For tests for which we expect sigreturn and the subsequent return to 27 * user mode to succeed, we return to a short trampoline that generates 28 * SIGTRAP so that the meat of the tests can be ordinary C code in a 29 * SIGTRAP handler. 30 * 31 * The inner workings of each test is documented below. 32 * 33 * Do not run on outdated, unpatched kernels at risk of nasty crashes. 34 */ 35 36 #define _GNU_SOURCE 37 38 #include <sys/time.h> 39 #include <time.h> 40 #include <stdlib.h> 41 #include <sys/syscall.h> 42 #include <unistd.h> 43 #include <stdio.h> 44 #include <string.h> 45 #include <inttypes.h> 46 #include <sys/mman.h> 47 #include <sys/signal.h> 48 #include <sys/ucontext.h> 49 #include <asm/ldt.h> 50 #include <err.h> 51 #include <setjmp.h> 52 #include <stddef.h> 53 #include <stdbool.h> 54 #include <sys/ptrace.h> 55 #include <sys/user.h> 56 57 /* Pull in AR_xyz defines. */ 58 typedef unsigned int u32; 59 typedef unsigned short u16; 60 #include "../../../../arch/x86/include/asm/desc_defs.h" 61 62 /* 63 * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc 64 * headers. 65 */ 66 #ifdef __x86_64__ 67 /* 68 * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on 69 * kernels that save SS in the sigcontext. All kernels that set 70 * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp 71 * regardless of SS (i.e. they implement espfix). 72 * 73 * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS 74 * when delivering a signal that came from 64-bit code. 75 * 76 * Sigreturn restores SS as follows: 77 * 78 * if (saved SS is valid || UC_STRICT_RESTORE_SS is set || 79 * saved CS is not 64-bit) 80 * new SS = saved SS (will fail IRET and signal if invalid) 81 * else 82 * new SS = a flat 32-bit data segment 83 */ 84 #define UC_SIGCONTEXT_SS 0x2 85 #define UC_STRICT_RESTORE_SS 0x4 86 #endif 87 88 /* 89 * In principle, this test can run on Linux emulation layers (e.g. 90 * Illumos "LX branded zones"). Solaris-based kernels reserve LDT 91 * entries 0-5 for their own internal purposes, so start our LDT 92 * allocations above that reservation. (The tests don't pass on LX 93 * branded zones, but at least this lets them run.) 94 */ 95 #define LDT_OFFSET 6 96 97 /* An aligned stack accessible through some of our segments. */ 98 static unsigned char stack16[65536] __attribute__((aligned(4096))); 99 100 /* 101 * An aligned int3 instruction used as a trampoline. Some of the tests 102 * want to fish out their ss values, so this trampoline copies ss to eax 103 * before the int3. 104 */ 105 asm (".pushsection .text\n\t" 106 ".type int3, @function\n\t" 107 ".align 4096\n\t" 108 "int3:\n\t" 109 "mov %ss,%eax\n\t" 110 "int3\n\t" 111 ".size int3, . - int3\n\t" 112 ".align 4096, 0xcc\n\t" 113 ".popsection"); 114 extern char int3[4096]; 115 116 /* 117 * At startup, we prepapre: 118 * 119 * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero 120 * descriptor or out of bounds). 121 * - code16_sel: A 16-bit LDT code segment pointing to int3. 122 * - data16_sel: A 16-bit LDT data segment pointing to stack16. 123 * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3. 124 * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16. 125 * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16. 126 * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to 127 * stack16. 128 * 129 * For no particularly good reason, xyz_sel is a selector value with the 130 * RPL and LDT bits filled in, whereas xyz_idx is just an index into the 131 * descriptor table. These variables will be zero if their respective 132 * segments could not be allocated. 133 */ 134 static unsigned short ldt_nonexistent_sel; 135 static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel; 136 137 static unsigned short gdt_data16_idx, gdt_npdata32_idx; 138 139 static unsigned short GDT3(int idx) 140 { 141 return (idx << 3) | 3; 142 } 143 144 static unsigned short LDT3(int idx) 145 { 146 return (idx << 3) | 7; 147 } 148 149 /* Our sigaltstack scratch space. */ 150 static char altstack_data[SIGSTKSZ]; 151 152 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), 153 int flags) 154 { 155 struct sigaction sa; 156 memset(&sa, 0, sizeof(sa)); 157 sa.sa_sigaction = handler; 158 sa.sa_flags = SA_SIGINFO | flags; 159 sigemptyset(&sa.sa_mask); 160 if (sigaction(sig, &sa, 0)) 161 err(1, "sigaction"); 162 } 163 164 static void clearhandler(int sig) 165 { 166 struct sigaction sa; 167 memset(&sa, 0, sizeof(sa)); 168 sa.sa_handler = SIG_DFL; 169 sigemptyset(&sa.sa_mask); 170 if (sigaction(sig, &sa, 0)) 171 err(1, "sigaction"); 172 } 173 174 static void add_ldt(const struct user_desc *desc, unsigned short *var, 175 const char *name) 176 { 177 if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) { 178 *var = LDT3(desc->entry_number); 179 } else { 180 printf("[NOTE]\tFailed to create %s segment\n", name); 181 *var = 0; 182 } 183 } 184 185 static void setup_ldt(void) 186 { 187 if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16)) 188 errx(1, "stack16 is too high\n"); 189 if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3)) 190 errx(1, "int3 is too high\n"); 191 192 ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2); 193 194 const struct user_desc code16_desc = { 195 .entry_number = LDT_OFFSET + 0, 196 .base_addr = (unsigned long)int3, 197 .limit = 4095, 198 .seg_32bit = 0, 199 .contents = 2, /* Code, not conforming */ 200 .read_exec_only = 0, 201 .limit_in_pages = 0, 202 .seg_not_present = 0, 203 .useable = 0 204 }; 205 add_ldt(&code16_desc, &code16_sel, "code16"); 206 207 const struct user_desc data16_desc = { 208 .entry_number = LDT_OFFSET + 1, 209 .base_addr = (unsigned long)stack16, 210 .limit = 0xffff, 211 .seg_32bit = 0, 212 .contents = 0, /* Data, grow-up */ 213 .read_exec_only = 0, 214 .limit_in_pages = 0, 215 .seg_not_present = 0, 216 .useable = 0 217 }; 218 add_ldt(&data16_desc, &data16_sel, "data16"); 219 220 const struct user_desc npcode32_desc = { 221 .entry_number = LDT_OFFSET + 3, 222 .base_addr = (unsigned long)int3, 223 .limit = 4095, 224 .seg_32bit = 1, 225 .contents = 2, /* Code, not conforming */ 226 .read_exec_only = 0, 227 .limit_in_pages = 0, 228 .seg_not_present = 1, 229 .useable = 0 230 }; 231 add_ldt(&npcode32_desc, &npcode32_sel, "npcode32"); 232 233 const struct user_desc npdata32_desc = { 234 .entry_number = LDT_OFFSET + 4, 235 .base_addr = (unsigned long)stack16, 236 .limit = 0xffff, 237 .seg_32bit = 1, 238 .contents = 0, /* Data, grow-up */ 239 .read_exec_only = 0, 240 .limit_in_pages = 0, 241 .seg_not_present = 1, 242 .useable = 0 243 }; 244 add_ldt(&npdata32_desc, &npdata32_sel, "npdata32"); 245 246 struct user_desc gdt_data16_desc = { 247 .entry_number = -1, 248 .base_addr = (unsigned long)stack16, 249 .limit = 0xffff, 250 .seg_32bit = 0, 251 .contents = 0, /* Data, grow-up */ 252 .read_exec_only = 0, 253 .limit_in_pages = 0, 254 .seg_not_present = 0, 255 .useable = 0 256 }; 257 258 if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) { 259 /* 260 * This probably indicates vulnerability to CVE-2014-8133. 261 * Merely getting here isn't definitive, though, and we'll 262 * diagnose the problem for real later on. 263 */ 264 printf("[WARN]\tset_thread_area allocated data16 at index %d\n", 265 gdt_data16_desc.entry_number); 266 gdt_data16_idx = gdt_data16_desc.entry_number; 267 } else { 268 printf("[OK]\tset_thread_area refused 16-bit data\n"); 269 } 270 271 struct user_desc gdt_npdata32_desc = { 272 .entry_number = -1, 273 .base_addr = (unsigned long)stack16, 274 .limit = 0xffff, 275 .seg_32bit = 1, 276 .contents = 0, /* Data, grow-up */ 277 .read_exec_only = 0, 278 .limit_in_pages = 0, 279 .seg_not_present = 1, 280 .useable = 0 281 }; 282 283 if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) { 284 /* 285 * As a hardening measure, newer kernels don't allow this. 286 */ 287 printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n", 288 gdt_npdata32_desc.entry_number); 289 gdt_npdata32_idx = gdt_npdata32_desc.entry_number; 290 } else { 291 printf("[OK]\tset_thread_area refused 16-bit data\n"); 292 } 293 } 294 295 /* State used by our signal handlers. */ 296 static gregset_t initial_regs, requested_regs, resulting_regs; 297 298 /* Instructions for the SIGUSR1 handler. */ 299 static volatile unsigned short sig_cs, sig_ss; 300 static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno; 301 #ifdef __x86_64__ 302 static volatile sig_atomic_t sig_corrupt_final_ss; 303 #endif 304 305 /* Abstractions for some 32-bit vs 64-bit differences. */ 306 #ifdef __x86_64__ 307 # define REG_IP REG_RIP 308 # define REG_SP REG_RSP 309 # define REG_AX REG_RAX 310 311 struct selectors { 312 unsigned short cs, gs, fs, ss; 313 }; 314 315 static unsigned short *ssptr(ucontext_t *ctx) 316 { 317 struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; 318 return &sels->ss; 319 } 320 321 static unsigned short *csptr(ucontext_t *ctx) 322 { 323 struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; 324 return &sels->cs; 325 } 326 #else 327 # define REG_IP REG_EIP 328 # define REG_SP REG_ESP 329 # define REG_AX REG_EAX 330 331 static greg_t *ssptr(ucontext_t *ctx) 332 { 333 return &ctx->uc_mcontext.gregs[REG_SS]; 334 } 335 336 static greg_t *csptr(ucontext_t *ctx) 337 { 338 return &ctx->uc_mcontext.gregs[REG_CS]; 339 } 340 #endif 341 342 /* 343 * Checks a given selector for its code bitness or returns -1 if it's not 344 * a usable code segment selector. 345 */ 346 int cs_bitness(unsigned short cs) 347 { 348 uint32_t valid = 0, ar; 349 asm ("lar %[cs], %[ar]\n\t" 350 "jnz 1f\n\t" 351 "mov $1, %[valid]\n\t" 352 "1:" 353 : [ar] "=r" (ar), [valid] "+rm" (valid) 354 : [cs] "r" (cs)); 355 356 if (!valid) 357 return -1; 358 359 bool db = (ar & (1 << 22)); 360 bool l = (ar & (1 << 21)); 361 362 if (!(ar & (1<<11))) 363 return -1; /* Not code. */ 364 365 if (l && !db) 366 return 64; 367 else if (!l && db) 368 return 32; 369 else if (!l && !db) 370 return 16; 371 else 372 return -1; /* Unknown bitness. */ 373 } 374 375 /* 376 * Checks a given selector for its code bitness or returns -1 if it's not 377 * a usable code segment selector. 378 */ 379 bool is_valid_ss(unsigned short cs) 380 { 381 uint32_t valid = 0, ar; 382 asm ("lar %[cs], %[ar]\n\t" 383 "jnz 1f\n\t" 384 "mov $1, %[valid]\n\t" 385 "1:" 386 : [ar] "=r" (ar), [valid] "+rm" (valid) 387 : [cs] "r" (cs)); 388 389 if (!valid) 390 return false; 391 392 if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA && 393 (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN) 394 return false; 395 396 return (ar & AR_P); 397 } 398 399 /* Number of errors in the current test case. */ 400 static volatile sig_atomic_t nerrs; 401 402 static void validate_signal_ss(int sig, ucontext_t *ctx) 403 { 404 #ifdef __x86_64__ 405 bool was_64bit = (cs_bitness(*csptr(ctx)) == 64); 406 407 if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) { 408 printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n"); 409 nerrs++; 410 411 /* 412 * This happens on Linux 4.1. The rest will fail, too, so 413 * return now to reduce the noise. 414 */ 415 return; 416 } 417 418 /* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */ 419 if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) { 420 printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n", 421 sig); 422 nerrs++; 423 } 424 425 if (is_valid_ss(*ssptr(ctx))) { 426 /* 427 * DOSEMU was written before 64-bit sigcontext had SS, and 428 * it tries to figure out the signal source SS by looking at 429 * the physical register. Make sure that keeps working. 430 */ 431 unsigned short hw_ss; 432 asm ("mov %%ss, %0" : "=rm" (hw_ss)); 433 if (hw_ss != *ssptr(ctx)) { 434 printf("[FAIL]\tHW SS didn't match saved SS\n"); 435 nerrs++; 436 } 437 } 438 #endif 439 } 440 441 /* 442 * SIGUSR1 handler. Sets CS and SS as requested and points IP to the 443 * int3 trampoline. Sets SP to a large known value so that we can see 444 * whether the value round-trips back to user mode correctly. 445 */ 446 static void sigusr1(int sig, siginfo_t *info, void *ctx_void) 447 { 448 ucontext_t *ctx = (ucontext_t*)ctx_void; 449 450 validate_signal_ss(sig, ctx); 451 452 memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); 453 454 *csptr(ctx) = sig_cs; 455 *ssptr(ctx) = sig_ss; 456 457 ctx->uc_mcontext.gregs[REG_IP] = 458 sig_cs == code16_sel ? 0 : (unsigned long)&int3; 459 ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL; 460 ctx->uc_mcontext.gregs[REG_AX] = 0; 461 462 memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); 463 requested_regs[REG_AX] = *ssptr(ctx); /* The asm code does this. */ 464 465 return; 466 } 467 468 /* 469 * Called after a successful sigreturn (via int3) or from a failed 470 * sigreturn (directly by kernel). Restores our state so that the 471 * original raise(SIGUSR1) returns. 472 */ 473 static void sigtrap(int sig, siginfo_t *info, void *ctx_void) 474 { 475 ucontext_t *ctx = (ucontext_t*)ctx_void; 476 477 validate_signal_ss(sig, ctx); 478 479 sig_err = ctx->uc_mcontext.gregs[REG_ERR]; 480 sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO]; 481 482 unsigned short ss; 483 asm ("mov %%ss,%0" : "=r" (ss)); 484 485 greg_t asm_ss = ctx->uc_mcontext.gregs[REG_AX]; 486 if (asm_ss != sig_ss && sig == SIGTRAP) { 487 /* Sanity check failure. */ 488 printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n", 489 ss, *ssptr(ctx), (unsigned long long)asm_ss); 490 nerrs++; 491 } 492 493 memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); 494 memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t)); 495 496 #ifdef __x86_64__ 497 if (sig_corrupt_final_ss) { 498 if (ctx->uc_flags & UC_STRICT_RESTORE_SS) { 499 printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n"); 500 nerrs++; 501 } else { 502 /* 503 * DOSEMU transitions from 32-bit to 64-bit mode by 504 * adjusting sigcontext, and it requires that this work 505 * even if the saved SS is bogus. 506 */ 507 printf("\tCorrupting SS on return to 64-bit mode\n"); 508 *ssptr(ctx) = 0; 509 } 510 } 511 #endif 512 513 sig_trapped = sig; 514 } 515 516 #ifdef __x86_64__ 517 /* Tests recovery if !UC_STRICT_RESTORE_SS */ 518 static void sigusr2(int sig, siginfo_t *info, void *ctx_void) 519 { 520 ucontext_t *ctx = (ucontext_t*)ctx_void; 521 522 if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) { 523 printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n"); 524 nerrs++; 525 return; /* We can't do the rest. */ 526 } 527 528 ctx->uc_flags &= ~UC_STRICT_RESTORE_SS; 529 *ssptr(ctx) = 0; 530 531 /* Return. The kernel should recover without sending another signal. */ 532 } 533 534 static int test_nonstrict_ss(void) 535 { 536 clearhandler(SIGUSR1); 537 clearhandler(SIGTRAP); 538 clearhandler(SIGSEGV); 539 clearhandler(SIGILL); 540 sethandler(SIGUSR2, sigusr2, 0); 541 542 nerrs = 0; 543 544 printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n"); 545 raise(SIGUSR2); 546 if (!nerrs) 547 printf("[OK]\tIt worked\n"); 548 549 return nerrs; 550 } 551 #endif 552 553 /* Finds a usable code segment of the requested bitness. */ 554 int find_cs(int bitness) 555 { 556 unsigned short my_cs; 557 558 asm ("mov %%cs,%0" : "=r" (my_cs)); 559 560 if (cs_bitness(my_cs) == bitness) 561 return my_cs; 562 if (cs_bitness(my_cs + (2 << 3)) == bitness) 563 return my_cs + (2 << 3); 564 if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness) 565 return my_cs - (2 << 3); 566 if (cs_bitness(code16_sel) == bitness) 567 return code16_sel; 568 569 printf("[WARN]\tCould not find %d-bit CS\n", bitness); 570 return -1; 571 } 572 573 static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) 574 { 575 int cs = find_cs(cs_bits); 576 if (cs == -1) { 577 printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n", 578 cs_bits, use_16bit_ss ? 16 : 32); 579 return 0; 580 } 581 582 if (force_ss != -1) { 583 sig_ss = force_ss; 584 } else { 585 if (use_16bit_ss) { 586 if (!data16_sel) { 587 printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n", 588 cs_bits); 589 return 0; 590 } 591 sig_ss = data16_sel; 592 } else { 593 asm volatile ("mov %%ss,%0" : "=r" (sig_ss)); 594 } 595 } 596 597 sig_cs = cs; 598 599 printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n", 600 cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss, 601 (sig_ss & 4) ? "" : ", GDT"); 602 603 raise(SIGUSR1); 604 605 nerrs = 0; 606 607 /* 608 * Check that each register had an acceptable value when the 609 * int3 trampoline was invoked. 610 */ 611 for (int i = 0; i < NGREG; i++) { 612 greg_t req = requested_regs[i], res = resulting_regs[i]; 613 if (i == REG_TRAPNO || i == REG_IP) 614 continue; /* don't care */ 615 if (i == REG_SP) { 616 printf("\tSP: %llx -> %llx\n", (unsigned long long)req, 617 (unsigned long long)res); 618 619 /* 620 * In many circumstances, the high 32 bits of rsp 621 * are zeroed. For example, we could be a real 622 * 32-bit program, or we could hit any of a number 623 * of poorly-documented IRET or segmented ESP 624 * oddities. If this happens, it's okay. 625 */ 626 if (res == (req & 0xFFFFFFFF)) 627 continue; /* OK; not expected to work */ 628 } 629 630 bool ignore_reg = false; 631 #if __i386__ 632 if (i == REG_UESP) 633 ignore_reg = true; 634 #else 635 if (i == REG_CSGSFS) { 636 struct selectors *req_sels = 637 (void *)&requested_regs[REG_CSGSFS]; 638 struct selectors *res_sels = 639 (void *)&resulting_regs[REG_CSGSFS]; 640 if (req_sels->cs != res_sels->cs) { 641 printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n", 642 req_sels->cs, res_sels->cs); 643 nerrs++; 644 } 645 646 if (req_sels->ss != res_sels->ss) { 647 printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n", 648 req_sels->ss, res_sels->ss); 649 nerrs++; 650 } 651 652 continue; 653 } 654 #endif 655 656 /* Sanity check on the kernel */ 657 if (i == REG_AX && requested_regs[i] != resulting_regs[i]) { 658 printf("[FAIL]\tAX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n", 659 (unsigned long long)requested_regs[i], 660 (unsigned long long)resulting_regs[i]); 661 nerrs++; 662 continue; 663 } 664 665 if (requested_regs[i] != resulting_regs[i] && !ignore_reg) { 666 /* 667 * SP is particularly interesting here. The 668 * usual cause of failures is that we hit the 669 * nasty IRET case of returning to a 16-bit SS, 670 * in which case bits 16:31 of the *kernel* 671 * stack pointer persist in ESP. 672 */ 673 printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n", 674 i, (unsigned long long)requested_regs[i], 675 (unsigned long long)resulting_regs[i]); 676 nerrs++; 677 } 678 } 679 680 if (nerrs == 0) 681 printf("[OK]\tall registers okay\n"); 682 683 return nerrs; 684 } 685 686 static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs) 687 { 688 int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs; 689 if (cs == -1) 690 return 0; 691 692 sig_cs = cs; 693 sig_ss = ss; 694 695 printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n", 696 cs_bits, sig_cs, sig_ss); 697 698 sig_trapped = 0; 699 raise(SIGUSR1); 700 if (sig_trapped) { 701 char errdesc[32] = ""; 702 if (sig_err) { 703 const char *src = (sig_err & 1) ? " EXT" : ""; 704 const char *table; 705 if ((sig_err & 0x6) == 0x0) 706 table = "GDT"; 707 else if ((sig_err & 0x6) == 0x4) 708 table = "LDT"; 709 else if ((sig_err & 0x6) == 0x2) 710 table = "IDT"; 711 else 712 table = "???"; 713 714 sprintf(errdesc, "%s%s index %d, ", 715 table, src, sig_err >> 3); 716 } 717 718 char trapname[32]; 719 if (sig_trapno == 13) 720 strcpy(trapname, "GP"); 721 else if (sig_trapno == 11) 722 strcpy(trapname, "NP"); 723 else if (sig_trapno == 12) 724 strcpy(trapname, "SS"); 725 else if (sig_trapno == 32) 726 strcpy(trapname, "IRET"); /* X86_TRAP_IRET */ 727 else 728 sprintf(trapname, "%d", sig_trapno); 729 730 printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n", 731 trapname, (unsigned long)sig_err, 732 errdesc, strsignal(sig_trapped)); 733 return 0; 734 } else { 735 /* 736 * This also implicitly tests UC_STRICT_RESTORE_SS: 737 * We check that these signals set UC_STRICT_RESTORE_SS and, 738 * if UC_STRICT_RESTORE_SS doesn't cause strict behavior, 739 * then we won't get SIGSEGV. 740 */ 741 printf("[FAIL]\tDid not get SIGSEGV\n"); 742 return 1; 743 } 744 } 745 746 int main() 747 { 748 int total_nerrs = 0; 749 unsigned short my_cs, my_ss; 750 751 asm volatile ("mov %%cs,%0" : "=r" (my_cs)); 752 asm volatile ("mov %%ss,%0" : "=r" (my_ss)); 753 setup_ldt(); 754 755 stack_t stack = { 756 .ss_sp = altstack_data, 757 .ss_size = SIGSTKSZ, 758 }; 759 if (sigaltstack(&stack, NULL) != 0) 760 err(1, "sigaltstack"); 761 762 sethandler(SIGUSR1, sigusr1, 0); 763 sethandler(SIGTRAP, sigtrap, SA_ONSTACK); 764 765 /* Easy cases: return to a 32-bit SS in each possible CS bitness. */ 766 total_nerrs += test_valid_sigreturn(64, false, -1); 767 total_nerrs += test_valid_sigreturn(32, false, -1); 768 total_nerrs += test_valid_sigreturn(16, false, -1); 769 770 /* 771 * Test easy espfix cases: return to a 16-bit LDT SS in each possible 772 * CS bitness. NB: with a long mode CS, the SS bitness is irrelevant. 773 * 774 * This catches the original missing-espfix-on-64-bit-kernels issue 775 * as well as CVE-2014-8134. 776 */ 777 total_nerrs += test_valid_sigreturn(64, true, -1); 778 total_nerrs += test_valid_sigreturn(32, true, -1); 779 total_nerrs += test_valid_sigreturn(16, true, -1); 780 781 if (gdt_data16_idx) { 782 /* 783 * For performance reasons, Linux skips espfix if SS points 784 * to the GDT. If we were able to allocate a 16-bit SS in 785 * the GDT, see if it leaks parts of the kernel stack pointer. 786 * 787 * This tests for CVE-2014-8133. 788 */ 789 total_nerrs += test_valid_sigreturn(64, true, 790 GDT3(gdt_data16_idx)); 791 total_nerrs += test_valid_sigreturn(32, true, 792 GDT3(gdt_data16_idx)); 793 total_nerrs += test_valid_sigreturn(16, true, 794 GDT3(gdt_data16_idx)); 795 } 796 797 #ifdef __x86_64__ 798 /* Nasty ABI case: check SS corruption handling. */ 799 sig_corrupt_final_ss = 1; 800 total_nerrs += test_valid_sigreturn(32, false, -1); 801 total_nerrs += test_valid_sigreturn(32, true, -1); 802 sig_corrupt_final_ss = 0; 803 #endif 804 805 /* 806 * We're done testing valid sigreturn cases. Now we test states 807 * for which sigreturn itself will succeed but the subsequent 808 * entry to user mode will fail. 809 * 810 * Depending on the failure mode and the kernel bitness, these 811 * entry failures can generate SIGSEGV, SIGBUS, or SIGILL. 812 */ 813 clearhandler(SIGTRAP); 814 sethandler(SIGSEGV, sigtrap, SA_ONSTACK); 815 sethandler(SIGBUS, sigtrap, SA_ONSTACK); 816 sethandler(SIGILL, sigtrap, SA_ONSTACK); /* 32-bit kernels do this */ 817 818 /* Easy failures: invalid SS, resulting in #GP(0) */ 819 test_bad_iret(64, ldt_nonexistent_sel, -1); 820 test_bad_iret(32, ldt_nonexistent_sel, -1); 821 test_bad_iret(16, ldt_nonexistent_sel, -1); 822 823 /* These fail because SS isn't a data segment, resulting in #GP(SS) */ 824 test_bad_iret(64, my_cs, -1); 825 test_bad_iret(32, my_cs, -1); 826 test_bad_iret(16, my_cs, -1); 827 828 /* Try to return to a not-present code segment, triggering #NP(SS). */ 829 test_bad_iret(32, my_ss, npcode32_sel); 830 831 /* 832 * Try to return to a not-present but otherwise valid data segment. 833 * This will cause IRET to fail with #SS on the espfix stack. This 834 * exercises CVE-2014-9322. 835 * 836 * Note that, if espfix is enabled, 64-bit Linux will lose track 837 * of the actual cause of failure and report #GP(0) instead. 838 * This would be very difficult for Linux to avoid, because 839 * espfix64 causes IRET failures to be promoted to #DF, so the 840 * original exception frame is never pushed onto the stack. 841 */ 842 test_bad_iret(32, npdata32_sel, -1); 843 844 /* 845 * Try to return to a not-present but otherwise valid data 846 * segment without invoking espfix. Newer kernels don't allow 847 * this to happen in the first place. On older kernels, though, 848 * this can trigger CVE-2014-9322. 849 */ 850 if (gdt_npdata32_idx) 851 test_bad_iret(32, GDT3(gdt_npdata32_idx), -1); 852 853 #ifdef __x86_64__ 854 total_nerrs += test_nonstrict_ss(); 855 #endif 856 857 return total_nerrs ? 1 : 0; 858 } 859