1 /* 2 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs 4 * 5 * Pentium III FXSR, SSE support 6 * Gareth Hughes <gareth@valinux.com>, May 2000 7 */ 8 9 /* 10 * Handle hardware traps and faults. 11 */ 12 13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14 15 #include <linux/context_tracking.h> 16 #include <linux/interrupt.h> 17 #include <linux/kallsyms.h> 18 #include <linux/spinlock.h> 19 #include <linux/kprobes.h> 20 #include <linux/uaccess.h> 21 #include <linux/kdebug.h> 22 #include <linux/kgdb.h> 23 #include <linux/kernel.h> 24 #include <linux/module.h> 25 #include <linux/ptrace.h> 26 #include <linux/uprobes.h> 27 #include <linux/string.h> 28 #include <linux/delay.h> 29 #include <linux/errno.h> 30 #include <linux/kexec.h> 31 #include <linux/sched.h> 32 #include <linux/timer.h> 33 #include <linux/init.h> 34 #include <linux/bug.h> 35 #include <linux/nmi.h> 36 #include <linux/mm.h> 37 #include <linux/smp.h> 38 #include <linux/io.h> 39 40 #ifdef CONFIG_EISA 41 #include <linux/ioport.h> 42 #include <linux/eisa.h> 43 #endif 44 45 #if defined(CONFIG_EDAC) 46 #include <linux/edac.h> 47 #endif 48 49 #include <asm/kmemcheck.h> 50 #include <asm/stacktrace.h> 51 #include <asm/processor.h> 52 #include <asm/debugreg.h> 53 #include <linux/atomic.h> 54 #include <asm/ftrace.h> 55 #include <asm/traps.h> 56 #include <asm/desc.h> 57 #include <asm/i387.h> 58 #include <asm/fpu-internal.h> 59 #include <asm/mce.h> 60 #include <asm/fixmap.h> 61 #include <asm/mach_traps.h> 62 #include <asm/alternative.h> 63 64 #ifdef CONFIG_X86_64 65 #include <asm/x86_init.h> 66 #include <asm/pgalloc.h> 67 #include <asm/proto.h> 68 69 /* No need to be aligned, but done to keep all IDTs defined the same way. */ 70 gate_desc debug_idt_table[NR_VECTORS] __page_aligned_bss; 71 #else 72 #include <asm/processor-flags.h> 73 #include <asm/setup.h> 74 75 asmlinkage int system_call(void); 76 #endif 77 78 /* Must be page-aligned because the real IDT is used in a fixmap. */ 79 gate_desc idt_table[NR_VECTORS] __page_aligned_bss; 80 81 DECLARE_BITMAP(used_vectors, NR_VECTORS); 82 EXPORT_SYMBOL_GPL(used_vectors); 83 84 static inline void conditional_sti(struct pt_regs *regs) 85 { 86 if (regs->flags & X86_EFLAGS_IF) 87 local_irq_enable(); 88 } 89 90 static inline void preempt_conditional_sti(struct pt_regs *regs) 91 { 92 preempt_count_inc(); 93 if (regs->flags & X86_EFLAGS_IF) 94 local_irq_enable(); 95 } 96 97 static inline void conditional_cli(struct pt_regs *regs) 98 { 99 if (regs->flags & X86_EFLAGS_IF) 100 local_irq_disable(); 101 } 102 103 static inline void preempt_conditional_cli(struct pt_regs *regs) 104 { 105 if (regs->flags & X86_EFLAGS_IF) 106 local_irq_disable(); 107 preempt_count_dec(); 108 } 109 110 static nokprobe_inline int 111 do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, 112 struct pt_regs *regs, long error_code) 113 { 114 #ifdef CONFIG_X86_32 115 if (regs->flags & X86_VM_MASK) { 116 /* 117 * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86. 118 * On nmi (interrupt 2), do_trap should not be called. 119 */ 120 if (trapnr < X86_TRAP_UD) { 121 if (!handle_vm86_trap((struct kernel_vm86_regs *) regs, 122 error_code, trapnr)) 123 return 0; 124 } 125 return -1; 126 } 127 #endif 128 if (!user_mode(regs)) { 129 if (!fixup_exception(regs)) { 130 tsk->thread.error_code = error_code; 131 tsk->thread.trap_nr = trapnr; 132 die(str, regs, error_code); 133 } 134 return 0; 135 } 136 137 return -1; 138 } 139 140 static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr, 141 siginfo_t *info) 142 { 143 unsigned long siaddr; 144 int sicode; 145 146 switch (trapnr) { 147 default: 148 return SEND_SIG_PRIV; 149 150 case X86_TRAP_DE: 151 sicode = FPE_INTDIV; 152 siaddr = uprobe_get_trap_addr(regs); 153 break; 154 case X86_TRAP_UD: 155 sicode = ILL_ILLOPN; 156 siaddr = uprobe_get_trap_addr(regs); 157 break; 158 case X86_TRAP_AC: 159 sicode = BUS_ADRALN; 160 siaddr = 0; 161 break; 162 } 163 164 info->si_signo = signr; 165 info->si_errno = 0; 166 info->si_code = sicode; 167 info->si_addr = (void __user *)siaddr; 168 return info; 169 } 170 171 static void 172 do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, 173 long error_code, siginfo_t *info) 174 { 175 struct task_struct *tsk = current; 176 177 178 if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code)) 179 return; 180 /* 181 * We want error_code and trap_nr set for userspace faults and 182 * kernelspace faults which result in die(), but not 183 * kernelspace faults which are fixed up. die() gives the 184 * process no chance to handle the signal and notice the 185 * kernel fault information, so that won't result in polluting 186 * the information about previously queued, but not yet 187 * delivered, faults. See also do_general_protection below. 188 */ 189 tsk->thread.error_code = error_code; 190 tsk->thread.trap_nr = trapnr; 191 192 #ifdef CONFIG_X86_64 193 if (show_unhandled_signals && unhandled_signal(tsk, signr) && 194 printk_ratelimit()) { 195 pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx", 196 tsk->comm, tsk->pid, str, 197 regs->ip, regs->sp, error_code); 198 print_vma_addr(" in ", regs->ip); 199 pr_cont("\n"); 200 } 201 #endif 202 203 force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk); 204 } 205 NOKPROBE_SYMBOL(do_trap); 206 207 static void do_error_trap(struct pt_regs *regs, long error_code, char *str, 208 unsigned long trapnr, int signr) 209 { 210 enum ctx_state prev_state = exception_enter(); 211 siginfo_t info; 212 213 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != 214 NOTIFY_STOP) { 215 conditional_sti(regs); 216 do_trap(trapnr, signr, str, regs, error_code, 217 fill_trap_info(regs, signr, trapnr, &info)); 218 } 219 220 exception_exit(prev_state); 221 } 222 223 #define DO_ERROR(trapnr, signr, str, name) \ 224 dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ 225 { \ 226 do_error_trap(regs, error_code, str, trapnr, signr); \ 227 } 228 229 DO_ERROR(X86_TRAP_DE, SIGFPE, "divide error", divide_error) 230 DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow) 231 DO_ERROR(X86_TRAP_BR, SIGSEGV, "bounds", bounds) 232 DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) 233 DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) 234 DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) 235 DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) 236 DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) 237 DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) 238 239 #ifdef CONFIG_X86_64 240 /* Runs on IST stack */ 241 dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) 242 { 243 static const char str[] = "double fault"; 244 struct task_struct *tsk = current; 245 246 #ifdef CONFIG_X86_ESPFIX64 247 extern unsigned char native_irq_return_iret[]; 248 249 /* 250 * If IRET takes a non-IST fault on the espfix64 stack, then we 251 * end up promoting it to a doublefault. In that case, modify 252 * the stack to make it look like we just entered the #GP 253 * handler from user space, similar to bad_iret. 254 */ 255 if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && 256 regs->cs == __KERNEL_CS && 257 regs->ip == (unsigned long)native_irq_return_iret) 258 { 259 struct pt_regs *normal_regs = task_pt_regs(current); 260 261 /* Fake a #GP(0) from userspace. */ 262 memmove(&normal_regs->ip, (void *)regs->sp, 5*8); 263 normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ 264 regs->ip = (unsigned long)general_protection; 265 regs->sp = (unsigned long)&normal_regs->orig_ax; 266 return; 267 } 268 #endif 269 270 exception_enter(); 271 /* Return not checked because double check cannot be ignored */ 272 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); 273 274 tsk->thread.error_code = error_code; 275 tsk->thread.trap_nr = X86_TRAP_DF; 276 277 #ifdef CONFIG_DOUBLEFAULT 278 df_debug(regs, error_code); 279 #endif 280 /* 281 * This is always a kernel trap and never fixable (and thus must 282 * never return). 283 */ 284 for (;;) 285 die(str, regs, error_code); 286 } 287 #endif 288 289 dotraplinkage void 290 do_general_protection(struct pt_regs *regs, long error_code) 291 { 292 struct task_struct *tsk; 293 enum ctx_state prev_state; 294 295 prev_state = exception_enter(); 296 conditional_sti(regs); 297 298 #ifdef CONFIG_X86_32 299 if (regs->flags & X86_VM_MASK) { 300 local_irq_enable(); 301 handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); 302 goto exit; 303 } 304 #endif 305 306 tsk = current; 307 if (!user_mode(regs)) { 308 if (fixup_exception(regs)) 309 goto exit; 310 311 tsk->thread.error_code = error_code; 312 tsk->thread.trap_nr = X86_TRAP_GP; 313 if (notify_die(DIE_GPF, "general protection fault", regs, error_code, 314 X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) 315 die("general protection fault", regs, error_code); 316 goto exit; 317 } 318 319 tsk->thread.error_code = error_code; 320 tsk->thread.trap_nr = X86_TRAP_GP; 321 322 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && 323 printk_ratelimit()) { 324 pr_info("%s[%d] general protection ip:%lx sp:%lx error:%lx", 325 tsk->comm, task_pid_nr(tsk), 326 regs->ip, regs->sp, error_code); 327 print_vma_addr(" in ", regs->ip); 328 pr_cont("\n"); 329 } 330 331 force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); 332 exit: 333 exception_exit(prev_state); 334 } 335 NOKPROBE_SYMBOL(do_general_protection); 336 337 /* May run on IST stack. */ 338 dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) 339 { 340 enum ctx_state prev_state; 341 342 #ifdef CONFIG_DYNAMIC_FTRACE 343 /* 344 * ftrace must be first, everything else may cause a recursive crash. 345 * See note by declaration of modifying_ftrace_code in ftrace.c 346 */ 347 if (unlikely(atomic_read(&modifying_ftrace_code)) && 348 ftrace_int3_handler(regs)) 349 return; 350 #endif 351 if (poke_int3_handler(regs)) 352 return; 353 354 prev_state = exception_enter(); 355 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP 356 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 357 SIGTRAP) == NOTIFY_STOP) 358 goto exit; 359 #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ 360 361 #ifdef CONFIG_KPROBES 362 if (kprobe_int3_handler(regs)) 363 goto exit; 364 #endif 365 366 if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 367 SIGTRAP) == NOTIFY_STOP) 368 goto exit; 369 370 /* 371 * Let others (NMI) know that the debug stack is in use 372 * as we may switch to the interrupt stack. 373 */ 374 debug_stack_usage_inc(); 375 preempt_conditional_sti(regs); 376 do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); 377 preempt_conditional_cli(regs); 378 debug_stack_usage_dec(); 379 exit: 380 exception_exit(prev_state); 381 } 382 NOKPROBE_SYMBOL(do_int3); 383 384 #ifdef CONFIG_X86_64 385 /* 386 * Help handler running on IST stack to switch back to user stack 387 * for scheduling or signal handling. The actual stack switch is done in 388 * entry.S 389 */ 390 asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs) 391 { 392 struct pt_regs *regs = eregs; 393 /* Did already sync */ 394 if (eregs == (struct pt_regs *)eregs->sp) 395 ; 396 /* Exception from user space */ 397 else if (user_mode(eregs)) 398 regs = task_pt_regs(current); 399 /* 400 * Exception from kernel and interrupts are enabled. Move to 401 * kernel process stack. 402 */ 403 else if (eregs->flags & X86_EFLAGS_IF) 404 regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); 405 if (eregs != regs) 406 *regs = *eregs; 407 return regs; 408 } 409 NOKPROBE_SYMBOL(sync_regs); 410 411 struct bad_iret_stack { 412 void *error_entry_ret; 413 struct pt_regs regs; 414 }; 415 416 asmlinkage __visible 417 struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) 418 { 419 /* 420 * This is called from entry_64.S early in handling a fault 421 * caused by a bad iret to user mode. To handle the fault 422 * correctly, we want move our stack frame to task_pt_regs 423 * and we want to pretend that the exception came from the 424 * iret target. 425 */ 426 struct bad_iret_stack *new_stack = 427 container_of(task_pt_regs(current), 428 struct bad_iret_stack, regs); 429 430 /* Copy the IRET target to the new stack. */ 431 memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); 432 433 /* Copy the remainder of the stack from the current stack. */ 434 memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip)); 435 436 BUG_ON(!user_mode_vm(&new_stack->regs)); 437 return new_stack; 438 } 439 #endif 440 441 /* 442 * Our handling of the processor debug registers is non-trivial. 443 * We do not clear them on entry and exit from the kernel. Therefore 444 * it is possible to get a watchpoint trap here from inside the kernel. 445 * However, the code in ./ptrace.c has ensured that the user can 446 * only set watchpoints on userspace addresses. Therefore the in-kernel 447 * watchpoint trap can only occur in code which is reading/writing 448 * from user space. Such code must not hold kernel locks (since it 449 * can equally take a page fault), therefore it is safe to call 450 * force_sig_info even though that claims and releases locks. 451 * 452 * Code in ./signal.c ensures that the debug control register 453 * is restored before we deliver any signal, and therefore that 454 * user code runs with the correct debug control register even though 455 * we clear it here. 456 * 457 * Being careful here means that we don't have to be as careful in a 458 * lot of more complicated places (task switching can be a bit lazy 459 * about restoring all the debug state, and ptrace doesn't have to 460 * find every occurrence of the TF bit that could be saved away even 461 * by user code) 462 * 463 * May run on IST stack. 464 */ 465 dotraplinkage void do_debug(struct pt_regs *regs, long error_code) 466 { 467 struct task_struct *tsk = current; 468 enum ctx_state prev_state; 469 int user_icebp = 0; 470 unsigned long dr6; 471 int si_code; 472 473 prev_state = exception_enter(); 474 475 get_debugreg(dr6, 6); 476 477 /* Filter out all the reserved bits which are preset to 1 */ 478 dr6 &= ~DR6_RESERVED; 479 480 /* 481 * If dr6 has no reason to give us about the origin of this trap, 482 * then it's very likely the result of an icebp/int01 trap. 483 * User wants a sigtrap for that. 484 */ 485 if (!dr6 && user_mode(regs)) 486 user_icebp = 1; 487 488 /* Catch kmemcheck conditions first of all! */ 489 if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) 490 goto exit; 491 492 /* DR6 may or may not be cleared by the CPU */ 493 set_debugreg(0, 6); 494 495 /* 496 * The processor cleared BTF, so don't mark that we need it set. 497 */ 498 clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP); 499 500 /* Store the virtualized DR6 value */ 501 tsk->thread.debugreg6 = dr6; 502 503 #ifdef CONFIG_KPROBES 504 if (kprobe_debug_handler(regs)) 505 goto exit; 506 #endif 507 508 if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code, 509 SIGTRAP) == NOTIFY_STOP) 510 goto exit; 511 512 /* 513 * Let others (NMI) know that the debug stack is in use 514 * as we may switch to the interrupt stack. 515 */ 516 debug_stack_usage_inc(); 517 518 /* It's safe to allow irq's after DR6 has been saved */ 519 preempt_conditional_sti(regs); 520 521 if (regs->flags & X86_VM_MASK) { 522 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 523 X86_TRAP_DB); 524 preempt_conditional_cli(regs); 525 debug_stack_usage_dec(); 526 goto exit; 527 } 528 529 /* 530 * Single-stepping through system calls: ignore any exceptions in 531 * kernel space, but re-enable TF when returning to user mode. 532 * 533 * We already checked v86 mode above, so we can check for kernel mode 534 * by just checking the CPL of CS. 535 */ 536 if ((dr6 & DR_STEP) && !user_mode(regs)) { 537 tsk->thread.debugreg6 &= ~DR_STEP; 538 set_tsk_thread_flag(tsk, TIF_SINGLESTEP); 539 regs->flags &= ~X86_EFLAGS_TF; 540 } 541 si_code = get_si_code(tsk->thread.debugreg6); 542 if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) 543 send_sigtrap(tsk, regs, error_code, si_code); 544 preempt_conditional_cli(regs); 545 debug_stack_usage_dec(); 546 547 exit: 548 exception_exit(prev_state); 549 } 550 NOKPROBE_SYMBOL(do_debug); 551 552 /* 553 * Note that we play around with the 'TS' bit in an attempt to get 554 * the correct behaviour even in the presence of the asynchronous 555 * IRQ13 behaviour 556 */ 557 static void math_error(struct pt_regs *regs, int error_code, int trapnr) 558 { 559 struct task_struct *task = current; 560 siginfo_t info; 561 unsigned short err; 562 char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" : 563 "simd exception"; 564 565 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP) 566 return; 567 conditional_sti(regs); 568 569 if (!user_mode_vm(regs)) 570 { 571 if (!fixup_exception(regs)) { 572 task->thread.error_code = error_code; 573 task->thread.trap_nr = trapnr; 574 die(str, regs, error_code); 575 } 576 return; 577 } 578 579 /* 580 * Save the info for the exception handler and clear the error. 581 */ 582 save_init_fpu(task); 583 task->thread.trap_nr = trapnr; 584 task->thread.error_code = error_code; 585 info.si_signo = SIGFPE; 586 info.si_errno = 0; 587 info.si_addr = (void __user *)uprobe_get_trap_addr(regs); 588 if (trapnr == X86_TRAP_MF) { 589 unsigned short cwd, swd; 590 /* 591 * (~cwd & swd) will mask out exceptions that are not set to unmasked 592 * status. 0x3f is the exception bits in these regs, 0x200 is the 593 * C1 reg you need in case of a stack fault, 0x040 is the stack 594 * fault bit. We should only be taking one exception at a time, 595 * so if this combination doesn't produce any single exception, 596 * then we have a bad program that isn't synchronizing its FPU usage 597 * and it will suffer the consequences since we won't be able to 598 * fully reproduce the context of the exception 599 */ 600 cwd = get_fpu_cwd(task); 601 swd = get_fpu_swd(task); 602 603 err = swd & ~cwd; 604 } else { 605 /* 606 * The SIMD FPU exceptions are handled a little differently, as there 607 * is only a single status/control register. Thus, to determine which 608 * unmasked exception was caught we must mask the exception mask bits 609 * at 0x1f80, and then use these to mask the exception bits at 0x3f. 610 */ 611 unsigned short mxcsr = get_fpu_mxcsr(task); 612 err = ~(mxcsr >> 7) & mxcsr; 613 } 614 615 if (err & 0x001) { /* Invalid op */ 616 /* 617 * swd & 0x240 == 0x040: Stack Underflow 618 * swd & 0x240 == 0x240: Stack Overflow 619 * User must clear the SF bit (0x40) if set 620 */ 621 info.si_code = FPE_FLTINV; 622 } else if (err & 0x004) { /* Divide by Zero */ 623 info.si_code = FPE_FLTDIV; 624 } else if (err & 0x008) { /* Overflow */ 625 info.si_code = FPE_FLTOVF; 626 } else if (err & 0x012) { /* Denormal, Underflow */ 627 info.si_code = FPE_FLTUND; 628 } else if (err & 0x020) { /* Precision */ 629 info.si_code = FPE_FLTRES; 630 } else { 631 /* 632 * If we're using IRQ 13, or supposedly even some trap 633 * X86_TRAP_MF implementations, it's possible 634 * we get a spurious trap, which is not an error. 635 */ 636 return; 637 } 638 force_sig_info(SIGFPE, &info, task); 639 } 640 641 dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) 642 { 643 enum ctx_state prev_state; 644 645 prev_state = exception_enter(); 646 math_error(regs, error_code, X86_TRAP_MF); 647 exception_exit(prev_state); 648 } 649 650 dotraplinkage void 651 do_simd_coprocessor_error(struct pt_regs *regs, long error_code) 652 { 653 enum ctx_state prev_state; 654 655 prev_state = exception_enter(); 656 math_error(regs, error_code, X86_TRAP_XF); 657 exception_exit(prev_state); 658 } 659 660 dotraplinkage void 661 do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) 662 { 663 conditional_sti(regs); 664 #if 0 665 /* No need to warn about this any longer. */ 666 pr_info("Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); 667 #endif 668 } 669 670 asmlinkage __visible void __attribute__((weak)) smp_thermal_interrupt(void) 671 { 672 } 673 674 asmlinkage __visible void __attribute__((weak)) smp_threshold_interrupt(void) 675 { 676 } 677 678 /* 679 * 'math_state_restore()' saves the current math information in the 680 * old math state array, and gets the new ones from the current task 681 * 682 * Careful.. There are problems with IBM-designed IRQ13 behaviour. 683 * Don't touch unless you *really* know how it works. 684 * 685 * Must be called with kernel preemption disabled (eg with local 686 * local interrupts as in the case of do_device_not_available). 687 */ 688 void math_state_restore(void) 689 { 690 struct task_struct *tsk = current; 691 692 if (!tsk_used_math(tsk)) { 693 local_irq_enable(); 694 /* 695 * does a slab alloc which can sleep 696 */ 697 if (init_fpu(tsk)) { 698 /* 699 * ran out of memory! 700 */ 701 do_group_exit(SIGKILL); 702 return; 703 } 704 local_irq_disable(); 705 } 706 707 __thread_fpu_begin(tsk); 708 709 /* 710 * Paranoid restore. send a SIGSEGV if we fail to restore the state. 711 */ 712 if (unlikely(restore_fpu_checking(tsk))) { 713 drop_init_fpu(tsk); 714 force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); 715 return; 716 } 717 718 tsk->thread.fpu_counter++; 719 } 720 EXPORT_SYMBOL_GPL(math_state_restore); 721 722 dotraplinkage void 723 do_device_not_available(struct pt_regs *regs, long error_code) 724 { 725 enum ctx_state prev_state; 726 727 prev_state = exception_enter(); 728 BUG_ON(use_eager_fpu()); 729 730 #ifdef CONFIG_MATH_EMULATION 731 if (read_cr0() & X86_CR0_EM) { 732 struct math_emu_info info = { }; 733 734 conditional_sti(regs); 735 736 info.regs = regs; 737 math_emulate(&info); 738 exception_exit(prev_state); 739 return; 740 } 741 #endif 742 math_state_restore(); /* interrupts still off */ 743 #ifdef CONFIG_X86_32 744 conditional_sti(regs); 745 #endif 746 exception_exit(prev_state); 747 } 748 NOKPROBE_SYMBOL(do_device_not_available); 749 750 #ifdef CONFIG_X86_32 751 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) 752 { 753 siginfo_t info; 754 enum ctx_state prev_state; 755 756 prev_state = exception_enter(); 757 local_irq_enable(); 758 759 info.si_signo = SIGILL; 760 info.si_errno = 0; 761 info.si_code = ILL_BADSTK; 762 info.si_addr = NULL; 763 if (notify_die(DIE_TRAP, "iret exception", regs, error_code, 764 X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) { 765 do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, 766 &info); 767 } 768 exception_exit(prev_state); 769 } 770 #endif 771 772 /* Set of traps needed for early debugging. */ 773 void __init early_trap_init(void) 774 { 775 set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); 776 /* int3 can be called from all */ 777 set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); 778 #ifdef CONFIG_X86_32 779 set_intr_gate(X86_TRAP_PF, page_fault); 780 #endif 781 load_idt(&idt_descr); 782 } 783 784 void __init early_trap_pf_init(void) 785 { 786 #ifdef CONFIG_X86_64 787 set_intr_gate(X86_TRAP_PF, page_fault); 788 #endif 789 } 790 791 void __init trap_init(void) 792 { 793 int i; 794 795 #ifdef CONFIG_EISA 796 void __iomem *p = early_ioremap(0x0FFFD9, 4); 797 798 if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24)) 799 EISA_bus = 1; 800 early_iounmap(p, 4); 801 #endif 802 803 set_intr_gate(X86_TRAP_DE, divide_error); 804 set_intr_gate_ist(X86_TRAP_NMI, &nmi, NMI_STACK); 805 /* int4 can be called from all */ 806 set_system_intr_gate(X86_TRAP_OF, &overflow); 807 set_intr_gate(X86_TRAP_BR, bounds); 808 set_intr_gate(X86_TRAP_UD, invalid_op); 809 set_intr_gate(X86_TRAP_NM, device_not_available); 810 #ifdef CONFIG_X86_32 811 set_task_gate(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS); 812 #else 813 set_intr_gate_ist(X86_TRAP_DF, &double_fault, DOUBLEFAULT_STACK); 814 #endif 815 set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun); 816 set_intr_gate(X86_TRAP_TS, invalid_TSS); 817 set_intr_gate(X86_TRAP_NP, segment_not_present); 818 set_intr_gate(X86_TRAP_SS, stack_segment); 819 set_intr_gate(X86_TRAP_GP, general_protection); 820 set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug); 821 set_intr_gate(X86_TRAP_MF, coprocessor_error); 822 set_intr_gate(X86_TRAP_AC, alignment_check); 823 #ifdef CONFIG_X86_MCE 824 set_intr_gate_ist(X86_TRAP_MC, &machine_check, MCE_STACK); 825 #endif 826 set_intr_gate(X86_TRAP_XF, simd_coprocessor_error); 827 828 /* Reserve all the builtin and the syscall vector: */ 829 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) 830 set_bit(i, used_vectors); 831 832 #ifdef CONFIG_IA32_EMULATION 833 set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); 834 set_bit(IA32_SYSCALL_VECTOR, used_vectors); 835 #endif 836 837 #ifdef CONFIG_X86_32 838 set_system_trap_gate(SYSCALL_VECTOR, &system_call); 839 set_bit(SYSCALL_VECTOR, used_vectors); 840 #endif 841 842 /* 843 * Set the IDT descriptor to a fixed read-only location, so that the 844 * "sidt" instruction will not leak the location of the kernel, and 845 * to defend the IDT against arbitrary memory write vulnerabilities. 846 * It will be reloaded in cpu_init() */ 847 __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO); 848 idt_descr.address = fix_to_virt(FIX_RO_IDT); 849 850 /* 851 * Should be a barrier for any external CPU state: 852 */ 853 cpu_init(); 854 855 x86_init.irqs.trap_init(); 856 857 #ifdef CONFIG_X86_64 858 memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16); 859 set_nmi_gate(X86_TRAP_DB, &debug); 860 set_nmi_gate(X86_TRAP_BP, &int3); 861 #endif 862 } 863