1 /* 2 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs 4 * 5 * Pentium III FXSR, SSE support 6 * Gareth Hughes <gareth@valinux.com>, May 2000 7 */ 8 9 /* 10 * Handle hardware traps and faults. 11 */ 12 13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14 15 #include <linux/context_tracking.h> 16 #include <linux/interrupt.h> 17 #include <linux/kallsyms.h> 18 #include <linux/spinlock.h> 19 #include <linux/kprobes.h> 20 #include <linux/uaccess.h> 21 #include <linux/kdebug.h> 22 #include <linux/kgdb.h> 23 #include <linux/kernel.h> 24 #include <linux/module.h> 25 #include <linux/ptrace.h> 26 #include <linux/uprobes.h> 27 #include <linux/string.h> 28 #include <linux/delay.h> 29 #include <linux/errno.h> 30 #include <linux/kexec.h> 31 #include <linux/sched.h> 32 #include <linux/timer.h> 33 #include <linux/init.h> 34 #include <linux/bug.h> 35 #include <linux/nmi.h> 36 #include <linux/mm.h> 37 #include <linux/smp.h> 38 #include <linux/io.h> 39 40 #ifdef CONFIG_EISA 41 #include <linux/ioport.h> 42 #include <linux/eisa.h> 43 #endif 44 45 #if defined(CONFIG_EDAC) 46 #include <linux/edac.h> 47 #endif 48 49 #include <asm/kmemcheck.h> 50 #include <asm/stacktrace.h> 51 #include <asm/processor.h> 52 #include <asm/debugreg.h> 53 #include <linux/atomic.h> 54 #include <asm/ftrace.h> 55 #include <asm/traps.h> 56 #include <asm/desc.h> 57 #include <asm/fpu/internal.h> 58 #include <asm/mce.h> 59 #include <asm/fixmap.h> 60 #include <asm/mach_traps.h> 61 #include <asm/alternative.h> 62 #include <asm/fpu/xstate.h> 63 #include <asm/trace/mpx.h> 64 #include <asm/mpx.h> 65 #include <asm/vm86.h> 66 67 #ifdef CONFIG_X86_64 68 #include <asm/x86_init.h> 69 #include <asm/pgalloc.h> 70 #include <asm/proto.h> 71 72 /* No need to be aligned, but done to keep all IDTs defined the same way. */ 73 gate_desc debug_idt_table[NR_VECTORS] __page_aligned_bss; 74 #else 75 #include <asm/processor-flags.h> 76 #include <asm/setup.h> 77 #include <asm/proto.h> 78 #endif 79 80 /* Must be page-aligned because the real IDT is used in a fixmap. */ 81 gate_desc idt_table[NR_VECTORS] __page_aligned_bss; 82 83 DECLARE_BITMAP(used_vectors, NR_VECTORS); 84 EXPORT_SYMBOL_GPL(used_vectors); 85 86 static inline void conditional_sti(struct pt_regs *regs) 87 { 88 if (regs->flags & X86_EFLAGS_IF) 89 local_irq_enable(); 90 } 91 92 static inline void preempt_conditional_sti(struct pt_regs *regs) 93 { 94 preempt_count_inc(); 95 if (regs->flags & X86_EFLAGS_IF) 96 local_irq_enable(); 97 } 98 99 static inline void conditional_cli(struct pt_regs *regs) 100 { 101 if (regs->flags & X86_EFLAGS_IF) 102 local_irq_disable(); 103 } 104 105 static inline void preempt_conditional_cli(struct pt_regs *regs) 106 { 107 if (regs->flags & X86_EFLAGS_IF) 108 local_irq_disable(); 109 preempt_count_dec(); 110 } 111 112 void ist_enter(struct pt_regs *regs) 113 { 114 if (user_mode(regs)) { 115 RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); 116 } else { 117 /* 118 * We might have interrupted pretty much anything. In 119 * fact, if we're a machine check, we can even interrupt 120 * NMI processing. We don't want in_nmi() to return true, 121 * but we need to notify RCU. 122 */ 123 rcu_nmi_enter(); 124 } 125 126 /* 127 * We are atomic because we're on the IST stack; or we're on 128 * x86_32, in which case we still shouldn't schedule; or we're 129 * on x86_64 and entered from user mode, in which case we're 130 * still atomic unless ist_begin_non_atomic is called. 131 */ 132 preempt_count_add(HARDIRQ_OFFSET); 133 134 /* This code is a bit fragile. Test it. */ 135 RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work"); 136 } 137 138 void ist_exit(struct pt_regs *regs) 139 { 140 preempt_count_sub(HARDIRQ_OFFSET); 141 142 if (!user_mode(regs)) 143 rcu_nmi_exit(); 144 } 145 146 /** 147 * ist_begin_non_atomic() - begin a non-atomic section in an IST exception 148 * @regs: regs passed to the IST exception handler 149 * 150 * IST exception handlers normally cannot schedule. As a special 151 * exception, if the exception interrupted userspace code (i.e. 152 * user_mode(regs) would return true) and the exception was not 153 * a double fault, it can be safe to schedule. ist_begin_non_atomic() 154 * begins a non-atomic section within an ist_enter()/ist_exit() region. 155 * Callers are responsible for enabling interrupts themselves inside 156 * the non-atomic section, and callers must call ist_end_non_atomic() 157 * before ist_exit(). 158 */ 159 void ist_begin_non_atomic(struct pt_regs *regs) 160 { 161 BUG_ON(!user_mode(regs)); 162 163 /* 164 * Sanity check: we need to be on the normal thread stack. This 165 * will catch asm bugs and any attempt to use ist_preempt_enable 166 * from double_fault. 167 */ 168 BUG_ON((unsigned long)(current_top_of_stack() - 169 current_stack_pointer()) >= THREAD_SIZE); 170 171 preempt_count_sub(HARDIRQ_OFFSET); 172 } 173 174 /** 175 * ist_end_non_atomic() - begin a non-atomic section in an IST exception 176 * 177 * Ends a non-atomic section started with ist_begin_non_atomic(). 178 */ 179 void ist_end_non_atomic(void) 180 { 181 preempt_count_add(HARDIRQ_OFFSET); 182 } 183 184 static nokprobe_inline int 185 do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, 186 struct pt_regs *regs, long error_code) 187 { 188 if (v8086_mode(regs)) { 189 /* 190 * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86. 191 * On nmi (interrupt 2), do_trap should not be called. 192 */ 193 if (trapnr < X86_TRAP_UD) { 194 if (!handle_vm86_trap((struct kernel_vm86_regs *) regs, 195 error_code, trapnr)) 196 return 0; 197 } 198 return -1; 199 } 200 201 if (!user_mode(regs)) { 202 if (!fixup_exception(regs)) { 203 tsk->thread.error_code = error_code; 204 tsk->thread.trap_nr = trapnr; 205 die(str, regs, error_code); 206 } 207 return 0; 208 } 209 210 return -1; 211 } 212 213 static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr, 214 siginfo_t *info) 215 { 216 unsigned long siaddr; 217 int sicode; 218 219 switch (trapnr) { 220 default: 221 return SEND_SIG_PRIV; 222 223 case X86_TRAP_DE: 224 sicode = FPE_INTDIV; 225 siaddr = uprobe_get_trap_addr(regs); 226 break; 227 case X86_TRAP_UD: 228 sicode = ILL_ILLOPN; 229 siaddr = uprobe_get_trap_addr(regs); 230 break; 231 case X86_TRAP_AC: 232 sicode = BUS_ADRALN; 233 siaddr = 0; 234 break; 235 } 236 237 info->si_signo = signr; 238 info->si_errno = 0; 239 info->si_code = sicode; 240 info->si_addr = (void __user *)siaddr; 241 return info; 242 } 243 244 static void 245 do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, 246 long error_code, siginfo_t *info) 247 { 248 struct task_struct *tsk = current; 249 250 251 if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code)) 252 return; 253 /* 254 * We want error_code and trap_nr set for userspace faults and 255 * kernelspace faults which result in die(), but not 256 * kernelspace faults which are fixed up. die() gives the 257 * process no chance to handle the signal and notice the 258 * kernel fault information, so that won't result in polluting 259 * the information about previously queued, but not yet 260 * delivered, faults. See also do_general_protection below. 261 */ 262 tsk->thread.error_code = error_code; 263 tsk->thread.trap_nr = trapnr; 264 265 #ifdef CONFIG_X86_64 266 if (show_unhandled_signals && unhandled_signal(tsk, signr) && 267 printk_ratelimit()) { 268 pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx", 269 tsk->comm, tsk->pid, str, 270 regs->ip, regs->sp, error_code); 271 print_vma_addr(" in ", regs->ip); 272 pr_cont("\n"); 273 } 274 #endif 275 276 force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk); 277 } 278 NOKPROBE_SYMBOL(do_trap); 279 280 static void do_error_trap(struct pt_regs *regs, long error_code, char *str, 281 unsigned long trapnr, int signr) 282 { 283 siginfo_t info; 284 285 RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); 286 287 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != 288 NOTIFY_STOP) { 289 conditional_sti(regs); 290 do_trap(trapnr, signr, str, regs, error_code, 291 fill_trap_info(regs, signr, trapnr, &info)); 292 } 293 } 294 295 #define DO_ERROR(trapnr, signr, str, name) \ 296 dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ 297 { \ 298 do_error_trap(regs, error_code, str, trapnr, signr); \ 299 } 300 301 DO_ERROR(X86_TRAP_DE, SIGFPE, "divide error", divide_error) 302 DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow) 303 DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) 304 DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) 305 DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) 306 DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) 307 DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) 308 DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) 309 310 #ifdef CONFIG_X86_64 311 /* Runs on IST stack */ 312 dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) 313 { 314 static const char str[] = "double fault"; 315 struct task_struct *tsk = current; 316 317 #ifdef CONFIG_X86_ESPFIX64 318 extern unsigned char native_irq_return_iret[]; 319 320 /* 321 * If IRET takes a non-IST fault on the espfix64 stack, then we 322 * end up promoting it to a doublefault. In that case, modify 323 * the stack to make it look like we just entered the #GP 324 * handler from user space, similar to bad_iret. 325 * 326 * No need for ist_enter here because we don't use RCU. 327 */ 328 if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && 329 regs->cs == __KERNEL_CS && 330 regs->ip == (unsigned long)native_irq_return_iret) 331 { 332 struct pt_regs *normal_regs = task_pt_regs(current); 333 334 /* Fake a #GP(0) from userspace. */ 335 memmove(&normal_regs->ip, (void *)regs->sp, 5*8); 336 normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ 337 regs->ip = (unsigned long)general_protection; 338 regs->sp = (unsigned long)&normal_regs->orig_ax; 339 340 return; 341 } 342 #endif 343 344 ist_enter(regs); 345 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); 346 347 tsk->thread.error_code = error_code; 348 tsk->thread.trap_nr = X86_TRAP_DF; 349 350 #ifdef CONFIG_DOUBLEFAULT 351 df_debug(regs, error_code); 352 #endif 353 /* 354 * This is always a kernel trap and never fixable (and thus must 355 * never return). 356 */ 357 for (;;) 358 die(str, regs, error_code); 359 } 360 #endif 361 362 dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) 363 { 364 const struct mpx_bndcsr *bndcsr; 365 siginfo_t *info; 366 367 RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); 368 if (notify_die(DIE_TRAP, "bounds", regs, error_code, 369 X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP) 370 return; 371 conditional_sti(regs); 372 373 if (!user_mode(regs)) 374 die("bounds", regs, error_code); 375 376 if (!cpu_feature_enabled(X86_FEATURE_MPX)) { 377 /* The exception is not from Intel MPX */ 378 goto exit_trap; 379 } 380 381 /* 382 * We need to look at BNDSTATUS to resolve this exception. 383 * A NULL here might mean that it is in its 'init state', 384 * which is all zeros which indicates MPX was not 385 * responsible for the exception. 386 */ 387 bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); 388 if (!bndcsr) 389 goto exit_trap; 390 391 trace_bounds_exception_mpx(bndcsr); 392 /* 393 * The error code field of the BNDSTATUS register communicates status 394 * information of a bound range exception #BR or operation involving 395 * bound directory. 396 */ 397 switch (bndcsr->bndstatus & MPX_BNDSTA_ERROR_CODE) { 398 case 2: /* Bound directory has invalid entry. */ 399 if (mpx_handle_bd_fault()) 400 goto exit_trap; 401 break; /* Success, it was handled */ 402 case 1: /* Bound violation. */ 403 info = mpx_generate_siginfo(regs); 404 if (IS_ERR(info)) { 405 /* 406 * We failed to decode the MPX instruction. Act as if 407 * the exception was not caused by MPX. 408 */ 409 goto exit_trap; 410 } 411 /* 412 * Success, we decoded the instruction and retrieved 413 * an 'info' containing the address being accessed 414 * which caused the exception. This information 415 * allows and application to possibly handle the 416 * #BR exception itself. 417 */ 418 do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, info); 419 kfree(info); 420 break; 421 case 0: /* No exception caused by Intel MPX operations. */ 422 goto exit_trap; 423 default: 424 die("bounds", regs, error_code); 425 } 426 427 return; 428 429 exit_trap: 430 /* 431 * This path out is for all the cases where we could not 432 * handle the exception in some way (like allocating a 433 * table or telling userspace about it. We will also end 434 * up here if the kernel has MPX turned off at compile 435 * time.. 436 */ 437 do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL); 438 } 439 440 dotraplinkage void 441 do_general_protection(struct pt_regs *regs, long error_code) 442 { 443 struct task_struct *tsk; 444 445 RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); 446 conditional_sti(regs); 447 448 if (v8086_mode(regs)) { 449 local_irq_enable(); 450 handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); 451 return; 452 } 453 454 tsk = current; 455 if (!user_mode(regs)) { 456 if (fixup_exception(regs)) 457 return; 458 459 tsk->thread.error_code = error_code; 460 tsk->thread.trap_nr = X86_TRAP_GP; 461 if (notify_die(DIE_GPF, "general protection fault", regs, error_code, 462 X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) 463 die("general protection fault", regs, error_code); 464 return; 465 } 466 467 tsk->thread.error_code = error_code; 468 tsk->thread.trap_nr = X86_TRAP_GP; 469 470 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && 471 printk_ratelimit()) { 472 pr_info("%s[%d] general protection ip:%lx sp:%lx error:%lx", 473 tsk->comm, task_pid_nr(tsk), 474 regs->ip, regs->sp, error_code); 475 print_vma_addr(" in ", regs->ip); 476 pr_cont("\n"); 477 } 478 479 force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); 480 } 481 NOKPROBE_SYMBOL(do_general_protection); 482 483 /* May run on IST stack. */ 484 dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) 485 { 486 #ifdef CONFIG_DYNAMIC_FTRACE 487 /* 488 * ftrace must be first, everything else may cause a recursive crash. 489 * See note by declaration of modifying_ftrace_code in ftrace.c 490 */ 491 if (unlikely(atomic_read(&modifying_ftrace_code)) && 492 ftrace_int3_handler(regs)) 493 return; 494 #endif 495 if (poke_int3_handler(regs)) 496 return; 497 498 ist_enter(regs); 499 RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); 500 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP 501 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 502 SIGTRAP) == NOTIFY_STOP) 503 goto exit; 504 #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ 505 506 #ifdef CONFIG_KPROBES 507 if (kprobe_int3_handler(regs)) 508 goto exit; 509 #endif 510 511 if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 512 SIGTRAP) == NOTIFY_STOP) 513 goto exit; 514 515 /* 516 * Let others (NMI) know that the debug stack is in use 517 * as we may switch to the interrupt stack. 518 */ 519 debug_stack_usage_inc(); 520 preempt_conditional_sti(regs); 521 do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); 522 preempt_conditional_cli(regs); 523 debug_stack_usage_dec(); 524 exit: 525 ist_exit(regs); 526 } 527 NOKPROBE_SYMBOL(do_int3); 528 529 #ifdef CONFIG_X86_64 530 /* 531 * Help handler running on IST stack to switch off the IST stack if the 532 * interrupted code was in user mode. The actual stack switch is done in 533 * entry_64.S 534 */ 535 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) 536 { 537 struct pt_regs *regs = task_pt_regs(current); 538 *regs = *eregs; 539 return regs; 540 } 541 NOKPROBE_SYMBOL(sync_regs); 542 543 struct bad_iret_stack { 544 void *error_entry_ret; 545 struct pt_regs regs; 546 }; 547 548 asmlinkage __visible notrace 549 struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) 550 { 551 /* 552 * This is called from entry_64.S early in handling a fault 553 * caused by a bad iret to user mode. To handle the fault 554 * correctly, we want move our stack frame to task_pt_regs 555 * and we want to pretend that the exception came from the 556 * iret target. 557 */ 558 struct bad_iret_stack *new_stack = 559 container_of(task_pt_regs(current), 560 struct bad_iret_stack, regs); 561 562 /* Copy the IRET target to the new stack. */ 563 memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); 564 565 /* Copy the remainder of the stack from the current stack. */ 566 memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip)); 567 568 BUG_ON(!user_mode(&new_stack->regs)); 569 return new_stack; 570 } 571 NOKPROBE_SYMBOL(fixup_bad_iret); 572 #endif 573 574 /* 575 * Our handling of the processor debug registers is non-trivial. 576 * We do not clear them on entry and exit from the kernel. Therefore 577 * it is possible to get a watchpoint trap here from inside the kernel. 578 * However, the code in ./ptrace.c has ensured that the user can 579 * only set watchpoints on userspace addresses. Therefore the in-kernel 580 * watchpoint trap can only occur in code which is reading/writing 581 * from user space. Such code must not hold kernel locks (since it 582 * can equally take a page fault), therefore it is safe to call 583 * force_sig_info even though that claims and releases locks. 584 * 585 * Code in ./signal.c ensures that the debug control register 586 * is restored before we deliver any signal, and therefore that 587 * user code runs with the correct debug control register even though 588 * we clear it here. 589 * 590 * Being careful here means that we don't have to be as careful in a 591 * lot of more complicated places (task switching can be a bit lazy 592 * about restoring all the debug state, and ptrace doesn't have to 593 * find every occurrence of the TF bit that could be saved away even 594 * by user code) 595 * 596 * May run on IST stack. 597 */ 598 dotraplinkage void do_debug(struct pt_regs *regs, long error_code) 599 { 600 struct task_struct *tsk = current; 601 int user_icebp = 0; 602 unsigned long dr6; 603 int si_code; 604 605 ist_enter(regs); 606 607 get_debugreg(dr6, 6); 608 609 /* Filter out all the reserved bits which are preset to 1 */ 610 dr6 &= ~DR6_RESERVED; 611 612 /* 613 * If dr6 has no reason to give us about the origin of this trap, 614 * then it's very likely the result of an icebp/int01 trap. 615 * User wants a sigtrap for that. 616 */ 617 if (!dr6 && user_mode(regs)) 618 user_icebp = 1; 619 620 /* Catch kmemcheck conditions first of all! */ 621 if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) 622 goto exit; 623 624 /* DR6 may or may not be cleared by the CPU */ 625 set_debugreg(0, 6); 626 627 /* 628 * The processor cleared BTF, so don't mark that we need it set. 629 */ 630 clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP); 631 632 /* Store the virtualized DR6 value */ 633 tsk->thread.debugreg6 = dr6; 634 635 #ifdef CONFIG_KPROBES 636 if (kprobe_debug_handler(regs)) 637 goto exit; 638 #endif 639 640 if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code, 641 SIGTRAP) == NOTIFY_STOP) 642 goto exit; 643 644 /* 645 * Let others (NMI) know that the debug stack is in use 646 * as we may switch to the interrupt stack. 647 */ 648 debug_stack_usage_inc(); 649 650 /* It's safe to allow irq's after DR6 has been saved */ 651 preempt_conditional_sti(regs); 652 653 if (v8086_mode(regs)) { 654 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 655 X86_TRAP_DB); 656 preempt_conditional_cli(regs); 657 debug_stack_usage_dec(); 658 goto exit; 659 } 660 661 /* 662 * Single-stepping through system calls: ignore any exceptions in 663 * kernel space, but re-enable TF when returning to user mode. 664 * 665 * We already checked v86 mode above, so we can check for kernel mode 666 * by just checking the CPL of CS. 667 */ 668 if ((dr6 & DR_STEP) && !user_mode(regs)) { 669 tsk->thread.debugreg6 &= ~DR_STEP; 670 set_tsk_thread_flag(tsk, TIF_SINGLESTEP); 671 regs->flags &= ~X86_EFLAGS_TF; 672 } 673 si_code = get_si_code(tsk->thread.debugreg6); 674 if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) 675 send_sigtrap(tsk, regs, error_code, si_code); 676 preempt_conditional_cli(regs); 677 debug_stack_usage_dec(); 678 679 exit: 680 ist_exit(regs); 681 } 682 NOKPROBE_SYMBOL(do_debug); 683 684 /* 685 * Note that we play around with the 'TS' bit in an attempt to get 686 * the correct behaviour even in the presence of the asynchronous 687 * IRQ13 behaviour 688 */ 689 static void math_error(struct pt_regs *regs, int error_code, int trapnr) 690 { 691 struct task_struct *task = current; 692 struct fpu *fpu = &task->thread.fpu; 693 siginfo_t info; 694 char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" : 695 "simd exception"; 696 697 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP) 698 return; 699 conditional_sti(regs); 700 701 if (!user_mode(regs)) { 702 if (!fixup_exception(regs)) { 703 task->thread.error_code = error_code; 704 task->thread.trap_nr = trapnr; 705 die(str, regs, error_code); 706 } 707 return; 708 } 709 710 /* 711 * Save the info for the exception handler and clear the error. 712 */ 713 fpu__save(fpu); 714 715 task->thread.trap_nr = trapnr; 716 task->thread.error_code = error_code; 717 info.si_signo = SIGFPE; 718 info.si_errno = 0; 719 info.si_addr = (void __user *)uprobe_get_trap_addr(regs); 720 721 info.si_code = fpu__exception_code(fpu, trapnr); 722 723 /* Retry when we get spurious exceptions: */ 724 if (!info.si_code) 725 return; 726 727 force_sig_info(SIGFPE, &info, task); 728 } 729 730 dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) 731 { 732 RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); 733 math_error(regs, error_code, X86_TRAP_MF); 734 } 735 736 dotraplinkage void 737 do_simd_coprocessor_error(struct pt_regs *regs, long error_code) 738 { 739 RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); 740 math_error(regs, error_code, X86_TRAP_XF); 741 } 742 743 dotraplinkage void 744 do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) 745 { 746 conditional_sti(regs); 747 } 748 749 dotraplinkage void 750 do_device_not_available(struct pt_regs *regs, long error_code) 751 { 752 RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); 753 BUG_ON(use_eager_fpu()); 754 755 #ifdef CONFIG_MATH_EMULATION 756 if (read_cr0() & X86_CR0_EM) { 757 struct math_emu_info info = { }; 758 759 conditional_sti(regs); 760 761 info.regs = regs; 762 math_emulate(&info); 763 return; 764 } 765 #endif 766 fpu__restore(¤t->thread.fpu); /* interrupts still off */ 767 #ifdef CONFIG_X86_32 768 conditional_sti(regs); 769 #endif 770 } 771 NOKPROBE_SYMBOL(do_device_not_available); 772 773 #ifdef CONFIG_X86_32 774 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) 775 { 776 siginfo_t info; 777 778 RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); 779 local_irq_enable(); 780 781 info.si_signo = SIGILL; 782 info.si_errno = 0; 783 info.si_code = ILL_BADSTK; 784 info.si_addr = NULL; 785 if (notify_die(DIE_TRAP, "iret exception", regs, error_code, 786 X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) { 787 do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, 788 &info); 789 } 790 } 791 #endif 792 793 /* Set of traps needed for early debugging. */ 794 void __init early_trap_init(void) 795 { 796 /* 797 * Don't use IST to set DEBUG_STACK as it doesn't work until TSS 798 * is ready in cpu_init() <-- trap_init(). Before trap_init(), 799 * CPU runs at ring 0 so it is impossible to hit an invalid 800 * stack. Using the original stack works well enough at this 801 * early stage. DEBUG_STACK will be equipped after cpu_init() in 802 * trap_init(). 803 * 804 * We don't need to set trace_idt_table like set_intr_gate(), 805 * since we don't have trace_debug and it will be reset to 806 * 'debug' in trap_init() by set_intr_gate_ist(). 807 */ 808 set_intr_gate_notrace(X86_TRAP_DB, debug); 809 /* int3 can be called from all */ 810 set_system_intr_gate(X86_TRAP_BP, &int3); 811 #ifdef CONFIG_X86_32 812 set_intr_gate(X86_TRAP_PF, page_fault); 813 #endif 814 load_idt(&idt_descr); 815 } 816 817 void __init early_trap_pf_init(void) 818 { 819 #ifdef CONFIG_X86_64 820 set_intr_gate(X86_TRAP_PF, page_fault); 821 #endif 822 } 823 824 void __init trap_init(void) 825 { 826 int i; 827 828 #ifdef CONFIG_EISA 829 void __iomem *p = early_ioremap(0x0FFFD9, 4); 830 831 if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24)) 832 EISA_bus = 1; 833 early_iounmap(p, 4); 834 #endif 835 836 set_intr_gate(X86_TRAP_DE, divide_error); 837 set_intr_gate_ist(X86_TRAP_NMI, &nmi, NMI_STACK); 838 /* int4 can be called from all */ 839 set_system_intr_gate(X86_TRAP_OF, &overflow); 840 set_intr_gate(X86_TRAP_BR, bounds); 841 set_intr_gate(X86_TRAP_UD, invalid_op); 842 set_intr_gate(X86_TRAP_NM, device_not_available); 843 #ifdef CONFIG_X86_32 844 set_task_gate(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS); 845 #else 846 set_intr_gate_ist(X86_TRAP_DF, &double_fault, DOUBLEFAULT_STACK); 847 #endif 848 set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun); 849 set_intr_gate(X86_TRAP_TS, invalid_TSS); 850 set_intr_gate(X86_TRAP_NP, segment_not_present); 851 set_intr_gate(X86_TRAP_SS, stack_segment); 852 set_intr_gate(X86_TRAP_GP, general_protection); 853 set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug); 854 set_intr_gate(X86_TRAP_MF, coprocessor_error); 855 set_intr_gate(X86_TRAP_AC, alignment_check); 856 #ifdef CONFIG_X86_MCE 857 set_intr_gate_ist(X86_TRAP_MC, &machine_check, MCE_STACK); 858 #endif 859 set_intr_gate(X86_TRAP_XF, simd_coprocessor_error); 860 861 /* Reserve all the builtin and the syscall vector: */ 862 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) 863 set_bit(i, used_vectors); 864 865 #ifdef CONFIG_IA32_EMULATION 866 set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_compat); 867 set_bit(IA32_SYSCALL_VECTOR, used_vectors); 868 #endif 869 870 #ifdef CONFIG_X86_32 871 set_system_trap_gate(IA32_SYSCALL_VECTOR, entry_INT80_32); 872 set_bit(IA32_SYSCALL_VECTOR, used_vectors); 873 #endif 874 875 /* 876 * Set the IDT descriptor to a fixed read-only location, so that the 877 * "sidt" instruction will not leak the location of the kernel, and 878 * to defend the IDT against arbitrary memory write vulnerabilities. 879 * It will be reloaded in cpu_init() */ 880 __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO); 881 idt_descr.address = fix_to_virt(FIX_RO_IDT); 882 883 /* 884 * Should be a barrier for any external CPU state: 885 */ 886 cpu_init(); 887 888 /* 889 * X86_TRAP_DB and X86_TRAP_BP have been set 890 * in early_trap_init(). However, ITS works only after 891 * cpu_init() loads TSS. See comments in early_trap_init(). 892 */ 893 set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); 894 /* int3 can be called from all */ 895 set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); 896 897 x86_init.irqs.trap_init(); 898 899 #ifdef CONFIG_X86_64 900 memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16); 901 set_nmi_gate(X86_TRAP_DB, &debug); 902 set_nmi_gate(X86_TRAP_BP, &int3); 903 #endif 904 } 905