1/* 2 * Copyright (C) 1991,1992 Linus Torvalds 3 * 4 * entry_32.S contains the system-call and low-level fault and trap handling routines. 5 * 6 * Stack layout while running C code: 7 * ptrace needs to have all registers on the stack. 8 * If the order here is changed, it needs to be 9 * updated in fork.c:copy_process(), signal.c:do_signal(), 10 * ptrace.c and ptrace.h 11 * 12 * 0(%esp) - %ebx 13 * 4(%esp) - %ecx 14 * 8(%esp) - %edx 15 * C(%esp) - %esi 16 * 10(%esp) - %edi 17 * 14(%esp) - %ebp 18 * 18(%esp) - %eax 19 * 1C(%esp) - %ds 20 * 20(%esp) - %es 21 * 24(%esp) - %fs 22 * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS 23 * 2C(%esp) - orig_eax 24 * 30(%esp) - %eip 25 * 34(%esp) - %cs 26 * 38(%esp) - %eflags 27 * 3C(%esp) - %oldesp 28 * 40(%esp) - %oldss 29 */ 30 31#include <linux/linkage.h> 32#include <linux/err.h> 33#include <asm/thread_info.h> 34#include <asm/irqflags.h> 35#include <asm/errno.h> 36#include <asm/segment.h> 37#include <asm/smp.h> 38#include <asm/page_types.h> 39#include <asm/percpu.h> 40#include <asm/processor-flags.h> 41#include <asm/ftrace.h> 42#include <asm/irq_vectors.h> 43#include <asm/cpufeatures.h> 44#include <asm/alternative-asm.h> 45#include <asm/asm.h> 46#include <asm/smap.h> 47#include <asm/export.h> 48 49 .section .entry.text, "ax" 50 51/* 52 * We use macros for low-level operations which need to be overridden 53 * for paravirtualization. The following will never clobber any registers: 54 * INTERRUPT_RETURN (aka. "iret") 55 * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") 56 * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). 57 * 58 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must 59 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). 60 * Allowing a register to be clobbered can shrink the paravirt replacement 61 * enough to patch inline, increasing performance. 62 */ 63 64#ifdef CONFIG_PREEMPT 65# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF 66#else 67# define preempt_stop(clobbers) 68# define resume_kernel restore_all 69#endif 70 71.macro TRACE_IRQS_IRET 72#ifdef CONFIG_TRACE_IRQFLAGS 73 testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off? 74 jz 1f 75 TRACE_IRQS_ON 761: 77#endif 78.endm 79 80/* 81 * User gs save/restore 82 * 83 * %gs is used for userland TLS and kernel only uses it for stack 84 * canary which is required to be at %gs:20 by gcc. Read the comment 85 * at the top of stackprotector.h for more info. 86 * 87 * Local labels 98 and 99 are used. 88 */ 89#ifdef CONFIG_X86_32_LAZY_GS 90 91 /* unfortunately push/pop can't be no-op */ 92.macro PUSH_GS 93 pushl $0 94.endm 95.macro POP_GS pop=0 96 addl $(4 + \pop), %esp 97.endm 98.macro POP_GS_EX 99.endm 100 101 /* all the rest are no-op */ 102.macro PTGS_TO_GS 103.endm 104.macro PTGS_TO_GS_EX 105.endm 106.macro GS_TO_REG reg 107.endm 108.macro REG_TO_PTGS reg 109.endm 110.macro SET_KERNEL_GS reg 111.endm 112 113#else /* CONFIG_X86_32_LAZY_GS */ 114 115.macro PUSH_GS 116 pushl %gs 117.endm 118 119.macro POP_GS pop=0 12098: popl %gs 121 .if \pop <> 0 122 add $\pop, %esp 123 .endif 124.endm 125.macro POP_GS_EX 126.pushsection .fixup, "ax" 12799: movl $0, (%esp) 128 jmp 98b 129.popsection 130 _ASM_EXTABLE(98b, 99b) 131.endm 132 133.macro PTGS_TO_GS 13498: mov PT_GS(%esp), %gs 135.endm 136.macro PTGS_TO_GS_EX 137.pushsection .fixup, "ax" 13899: movl $0, PT_GS(%esp) 139 jmp 98b 140.popsection 141 _ASM_EXTABLE(98b, 99b) 142.endm 143 144.macro GS_TO_REG reg 145 movl %gs, \reg 146.endm 147.macro REG_TO_PTGS reg 148 movl \reg, PT_GS(%esp) 149.endm 150.macro SET_KERNEL_GS reg 151 movl $(__KERNEL_STACK_CANARY), \reg 152 movl \reg, %gs 153.endm 154 155#endif /* CONFIG_X86_32_LAZY_GS */ 156 157.macro SAVE_ALL pt_regs_ax=%eax 158 cld 159 PUSH_GS 160 pushl %fs 161 pushl %es 162 pushl %ds 163 pushl \pt_regs_ax 164 pushl %ebp 165 pushl %edi 166 pushl %esi 167 pushl %edx 168 pushl %ecx 169 pushl %ebx 170 movl $(__USER_DS), %edx 171 movl %edx, %ds 172 movl %edx, %es 173 movl $(__KERNEL_PERCPU), %edx 174 movl %edx, %fs 175 SET_KERNEL_GS %edx 176.endm 177 178.macro RESTORE_INT_REGS 179 popl %ebx 180 popl %ecx 181 popl %edx 182 popl %esi 183 popl %edi 184 popl %ebp 185 popl %eax 186.endm 187 188.macro RESTORE_REGS pop=0 189 RESTORE_INT_REGS 1901: popl %ds 1912: popl %es 1923: popl %fs 193 POP_GS \pop 194.pushsection .fixup, "ax" 1954: movl $0, (%esp) 196 jmp 1b 1975: movl $0, (%esp) 198 jmp 2b 1996: movl $0, (%esp) 200 jmp 3b 201.popsection 202 _ASM_EXTABLE(1b, 4b) 203 _ASM_EXTABLE(2b, 5b) 204 _ASM_EXTABLE(3b, 6b) 205 POP_GS_EX 206.endm 207 208/* 209 * %eax: prev task 210 * %edx: next task 211 */ 212ENTRY(__switch_to_asm) 213 /* 214 * Save callee-saved registers 215 * This must match the order in struct inactive_task_frame 216 */ 217 pushl %ebp 218 pushl %ebx 219 pushl %edi 220 pushl %esi 221 222 /* switch stack */ 223 movl %esp, TASK_threadsp(%eax) 224 movl TASK_threadsp(%edx), %esp 225 226#ifdef CONFIG_CC_STACKPROTECTOR 227 movl TASK_stack_canary(%edx), %ebx 228 movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset 229#endif 230 231 /* restore callee-saved registers */ 232 popl %esi 233 popl %edi 234 popl %ebx 235 popl %ebp 236 237 jmp __switch_to 238END(__switch_to_asm) 239 240/* 241 * A newly forked process directly context switches into this address. 242 * 243 * eax: prev task we switched from 244 * ebx: kernel thread func (NULL for user thread) 245 * edi: kernel thread arg 246 */ 247ENTRY(ret_from_fork) 248 pushl %eax 249 call schedule_tail 250 popl %eax 251 252 testl %ebx, %ebx 253 jnz 1f /* kernel threads are uncommon */ 254 2552: 256 /* When we fork, we trace the syscall return in the child, too. */ 257 movl %esp, %eax 258 call syscall_return_slowpath 259 jmp restore_all 260 261 /* kernel thread */ 2621: movl %edi, %eax 263 call *%ebx 264 /* 265 * A kernel thread is allowed to return here after successfully 266 * calling do_execve(). Exit to userspace to complete the execve() 267 * syscall. 268 */ 269 movl $0, PT_EAX(%esp) 270 jmp 2b 271END(ret_from_fork) 272 273/* 274 * Return to user mode is not as complex as all this looks, 275 * but we want the default path for a system call return to 276 * go as quickly as possible which is why some of this is 277 * less clear than it otherwise should be. 278 */ 279 280 # userspace resumption stub bypassing syscall exit tracing 281 ALIGN 282ret_from_exception: 283 preempt_stop(CLBR_ANY) 284ret_from_intr: 285#ifdef CONFIG_VM86 286 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS 287 movb PT_CS(%esp), %al 288 andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax 289#else 290 /* 291 * We can be coming here from child spawned by kernel_thread(). 292 */ 293 movl PT_CS(%esp), %eax 294 andl $SEGMENT_RPL_MASK, %eax 295#endif 296 cmpl $USER_RPL, %eax 297 jb resume_kernel # not returning to v8086 or userspace 298 299ENTRY(resume_userspace) 300 DISABLE_INTERRUPTS(CLBR_ANY) 301 TRACE_IRQS_OFF 302 movl %esp, %eax 303 call prepare_exit_to_usermode 304 jmp restore_all 305END(ret_from_exception) 306 307#ifdef CONFIG_PREEMPT 308ENTRY(resume_kernel) 309 DISABLE_INTERRUPTS(CLBR_ANY) 310need_resched: 311 cmpl $0, PER_CPU_VAR(__preempt_count) 312 jnz restore_all 313 testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? 314 jz restore_all 315 call preempt_schedule_irq 316 jmp need_resched 317END(resume_kernel) 318#endif 319 320GLOBAL(__begin_SYSENTER_singlestep_region) 321/* 322 * All code from here through __end_SYSENTER_singlestep_region is subject 323 * to being single-stepped if a user program sets TF and executes SYSENTER. 324 * There is absolutely nothing that we can do to prevent this from happening 325 * (thanks Intel!). To keep our handling of this situation as simple as 326 * possible, we handle TF just like AC and NT, except that our #DB handler 327 * will ignore all of the single-step traps generated in this range. 328 */ 329 330#ifdef CONFIG_XEN 331/* 332 * Xen doesn't set %esp to be precisely what the normal SYSENTER 333 * entry point expects, so fix it up before using the normal path. 334 */ 335ENTRY(xen_sysenter_target) 336 addl $5*4, %esp /* remove xen-provided frame */ 337 jmp sysenter_past_esp 338#endif 339 340/* 341 * 32-bit SYSENTER entry. 342 * 343 * 32-bit system calls through the vDSO's __kernel_vsyscall enter here 344 * if X86_FEATURE_SEP is available. This is the preferred system call 345 * entry on 32-bit systems. 346 * 347 * The SYSENTER instruction, in principle, should *only* occur in the 348 * vDSO. In practice, a small number of Android devices were shipped 349 * with a copy of Bionic that inlined a SYSENTER instruction. This 350 * never happened in any of Google's Bionic versions -- it only happened 351 * in a narrow range of Intel-provided versions. 352 * 353 * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs. 354 * IF and VM in RFLAGS are cleared (IOW: interrupts are off). 355 * SYSENTER does not save anything on the stack, 356 * and does not save old EIP (!!!), ESP, or EFLAGS. 357 * 358 * To avoid losing track of EFLAGS.VM (and thus potentially corrupting 359 * user and/or vm86 state), we explicitly disable the SYSENTER 360 * instruction in vm86 mode by reprogramming the MSRs. 361 * 362 * Arguments: 363 * eax system call number 364 * ebx arg1 365 * ecx arg2 366 * edx arg3 367 * esi arg4 368 * edi arg5 369 * ebp user stack 370 * 0(%ebp) arg6 371 */ 372ENTRY(entry_SYSENTER_32) 373 movl TSS_sysenter_sp0(%esp), %esp 374sysenter_past_esp: 375 pushl $__USER_DS /* pt_regs->ss */ 376 pushl %ebp /* pt_regs->sp (stashed in bp) */ 377 pushfl /* pt_regs->flags (except IF = 0) */ 378 orl $X86_EFLAGS_IF, (%esp) /* Fix IF */ 379 pushl $__USER_CS /* pt_regs->cs */ 380 pushl $0 /* pt_regs->ip = 0 (placeholder) */ 381 pushl %eax /* pt_regs->orig_ax */ 382 SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ 383 384 /* 385 * SYSENTER doesn't filter flags, so we need to clear NT, AC 386 * and TF ourselves. To save a few cycles, we can check whether 387 * either was set instead of doing an unconditional popfq. 388 * This needs to happen before enabling interrupts so that 389 * we don't get preempted with NT set. 390 * 391 * If TF is set, we will single-step all the way to here -- do_debug 392 * will ignore all the traps. (Yes, this is slow, but so is 393 * single-stepping in general. This allows us to avoid having 394 * a more complicated code to handle the case where a user program 395 * forces us to single-step through the SYSENTER entry code.) 396 * 397 * NB.: .Lsysenter_fix_flags is a label with the code under it moved 398 * out-of-line as an optimization: NT is unlikely to be set in the 399 * majority of the cases and instead of polluting the I$ unnecessarily, 400 * we're keeping that code behind a branch which will predict as 401 * not-taken and therefore its instructions won't be fetched. 402 */ 403 testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp) 404 jnz .Lsysenter_fix_flags 405.Lsysenter_flags_fixed: 406 407 /* 408 * User mode is traced as though IRQs are on, and SYSENTER 409 * turned them off. 410 */ 411 TRACE_IRQS_OFF 412 413 movl %esp, %eax 414 call do_fast_syscall_32 415 /* XEN PV guests always use IRET path */ 416 ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ 417 "jmp .Lsyscall_32_done", X86_FEATURE_XENPV 418 419/* Opportunistic SYSEXIT */ 420 TRACE_IRQS_ON /* User mode traces as IRQs on. */ 421 movl PT_EIP(%esp), %edx /* pt_regs->ip */ 422 movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */ 4231: mov PT_FS(%esp), %fs 424 PTGS_TO_GS 425 popl %ebx /* pt_regs->bx */ 426 addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */ 427 popl %esi /* pt_regs->si */ 428 popl %edi /* pt_regs->di */ 429 popl %ebp /* pt_regs->bp */ 430 popl %eax /* pt_regs->ax */ 431 432 /* 433 * Restore all flags except IF. (We restore IF separately because 434 * STI gives a one-instruction window in which we won't be interrupted, 435 * whereas POPF does not.) 436 */ 437 addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */ 438 btr $X86_EFLAGS_IF_BIT, (%esp) 439 popfl 440 441 /* 442 * Return back to the vDSO, which will pop ecx and edx. 443 * Don't bother with DS and ES (they already contain __USER_DS). 444 */ 445 sti 446 sysexit 447 448.pushsection .fixup, "ax" 4492: movl $0, PT_FS(%esp) 450 jmp 1b 451.popsection 452 _ASM_EXTABLE(1b, 2b) 453 PTGS_TO_GS_EX 454 455.Lsysenter_fix_flags: 456 pushl $X86_EFLAGS_FIXED 457 popfl 458 jmp .Lsysenter_flags_fixed 459GLOBAL(__end_SYSENTER_singlestep_region) 460ENDPROC(entry_SYSENTER_32) 461 462/* 463 * 32-bit legacy system call entry. 464 * 465 * 32-bit x86 Linux system calls traditionally used the INT $0x80 466 * instruction. INT $0x80 lands here. 467 * 468 * This entry point can be used by any 32-bit perform system calls. 469 * Instances of INT $0x80 can be found inline in various programs and 470 * libraries. It is also used by the vDSO's __kernel_vsyscall 471 * fallback for hardware that doesn't support a faster entry method. 472 * Restarted 32-bit system calls also fall back to INT $0x80 473 * regardless of what instruction was originally used to do the system 474 * call. (64-bit programs can use INT $0x80 as well, but they can 475 * only run on 64-bit kernels and therefore land in 476 * entry_INT80_compat.) 477 * 478 * This is considered a slow path. It is not used by most libc 479 * implementations on modern hardware except during process startup. 480 * 481 * Arguments: 482 * eax system call number 483 * ebx arg1 484 * ecx arg2 485 * edx arg3 486 * esi arg4 487 * edi arg5 488 * ebp arg6 489 */ 490ENTRY(entry_INT80_32) 491 ASM_CLAC 492 pushl %eax /* pt_regs->orig_ax */ 493 SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ 494 495 /* 496 * User mode is traced as though IRQs are on, and the interrupt gate 497 * turned them off. 498 */ 499 TRACE_IRQS_OFF 500 501 movl %esp, %eax 502 call do_int80_syscall_32 503.Lsyscall_32_done: 504 505restore_all: 506 TRACE_IRQS_IRET 507restore_all_notrace: 508#ifdef CONFIG_X86_ESPFIX32 509 ALTERNATIVE "jmp restore_nocheck", "", X86_BUG_ESPFIX 510 511 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS 512 /* 513 * Warning: PT_OLDSS(%esp) contains the wrong/random values if we 514 * are returning to the kernel. 515 * See comments in process.c:copy_thread() for details. 516 */ 517 movb PT_OLDSS(%esp), %ah 518 movb PT_CS(%esp), %al 519 andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax 520 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax 521 je ldt_ss # returning to user-space with LDT SS 522#endif 523restore_nocheck: 524 RESTORE_REGS 4 # skip orig_eax/error_code 525irq_return: 526 INTERRUPT_RETURN 527.section .fixup, "ax" 528ENTRY(iret_exc ) 529 pushl $0 # no error code 530 pushl $do_iret_error 531 jmp error_code 532.previous 533 _ASM_EXTABLE(irq_return, iret_exc) 534 535#ifdef CONFIG_X86_ESPFIX32 536ldt_ss: 537/* 538 * Setup and switch to ESPFIX stack 539 * 540 * We're returning to userspace with a 16 bit stack. The CPU will not 541 * restore the high word of ESP for us on executing iret... This is an 542 * "official" bug of all the x86-compatible CPUs, which we can work 543 * around to make dosemu and wine happy. We do this by preloading the 544 * high word of ESP with the high word of the userspace ESP while 545 * compensating for the offset by changing to the ESPFIX segment with 546 * a base address that matches for the difference. 547 */ 548#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8) 549 mov %esp, %edx /* load kernel esp */ 550 mov PT_OLDESP(%esp), %eax /* load userspace esp */ 551 mov %dx, %ax /* eax: new kernel esp */ 552 sub %eax, %edx /* offset (low word is 0) */ 553 shr $16, %edx 554 mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ 555 mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ 556 pushl $__ESPFIX_SS 557 pushl %eax /* new kernel esp */ 558 /* 559 * Disable interrupts, but do not irqtrace this section: we 560 * will soon execute iret and the tracer was already set to 561 * the irqstate after the IRET: 562 */ 563 DISABLE_INTERRUPTS(CLBR_EAX) 564 lss (%esp), %esp /* switch to espfix segment */ 565 jmp restore_nocheck 566#endif 567ENDPROC(entry_INT80_32) 568 569.macro FIXUP_ESPFIX_STACK 570/* 571 * Switch back for ESPFIX stack to the normal zerobased stack 572 * 573 * We can't call C functions using the ESPFIX stack. This code reads 574 * the high word of the segment base from the GDT and swiches to the 575 * normal stack and adjusts ESP with the matching offset. 576 */ 577#ifdef CONFIG_X86_ESPFIX32 578 /* fixup the stack */ 579 mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ 580 mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ 581 shl $16, %eax 582 addl %esp, %eax /* the adjusted stack pointer */ 583 pushl $__KERNEL_DS 584 pushl %eax 585 lss (%esp), %esp /* switch to the normal stack segment */ 586#endif 587.endm 588.macro UNWIND_ESPFIX_STACK 589#ifdef CONFIG_X86_ESPFIX32 590 movl %ss, %eax 591 /* see if on espfix stack */ 592 cmpw $__ESPFIX_SS, %ax 593 jne 27f 594 movl $__KERNEL_DS, %eax 595 movl %eax, %ds 596 movl %eax, %es 597 /* switch to normal stack */ 598 FIXUP_ESPFIX_STACK 59927: 600#endif 601.endm 602 603/* 604 * Build the entry stubs with some assembler magic. 605 * We pack 1 stub into every 8-byte block. 606 */ 607 .align 8 608ENTRY(irq_entries_start) 609 vector=FIRST_EXTERNAL_VECTOR 610 .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) 611 pushl $(~vector+0x80) /* Note: always in signed byte range */ 612 vector=vector+1 613 jmp common_interrupt 614 .align 8 615 .endr 616END(irq_entries_start) 617 618/* 619 * the CPU automatically disables interrupts when executing an IRQ vector, 620 * so IRQ-flags tracing has to follow that: 621 */ 622 .p2align CONFIG_X86_L1_CACHE_SHIFT 623common_interrupt: 624 ASM_CLAC 625 addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */ 626 SAVE_ALL 627 TRACE_IRQS_OFF 628 movl %esp, %eax 629 call do_IRQ 630 jmp ret_from_intr 631ENDPROC(common_interrupt) 632 633#define BUILD_INTERRUPT3(name, nr, fn) \ 634ENTRY(name) \ 635 ASM_CLAC; \ 636 pushl $~(nr); \ 637 SAVE_ALL; \ 638 TRACE_IRQS_OFF \ 639 movl %esp, %eax; \ 640 call fn; \ 641 jmp ret_from_intr; \ 642ENDPROC(name) 643 644 645#ifdef CONFIG_TRACING 646# define TRACE_BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name) 647#else 648# define TRACE_BUILD_INTERRUPT(name, nr) 649#endif 650 651#define BUILD_INTERRUPT(name, nr) \ 652 BUILD_INTERRUPT3(name, nr, smp_##name); \ 653 TRACE_BUILD_INTERRUPT(name, nr) 654 655/* The include is where all of the SMP etc. interrupts come from */ 656#include <asm/entry_arch.h> 657 658ENTRY(coprocessor_error) 659 ASM_CLAC 660 pushl $0 661 pushl $do_coprocessor_error 662 jmp error_code 663END(coprocessor_error) 664 665ENTRY(simd_coprocessor_error) 666 ASM_CLAC 667 pushl $0 668#ifdef CONFIG_X86_INVD_BUG 669 /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ 670 ALTERNATIVE "pushl $do_general_protection", \ 671 "pushl $do_simd_coprocessor_error", \ 672 X86_FEATURE_XMM 673#else 674 pushl $do_simd_coprocessor_error 675#endif 676 jmp error_code 677END(simd_coprocessor_error) 678 679ENTRY(device_not_available) 680 ASM_CLAC 681 pushl $-1 # mark this as an int 682 pushl $do_device_not_available 683 jmp error_code 684END(device_not_available) 685 686#ifdef CONFIG_PARAVIRT 687ENTRY(native_iret) 688 iret 689 _ASM_EXTABLE(native_iret, iret_exc) 690END(native_iret) 691#endif 692 693ENTRY(overflow) 694 ASM_CLAC 695 pushl $0 696 pushl $do_overflow 697 jmp error_code 698END(overflow) 699 700ENTRY(bounds) 701 ASM_CLAC 702 pushl $0 703 pushl $do_bounds 704 jmp error_code 705END(bounds) 706 707ENTRY(invalid_op) 708 ASM_CLAC 709 pushl $0 710 pushl $do_invalid_op 711 jmp error_code 712END(invalid_op) 713 714ENTRY(coprocessor_segment_overrun) 715 ASM_CLAC 716 pushl $0 717 pushl $do_coprocessor_segment_overrun 718 jmp error_code 719END(coprocessor_segment_overrun) 720 721ENTRY(invalid_TSS) 722 ASM_CLAC 723 pushl $do_invalid_TSS 724 jmp error_code 725END(invalid_TSS) 726 727ENTRY(segment_not_present) 728 ASM_CLAC 729 pushl $do_segment_not_present 730 jmp error_code 731END(segment_not_present) 732 733ENTRY(stack_segment) 734 ASM_CLAC 735 pushl $do_stack_segment 736 jmp error_code 737END(stack_segment) 738 739ENTRY(alignment_check) 740 ASM_CLAC 741 pushl $do_alignment_check 742 jmp error_code 743END(alignment_check) 744 745ENTRY(divide_error) 746 ASM_CLAC 747 pushl $0 # no error code 748 pushl $do_divide_error 749 jmp error_code 750END(divide_error) 751 752#ifdef CONFIG_X86_MCE 753ENTRY(machine_check) 754 ASM_CLAC 755 pushl $0 756 pushl machine_check_vector 757 jmp error_code 758END(machine_check) 759#endif 760 761ENTRY(spurious_interrupt_bug) 762 ASM_CLAC 763 pushl $0 764 pushl $do_spurious_interrupt_bug 765 jmp error_code 766END(spurious_interrupt_bug) 767 768#ifdef CONFIG_XEN 769ENTRY(xen_hypervisor_callback) 770 pushl $-1 /* orig_ax = -1 => not a system call */ 771 SAVE_ALL 772 TRACE_IRQS_OFF 773 774 /* 775 * Check to see if we got the event in the critical 776 * region in xen_iret_direct, after we've reenabled 777 * events and checked for pending events. This simulates 778 * iret instruction's behaviour where it delivers a 779 * pending interrupt when enabling interrupts: 780 */ 781 movl PT_EIP(%esp), %eax 782 cmpl $xen_iret_start_crit, %eax 783 jb 1f 784 cmpl $xen_iret_end_crit, %eax 785 jae 1f 786 787 jmp xen_iret_crit_fixup 788 789ENTRY(xen_do_upcall) 7901: mov %esp, %eax 791 call xen_evtchn_do_upcall 792#ifndef CONFIG_PREEMPT 793 call xen_maybe_preempt_hcall 794#endif 795 jmp ret_from_intr 796ENDPROC(xen_hypervisor_callback) 797 798/* 799 * Hypervisor uses this for application faults while it executes. 800 * We get here for two reasons: 801 * 1. Fault while reloading DS, ES, FS or GS 802 * 2. Fault while executing IRET 803 * Category 1 we fix up by reattempting the load, and zeroing the segment 804 * register if the load fails. 805 * Category 2 we fix up by jumping to do_iret_error. We cannot use the 806 * normal Linux return path in this case because if we use the IRET hypercall 807 * to pop the stack frame we end up in an infinite loop of failsafe callbacks. 808 * We distinguish between categories by maintaining a status value in EAX. 809 */ 810ENTRY(xen_failsafe_callback) 811 pushl %eax 812 movl $1, %eax 8131: mov 4(%esp), %ds 8142: mov 8(%esp), %es 8153: mov 12(%esp), %fs 8164: mov 16(%esp), %gs 817 /* EAX == 0 => Category 1 (Bad segment) 818 EAX != 0 => Category 2 (Bad IRET) */ 819 testl %eax, %eax 820 popl %eax 821 lea 16(%esp), %esp 822 jz 5f 823 jmp iret_exc 8245: pushl $-1 /* orig_ax = -1 => not a system call */ 825 SAVE_ALL 826 jmp ret_from_exception 827 828.section .fixup, "ax" 8296: xorl %eax, %eax 830 movl %eax, 4(%esp) 831 jmp 1b 8327: xorl %eax, %eax 833 movl %eax, 8(%esp) 834 jmp 2b 8358: xorl %eax, %eax 836 movl %eax, 12(%esp) 837 jmp 3b 8389: xorl %eax, %eax 839 movl %eax, 16(%esp) 840 jmp 4b 841.previous 842 _ASM_EXTABLE(1b, 6b) 843 _ASM_EXTABLE(2b, 7b) 844 _ASM_EXTABLE(3b, 8b) 845 _ASM_EXTABLE(4b, 9b) 846ENDPROC(xen_failsafe_callback) 847 848BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR, 849 xen_evtchn_do_upcall) 850 851#endif /* CONFIG_XEN */ 852 853#if IS_ENABLED(CONFIG_HYPERV) 854 855BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR, 856 hyperv_vector_handler) 857 858#endif /* CONFIG_HYPERV */ 859 860#ifdef CONFIG_FUNCTION_TRACER 861#ifdef CONFIG_DYNAMIC_FTRACE 862 863ENTRY(mcount) 864 ret 865END(mcount) 866 867ENTRY(ftrace_caller) 868 pushl %eax 869 pushl %ecx 870 pushl %edx 871 pushl $0 /* Pass NULL as regs pointer */ 872 movl 4*4(%esp), %eax 873 movl 0x4(%ebp), %edx 874 movl function_trace_op, %ecx 875 subl $MCOUNT_INSN_SIZE, %eax 876 877.globl ftrace_call 878ftrace_call: 879 call ftrace_stub 880 881 addl $4, %esp /* skip NULL pointer */ 882 popl %edx 883 popl %ecx 884 popl %eax 885ftrace_ret: 886#ifdef CONFIG_FUNCTION_GRAPH_TRACER 887.globl ftrace_graph_call 888ftrace_graph_call: 889 jmp ftrace_stub 890#endif 891 892.globl ftrace_stub 893ftrace_stub: 894 ret 895END(ftrace_caller) 896 897ENTRY(ftrace_regs_caller) 898 pushf /* push flags before compare (in cs location) */ 899 900 /* 901 * i386 does not save SS and ESP when coming from kernel. 902 * Instead, to get sp, ®s->sp is used (see ptrace.h). 903 * Unfortunately, that means eflags must be at the same location 904 * as the current return ip is. We move the return ip into the 905 * ip location, and move flags into the return ip location. 906 */ 907 pushl 4(%esp) /* save return ip into ip slot */ 908 909 pushl $0 /* Load 0 into orig_ax */ 910 pushl %gs 911 pushl %fs 912 pushl %es 913 pushl %ds 914 pushl %eax 915 pushl %ebp 916 pushl %edi 917 pushl %esi 918 pushl %edx 919 pushl %ecx 920 pushl %ebx 921 922 movl 13*4(%esp), %eax /* Get the saved flags */ 923 movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */ 924 /* clobbering return ip */ 925 movl $__KERNEL_CS, 13*4(%esp) 926 927 movl 12*4(%esp), %eax /* Load ip (1st parameter) */ 928 subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ 929 movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ 930 movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ 931 pushl %esp /* Save pt_regs as 4th parameter */ 932 933GLOBAL(ftrace_regs_call) 934 call ftrace_stub 935 936 addl $4, %esp /* Skip pt_regs */ 937 movl 14*4(%esp), %eax /* Move flags back into cs */ 938 movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */ 939 movl 12*4(%esp), %eax /* Get return ip from regs->ip */ 940 movl %eax, 14*4(%esp) /* Put return ip back for ret */ 941 942 popl %ebx 943 popl %ecx 944 popl %edx 945 popl %esi 946 popl %edi 947 popl %ebp 948 popl %eax 949 popl %ds 950 popl %es 951 popl %fs 952 popl %gs 953 addl $8, %esp /* Skip orig_ax and ip */ 954 popf /* Pop flags at end (no addl to corrupt flags) */ 955 jmp ftrace_ret 956 957 popf 958 jmp ftrace_stub 959#else /* ! CONFIG_DYNAMIC_FTRACE */ 960 961ENTRY(mcount) 962 cmpl $__PAGE_OFFSET, %esp 963 jb ftrace_stub /* Paging not enabled yet? */ 964 965 cmpl $ftrace_stub, ftrace_trace_function 966 jnz trace 967#ifdef CONFIG_FUNCTION_GRAPH_TRACER 968 cmpl $ftrace_stub, ftrace_graph_return 969 jnz ftrace_graph_caller 970 971 cmpl $ftrace_graph_entry_stub, ftrace_graph_entry 972 jnz ftrace_graph_caller 973#endif 974.globl ftrace_stub 975ftrace_stub: 976 ret 977 978 /* taken from glibc */ 979trace: 980 pushl %eax 981 pushl %ecx 982 pushl %edx 983 movl 0xc(%esp), %eax 984 movl 0x4(%ebp), %edx 985 subl $MCOUNT_INSN_SIZE, %eax 986 987 call *ftrace_trace_function 988 989 popl %edx 990 popl %ecx 991 popl %eax 992 jmp ftrace_stub 993END(mcount) 994#endif /* CONFIG_DYNAMIC_FTRACE */ 995EXPORT_SYMBOL(mcount) 996#endif /* CONFIG_FUNCTION_TRACER */ 997 998#ifdef CONFIG_FUNCTION_GRAPH_TRACER 999ENTRY(ftrace_graph_caller) 1000 pushl %eax 1001 pushl %ecx 1002 pushl %edx 1003 movl 0xc(%esp), %eax 1004 lea 0x4(%ebp), %edx 1005 movl (%ebp), %ecx 1006 subl $MCOUNT_INSN_SIZE, %eax 1007 call prepare_ftrace_return 1008 popl %edx 1009 popl %ecx 1010 popl %eax 1011 ret 1012END(ftrace_graph_caller) 1013 1014.globl return_to_handler 1015return_to_handler: 1016 pushl %eax 1017 pushl %edx 1018 movl %ebp, %eax 1019 call ftrace_return_to_handler 1020 movl %eax, %ecx 1021 popl %edx 1022 popl %eax 1023 jmp *%ecx 1024#endif 1025 1026#ifdef CONFIG_TRACING 1027ENTRY(trace_page_fault) 1028 ASM_CLAC 1029 pushl $trace_do_page_fault 1030 jmp error_code 1031END(trace_page_fault) 1032#endif 1033 1034ENTRY(page_fault) 1035 ASM_CLAC 1036 pushl $do_page_fault 1037 ALIGN 1038error_code: 1039 /* the function address is in %gs's slot on the stack */ 1040 pushl %fs 1041 pushl %es 1042 pushl %ds 1043 pushl %eax 1044 pushl %ebp 1045 pushl %edi 1046 pushl %esi 1047 pushl %edx 1048 pushl %ecx 1049 pushl %ebx 1050 cld 1051 movl $(__KERNEL_PERCPU), %ecx 1052 movl %ecx, %fs 1053 UNWIND_ESPFIX_STACK 1054 GS_TO_REG %ecx 1055 movl PT_GS(%esp), %edi # get the function address 1056 movl PT_ORIG_EAX(%esp), %edx # get the error code 1057 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart 1058 REG_TO_PTGS %ecx 1059 SET_KERNEL_GS %ecx 1060 movl $(__USER_DS), %ecx 1061 movl %ecx, %ds 1062 movl %ecx, %es 1063 TRACE_IRQS_OFF 1064 movl %esp, %eax # pt_regs pointer 1065 call *%edi 1066 jmp ret_from_exception 1067END(page_fault) 1068 1069ENTRY(debug) 1070 /* 1071 * #DB can happen at the first instruction of 1072 * entry_SYSENTER_32 or in Xen's SYSENTER prologue. If this 1073 * happens, then we will be running on a very small stack. We 1074 * need to detect this condition and switch to the thread 1075 * stack before calling any C code at all. 1076 * 1077 * If you edit this code, keep in mind that NMIs can happen in here. 1078 */ 1079 ASM_CLAC 1080 pushl $-1 # mark this as an int 1081 SAVE_ALL 1082 xorl %edx, %edx # error code 0 1083 movl %esp, %eax # pt_regs pointer 1084 1085 /* Are we currently on the SYSENTER stack? */ 1086 PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) 1087 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ 1088 cmpl $SIZEOF_SYSENTER_stack, %ecx 1089 jb .Ldebug_from_sysenter_stack 1090 1091 TRACE_IRQS_OFF 1092 call do_debug 1093 jmp ret_from_exception 1094 1095.Ldebug_from_sysenter_stack: 1096 /* We're on the SYSENTER stack. Switch off. */ 1097 movl %esp, %ebp 1098 movl PER_CPU_VAR(cpu_current_top_of_stack), %esp 1099 TRACE_IRQS_OFF 1100 call do_debug 1101 movl %ebp, %esp 1102 jmp ret_from_exception 1103END(debug) 1104 1105/* 1106 * NMI is doubly nasty. It can happen on the first instruction of 1107 * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning 1108 * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32 1109 * switched stacks. We handle both conditions by simply checking whether we 1110 * interrupted kernel code running on the SYSENTER stack. 1111 */ 1112ENTRY(nmi) 1113 ASM_CLAC 1114#ifdef CONFIG_X86_ESPFIX32 1115 pushl %eax 1116 movl %ss, %eax 1117 cmpw $__ESPFIX_SS, %ax 1118 popl %eax 1119 je nmi_espfix_stack 1120#endif 1121 1122 pushl %eax # pt_regs->orig_ax 1123 SAVE_ALL 1124 xorl %edx, %edx # zero error code 1125 movl %esp, %eax # pt_regs pointer 1126 1127 /* Are we currently on the SYSENTER stack? */ 1128 PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) 1129 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ 1130 cmpl $SIZEOF_SYSENTER_stack, %ecx 1131 jb .Lnmi_from_sysenter_stack 1132 1133 /* Not on SYSENTER stack. */ 1134 call do_nmi 1135 jmp restore_all_notrace 1136 1137.Lnmi_from_sysenter_stack: 1138 /* 1139 * We're on the SYSENTER stack. Switch off. No one (not even debug) 1140 * is using the thread stack right now, so it's safe for us to use it. 1141 */ 1142 movl %esp, %ebp 1143 movl PER_CPU_VAR(cpu_current_top_of_stack), %esp 1144 call do_nmi 1145 movl %ebp, %esp 1146 jmp restore_all_notrace 1147 1148#ifdef CONFIG_X86_ESPFIX32 1149nmi_espfix_stack: 1150 /* 1151 * create the pointer to lss back 1152 */ 1153 pushl %ss 1154 pushl %esp 1155 addl $4, (%esp) 1156 /* copy the iret frame of 12 bytes */ 1157 .rept 3 1158 pushl 16(%esp) 1159 .endr 1160 pushl %eax 1161 SAVE_ALL 1162 FIXUP_ESPFIX_STACK # %eax == %esp 1163 xorl %edx, %edx # zero error code 1164 call do_nmi 1165 RESTORE_REGS 1166 lss 12+4(%esp), %esp # back to espfix stack 1167 jmp irq_return 1168#endif 1169END(nmi) 1170 1171ENTRY(int3) 1172 ASM_CLAC 1173 pushl $-1 # mark this as an int 1174 SAVE_ALL 1175 TRACE_IRQS_OFF 1176 xorl %edx, %edx # zero error code 1177 movl %esp, %eax # pt_regs pointer 1178 call do_int3 1179 jmp ret_from_exception 1180END(int3) 1181 1182ENTRY(general_protection) 1183 pushl $do_general_protection 1184 jmp error_code 1185END(general_protection) 1186 1187#ifdef CONFIG_KVM_GUEST 1188ENTRY(async_page_fault) 1189 ASM_CLAC 1190 pushl $do_async_page_fault 1191 jmp error_code 1192END(async_page_fault) 1193#endif 1194 1195ENTRY(rewind_stack_do_exit) 1196 /* Prevent any naive code from trying to unwind to our caller. */ 1197 xorl %ebp, %ebp 1198 1199 movl PER_CPU_VAR(cpu_current_top_of_stack), %esi 1200 leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp 1201 1202 call do_exit 12031: jmp 1b 1204END(rewind_stack_do_exit) 1205