1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * Copyright (C) 1991,1992 Linus Torvalds 4 * 5 * entry_32.S contains the system-call and low-level fault and trap handling routines. 6 * 7 * Stack layout while running C code: 8 * ptrace needs to have all registers on the stack. 9 * If the order here is changed, it needs to be 10 * updated in fork.c:copy_process(), signal.c:do_signal(), 11 * ptrace.c and ptrace.h 12 * 13 * 0(%esp) - %ebx 14 * 4(%esp) - %ecx 15 * 8(%esp) - %edx 16 * C(%esp) - %esi 17 * 10(%esp) - %edi 18 * 14(%esp) - %ebp 19 * 18(%esp) - %eax 20 * 1C(%esp) - %ds 21 * 20(%esp) - %es 22 * 24(%esp) - %fs 23 * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS 24 * 2C(%esp) - orig_eax 25 * 30(%esp) - %eip 26 * 34(%esp) - %cs 27 * 38(%esp) - %eflags 28 * 3C(%esp) - %oldesp 29 * 40(%esp) - %oldss 30 */ 31 32#include <linux/linkage.h> 33#include <linux/err.h> 34#include <asm/thread_info.h> 35#include <asm/irqflags.h> 36#include <asm/errno.h> 37#include <asm/segment.h> 38#include <asm/smp.h> 39#include <asm/percpu.h> 40#include <asm/processor-flags.h> 41#include <asm/irq_vectors.h> 42#include <asm/cpufeatures.h> 43#include <asm/alternative-asm.h> 44#include <asm/asm.h> 45#include <asm/smap.h> 46#include <asm/frame.h> 47#include <asm/nospec-branch.h> 48 49#include "calling.h" 50 51 .section .entry.text, "ax" 52 53/* 54 * We use macros for low-level operations which need to be overridden 55 * for paravirtualization. The following will never clobber any registers: 56 * INTERRUPT_RETURN (aka. "iret") 57 * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") 58 * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). 59 * 60 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must 61 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). 62 * Allowing a register to be clobbered can shrink the paravirt replacement 63 * enough to patch inline, increasing performance. 64 */ 65 66#ifdef CONFIG_PREEMPT 67# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF 68#else 69# define preempt_stop(clobbers) 70#endif 71 72.macro TRACE_IRQS_IRET 73#ifdef CONFIG_TRACE_IRQFLAGS 74 testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off? 75 jz 1f 76 TRACE_IRQS_ON 771: 78#endif 79.endm 80 81#define PTI_SWITCH_MASK (1 << PAGE_SHIFT) 82 83/* 84 * User gs save/restore 85 * 86 * %gs is used for userland TLS and kernel only uses it for stack 87 * canary which is required to be at %gs:20 by gcc. Read the comment 88 * at the top of stackprotector.h for more info. 89 * 90 * Local labels 98 and 99 are used. 91 */ 92#ifdef CONFIG_X86_32_LAZY_GS 93 94 /* unfortunately push/pop can't be no-op */ 95.macro PUSH_GS 96 pushl $0 97.endm 98.macro POP_GS pop=0 99 addl $(4 + \pop), %esp 100.endm 101.macro POP_GS_EX 102.endm 103 104 /* all the rest are no-op */ 105.macro PTGS_TO_GS 106.endm 107.macro PTGS_TO_GS_EX 108.endm 109.macro GS_TO_REG reg 110.endm 111.macro REG_TO_PTGS reg 112.endm 113.macro SET_KERNEL_GS reg 114.endm 115 116#else /* CONFIG_X86_32_LAZY_GS */ 117 118.macro PUSH_GS 119 pushl %gs 120.endm 121 122.macro POP_GS pop=0 12398: popl %gs 124 .if \pop <> 0 125 add $\pop, %esp 126 .endif 127.endm 128.macro POP_GS_EX 129.pushsection .fixup, "ax" 13099: movl $0, (%esp) 131 jmp 98b 132.popsection 133 _ASM_EXTABLE(98b, 99b) 134.endm 135 136.macro PTGS_TO_GS 13798: mov PT_GS(%esp), %gs 138.endm 139.macro PTGS_TO_GS_EX 140.pushsection .fixup, "ax" 14199: movl $0, PT_GS(%esp) 142 jmp 98b 143.popsection 144 _ASM_EXTABLE(98b, 99b) 145.endm 146 147.macro GS_TO_REG reg 148 movl %gs, \reg 149.endm 150.macro REG_TO_PTGS reg 151 movl \reg, PT_GS(%esp) 152.endm 153.macro SET_KERNEL_GS reg 154 movl $(__KERNEL_STACK_CANARY), \reg 155 movl \reg, %gs 156.endm 157 158#endif /* CONFIG_X86_32_LAZY_GS */ 159 160/* Unconditionally switch to user cr3 */ 161.macro SWITCH_TO_USER_CR3 scratch_reg:req 162 ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI 163 164 movl %cr3, \scratch_reg 165 orl $PTI_SWITCH_MASK, \scratch_reg 166 movl \scratch_reg, %cr3 167.Lend_\@: 168.endm 169 170.macro BUG_IF_WRONG_CR3 no_user_check=0 171#ifdef CONFIG_DEBUG_ENTRY 172 ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI 173 .if \no_user_check == 0 174 /* coming from usermode? */ 175 testl $SEGMENT_RPL_MASK, PT_CS(%esp) 176 jz .Lend_\@ 177 .endif 178 /* On user-cr3? */ 179 movl %cr3, %eax 180 testl $PTI_SWITCH_MASK, %eax 181 jnz .Lend_\@ 182 /* From userspace with kernel cr3 - BUG */ 183 ud2 184.Lend_\@: 185#endif 186.endm 187 188/* 189 * Switch to kernel cr3 if not already loaded and return current cr3 in 190 * \scratch_reg 191 */ 192.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req 193 ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI 194 movl %cr3, \scratch_reg 195 /* Test if we are already on kernel CR3 */ 196 testl $PTI_SWITCH_MASK, \scratch_reg 197 jz .Lend_\@ 198 andl $(~PTI_SWITCH_MASK), \scratch_reg 199 movl \scratch_reg, %cr3 200 /* Return original CR3 in \scratch_reg */ 201 orl $PTI_SWITCH_MASK, \scratch_reg 202.Lend_\@: 203.endm 204 205#define CS_FROM_ENTRY_STACK (1 << 31) 206#define CS_FROM_USER_CR3 (1 << 30) 207#define CS_FROM_KERNEL (1 << 29) 208 209.macro FIXUP_FRAME 210 /* 211 * The high bits of the CS dword (__csh) are used for CS_FROM_*. 212 * Clear them in case hardware didn't do this for us. 213 */ 214 andl $0x0000ffff, 3*4(%esp) 215 216#ifdef CONFIG_VM86 217 testl $X86_EFLAGS_VM, 4*4(%esp) 218 jnz .Lfrom_usermode_no_fixup_\@ 219#endif 220 testl $SEGMENT_RPL_MASK, 3*4(%esp) 221 jnz .Lfrom_usermode_no_fixup_\@ 222 223 orl $CS_FROM_KERNEL, 3*4(%esp) 224 225 /* 226 * When we're here from kernel mode; the (exception) stack looks like: 227 * 228 * 5*4(%esp) - <previous context> 229 * 4*4(%esp) - flags 230 * 3*4(%esp) - cs 231 * 2*4(%esp) - ip 232 * 1*4(%esp) - orig_eax 233 * 0*4(%esp) - gs / function 234 * 235 * Lets build a 5 entry IRET frame after that, such that struct pt_regs 236 * is complete and in particular regs->sp is correct. This gives us 237 * the original 5 enties as gap: 238 * 239 * 12*4(%esp) - <previous context> 240 * 11*4(%esp) - gap / flags 241 * 10*4(%esp) - gap / cs 242 * 9*4(%esp) - gap / ip 243 * 8*4(%esp) - gap / orig_eax 244 * 7*4(%esp) - gap / gs / function 245 * 6*4(%esp) - ss 246 * 5*4(%esp) - sp 247 * 4*4(%esp) - flags 248 * 3*4(%esp) - cs 249 * 2*4(%esp) - ip 250 * 1*4(%esp) - orig_eax 251 * 0*4(%esp) - gs / function 252 */ 253 254 pushl %ss # ss 255 pushl %esp # sp (points at ss) 256 addl $6*4, (%esp) # point sp back at the previous context 257 pushl 6*4(%esp) # flags 258 pushl 6*4(%esp) # cs 259 pushl 6*4(%esp) # ip 260 pushl 6*4(%esp) # orig_eax 261 pushl 6*4(%esp) # gs / function 262.Lfrom_usermode_no_fixup_\@: 263.endm 264 265.macro IRET_FRAME 266 testl $CS_FROM_KERNEL, 1*4(%esp) 267 jz .Lfinished_frame_\@ 268 269 /* 270 * Reconstruct the 3 entry IRET frame right after the (modified) 271 * regs->sp without lowering %esp in between, such that an NMI in the 272 * middle doesn't scribble our stack. 273 */ 274 pushl %eax 275 pushl %ecx 276 movl 5*4(%esp), %eax # (modified) regs->sp 277 278 movl 4*4(%esp), %ecx # flags 279 movl %ecx, -4(%eax) 280 281 movl 3*4(%esp), %ecx # cs 282 andl $0x0000ffff, %ecx 283 movl %ecx, -8(%eax) 284 285 movl 2*4(%esp), %ecx # ip 286 movl %ecx, -12(%eax) 287 288 movl 1*4(%esp), %ecx # eax 289 movl %ecx, -16(%eax) 290 291 popl %ecx 292 lea -16(%eax), %esp 293 popl %eax 294.Lfinished_frame_\@: 295.endm 296 297.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 298 cld 299 PUSH_GS 300 FIXUP_FRAME 301 pushl %fs 302 pushl %es 303 pushl %ds 304 pushl \pt_regs_ax 305 pushl %ebp 306 pushl %edi 307 pushl %esi 308 pushl %edx 309 pushl %ecx 310 pushl %ebx 311 movl $(__USER_DS), %edx 312 movl %edx, %ds 313 movl %edx, %es 314 movl $(__KERNEL_PERCPU), %edx 315 movl %edx, %fs 316 SET_KERNEL_GS %edx 317 318 /* Switch to kernel stack if necessary */ 319.if \switch_stacks > 0 320 SWITCH_TO_KERNEL_STACK 321.endif 322 323.endm 324 325.macro SAVE_ALL_NMI cr3_reg:req 326 SAVE_ALL 327 328 BUG_IF_WRONG_CR3 329 330 /* 331 * Now switch the CR3 when PTI is enabled. 332 * 333 * We can enter with either user or kernel cr3, the code will 334 * store the old cr3 in \cr3_reg and switches to the kernel cr3 335 * if necessary. 336 */ 337 SWITCH_TO_KERNEL_CR3 scratch_reg=\cr3_reg 338 339.Lend_\@: 340.endm 341 342.macro RESTORE_INT_REGS 343 popl %ebx 344 popl %ecx 345 popl %edx 346 popl %esi 347 popl %edi 348 popl %ebp 349 popl %eax 350.endm 351 352.macro RESTORE_REGS pop=0 353 RESTORE_INT_REGS 3541: popl %ds 3552: popl %es 3563: popl %fs 357 POP_GS \pop 358.pushsection .fixup, "ax" 3594: movl $0, (%esp) 360 jmp 1b 3615: movl $0, (%esp) 362 jmp 2b 3636: movl $0, (%esp) 364 jmp 3b 365.popsection 366 _ASM_EXTABLE(1b, 4b) 367 _ASM_EXTABLE(2b, 5b) 368 _ASM_EXTABLE(3b, 6b) 369 POP_GS_EX 370.endm 371 372.macro RESTORE_ALL_NMI cr3_reg:req pop=0 373 /* 374 * Now switch the CR3 when PTI is enabled. 375 * 376 * We enter with kernel cr3 and switch the cr3 to the value 377 * stored on \cr3_reg, which is either a user or a kernel cr3. 378 */ 379 ALTERNATIVE "jmp .Lswitched_\@", "", X86_FEATURE_PTI 380 381 testl $PTI_SWITCH_MASK, \cr3_reg 382 jz .Lswitched_\@ 383 384 /* User cr3 in \cr3_reg - write it to hardware cr3 */ 385 movl \cr3_reg, %cr3 386 387.Lswitched_\@: 388 389 BUG_IF_WRONG_CR3 390 391 RESTORE_REGS pop=\pop 392.endm 393 394.macro CHECK_AND_APPLY_ESPFIX 395#ifdef CONFIG_X86_ESPFIX32 396#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8) 397 398 ALTERNATIVE "jmp .Lend_\@", "", X86_BUG_ESPFIX 399 400 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS 401 /* 402 * Warning: PT_OLDSS(%esp) contains the wrong/random values if we 403 * are returning to the kernel. 404 * See comments in process.c:copy_thread() for details. 405 */ 406 movb PT_OLDSS(%esp), %ah 407 movb PT_CS(%esp), %al 408 andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax 409 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax 410 jne .Lend_\@ # returning to user-space with LDT SS 411 412 /* 413 * Setup and switch to ESPFIX stack 414 * 415 * We're returning to userspace with a 16 bit stack. The CPU will not 416 * restore the high word of ESP for us on executing iret... This is an 417 * "official" bug of all the x86-compatible CPUs, which we can work 418 * around to make dosemu and wine happy. We do this by preloading the 419 * high word of ESP with the high word of the userspace ESP while 420 * compensating for the offset by changing to the ESPFIX segment with 421 * a base address that matches for the difference. 422 */ 423 mov %esp, %edx /* load kernel esp */ 424 mov PT_OLDESP(%esp), %eax /* load userspace esp */ 425 mov %dx, %ax /* eax: new kernel esp */ 426 sub %eax, %edx /* offset (low word is 0) */ 427 shr $16, %edx 428 mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ 429 mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ 430 pushl $__ESPFIX_SS 431 pushl %eax /* new kernel esp */ 432 /* 433 * Disable interrupts, but do not irqtrace this section: we 434 * will soon execute iret and the tracer was already set to 435 * the irqstate after the IRET: 436 */ 437 DISABLE_INTERRUPTS(CLBR_ANY) 438 lss (%esp), %esp /* switch to espfix segment */ 439.Lend_\@: 440#endif /* CONFIG_X86_ESPFIX32 */ 441.endm 442 443/* 444 * Called with pt_regs fully populated and kernel segments loaded, 445 * so we can access PER_CPU and use the integer registers. 446 * 447 * We need to be very careful here with the %esp switch, because an NMI 448 * can happen everywhere. If the NMI handler finds itself on the 449 * entry-stack, it will overwrite the task-stack and everything we 450 * copied there. So allocate the stack-frame on the task-stack and 451 * switch to it before we do any copying. 452 */ 453 454.macro SWITCH_TO_KERNEL_STACK 455 456 ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV 457 458 BUG_IF_WRONG_CR3 459 460 SWITCH_TO_KERNEL_CR3 scratch_reg=%eax 461 462 /* 463 * %eax now contains the entry cr3 and we carry it forward in 464 * that register for the time this macro runs 465 */ 466 467 /* Are we on the entry stack? Bail out if not! */ 468 movl PER_CPU_VAR(cpu_entry_area), %ecx 469 addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx 470 subl %esp, %ecx /* ecx = (end of entry_stack) - esp */ 471 cmpl $SIZEOF_entry_stack, %ecx 472 jae .Lend_\@ 473 474 /* Load stack pointer into %esi and %edi */ 475 movl %esp, %esi 476 movl %esi, %edi 477 478 /* Move %edi to the top of the entry stack */ 479 andl $(MASK_entry_stack), %edi 480 addl $(SIZEOF_entry_stack), %edi 481 482 /* Load top of task-stack into %edi */ 483 movl TSS_entry2task_stack(%edi), %edi 484 485 /* Special case - entry from kernel mode via entry stack */ 486#ifdef CONFIG_VM86 487 movl PT_EFLAGS(%esp), %ecx # mix EFLAGS and CS 488 movb PT_CS(%esp), %cl 489 andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %ecx 490#else 491 movl PT_CS(%esp), %ecx 492 andl $SEGMENT_RPL_MASK, %ecx 493#endif 494 cmpl $USER_RPL, %ecx 495 jb .Lentry_from_kernel_\@ 496 497 /* Bytes to copy */ 498 movl $PTREGS_SIZE, %ecx 499 500#ifdef CONFIG_VM86 501 testl $X86_EFLAGS_VM, PT_EFLAGS(%esi) 502 jz .Lcopy_pt_regs_\@ 503 504 /* 505 * Stack-frame contains 4 additional segment registers when 506 * coming from VM86 mode 507 */ 508 addl $(4 * 4), %ecx 509 510#endif 511.Lcopy_pt_regs_\@: 512 513 /* Allocate frame on task-stack */ 514 subl %ecx, %edi 515 516 /* Switch to task-stack */ 517 movl %edi, %esp 518 519 /* 520 * We are now on the task-stack and can safely copy over the 521 * stack-frame 522 */ 523 shrl $2, %ecx 524 cld 525 rep movsl 526 527 jmp .Lend_\@ 528 529.Lentry_from_kernel_\@: 530 531 /* 532 * This handles the case when we enter the kernel from 533 * kernel-mode and %esp points to the entry-stack. When this 534 * happens we need to switch to the task-stack to run C code, 535 * but switch back to the entry-stack again when we approach 536 * iret and return to the interrupted code-path. This usually 537 * happens when we hit an exception while restoring user-space 538 * segment registers on the way back to user-space or when the 539 * sysenter handler runs with eflags.tf set. 540 * 541 * When we switch to the task-stack here, we can't trust the 542 * contents of the entry-stack anymore, as the exception handler 543 * might be scheduled out or moved to another CPU. Therefore we 544 * copy the complete entry-stack to the task-stack and set a 545 * marker in the iret-frame (bit 31 of the CS dword) to detect 546 * what we've done on the iret path. 547 * 548 * On the iret path we copy everything back and switch to the 549 * entry-stack, so that the interrupted kernel code-path 550 * continues on the same stack it was interrupted with. 551 * 552 * Be aware that an NMI can happen anytime in this code. 553 * 554 * %esi: Entry-Stack pointer (same as %esp) 555 * %edi: Top of the task stack 556 * %eax: CR3 on kernel entry 557 */ 558 559 /* Calculate number of bytes on the entry stack in %ecx */ 560 movl %esi, %ecx 561 562 /* %ecx to the top of entry-stack */ 563 andl $(MASK_entry_stack), %ecx 564 addl $(SIZEOF_entry_stack), %ecx 565 566 /* Number of bytes on the entry stack to %ecx */ 567 sub %esi, %ecx 568 569 /* Mark stackframe as coming from entry stack */ 570 orl $CS_FROM_ENTRY_STACK, PT_CS(%esp) 571 572 /* 573 * Test the cr3 used to enter the kernel and add a marker 574 * so that we can switch back to it before iret. 575 */ 576 testl $PTI_SWITCH_MASK, %eax 577 jz .Lcopy_pt_regs_\@ 578 orl $CS_FROM_USER_CR3, PT_CS(%esp) 579 580 /* 581 * %esi and %edi are unchanged, %ecx contains the number of 582 * bytes to copy. The code at .Lcopy_pt_regs_\@ will allocate 583 * the stack-frame on task-stack and copy everything over 584 */ 585 jmp .Lcopy_pt_regs_\@ 586 587.Lend_\@: 588.endm 589 590/* 591 * Switch back from the kernel stack to the entry stack. 592 * 593 * The %esp register must point to pt_regs on the task stack. It will 594 * first calculate the size of the stack-frame to copy, depending on 595 * whether we return to VM86 mode or not. With that it uses 'rep movsl' 596 * to copy the contents of the stack over to the entry stack. 597 * 598 * We must be very careful here, as we can't trust the contents of the 599 * task-stack once we switched to the entry-stack. When an NMI happens 600 * while on the entry-stack, the NMI handler will switch back to the top 601 * of the task stack, overwriting our stack-frame we are about to copy. 602 * Therefore we switch the stack only after everything is copied over. 603 */ 604.macro SWITCH_TO_ENTRY_STACK 605 606 ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV 607 608 /* Bytes to copy */ 609 movl $PTREGS_SIZE, %ecx 610 611#ifdef CONFIG_VM86 612 testl $(X86_EFLAGS_VM), PT_EFLAGS(%esp) 613 jz .Lcopy_pt_regs_\@ 614 615 /* Additional 4 registers to copy when returning to VM86 mode */ 616 addl $(4 * 4), %ecx 617 618.Lcopy_pt_regs_\@: 619#endif 620 621 /* Initialize source and destination for movsl */ 622 movl PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %edi 623 subl %ecx, %edi 624 movl %esp, %esi 625 626 /* Save future stack pointer in %ebx */ 627 movl %edi, %ebx 628 629 /* Copy over the stack-frame */ 630 shrl $2, %ecx 631 cld 632 rep movsl 633 634 /* 635 * Switch to entry-stack - needs to happen after everything is 636 * copied because the NMI handler will overwrite the task-stack 637 * when on entry-stack 638 */ 639 movl %ebx, %esp 640 641.Lend_\@: 642.endm 643 644/* 645 * This macro handles the case when we return to kernel-mode on the iret 646 * path and have to switch back to the entry stack and/or user-cr3 647 * 648 * See the comments below the .Lentry_from_kernel_\@ label in the 649 * SWITCH_TO_KERNEL_STACK macro for more details. 650 */ 651.macro PARANOID_EXIT_TO_KERNEL_MODE 652 653 /* 654 * Test if we entered the kernel with the entry-stack. Most 655 * likely we did not, because this code only runs on the 656 * return-to-kernel path. 657 */ 658 testl $CS_FROM_ENTRY_STACK, PT_CS(%esp) 659 jz .Lend_\@ 660 661 /* Unlikely slow-path */ 662 663 /* Clear marker from stack-frame */ 664 andl $(~CS_FROM_ENTRY_STACK), PT_CS(%esp) 665 666 /* Copy the remaining task-stack contents to entry-stack */ 667 movl %esp, %esi 668 movl PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %edi 669 670 /* Bytes on the task-stack to ecx */ 671 movl PER_CPU_VAR(cpu_tss_rw + TSS_sp1), %ecx 672 subl %esi, %ecx 673 674 /* Allocate stack-frame on entry-stack */ 675 subl %ecx, %edi 676 677 /* 678 * Save future stack-pointer, we must not switch until the 679 * copy is done, otherwise the NMI handler could destroy the 680 * contents of the task-stack we are about to copy. 681 */ 682 movl %edi, %ebx 683 684 /* Do the copy */ 685 shrl $2, %ecx 686 cld 687 rep movsl 688 689 /* Safe to switch to entry-stack now */ 690 movl %ebx, %esp 691 692 /* 693 * We came from entry-stack and need to check if we also need to 694 * switch back to user cr3. 695 */ 696 testl $CS_FROM_USER_CR3, PT_CS(%esp) 697 jz .Lend_\@ 698 699 /* Clear marker from stack-frame */ 700 andl $(~CS_FROM_USER_CR3), PT_CS(%esp) 701 702 SWITCH_TO_USER_CR3 scratch_reg=%eax 703 704.Lend_\@: 705.endm 706/* 707 * %eax: prev task 708 * %edx: next task 709 */ 710ENTRY(__switch_to_asm) 711 /* 712 * Save callee-saved registers 713 * This must match the order in struct inactive_task_frame 714 */ 715 pushl %ebp 716 pushl %ebx 717 pushl %edi 718 pushl %esi 719 pushfl 720 721 /* switch stack */ 722 movl %esp, TASK_threadsp(%eax) 723 movl TASK_threadsp(%edx), %esp 724 725#ifdef CONFIG_STACKPROTECTOR 726 movl TASK_stack_canary(%edx), %ebx 727 movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset 728#endif 729 730#ifdef CONFIG_RETPOLINE 731 /* 732 * When switching from a shallower to a deeper call stack 733 * the RSB may either underflow or use entries populated 734 * with userspace addresses. On CPUs where those concerns 735 * exist, overwrite the RSB with entries which capture 736 * speculative execution to prevent attack. 737 */ 738 FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW 739#endif 740 741 /* restore callee-saved registers */ 742 popfl 743 popl %esi 744 popl %edi 745 popl %ebx 746 popl %ebp 747 748 jmp __switch_to 749END(__switch_to_asm) 750 751/* 752 * The unwinder expects the last frame on the stack to always be at the same 753 * offset from the end of the page, which allows it to validate the stack. 754 * Calling schedule_tail() directly would break that convention because its an 755 * asmlinkage function so its argument has to be pushed on the stack. This 756 * wrapper creates a proper "end of stack" frame header before the call. 757 */ 758ENTRY(schedule_tail_wrapper) 759 FRAME_BEGIN 760 761 pushl %eax 762 call schedule_tail 763 popl %eax 764 765 FRAME_END 766 ret 767ENDPROC(schedule_tail_wrapper) 768/* 769 * A newly forked process directly context switches into this address. 770 * 771 * eax: prev task we switched from 772 * ebx: kernel thread func (NULL for user thread) 773 * edi: kernel thread arg 774 */ 775ENTRY(ret_from_fork) 776 call schedule_tail_wrapper 777 778 testl %ebx, %ebx 779 jnz 1f /* kernel threads are uncommon */ 780 7812: 782 /* When we fork, we trace the syscall return in the child, too. */ 783 movl %esp, %eax 784 call syscall_return_slowpath 785 STACKLEAK_ERASE 786 jmp restore_all 787 788 /* kernel thread */ 7891: movl %edi, %eax 790 CALL_NOSPEC %ebx 791 /* 792 * A kernel thread is allowed to return here after successfully 793 * calling do_execve(). Exit to userspace to complete the execve() 794 * syscall. 795 */ 796 movl $0, PT_EAX(%esp) 797 jmp 2b 798END(ret_from_fork) 799 800/* 801 * Return to user mode is not as complex as all this looks, 802 * but we want the default path for a system call return to 803 * go as quickly as possible which is why some of this is 804 * less clear than it otherwise should be. 805 */ 806 807 # userspace resumption stub bypassing syscall exit tracing 808 ALIGN 809ret_from_exception: 810 preempt_stop(CLBR_ANY) 811ret_from_intr: 812#ifdef CONFIG_VM86 813 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS 814 movb PT_CS(%esp), %al 815 andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax 816#else 817 /* 818 * We can be coming here from child spawned by kernel_thread(). 819 */ 820 movl PT_CS(%esp), %eax 821 andl $SEGMENT_RPL_MASK, %eax 822#endif 823 cmpl $USER_RPL, %eax 824 jb restore_all_kernel # not returning to v8086 or userspace 825 826ENTRY(resume_userspace) 827 DISABLE_INTERRUPTS(CLBR_ANY) 828 TRACE_IRQS_OFF 829 movl %esp, %eax 830 call prepare_exit_to_usermode 831 jmp restore_all 832END(ret_from_exception) 833 834GLOBAL(__begin_SYSENTER_singlestep_region) 835/* 836 * All code from here through __end_SYSENTER_singlestep_region is subject 837 * to being single-stepped if a user program sets TF and executes SYSENTER. 838 * There is absolutely nothing that we can do to prevent this from happening 839 * (thanks Intel!). To keep our handling of this situation as simple as 840 * possible, we handle TF just like AC and NT, except that our #DB handler 841 * will ignore all of the single-step traps generated in this range. 842 */ 843 844#ifdef CONFIG_XEN_PV 845/* 846 * Xen doesn't set %esp to be precisely what the normal SYSENTER 847 * entry point expects, so fix it up before using the normal path. 848 */ 849ENTRY(xen_sysenter_target) 850 addl $5*4, %esp /* remove xen-provided frame */ 851 jmp .Lsysenter_past_esp 852#endif 853 854/* 855 * 32-bit SYSENTER entry. 856 * 857 * 32-bit system calls through the vDSO's __kernel_vsyscall enter here 858 * if X86_FEATURE_SEP is available. This is the preferred system call 859 * entry on 32-bit systems. 860 * 861 * The SYSENTER instruction, in principle, should *only* occur in the 862 * vDSO. In practice, a small number of Android devices were shipped 863 * with a copy of Bionic that inlined a SYSENTER instruction. This 864 * never happened in any of Google's Bionic versions -- it only happened 865 * in a narrow range of Intel-provided versions. 866 * 867 * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs. 868 * IF and VM in RFLAGS are cleared (IOW: interrupts are off). 869 * SYSENTER does not save anything on the stack, 870 * and does not save old EIP (!!!), ESP, or EFLAGS. 871 * 872 * To avoid losing track of EFLAGS.VM (and thus potentially corrupting 873 * user and/or vm86 state), we explicitly disable the SYSENTER 874 * instruction in vm86 mode by reprogramming the MSRs. 875 * 876 * Arguments: 877 * eax system call number 878 * ebx arg1 879 * ecx arg2 880 * edx arg3 881 * esi arg4 882 * edi arg5 883 * ebp user stack 884 * 0(%ebp) arg6 885 */ 886ENTRY(entry_SYSENTER_32) 887 /* 888 * On entry-stack with all userspace-regs live - save and 889 * restore eflags and %eax to use it as scratch-reg for the cr3 890 * switch. 891 */ 892 pushfl 893 pushl %eax 894 BUG_IF_WRONG_CR3 no_user_check=1 895 SWITCH_TO_KERNEL_CR3 scratch_reg=%eax 896 popl %eax 897 popfl 898 899 /* Stack empty again, switch to task stack */ 900 movl TSS_entry2task_stack(%esp), %esp 901 902.Lsysenter_past_esp: 903 pushl $__USER_DS /* pt_regs->ss */ 904 pushl %ebp /* pt_regs->sp (stashed in bp) */ 905 pushfl /* pt_regs->flags (except IF = 0) */ 906 orl $X86_EFLAGS_IF, (%esp) /* Fix IF */ 907 pushl $__USER_CS /* pt_regs->cs */ 908 pushl $0 /* pt_regs->ip = 0 (placeholder) */ 909 pushl %eax /* pt_regs->orig_ax */ 910 SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest, stack already switched */ 911 912 /* 913 * SYSENTER doesn't filter flags, so we need to clear NT, AC 914 * and TF ourselves. To save a few cycles, we can check whether 915 * either was set instead of doing an unconditional popfq. 916 * This needs to happen before enabling interrupts so that 917 * we don't get preempted with NT set. 918 * 919 * If TF is set, we will single-step all the way to here -- do_debug 920 * will ignore all the traps. (Yes, this is slow, but so is 921 * single-stepping in general. This allows us to avoid having 922 * a more complicated code to handle the case where a user program 923 * forces us to single-step through the SYSENTER entry code.) 924 * 925 * NB.: .Lsysenter_fix_flags is a label with the code under it moved 926 * out-of-line as an optimization: NT is unlikely to be set in the 927 * majority of the cases and instead of polluting the I$ unnecessarily, 928 * we're keeping that code behind a branch which will predict as 929 * not-taken and therefore its instructions won't be fetched. 930 */ 931 testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp) 932 jnz .Lsysenter_fix_flags 933.Lsysenter_flags_fixed: 934 935 /* 936 * User mode is traced as though IRQs are on, and SYSENTER 937 * turned them off. 938 */ 939 TRACE_IRQS_OFF 940 941 movl %esp, %eax 942 call do_fast_syscall_32 943 /* XEN PV guests always use IRET path */ 944 ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ 945 "jmp .Lsyscall_32_done", X86_FEATURE_XENPV 946 947 STACKLEAK_ERASE 948 949/* Opportunistic SYSEXIT */ 950 TRACE_IRQS_ON /* User mode traces as IRQs on. */ 951 952 /* 953 * Setup entry stack - we keep the pointer in %eax and do the 954 * switch after almost all user-state is restored. 955 */ 956 957 /* Load entry stack pointer and allocate frame for eflags/eax */ 958 movl PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %eax 959 subl $(2*4), %eax 960 961 /* Copy eflags and eax to entry stack */ 962 movl PT_EFLAGS(%esp), %edi 963 movl PT_EAX(%esp), %esi 964 movl %edi, (%eax) 965 movl %esi, 4(%eax) 966 967 /* Restore user registers and segments */ 968 movl PT_EIP(%esp), %edx /* pt_regs->ip */ 969 movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */ 9701: mov PT_FS(%esp), %fs 971 PTGS_TO_GS 972 973 popl %ebx /* pt_regs->bx */ 974 addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */ 975 popl %esi /* pt_regs->si */ 976 popl %edi /* pt_regs->di */ 977 popl %ebp /* pt_regs->bp */ 978 979 /* Switch to entry stack */ 980 movl %eax, %esp 981 982 /* Now ready to switch the cr3 */ 983 SWITCH_TO_USER_CR3 scratch_reg=%eax 984 985 /* 986 * Restore all flags except IF. (We restore IF separately because 987 * STI gives a one-instruction window in which we won't be interrupted, 988 * whereas POPF does not.) 989 */ 990 btrl $X86_EFLAGS_IF_BIT, (%esp) 991 BUG_IF_WRONG_CR3 no_user_check=1 992 popfl 993 popl %eax 994 995 /* 996 * Return back to the vDSO, which will pop ecx and edx. 997 * Don't bother with DS and ES (they already contain __USER_DS). 998 */ 999 sti 1000 sysexit 1001 1002.pushsection .fixup, "ax" 10032: movl $0, PT_FS(%esp) 1004 jmp 1b 1005.popsection 1006 _ASM_EXTABLE(1b, 2b) 1007 PTGS_TO_GS_EX 1008 1009.Lsysenter_fix_flags: 1010 pushl $X86_EFLAGS_FIXED 1011 popfl 1012 jmp .Lsysenter_flags_fixed 1013GLOBAL(__end_SYSENTER_singlestep_region) 1014ENDPROC(entry_SYSENTER_32) 1015 1016/* 1017 * 32-bit legacy system call entry. 1018 * 1019 * 32-bit x86 Linux system calls traditionally used the INT $0x80 1020 * instruction. INT $0x80 lands here. 1021 * 1022 * This entry point can be used by any 32-bit perform system calls. 1023 * Instances of INT $0x80 can be found inline in various programs and 1024 * libraries. It is also used by the vDSO's __kernel_vsyscall 1025 * fallback for hardware that doesn't support a faster entry method. 1026 * Restarted 32-bit system calls also fall back to INT $0x80 1027 * regardless of what instruction was originally used to do the system 1028 * call. (64-bit programs can use INT $0x80 as well, but they can 1029 * only run on 64-bit kernels and therefore land in 1030 * entry_INT80_compat.) 1031 * 1032 * This is considered a slow path. It is not used by most libc 1033 * implementations on modern hardware except during process startup. 1034 * 1035 * Arguments: 1036 * eax system call number 1037 * ebx arg1 1038 * ecx arg2 1039 * edx arg3 1040 * esi arg4 1041 * edi arg5 1042 * ebp arg6 1043 */ 1044ENTRY(entry_INT80_32) 1045 ASM_CLAC 1046 pushl %eax /* pt_regs->orig_ax */ 1047 1048 SAVE_ALL pt_regs_ax=$-ENOSYS switch_stacks=1 /* save rest */ 1049 1050 /* 1051 * User mode is traced as though IRQs are on, and the interrupt gate 1052 * turned them off. 1053 */ 1054 TRACE_IRQS_OFF 1055 1056 movl %esp, %eax 1057 call do_int80_syscall_32 1058.Lsyscall_32_done: 1059 1060 STACKLEAK_ERASE 1061 1062restore_all: 1063 TRACE_IRQS_IRET 1064 SWITCH_TO_ENTRY_STACK 1065.Lrestore_all_notrace: 1066 CHECK_AND_APPLY_ESPFIX 1067.Lrestore_nocheck: 1068 /* Switch back to user CR3 */ 1069 SWITCH_TO_USER_CR3 scratch_reg=%eax 1070 1071 BUG_IF_WRONG_CR3 1072 1073 /* Restore user state */ 1074 RESTORE_REGS pop=4 # skip orig_eax/error_code 1075.Lirq_return: 1076 IRET_FRAME 1077 /* 1078 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization 1079 * when returning from IPI handler and when returning from 1080 * scheduler to user-space. 1081 */ 1082 INTERRUPT_RETURN 1083 1084restore_all_kernel: 1085#ifdef CONFIG_PREEMPT 1086 DISABLE_INTERRUPTS(CLBR_ANY) 1087 cmpl $0, PER_CPU_VAR(__preempt_count) 1088 jnz .Lno_preempt 1089 testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? 1090 jz .Lno_preempt 1091 call preempt_schedule_irq 1092.Lno_preempt: 1093#endif 1094 TRACE_IRQS_IRET 1095 PARANOID_EXIT_TO_KERNEL_MODE 1096 BUG_IF_WRONG_CR3 1097 RESTORE_REGS 4 1098 jmp .Lirq_return 1099 1100.section .fixup, "ax" 1101ENTRY(iret_exc ) 1102 pushl $0 # no error code 1103 pushl $do_iret_error 1104 1105#ifdef CONFIG_DEBUG_ENTRY 1106 /* 1107 * The stack-frame here is the one that iret faulted on, so its a 1108 * return-to-user frame. We are on kernel-cr3 because we come here from 1109 * the fixup code. This confuses the CR3 checker, so switch to user-cr3 1110 * as the checker expects it. 1111 */ 1112 pushl %eax 1113 SWITCH_TO_USER_CR3 scratch_reg=%eax 1114 popl %eax 1115#endif 1116 1117 jmp common_exception 1118.previous 1119 _ASM_EXTABLE(.Lirq_return, iret_exc) 1120ENDPROC(entry_INT80_32) 1121 1122.macro FIXUP_ESPFIX_STACK 1123/* 1124 * Switch back for ESPFIX stack to the normal zerobased stack 1125 * 1126 * We can't call C functions using the ESPFIX stack. This code reads 1127 * the high word of the segment base from the GDT and swiches to the 1128 * normal stack and adjusts ESP with the matching offset. 1129 */ 1130#ifdef CONFIG_X86_ESPFIX32 1131 /* fixup the stack */ 1132 mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ 1133 mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ 1134 shl $16, %eax 1135 addl %esp, %eax /* the adjusted stack pointer */ 1136 pushl $__KERNEL_DS 1137 pushl %eax 1138 lss (%esp), %esp /* switch to the normal stack segment */ 1139#endif 1140.endm 1141.macro UNWIND_ESPFIX_STACK 1142#ifdef CONFIG_X86_ESPFIX32 1143 movl %ss, %eax 1144 /* see if on espfix stack */ 1145 cmpw $__ESPFIX_SS, %ax 1146 jne 27f 1147 movl $__KERNEL_DS, %eax 1148 movl %eax, %ds 1149 movl %eax, %es 1150 /* switch to normal stack */ 1151 FIXUP_ESPFIX_STACK 115227: 1153#endif 1154.endm 1155 1156/* 1157 * Build the entry stubs with some assembler magic. 1158 * We pack 1 stub into every 8-byte block. 1159 */ 1160 .align 8 1161ENTRY(irq_entries_start) 1162 vector=FIRST_EXTERNAL_VECTOR 1163 .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) 1164 pushl $(~vector+0x80) /* Note: always in signed byte range */ 1165 vector=vector+1 1166 jmp common_interrupt 1167 .align 8 1168 .endr 1169END(irq_entries_start) 1170 1171#ifdef CONFIG_X86_LOCAL_APIC 1172 .align 8 1173ENTRY(spurious_entries_start) 1174 vector=FIRST_SYSTEM_VECTOR 1175 .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) 1176 pushl $(~vector+0x80) /* Note: always in signed byte range */ 1177 vector=vector+1 1178 jmp common_spurious 1179 .align 8 1180 .endr 1181END(spurious_entries_start) 1182 1183common_spurious: 1184 ASM_CLAC 1185 addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */ 1186 SAVE_ALL switch_stacks=1 1187 ENCODE_FRAME_POINTER 1188 TRACE_IRQS_OFF 1189 movl %esp, %eax 1190 call smp_spurious_interrupt 1191 jmp ret_from_intr 1192ENDPROC(common_spurious) 1193#endif 1194 1195/* 1196 * the CPU automatically disables interrupts when executing an IRQ vector, 1197 * so IRQ-flags tracing has to follow that: 1198 */ 1199 .p2align CONFIG_X86_L1_CACHE_SHIFT 1200common_interrupt: 1201 ASM_CLAC 1202 addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */ 1203 1204 SAVE_ALL switch_stacks=1 1205 ENCODE_FRAME_POINTER 1206 TRACE_IRQS_OFF 1207 movl %esp, %eax 1208 call do_IRQ 1209 jmp ret_from_intr 1210ENDPROC(common_interrupt) 1211 1212#define BUILD_INTERRUPT3(name, nr, fn) \ 1213ENTRY(name) \ 1214 ASM_CLAC; \ 1215 pushl $~(nr); \ 1216 SAVE_ALL switch_stacks=1; \ 1217 ENCODE_FRAME_POINTER; \ 1218 TRACE_IRQS_OFF \ 1219 movl %esp, %eax; \ 1220 call fn; \ 1221 jmp ret_from_intr; \ 1222ENDPROC(name) 1223 1224#define BUILD_INTERRUPT(name, nr) \ 1225 BUILD_INTERRUPT3(name, nr, smp_##name); \ 1226 1227/* The include is where all of the SMP etc. interrupts come from */ 1228#include <asm/entry_arch.h> 1229 1230ENTRY(coprocessor_error) 1231 ASM_CLAC 1232 pushl $0 1233 pushl $do_coprocessor_error 1234 jmp common_exception 1235END(coprocessor_error) 1236 1237ENTRY(simd_coprocessor_error) 1238 ASM_CLAC 1239 pushl $0 1240#ifdef CONFIG_X86_INVD_BUG 1241 /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ 1242 ALTERNATIVE "pushl $do_general_protection", \ 1243 "pushl $do_simd_coprocessor_error", \ 1244 X86_FEATURE_XMM 1245#else 1246 pushl $do_simd_coprocessor_error 1247#endif 1248 jmp common_exception 1249END(simd_coprocessor_error) 1250 1251ENTRY(device_not_available) 1252 ASM_CLAC 1253 pushl $-1 # mark this as an int 1254 pushl $do_device_not_available 1255 jmp common_exception 1256END(device_not_available) 1257 1258#ifdef CONFIG_PARAVIRT 1259ENTRY(native_iret) 1260 iret 1261 _ASM_EXTABLE(native_iret, iret_exc) 1262END(native_iret) 1263#endif 1264 1265ENTRY(overflow) 1266 ASM_CLAC 1267 pushl $0 1268 pushl $do_overflow 1269 jmp common_exception 1270END(overflow) 1271 1272ENTRY(bounds) 1273 ASM_CLAC 1274 pushl $0 1275 pushl $do_bounds 1276 jmp common_exception 1277END(bounds) 1278 1279ENTRY(invalid_op) 1280 ASM_CLAC 1281 pushl $0 1282 pushl $do_invalid_op 1283 jmp common_exception 1284END(invalid_op) 1285 1286ENTRY(coprocessor_segment_overrun) 1287 ASM_CLAC 1288 pushl $0 1289 pushl $do_coprocessor_segment_overrun 1290 jmp common_exception 1291END(coprocessor_segment_overrun) 1292 1293ENTRY(invalid_TSS) 1294 ASM_CLAC 1295 pushl $do_invalid_TSS 1296 jmp common_exception 1297END(invalid_TSS) 1298 1299ENTRY(segment_not_present) 1300 ASM_CLAC 1301 pushl $do_segment_not_present 1302 jmp common_exception 1303END(segment_not_present) 1304 1305ENTRY(stack_segment) 1306 ASM_CLAC 1307 pushl $do_stack_segment 1308 jmp common_exception 1309END(stack_segment) 1310 1311ENTRY(alignment_check) 1312 ASM_CLAC 1313 pushl $do_alignment_check 1314 jmp common_exception 1315END(alignment_check) 1316 1317ENTRY(divide_error) 1318 ASM_CLAC 1319 pushl $0 # no error code 1320 pushl $do_divide_error 1321 jmp common_exception 1322END(divide_error) 1323 1324#ifdef CONFIG_X86_MCE 1325ENTRY(machine_check) 1326 ASM_CLAC 1327 pushl $0 1328 pushl machine_check_vector 1329 jmp common_exception 1330END(machine_check) 1331#endif 1332 1333ENTRY(spurious_interrupt_bug) 1334 ASM_CLAC 1335 pushl $0 1336 pushl $do_spurious_interrupt_bug 1337 jmp common_exception 1338END(spurious_interrupt_bug) 1339 1340#ifdef CONFIG_XEN_PV 1341ENTRY(xen_hypervisor_callback) 1342 pushl $-1 /* orig_ax = -1 => not a system call */ 1343 SAVE_ALL 1344 ENCODE_FRAME_POINTER 1345 TRACE_IRQS_OFF 1346 1347 /* 1348 * Check to see if we got the event in the critical 1349 * region in xen_iret_direct, after we've reenabled 1350 * events and checked for pending events. This simulates 1351 * iret instruction's behaviour where it delivers a 1352 * pending interrupt when enabling interrupts: 1353 */ 1354 movl PT_EIP(%esp), %eax 1355 cmpl $xen_iret_start_crit, %eax 1356 jb 1f 1357 cmpl $xen_iret_end_crit, %eax 1358 jae 1f 1359 1360 jmp xen_iret_crit_fixup 1361 1362ENTRY(xen_do_upcall) 13631: mov %esp, %eax 1364 call xen_evtchn_do_upcall 1365#ifndef CONFIG_PREEMPT 1366 call xen_maybe_preempt_hcall 1367#endif 1368 jmp ret_from_intr 1369ENDPROC(xen_hypervisor_callback) 1370 1371/* 1372 * Hypervisor uses this for application faults while it executes. 1373 * We get here for two reasons: 1374 * 1. Fault while reloading DS, ES, FS or GS 1375 * 2. Fault while executing IRET 1376 * Category 1 we fix up by reattempting the load, and zeroing the segment 1377 * register if the load fails. 1378 * Category 2 we fix up by jumping to do_iret_error. We cannot use the 1379 * normal Linux return path in this case because if we use the IRET hypercall 1380 * to pop the stack frame we end up in an infinite loop of failsafe callbacks. 1381 * We distinguish between categories by maintaining a status value in EAX. 1382 */ 1383ENTRY(xen_failsafe_callback) 1384 pushl %eax 1385 movl $1, %eax 13861: mov 4(%esp), %ds 13872: mov 8(%esp), %es 13883: mov 12(%esp), %fs 13894: mov 16(%esp), %gs 1390 /* EAX == 0 => Category 1 (Bad segment) 1391 EAX != 0 => Category 2 (Bad IRET) */ 1392 testl %eax, %eax 1393 popl %eax 1394 lea 16(%esp), %esp 1395 jz 5f 1396 jmp iret_exc 13975: pushl $-1 /* orig_ax = -1 => not a system call */ 1398 SAVE_ALL 1399 ENCODE_FRAME_POINTER 1400 jmp ret_from_exception 1401 1402.section .fixup, "ax" 14036: xorl %eax, %eax 1404 movl %eax, 4(%esp) 1405 jmp 1b 14067: xorl %eax, %eax 1407 movl %eax, 8(%esp) 1408 jmp 2b 14098: xorl %eax, %eax 1410 movl %eax, 12(%esp) 1411 jmp 3b 14129: xorl %eax, %eax 1413 movl %eax, 16(%esp) 1414 jmp 4b 1415.previous 1416 _ASM_EXTABLE(1b, 6b) 1417 _ASM_EXTABLE(2b, 7b) 1418 _ASM_EXTABLE(3b, 8b) 1419 _ASM_EXTABLE(4b, 9b) 1420ENDPROC(xen_failsafe_callback) 1421#endif /* CONFIG_XEN_PV */ 1422 1423#ifdef CONFIG_XEN_PVHVM 1424BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR, 1425 xen_evtchn_do_upcall) 1426#endif 1427 1428 1429#if IS_ENABLED(CONFIG_HYPERV) 1430 1431BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR, 1432 hyperv_vector_handler) 1433 1434BUILD_INTERRUPT3(hyperv_reenlightenment_vector, HYPERV_REENLIGHTENMENT_VECTOR, 1435 hyperv_reenlightenment_intr) 1436 1437BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR, 1438 hv_stimer0_vector_handler) 1439 1440#endif /* CONFIG_HYPERV */ 1441 1442ENTRY(page_fault) 1443 ASM_CLAC 1444 pushl $do_page_fault 1445 ALIGN 1446 jmp common_exception 1447END(page_fault) 1448 1449common_exception: 1450 /* the function address is in %gs's slot on the stack */ 1451 FIXUP_FRAME 1452 pushl %fs 1453 pushl %es 1454 pushl %ds 1455 pushl %eax 1456 movl $(__USER_DS), %eax 1457 movl %eax, %ds 1458 movl %eax, %es 1459 movl $(__KERNEL_PERCPU), %eax 1460 movl %eax, %fs 1461 pushl %ebp 1462 pushl %edi 1463 pushl %esi 1464 pushl %edx 1465 pushl %ecx 1466 pushl %ebx 1467 SWITCH_TO_KERNEL_STACK 1468 ENCODE_FRAME_POINTER 1469 cld 1470 UNWIND_ESPFIX_STACK 1471 GS_TO_REG %ecx 1472 movl PT_GS(%esp), %edi # get the function address 1473 movl PT_ORIG_EAX(%esp), %edx # get the error code 1474 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart 1475 REG_TO_PTGS %ecx 1476 SET_KERNEL_GS %ecx 1477 TRACE_IRQS_OFF 1478 movl %esp, %eax # pt_regs pointer 1479 CALL_NOSPEC %edi 1480 jmp ret_from_exception 1481END(common_exception) 1482 1483ENTRY(debug) 1484 /* 1485 * Entry from sysenter is now handled in common_exception 1486 */ 1487 ASM_CLAC 1488 pushl $-1 # mark this as an int 1489 pushl $do_debug 1490 jmp common_exception 1491END(debug) 1492 1493/* 1494 * NMI is doubly nasty. It can happen on the first instruction of 1495 * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning 1496 * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32 1497 * switched stacks. We handle both conditions by simply checking whether we 1498 * interrupted kernel code running on the SYSENTER stack. 1499 */ 1500ENTRY(nmi) 1501 ASM_CLAC 1502 1503#ifdef CONFIG_X86_ESPFIX32 1504 pushl %eax 1505 movl %ss, %eax 1506 cmpw $__ESPFIX_SS, %ax 1507 popl %eax 1508 je .Lnmi_espfix_stack 1509#endif 1510 1511 pushl %eax # pt_regs->orig_ax 1512 SAVE_ALL_NMI cr3_reg=%edi 1513 ENCODE_FRAME_POINTER 1514 xorl %edx, %edx # zero error code 1515 movl %esp, %eax # pt_regs pointer 1516 1517 /* Are we currently on the SYSENTER stack? */ 1518 movl PER_CPU_VAR(cpu_entry_area), %ecx 1519 addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx 1520 subl %eax, %ecx /* ecx = (end of entry_stack) - esp */ 1521 cmpl $SIZEOF_entry_stack, %ecx 1522 jb .Lnmi_from_sysenter_stack 1523 1524 /* Not on SYSENTER stack. */ 1525 call do_nmi 1526 jmp .Lnmi_return 1527 1528.Lnmi_from_sysenter_stack: 1529 /* 1530 * We're on the SYSENTER stack. Switch off. No one (not even debug) 1531 * is using the thread stack right now, so it's safe for us to use it. 1532 */ 1533 movl %esp, %ebx 1534 movl PER_CPU_VAR(cpu_current_top_of_stack), %esp 1535 call do_nmi 1536 movl %ebx, %esp 1537 1538.Lnmi_return: 1539 CHECK_AND_APPLY_ESPFIX 1540 RESTORE_ALL_NMI cr3_reg=%edi pop=4 1541 jmp .Lirq_return 1542 1543#ifdef CONFIG_X86_ESPFIX32 1544.Lnmi_espfix_stack: 1545 /* 1546 * create the pointer to lss back 1547 */ 1548 pushl %ss 1549 pushl %esp 1550 addl $4, (%esp) 1551 /* copy the iret frame of 12 bytes */ 1552 .rept 3 1553 pushl 16(%esp) 1554 .endr 1555 pushl %eax 1556 SAVE_ALL_NMI cr3_reg=%edi 1557 ENCODE_FRAME_POINTER 1558 FIXUP_ESPFIX_STACK # %eax == %esp 1559 xorl %edx, %edx # zero error code 1560 call do_nmi 1561 RESTORE_ALL_NMI cr3_reg=%edi 1562 lss 12+4(%esp), %esp # back to espfix stack 1563 jmp .Lirq_return 1564#endif 1565END(nmi) 1566 1567ENTRY(int3) 1568 ASM_CLAC 1569 pushl $-1 # mark this as an int 1570 1571 SAVE_ALL switch_stacks=1 1572 ENCODE_FRAME_POINTER 1573 TRACE_IRQS_OFF 1574 xorl %edx, %edx # zero error code 1575 movl %esp, %eax # pt_regs pointer 1576 call do_int3 1577 jmp ret_from_exception 1578END(int3) 1579 1580ENTRY(general_protection) 1581 pushl $do_general_protection 1582 jmp common_exception 1583END(general_protection) 1584 1585#ifdef CONFIG_KVM_GUEST 1586ENTRY(async_page_fault) 1587 ASM_CLAC 1588 pushl $do_async_page_fault 1589 jmp common_exception 1590END(async_page_fault) 1591#endif 1592 1593ENTRY(rewind_stack_do_exit) 1594 /* Prevent any naive code from trying to unwind to our caller. */ 1595 xorl %ebp, %ebp 1596 1597 movl PER_CPU_VAR(cpu_current_top_of_stack), %esi 1598 leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp 1599 1600 call do_exit 16011: jmp 1b 1602END(rewind_stack_do_exit) 1603