xref: /openbmc/linux/arch/x86/entry/entry_32.S (revision 8730046c)
1/*
2 *  Copyright (C) 1991,1992  Linus Torvalds
3 *
4 * entry_32.S contains the system-call and low-level fault and trap handling routines.
5 *
6 * Stack layout while running C code:
7 *	ptrace needs to have all registers on the stack.
8 *	If the order here is changed, it needs to be
9 *	updated in fork.c:copy_process(), signal.c:do_signal(),
10 *	ptrace.c and ptrace.h
11 *
12 *	 0(%esp) - %ebx
13 *	 4(%esp) - %ecx
14 *	 8(%esp) - %edx
15 *	 C(%esp) - %esi
16 *	10(%esp) - %edi
17 *	14(%esp) - %ebp
18 *	18(%esp) - %eax
19 *	1C(%esp) - %ds
20 *	20(%esp) - %es
21 *	24(%esp) - %fs
22 *	28(%esp) - %gs		saved iff !CONFIG_X86_32_LAZY_GS
23 *	2C(%esp) - orig_eax
24 *	30(%esp) - %eip
25 *	34(%esp) - %cs
26 *	38(%esp) - %eflags
27 *	3C(%esp) - %oldesp
28 *	40(%esp) - %oldss
29 */
30
31#include <linux/linkage.h>
32#include <linux/err.h>
33#include <asm/thread_info.h>
34#include <asm/irqflags.h>
35#include <asm/errno.h>
36#include <asm/segment.h>
37#include <asm/smp.h>
38#include <asm/page_types.h>
39#include <asm/percpu.h>
40#include <asm/processor-flags.h>
41#include <asm/ftrace.h>
42#include <asm/irq_vectors.h>
43#include <asm/cpufeatures.h>
44#include <asm/alternative-asm.h>
45#include <asm/asm.h>
46#include <asm/smap.h>
47#include <asm/export.h>
48#include <asm/frame.h>
49
50	.section .entry.text, "ax"
51
52/*
53 * We use macros for low-level operations which need to be overridden
54 * for paravirtualization.  The following will never clobber any registers:
55 *   INTERRUPT_RETURN (aka. "iret")
56 *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
57 *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
58 *
59 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
60 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
61 * Allowing a register to be clobbered can shrink the paravirt replacement
62 * enough to patch inline, increasing performance.
63 */
64
65#ifdef CONFIG_PREEMPT
66# define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
67#else
68# define preempt_stop(clobbers)
69# define resume_kernel		restore_all
70#endif
71
72.macro TRACE_IRQS_IRET
73#ifdef CONFIG_TRACE_IRQFLAGS
74	testl	$X86_EFLAGS_IF, PT_EFLAGS(%esp)     # interrupts off?
75	jz	1f
76	TRACE_IRQS_ON
771:
78#endif
79.endm
80
81/*
82 * User gs save/restore
83 *
84 * %gs is used for userland TLS and kernel only uses it for stack
85 * canary which is required to be at %gs:20 by gcc.  Read the comment
86 * at the top of stackprotector.h for more info.
87 *
88 * Local labels 98 and 99 are used.
89 */
90#ifdef CONFIG_X86_32_LAZY_GS
91
92 /* unfortunately push/pop can't be no-op */
93.macro PUSH_GS
94	pushl	$0
95.endm
96.macro POP_GS pop=0
97	addl	$(4 + \pop), %esp
98.endm
99.macro POP_GS_EX
100.endm
101
102 /* all the rest are no-op */
103.macro PTGS_TO_GS
104.endm
105.macro PTGS_TO_GS_EX
106.endm
107.macro GS_TO_REG reg
108.endm
109.macro REG_TO_PTGS reg
110.endm
111.macro SET_KERNEL_GS reg
112.endm
113
114#else	/* CONFIG_X86_32_LAZY_GS */
115
116.macro PUSH_GS
117	pushl	%gs
118.endm
119
120.macro POP_GS pop=0
12198:	popl	%gs
122  .if \pop <> 0
123	add	$\pop, %esp
124  .endif
125.endm
126.macro POP_GS_EX
127.pushsection .fixup, "ax"
12899:	movl	$0, (%esp)
129	jmp	98b
130.popsection
131	_ASM_EXTABLE(98b, 99b)
132.endm
133
134.macro PTGS_TO_GS
13598:	mov	PT_GS(%esp), %gs
136.endm
137.macro PTGS_TO_GS_EX
138.pushsection .fixup, "ax"
13999:	movl	$0, PT_GS(%esp)
140	jmp	98b
141.popsection
142	_ASM_EXTABLE(98b, 99b)
143.endm
144
145.macro GS_TO_REG reg
146	movl	%gs, \reg
147.endm
148.macro REG_TO_PTGS reg
149	movl	\reg, PT_GS(%esp)
150.endm
151.macro SET_KERNEL_GS reg
152	movl	$(__KERNEL_STACK_CANARY), \reg
153	movl	\reg, %gs
154.endm
155
156#endif /* CONFIG_X86_32_LAZY_GS */
157
158.macro SAVE_ALL pt_regs_ax=%eax
159	cld
160	PUSH_GS
161	pushl	%fs
162	pushl	%es
163	pushl	%ds
164	pushl	\pt_regs_ax
165	pushl	%ebp
166	pushl	%edi
167	pushl	%esi
168	pushl	%edx
169	pushl	%ecx
170	pushl	%ebx
171	movl	$(__USER_DS), %edx
172	movl	%edx, %ds
173	movl	%edx, %es
174	movl	$(__KERNEL_PERCPU), %edx
175	movl	%edx, %fs
176	SET_KERNEL_GS %edx
177.endm
178
179/*
180 * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
181 * frame pointer is replaced with an encoded pointer to pt_regs.  The encoding
182 * is just setting the LSB, which makes it an invalid stack address and is also
183 * a signal to the unwinder that it's a pt_regs pointer in disguise.
184 *
185 * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
186 * original rbp.
187 */
188.macro ENCODE_FRAME_POINTER
189#ifdef CONFIG_FRAME_POINTER
190	mov %esp, %ebp
191	orl $0x1, %ebp
192#endif
193.endm
194
195.macro RESTORE_INT_REGS
196	popl	%ebx
197	popl	%ecx
198	popl	%edx
199	popl	%esi
200	popl	%edi
201	popl	%ebp
202	popl	%eax
203.endm
204
205.macro RESTORE_REGS pop=0
206	RESTORE_INT_REGS
2071:	popl	%ds
2082:	popl	%es
2093:	popl	%fs
210	POP_GS \pop
211.pushsection .fixup, "ax"
2124:	movl	$0, (%esp)
213	jmp	1b
2145:	movl	$0, (%esp)
215	jmp	2b
2166:	movl	$0, (%esp)
217	jmp	3b
218.popsection
219	_ASM_EXTABLE(1b, 4b)
220	_ASM_EXTABLE(2b, 5b)
221	_ASM_EXTABLE(3b, 6b)
222	POP_GS_EX
223.endm
224
225/*
226 * %eax: prev task
227 * %edx: next task
228 */
229ENTRY(__switch_to_asm)
230	/*
231	 * Save callee-saved registers
232	 * This must match the order in struct inactive_task_frame
233	 */
234	pushl	%ebp
235	pushl	%ebx
236	pushl	%edi
237	pushl	%esi
238
239	/* switch stack */
240	movl	%esp, TASK_threadsp(%eax)
241	movl	TASK_threadsp(%edx), %esp
242
243#ifdef CONFIG_CC_STACKPROTECTOR
244	movl	TASK_stack_canary(%edx), %ebx
245	movl	%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
246#endif
247
248	/* restore callee-saved registers */
249	popl	%esi
250	popl	%edi
251	popl	%ebx
252	popl	%ebp
253
254	jmp	__switch_to
255END(__switch_to_asm)
256
257/*
258 * A newly forked process directly context switches into this address.
259 *
260 * eax: prev task we switched from
261 * ebx: kernel thread func (NULL for user thread)
262 * edi: kernel thread arg
263 */
264ENTRY(ret_from_fork)
265	FRAME_BEGIN		/* help unwinder find end of stack */
266
267	/*
268	 * schedule_tail() is asmlinkage so we have to put its 'prev' argument
269	 * on the stack.
270	 */
271	pushl	%eax
272	call	schedule_tail
273	popl	%eax
274
275	testl	%ebx, %ebx
276	jnz	1f		/* kernel threads are uncommon */
277
2782:
279	/* When we fork, we trace the syscall return in the child, too. */
280	leal	FRAME_OFFSET(%esp), %eax
281	call    syscall_return_slowpath
282	FRAME_END
283	jmp     restore_all
284
285	/* kernel thread */
2861:	movl	%edi, %eax
287	call	*%ebx
288	/*
289	 * A kernel thread is allowed to return here after successfully
290	 * calling do_execve().  Exit to userspace to complete the execve()
291	 * syscall.
292	 */
293	movl	$0, PT_EAX(%esp)
294	jmp	2b
295END(ret_from_fork)
296
297/*
298 * Return to user mode is not as complex as all this looks,
299 * but we want the default path for a system call return to
300 * go as quickly as possible which is why some of this is
301 * less clear than it otherwise should be.
302 */
303
304	# userspace resumption stub bypassing syscall exit tracing
305	ALIGN
306ret_from_exception:
307	preempt_stop(CLBR_ANY)
308ret_from_intr:
309#ifdef CONFIG_VM86
310	movl	PT_EFLAGS(%esp), %eax		# mix EFLAGS and CS
311	movb	PT_CS(%esp), %al
312	andl	$(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
313#else
314	/*
315	 * We can be coming here from child spawned by kernel_thread().
316	 */
317	movl	PT_CS(%esp), %eax
318	andl	$SEGMENT_RPL_MASK, %eax
319#endif
320	cmpl	$USER_RPL, %eax
321	jb	resume_kernel			# not returning to v8086 or userspace
322
323ENTRY(resume_userspace)
324	DISABLE_INTERRUPTS(CLBR_ANY)
325	TRACE_IRQS_OFF
326	movl	%esp, %eax
327	call	prepare_exit_to_usermode
328	jmp	restore_all
329END(ret_from_exception)
330
331#ifdef CONFIG_PREEMPT
332ENTRY(resume_kernel)
333	DISABLE_INTERRUPTS(CLBR_ANY)
334.Lneed_resched:
335	cmpl	$0, PER_CPU_VAR(__preempt_count)
336	jnz	restore_all
337	testl	$X86_EFLAGS_IF, PT_EFLAGS(%esp)	# interrupts off (exception path) ?
338	jz	restore_all
339	call	preempt_schedule_irq
340	jmp	.Lneed_resched
341END(resume_kernel)
342#endif
343
344GLOBAL(__begin_SYSENTER_singlestep_region)
345/*
346 * All code from here through __end_SYSENTER_singlestep_region is subject
347 * to being single-stepped if a user program sets TF and executes SYSENTER.
348 * There is absolutely nothing that we can do to prevent this from happening
349 * (thanks Intel!).  To keep our handling of this situation as simple as
350 * possible, we handle TF just like AC and NT, except that our #DB handler
351 * will ignore all of the single-step traps generated in this range.
352 */
353
354#ifdef CONFIG_XEN
355/*
356 * Xen doesn't set %esp to be precisely what the normal SYSENTER
357 * entry point expects, so fix it up before using the normal path.
358 */
359ENTRY(xen_sysenter_target)
360	addl	$5*4, %esp			/* remove xen-provided frame */
361	jmp	.Lsysenter_past_esp
362#endif
363
364/*
365 * 32-bit SYSENTER entry.
366 *
367 * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
368 * if X86_FEATURE_SEP is available.  This is the preferred system call
369 * entry on 32-bit systems.
370 *
371 * The SYSENTER instruction, in principle, should *only* occur in the
372 * vDSO.  In practice, a small number of Android devices were shipped
373 * with a copy of Bionic that inlined a SYSENTER instruction.  This
374 * never happened in any of Google's Bionic versions -- it only happened
375 * in a narrow range of Intel-provided versions.
376 *
377 * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs.
378 * IF and VM in RFLAGS are cleared (IOW: interrupts are off).
379 * SYSENTER does not save anything on the stack,
380 * and does not save old EIP (!!!), ESP, or EFLAGS.
381 *
382 * To avoid losing track of EFLAGS.VM (and thus potentially corrupting
383 * user and/or vm86 state), we explicitly disable the SYSENTER
384 * instruction in vm86 mode by reprogramming the MSRs.
385 *
386 * Arguments:
387 * eax  system call number
388 * ebx  arg1
389 * ecx  arg2
390 * edx  arg3
391 * esi  arg4
392 * edi  arg5
393 * ebp  user stack
394 * 0(%ebp) arg6
395 */
396ENTRY(entry_SYSENTER_32)
397	movl	TSS_sysenter_sp0(%esp), %esp
398.Lsysenter_past_esp:
399	pushl	$__USER_DS		/* pt_regs->ss */
400	pushl	%ebp			/* pt_regs->sp (stashed in bp) */
401	pushfl				/* pt_regs->flags (except IF = 0) */
402	orl	$X86_EFLAGS_IF, (%esp)	/* Fix IF */
403	pushl	$__USER_CS		/* pt_regs->cs */
404	pushl	$0			/* pt_regs->ip = 0 (placeholder) */
405	pushl	%eax			/* pt_regs->orig_ax */
406	SAVE_ALL pt_regs_ax=$-ENOSYS	/* save rest */
407
408	/*
409	 * SYSENTER doesn't filter flags, so we need to clear NT, AC
410	 * and TF ourselves.  To save a few cycles, we can check whether
411	 * either was set instead of doing an unconditional popfq.
412	 * This needs to happen before enabling interrupts so that
413	 * we don't get preempted with NT set.
414	 *
415	 * If TF is set, we will single-step all the way to here -- do_debug
416	 * will ignore all the traps.  (Yes, this is slow, but so is
417	 * single-stepping in general.  This allows us to avoid having
418	 * a more complicated code to handle the case where a user program
419	 * forces us to single-step through the SYSENTER entry code.)
420	 *
421	 * NB.: .Lsysenter_fix_flags is a label with the code under it moved
422	 * out-of-line as an optimization: NT is unlikely to be set in the
423	 * majority of the cases and instead of polluting the I$ unnecessarily,
424	 * we're keeping that code behind a branch which will predict as
425	 * not-taken and therefore its instructions won't be fetched.
426	 */
427	testl	$X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp)
428	jnz	.Lsysenter_fix_flags
429.Lsysenter_flags_fixed:
430
431	/*
432	 * User mode is traced as though IRQs are on, and SYSENTER
433	 * turned them off.
434	 */
435	TRACE_IRQS_OFF
436
437	movl	%esp, %eax
438	call	do_fast_syscall_32
439	/* XEN PV guests always use IRET path */
440	ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
441		    "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
442
443/* Opportunistic SYSEXIT */
444	TRACE_IRQS_ON			/* User mode traces as IRQs on. */
445	movl	PT_EIP(%esp), %edx	/* pt_regs->ip */
446	movl	PT_OLDESP(%esp), %ecx	/* pt_regs->sp */
4471:	mov	PT_FS(%esp), %fs
448	PTGS_TO_GS
449	popl	%ebx			/* pt_regs->bx */
450	addl	$2*4, %esp		/* skip pt_regs->cx and pt_regs->dx */
451	popl	%esi			/* pt_regs->si */
452	popl	%edi			/* pt_regs->di */
453	popl	%ebp			/* pt_regs->bp */
454	popl	%eax			/* pt_regs->ax */
455
456	/*
457	 * Restore all flags except IF. (We restore IF separately because
458	 * STI gives a one-instruction window in which we won't be interrupted,
459	 * whereas POPF does not.)
460	 */
461	addl	$PT_EFLAGS-PT_DS, %esp	/* point esp at pt_regs->flags */
462	btr	$X86_EFLAGS_IF_BIT, (%esp)
463	popfl
464
465	/*
466	 * Return back to the vDSO, which will pop ecx and edx.
467	 * Don't bother with DS and ES (they already contain __USER_DS).
468	 */
469	sti
470	sysexit
471
472.pushsection .fixup, "ax"
4732:	movl	$0, PT_FS(%esp)
474	jmp	1b
475.popsection
476	_ASM_EXTABLE(1b, 2b)
477	PTGS_TO_GS_EX
478
479.Lsysenter_fix_flags:
480	pushl	$X86_EFLAGS_FIXED
481	popfl
482	jmp	.Lsysenter_flags_fixed
483GLOBAL(__end_SYSENTER_singlestep_region)
484ENDPROC(entry_SYSENTER_32)
485
486/*
487 * 32-bit legacy system call entry.
488 *
489 * 32-bit x86 Linux system calls traditionally used the INT $0x80
490 * instruction.  INT $0x80 lands here.
491 *
492 * This entry point can be used by any 32-bit perform system calls.
493 * Instances of INT $0x80 can be found inline in various programs and
494 * libraries.  It is also used by the vDSO's __kernel_vsyscall
495 * fallback for hardware that doesn't support a faster entry method.
496 * Restarted 32-bit system calls also fall back to INT $0x80
497 * regardless of what instruction was originally used to do the system
498 * call.  (64-bit programs can use INT $0x80 as well, but they can
499 * only run on 64-bit kernels and therefore land in
500 * entry_INT80_compat.)
501 *
502 * This is considered a slow path.  It is not used by most libc
503 * implementations on modern hardware except during process startup.
504 *
505 * Arguments:
506 * eax  system call number
507 * ebx  arg1
508 * ecx  arg2
509 * edx  arg3
510 * esi  arg4
511 * edi  arg5
512 * ebp  arg6
513 */
514ENTRY(entry_INT80_32)
515	ASM_CLAC
516	pushl	%eax			/* pt_regs->orig_ax */
517	SAVE_ALL pt_regs_ax=$-ENOSYS	/* save rest */
518
519	/*
520	 * User mode is traced as though IRQs are on, and the interrupt gate
521	 * turned them off.
522	 */
523	TRACE_IRQS_OFF
524
525	movl	%esp, %eax
526	call	do_int80_syscall_32
527.Lsyscall_32_done:
528
529restore_all:
530	TRACE_IRQS_IRET
531.Lrestore_all_notrace:
532#ifdef CONFIG_X86_ESPFIX32
533	ALTERNATIVE	"jmp .Lrestore_nocheck", "", X86_BUG_ESPFIX
534
535	movl	PT_EFLAGS(%esp), %eax		# mix EFLAGS, SS and CS
536	/*
537	 * Warning: PT_OLDSS(%esp) contains the wrong/random values if we
538	 * are returning to the kernel.
539	 * See comments in process.c:copy_thread() for details.
540	 */
541	movb	PT_OLDSS(%esp), %ah
542	movb	PT_CS(%esp), %al
543	andl	$(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
544	cmpl	$((SEGMENT_LDT << 8) | USER_RPL), %eax
545	je .Lldt_ss				# returning to user-space with LDT SS
546#endif
547.Lrestore_nocheck:
548	RESTORE_REGS 4				# skip orig_eax/error_code
549.Lirq_return:
550	INTERRUPT_RETURN
551
552.section .fixup, "ax"
553ENTRY(iret_exc	)
554	pushl	$0				# no error code
555	pushl	$do_iret_error
556	jmp	common_exception
557.previous
558	_ASM_EXTABLE(.Lirq_return, iret_exc)
559
560#ifdef CONFIG_X86_ESPFIX32
561.Lldt_ss:
562/*
563 * Setup and switch to ESPFIX stack
564 *
565 * We're returning to userspace with a 16 bit stack. The CPU will not
566 * restore the high word of ESP for us on executing iret... This is an
567 * "official" bug of all the x86-compatible CPUs, which we can work
568 * around to make dosemu and wine happy. We do this by preloading the
569 * high word of ESP with the high word of the userspace ESP while
570 * compensating for the offset by changing to the ESPFIX segment with
571 * a base address that matches for the difference.
572 */
573#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
574	mov	%esp, %edx			/* load kernel esp */
575	mov	PT_OLDESP(%esp), %eax		/* load userspace esp */
576	mov	%dx, %ax			/* eax: new kernel esp */
577	sub	%eax, %edx			/* offset (low word is 0) */
578	shr	$16, %edx
579	mov	%dl, GDT_ESPFIX_SS + 4		/* bits 16..23 */
580	mov	%dh, GDT_ESPFIX_SS + 7		/* bits 24..31 */
581	pushl	$__ESPFIX_SS
582	pushl	%eax				/* new kernel esp */
583	/*
584	 * Disable interrupts, but do not irqtrace this section: we
585	 * will soon execute iret and the tracer was already set to
586	 * the irqstate after the IRET:
587	 */
588	DISABLE_INTERRUPTS(CLBR_EAX)
589	lss	(%esp), %esp			/* switch to espfix segment */
590	jmp	.Lrestore_nocheck
591#endif
592ENDPROC(entry_INT80_32)
593
594.macro FIXUP_ESPFIX_STACK
595/*
596 * Switch back for ESPFIX stack to the normal zerobased stack
597 *
598 * We can't call C functions using the ESPFIX stack. This code reads
599 * the high word of the segment base from the GDT and swiches to the
600 * normal stack and adjusts ESP with the matching offset.
601 */
602#ifdef CONFIG_X86_ESPFIX32
603	/* fixup the stack */
604	mov	GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
605	mov	GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
606	shl	$16, %eax
607	addl	%esp, %eax			/* the adjusted stack pointer */
608	pushl	$__KERNEL_DS
609	pushl	%eax
610	lss	(%esp), %esp			/* switch to the normal stack segment */
611#endif
612.endm
613.macro UNWIND_ESPFIX_STACK
614#ifdef CONFIG_X86_ESPFIX32
615	movl	%ss, %eax
616	/* see if on espfix stack */
617	cmpw	$__ESPFIX_SS, %ax
618	jne	27f
619	movl	$__KERNEL_DS, %eax
620	movl	%eax, %ds
621	movl	%eax, %es
622	/* switch to normal stack */
623	FIXUP_ESPFIX_STACK
62427:
625#endif
626.endm
627
628/*
629 * Build the entry stubs with some assembler magic.
630 * We pack 1 stub into every 8-byte block.
631 */
632	.align 8
633ENTRY(irq_entries_start)
634    vector=FIRST_EXTERNAL_VECTOR
635    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
636	pushl	$(~vector+0x80)			/* Note: always in signed byte range */
637    vector=vector+1
638	jmp	common_interrupt
639	.align	8
640    .endr
641END(irq_entries_start)
642
643/*
644 * the CPU automatically disables interrupts when executing an IRQ vector,
645 * so IRQ-flags tracing has to follow that:
646 */
647	.p2align CONFIG_X86_L1_CACHE_SHIFT
648common_interrupt:
649	ASM_CLAC
650	addl	$-0x80, (%esp)			/* Adjust vector into the [-256, -1] range */
651	SAVE_ALL
652	ENCODE_FRAME_POINTER
653	TRACE_IRQS_OFF
654	movl	%esp, %eax
655	call	do_IRQ
656	jmp	ret_from_intr
657ENDPROC(common_interrupt)
658
659#define BUILD_INTERRUPT3(name, nr, fn)	\
660ENTRY(name)				\
661	ASM_CLAC;			\
662	pushl	$~(nr);			\
663	SAVE_ALL;			\
664	ENCODE_FRAME_POINTER;		\
665	TRACE_IRQS_OFF			\
666	movl	%esp, %eax;		\
667	call	fn;			\
668	jmp	ret_from_intr;		\
669ENDPROC(name)
670
671
672#ifdef CONFIG_TRACING
673# define TRACE_BUILD_INTERRUPT(name, nr)	BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name)
674#else
675# define TRACE_BUILD_INTERRUPT(name, nr)
676#endif
677
678#define BUILD_INTERRUPT(name, nr)		\
679	BUILD_INTERRUPT3(name, nr, smp_##name);	\
680	TRACE_BUILD_INTERRUPT(name, nr)
681
682/* The include is where all of the SMP etc. interrupts come from */
683#include <asm/entry_arch.h>
684
685ENTRY(coprocessor_error)
686	ASM_CLAC
687	pushl	$0
688	pushl	$do_coprocessor_error
689	jmp	common_exception
690END(coprocessor_error)
691
692ENTRY(simd_coprocessor_error)
693	ASM_CLAC
694	pushl	$0
695#ifdef CONFIG_X86_INVD_BUG
696	/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
697	ALTERNATIVE "pushl	$do_general_protection",	\
698		    "pushl	$do_simd_coprocessor_error",	\
699		    X86_FEATURE_XMM
700#else
701	pushl	$do_simd_coprocessor_error
702#endif
703	jmp	common_exception
704END(simd_coprocessor_error)
705
706ENTRY(device_not_available)
707	ASM_CLAC
708	pushl	$-1				# mark this as an int
709	pushl	$do_device_not_available
710	jmp	common_exception
711END(device_not_available)
712
713#ifdef CONFIG_PARAVIRT
714ENTRY(native_iret)
715	iret
716	_ASM_EXTABLE(native_iret, iret_exc)
717END(native_iret)
718#endif
719
720ENTRY(overflow)
721	ASM_CLAC
722	pushl	$0
723	pushl	$do_overflow
724	jmp	common_exception
725END(overflow)
726
727ENTRY(bounds)
728	ASM_CLAC
729	pushl	$0
730	pushl	$do_bounds
731	jmp	common_exception
732END(bounds)
733
734ENTRY(invalid_op)
735	ASM_CLAC
736	pushl	$0
737	pushl	$do_invalid_op
738	jmp	common_exception
739END(invalid_op)
740
741ENTRY(coprocessor_segment_overrun)
742	ASM_CLAC
743	pushl	$0
744	pushl	$do_coprocessor_segment_overrun
745	jmp	common_exception
746END(coprocessor_segment_overrun)
747
748ENTRY(invalid_TSS)
749	ASM_CLAC
750	pushl	$do_invalid_TSS
751	jmp	common_exception
752END(invalid_TSS)
753
754ENTRY(segment_not_present)
755	ASM_CLAC
756	pushl	$do_segment_not_present
757	jmp	common_exception
758END(segment_not_present)
759
760ENTRY(stack_segment)
761	ASM_CLAC
762	pushl	$do_stack_segment
763	jmp	common_exception
764END(stack_segment)
765
766ENTRY(alignment_check)
767	ASM_CLAC
768	pushl	$do_alignment_check
769	jmp	common_exception
770END(alignment_check)
771
772ENTRY(divide_error)
773	ASM_CLAC
774	pushl	$0				# no error code
775	pushl	$do_divide_error
776	jmp	common_exception
777END(divide_error)
778
779#ifdef CONFIG_X86_MCE
780ENTRY(machine_check)
781	ASM_CLAC
782	pushl	$0
783	pushl	machine_check_vector
784	jmp	common_exception
785END(machine_check)
786#endif
787
788ENTRY(spurious_interrupt_bug)
789	ASM_CLAC
790	pushl	$0
791	pushl	$do_spurious_interrupt_bug
792	jmp	common_exception
793END(spurious_interrupt_bug)
794
795#ifdef CONFIG_XEN
796ENTRY(xen_hypervisor_callback)
797	pushl	$-1				/* orig_ax = -1 => not a system call */
798	SAVE_ALL
799	ENCODE_FRAME_POINTER
800	TRACE_IRQS_OFF
801
802	/*
803	 * Check to see if we got the event in the critical
804	 * region in xen_iret_direct, after we've reenabled
805	 * events and checked for pending events.  This simulates
806	 * iret instruction's behaviour where it delivers a
807	 * pending interrupt when enabling interrupts:
808	 */
809	movl	PT_EIP(%esp), %eax
810	cmpl	$xen_iret_start_crit, %eax
811	jb	1f
812	cmpl	$xen_iret_end_crit, %eax
813	jae	1f
814
815	jmp	xen_iret_crit_fixup
816
817ENTRY(xen_do_upcall)
8181:	mov	%esp, %eax
819	call	xen_evtchn_do_upcall
820#ifndef CONFIG_PREEMPT
821	call	xen_maybe_preempt_hcall
822#endif
823	jmp	ret_from_intr
824ENDPROC(xen_hypervisor_callback)
825
826/*
827 * Hypervisor uses this for application faults while it executes.
828 * We get here for two reasons:
829 *  1. Fault while reloading DS, ES, FS or GS
830 *  2. Fault while executing IRET
831 * Category 1 we fix up by reattempting the load, and zeroing the segment
832 * register if the load fails.
833 * Category 2 we fix up by jumping to do_iret_error. We cannot use the
834 * normal Linux return path in this case because if we use the IRET hypercall
835 * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
836 * We distinguish between categories by maintaining a status value in EAX.
837 */
838ENTRY(xen_failsafe_callback)
839	pushl	%eax
840	movl	$1, %eax
8411:	mov	4(%esp), %ds
8422:	mov	8(%esp), %es
8433:	mov	12(%esp), %fs
8444:	mov	16(%esp), %gs
845	/* EAX == 0 => Category 1 (Bad segment)
846	   EAX != 0 => Category 2 (Bad IRET) */
847	testl	%eax, %eax
848	popl	%eax
849	lea	16(%esp), %esp
850	jz	5f
851	jmp	iret_exc
8525:	pushl	$-1				/* orig_ax = -1 => not a system call */
853	SAVE_ALL
854	ENCODE_FRAME_POINTER
855	jmp	ret_from_exception
856
857.section .fixup, "ax"
8586:	xorl	%eax, %eax
859	movl	%eax, 4(%esp)
860	jmp	1b
8617:	xorl	%eax, %eax
862	movl	%eax, 8(%esp)
863	jmp	2b
8648:	xorl	%eax, %eax
865	movl	%eax, 12(%esp)
866	jmp	3b
8679:	xorl	%eax, %eax
868	movl	%eax, 16(%esp)
869	jmp	4b
870.previous
871	_ASM_EXTABLE(1b, 6b)
872	_ASM_EXTABLE(2b, 7b)
873	_ASM_EXTABLE(3b, 8b)
874	_ASM_EXTABLE(4b, 9b)
875ENDPROC(xen_failsafe_callback)
876
877BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
878		xen_evtchn_do_upcall)
879
880#endif /* CONFIG_XEN */
881
882#if IS_ENABLED(CONFIG_HYPERV)
883
884BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
885	hyperv_vector_handler)
886
887#endif /* CONFIG_HYPERV */
888
889#ifdef CONFIG_FUNCTION_TRACER
890#ifdef CONFIG_DYNAMIC_FTRACE
891
892ENTRY(mcount)
893	ret
894END(mcount)
895
896ENTRY(ftrace_caller)
897	pushl	%eax
898	pushl	%ecx
899	pushl	%edx
900	pushl	$0				/* Pass NULL as regs pointer */
901	movl	4*4(%esp), %eax
902	movl	0x4(%ebp), %edx
903	movl	function_trace_op, %ecx
904	subl	$MCOUNT_INSN_SIZE, %eax
905
906.globl ftrace_call
907ftrace_call:
908	call	ftrace_stub
909
910	addl	$4, %esp			/* skip NULL pointer */
911	popl	%edx
912	popl	%ecx
913	popl	%eax
914.Lftrace_ret:
915#ifdef CONFIG_FUNCTION_GRAPH_TRACER
916.globl ftrace_graph_call
917ftrace_graph_call:
918	jmp	ftrace_stub
919#endif
920
921/* This is weak to keep gas from relaxing the jumps */
922WEAK(ftrace_stub)
923	ret
924END(ftrace_caller)
925
926ENTRY(ftrace_regs_caller)
927	pushf	/* push flags before compare (in cs location) */
928
929	/*
930	 * i386 does not save SS and ESP when coming from kernel.
931	 * Instead, to get sp, &regs->sp is used (see ptrace.h).
932	 * Unfortunately, that means eflags must be at the same location
933	 * as the current return ip is. We move the return ip into the
934	 * ip location, and move flags into the return ip location.
935	 */
936	pushl	4(%esp)				/* save return ip into ip slot */
937
938	pushl	$0				/* Load 0 into orig_ax */
939	pushl	%gs
940	pushl	%fs
941	pushl	%es
942	pushl	%ds
943	pushl	%eax
944	pushl	%ebp
945	pushl	%edi
946	pushl	%esi
947	pushl	%edx
948	pushl	%ecx
949	pushl	%ebx
950
951	movl	13*4(%esp), %eax		/* Get the saved flags */
952	movl	%eax, 14*4(%esp)		/* Move saved flags into regs->flags location */
953						/* clobbering return ip */
954	movl	$__KERNEL_CS, 13*4(%esp)
955
956	movl	12*4(%esp), %eax		/* Load ip (1st parameter) */
957	subl	$MCOUNT_INSN_SIZE, %eax		/* Adjust ip */
958	movl	0x4(%ebp), %edx			/* Load parent ip (2nd parameter) */
959	movl	function_trace_op, %ecx		/* Save ftrace_pos in 3rd parameter */
960	pushl	%esp				/* Save pt_regs as 4th parameter */
961
962GLOBAL(ftrace_regs_call)
963	call	ftrace_stub
964
965	addl	$4, %esp			/* Skip pt_regs */
966	movl	14*4(%esp), %eax		/* Move flags back into cs */
967	movl	%eax, 13*4(%esp)		/* Needed to keep addl	from modifying flags */
968	movl	12*4(%esp), %eax		/* Get return ip from regs->ip */
969	movl	%eax, 14*4(%esp)		/* Put return ip back for ret */
970
971	popl	%ebx
972	popl	%ecx
973	popl	%edx
974	popl	%esi
975	popl	%edi
976	popl	%ebp
977	popl	%eax
978	popl	%ds
979	popl	%es
980	popl	%fs
981	popl	%gs
982	addl	$8, %esp			/* Skip orig_ax and ip */
983	popf					/* Pop flags at end (no addl to corrupt flags) */
984	jmp	.Lftrace_ret
985
986	popf
987	jmp	ftrace_stub
988#else /* ! CONFIG_DYNAMIC_FTRACE */
989
990ENTRY(mcount)
991	cmpl	$__PAGE_OFFSET, %esp
992	jb	ftrace_stub			/* Paging not enabled yet? */
993
994	cmpl	$ftrace_stub, ftrace_trace_function
995	jnz	.Ltrace
996#ifdef CONFIG_FUNCTION_GRAPH_TRACER
997	cmpl	$ftrace_stub, ftrace_graph_return
998	jnz	ftrace_graph_caller
999
1000	cmpl	$ftrace_graph_entry_stub, ftrace_graph_entry
1001	jnz	ftrace_graph_caller
1002#endif
1003.globl ftrace_stub
1004ftrace_stub:
1005	ret
1006
1007	/* taken from glibc */
1008.Ltrace:
1009	pushl	%eax
1010	pushl	%ecx
1011	pushl	%edx
1012	movl	0xc(%esp), %eax
1013	movl	0x4(%ebp), %edx
1014	subl	$MCOUNT_INSN_SIZE, %eax
1015
1016	call	*ftrace_trace_function
1017
1018	popl	%edx
1019	popl	%ecx
1020	popl	%eax
1021	jmp	ftrace_stub
1022END(mcount)
1023#endif /* CONFIG_DYNAMIC_FTRACE */
1024EXPORT_SYMBOL(mcount)
1025#endif /* CONFIG_FUNCTION_TRACER */
1026
1027#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1028ENTRY(ftrace_graph_caller)
1029	pushl	%eax
1030	pushl	%ecx
1031	pushl	%edx
1032	movl	0xc(%esp), %eax
1033	lea	0x4(%ebp), %edx
1034	movl	(%ebp), %ecx
1035	subl	$MCOUNT_INSN_SIZE, %eax
1036	call	prepare_ftrace_return
1037	popl	%edx
1038	popl	%ecx
1039	popl	%eax
1040	ret
1041END(ftrace_graph_caller)
1042
1043.globl return_to_handler
1044return_to_handler:
1045	pushl	%eax
1046	pushl	%edx
1047	movl	%ebp, %eax
1048	call	ftrace_return_to_handler
1049	movl	%eax, %ecx
1050	popl	%edx
1051	popl	%eax
1052	jmp	*%ecx
1053#endif
1054
1055#ifdef CONFIG_TRACING
1056ENTRY(trace_page_fault)
1057	ASM_CLAC
1058	pushl	$trace_do_page_fault
1059	jmp	common_exception
1060END(trace_page_fault)
1061#endif
1062
1063ENTRY(page_fault)
1064	ASM_CLAC
1065	pushl	$do_page_fault
1066	ALIGN
1067	jmp common_exception
1068END(page_fault)
1069
1070common_exception:
1071	/* the function address is in %gs's slot on the stack */
1072	pushl	%fs
1073	pushl	%es
1074	pushl	%ds
1075	pushl	%eax
1076	pushl	%ebp
1077	pushl	%edi
1078	pushl	%esi
1079	pushl	%edx
1080	pushl	%ecx
1081	pushl	%ebx
1082	ENCODE_FRAME_POINTER
1083	cld
1084	movl	$(__KERNEL_PERCPU), %ecx
1085	movl	%ecx, %fs
1086	UNWIND_ESPFIX_STACK
1087	GS_TO_REG %ecx
1088	movl	PT_GS(%esp), %edi		# get the function address
1089	movl	PT_ORIG_EAX(%esp), %edx		# get the error code
1090	movl	$-1, PT_ORIG_EAX(%esp)		# no syscall to restart
1091	REG_TO_PTGS %ecx
1092	SET_KERNEL_GS %ecx
1093	movl	$(__USER_DS), %ecx
1094	movl	%ecx, %ds
1095	movl	%ecx, %es
1096	TRACE_IRQS_OFF
1097	movl	%esp, %eax			# pt_regs pointer
1098	call	*%edi
1099	jmp	ret_from_exception
1100END(common_exception)
1101
1102ENTRY(debug)
1103	/*
1104	 * #DB can happen at the first instruction of
1105	 * entry_SYSENTER_32 or in Xen's SYSENTER prologue.  If this
1106	 * happens, then we will be running on a very small stack.  We
1107	 * need to detect this condition and switch to the thread
1108	 * stack before calling any C code at all.
1109	 *
1110	 * If you edit this code, keep in mind that NMIs can happen in here.
1111	 */
1112	ASM_CLAC
1113	pushl	$-1				# mark this as an int
1114	SAVE_ALL
1115	ENCODE_FRAME_POINTER
1116	xorl	%edx, %edx			# error code 0
1117	movl	%esp, %eax			# pt_regs pointer
1118
1119	/* Are we currently on the SYSENTER stack? */
1120	PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
1121	subl	%eax, %ecx	/* ecx = (end of SYSENTER_stack) - esp */
1122	cmpl	$SIZEOF_SYSENTER_stack, %ecx
1123	jb	.Ldebug_from_sysenter_stack
1124
1125	TRACE_IRQS_OFF
1126	call	do_debug
1127	jmp	ret_from_exception
1128
1129.Ldebug_from_sysenter_stack:
1130	/* We're on the SYSENTER stack.  Switch off. */
1131	movl	%esp, %ebx
1132	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
1133	TRACE_IRQS_OFF
1134	call	do_debug
1135	movl	%ebx, %esp
1136	jmp	ret_from_exception
1137END(debug)
1138
1139/*
1140 * NMI is doubly nasty.  It can happen on the first instruction of
1141 * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning
1142 * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32
1143 * switched stacks.  We handle both conditions by simply checking whether we
1144 * interrupted kernel code running on the SYSENTER stack.
1145 */
1146ENTRY(nmi)
1147	ASM_CLAC
1148#ifdef CONFIG_X86_ESPFIX32
1149	pushl	%eax
1150	movl	%ss, %eax
1151	cmpw	$__ESPFIX_SS, %ax
1152	popl	%eax
1153	je	.Lnmi_espfix_stack
1154#endif
1155
1156	pushl	%eax				# pt_regs->orig_ax
1157	SAVE_ALL
1158	ENCODE_FRAME_POINTER
1159	xorl	%edx, %edx			# zero error code
1160	movl	%esp, %eax			# pt_regs pointer
1161
1162	/* Are we currently on the SYSENTER stack? */
1163	PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
1164	subl	%eax, %ecx	/* ecx = (end of SYSENTER_stack) - esp */
1165	cmpl	$SIZEOF_SYSENTER_stack, %ecx
1166	jb	.Lnmi_from_sysenter_stack
1167
1168	/* Not on SYSENTER stack. */
1169	call	do_nmi
1170	jmp	.Lrestore_all_notrace
1171
1172.Lnmi_from_sysenter_stack:
1173	/*
1174	 * We're on the SYSENTER stack.  Switch off.  No one (not even debug)
1175	 * is using the thread stack right now, so it's safe for us to use it.
1176	 */
1177	movl	%esp, %ebx
1178	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
1179	call	do_nmi
1180	movl	%ebx, %esp
1181	jmp	.Lrestore_all_notrace
1182
1183#ifdef CONFIG_X86_ESPFIX32
1184.Lnmi_espfix_stack:
1185	/*
1186	 * create the pointer to lss back
1187	 */
1188	pushl	%ss
1189	pushl	%esp
1190	addl	$4, (%esp)
1191	/* copy the iret frame of 12 bytes */
1192	.rept 3
1193	pushl	16(%esp)
1194	.endr
1195	pushl	%eax
1196	SAVE_ALL
1197	ENCODE_FRAME_POINTER
1198	FIXUP_ESPFIX_STACK			# %eax == %esp
1199	xorl	%edx, %edx			# zero error code
1200	call	do_nmi
1201	RESTORE_REGS
1202	lss	12+4(%esp), %esp		# back to espfix stack
1203	jmp	.Lirq_return
1204#endif
1205END(nmi)
1206
1207ENTRY(int3)
1208	ASM_CLAC
1209	pushl	$-1				# mark this as an int
1210	SAVE_ALL
1211	ENCODE_FRAME_POINTER
1212	TRACE_IRQS_OFF
1213	xorl	%edx, %edx			# zero error code
1214	movl	%esp, %eax			# pt_regs pointer
1215	call	do_int3
1216	jmp	ret_from_exception
1217END(int3)
1218
1219ENTRY(general_protection)
1220	pushl	$do_general_protection
1221	jmp	common_exception
1222END(general_protection)
1223
1224#ifdef CONFIG_KVM_GUEST
1225ENTRY(async_page_fault)
1226	ASM_CLAC
1227	pushl	$do_async_page_fault
1228	jmp	common_exception
1229END(async_page_fault)
1230#endif
1231
1232ENTRY(rewind_stack_do_exit)
1233	/* Prevent any naive code from trying to unwind to our caller. */
1234	xorl	%ebp, %ebp
1235
1236	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esi
1237	leal	-TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
1238
1239	call	do_exit
12401:	jmp 1b
1241END(rewind_stack_do_exit)
1242