xref: /openbmc/linux/arch/powerpc/kernel/syscall.c (revision 1547db7d1f4481c1f3ec731f3edc724ef3026ede)
1*1547db7dSXiu Jianfeng // SPDX-License-Identifier: GPL-2.0-or-later
2*1547db7dSXiu Jianfeng 
3*1547db7dSXiu Jianfeng #include <linux/compat.h>
4*1547db7dSXiu Jianfeng #include <linux/context_tracking.h>
5*1547db7dSXiu Jianfeng 
6*1547db7dSXiu Jianfeng #include <asm/interrupt.h>
7*1547db7dSXiu Jianfeng #include <asm/kup.h>
8*1547db7dSXiu Jianfeng #include <asm/syscall.h>
9*1547db7dSXiu Jianfeng #include <asm/time.h>
10*1547db7dSXiu Jianfeng #include <asm/tm.h>
11*1547db7dSXiu Jianfeng #include <asm/unistd.h>
12*1547db7dSXiu Jianfeng 
13*1547db7dSXiu Jianfeng 
14*1547db7dSXiu Jianfeng typedef long (*syscall_fn)(long, long, long, long, long, long);
15*1547db7dSXiu Jianfeng 
16*1547db7dSXiu Jianfeng /* Has to run notrace because it is entered not completely "reconciled" */
17*1547db7dSXiu Jianfeng notrace long system_call_exception(long r3, long r4, long r5,
18*1547db7dSXiu Jianfeng 				   long r6, long r7, long r8,
19*1547db7dSXiu Jianfeng 				   unsigned long r0, struct pt_regs *regs)
20*1547db7dSXiu Jianfeng {
21*1547db7dSXiu Jianfeng 	syscall_fn f;
22*1547db7dSXiu Jianfeng 
23*1547db7dSXiu Jianfeng 	kuap_lock();
24*1547db7dSXiu Jianfeng 
25*1547db7dSXiu Jianfeng 	regs->orig_gpr3 = r3;
26*1547db7dSXiu Jianfeng 
27*1547db7dSXiu Jianfeng 	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
28*1547db7dSXiu Jianfeng 		BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
29*1547db7dSXiu Jianfeng 
30*1547db7dSXiu Jianfeng 	trace_hardirqs_off(); /* finish reconciling */
31*1547db7dSXiu Jianfeng 
32*1547db7dSXiu Jianfeng 	CT_WARN_ON(ct_state() == CONTEXT_KERNEL);
33*1547db7dSXiu Jianfeng 	user_exit_irqoff();
34*1547db7dSXiu Jianfeng 
35*1547db7dSXiu Jianfeng 	BUG_ON(regs_is_unrecoverable(regs));
36*1547db7dSXiu Jianfeng 	BUG_ON(!(regs->msr & MSR_PR));
37*1547db7dSXiu Jianfeng 	BUG_ON(arch_irq_disabled_regs(regs));
38*1547db7dSXiu Jianfeng 
39*1547db7dSXiu Jianfeng #ifdef CONFIG_PPC_PKEY
40*1547db7dSXiu Jianfeng 	if (mmu_has_feature(MMU_FTR_PKEY)) {
41*1547db7dSXiu Jianfeng 		unsigned long amr, iamr;
42*1547db7dSXiu Jianfeng 		bool flush_needed = false;
43*1547db7dSXiu Jianfeng 		/*
44*1547db7dSXiu Jianfeng 		 * When entering from userspace we mostly have the AMR/IAMR
45*1547db7dSXiu Jianfeng 		 * different from kernel default values. Hence don't compare.
46*1547db7dSXiu Jianfeng 		 */
47*1547db7dSXiu Jianfeng 		amr = mfspr(SPRN_AMR);
48*1547db7dSXiu Jianfeng 		iamr = mfspr(SPRN_IAMR);
49*1547db7dSXiu Jianfeng 		regs->amr  = amr;
50*1547db7dSXiu Jianfeng 		regs->iamr = iamr;
51*1547db7dSXiu Jianfeng 		if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
52*1547db7dSXiu Jianfeng 			mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
53*1547db7dSXiu Jianfeng 			flush_needed = true;
54*1547db7dSXiu Jianfeng 		}
55*1547db7dSXiu Jianfeng 		if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
56*1547db7dSXiu Jianfeng 			mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
57*1547db7dSXiu Jianfeng 			flush_needed = true;
58*1547db7dSXiu Jianfeng 		}
59*1547db7dSXiu Jianfeng 		if (flush_needed)
60*1547db7dSXiu Jianfeng 			isync();
61*1547db7dSXiu Jianfeng 	} else
62*1547db7dSXiu Jianfeng #endif
63*1547db7dSXiu Jianfeng 		kuap_assert_locked();
64*1547db7dSXiu Jianfeng 
65*1547db7dSXiu Jianfeng 	booke_restore_dbcr0();
66*1547db7dSXiu Jianfeng 
67*1547db7dSXiu Jianfeng 	account_cpu_user_entry();
68*1547db7dSXiu Jianfeng 
69*1547db7dSXiu Jianfeng 	account_stolen_time();
70*1547db7dSXiu Jianfeng 
71*1547db7dSXiu Jianfeng 	/*
72*1547db7dSXiu Jianfeng 	 * This is not required for the syscall exit path, but makes the
73*1547db7dSXiu Jianfeng 	 * stack frame look nicer. If this was initialised in the first stack
74*1547db7dSXiu Jianfeng 	 * frame, or if the unwinder was taught the first stack frame always
75*1547db7dSXiu Jianfeng 	 * returns to user with IRQS_ENABLED, this store could be avoided!
76*1547db7dSXiu Jianfeng 	 */
77*1547db7dSXiu Jianfeng 	irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
78*1547db7dSXiu Jianfeng 
79*1547db7dSXiu Jianfeng 	/*
80*1547db7dSXiu Jianfeng 	 * If system call is called with TM active, set _TIF_RESTOREALL to
81*1547db7dSXiu Jianfeng 	 * prevent RFSCV being used to return to userspace, because POWER9
82*1547db7dSXiu Jianfeng 	 * TM implementation has problems with this instruction returning to
83*1547db7dSXiu Jianfeng 	 * transactional state. Final register values are not relevant because
84*1547db7dSXiu Jianfeng 	 * the transaction will be aborted upon return anyway. Or in the case
85*1547db7dSXiu Jianfeng 	 * of unsupported_scv SIGILL fault, the return state does not much
86*1547db7dSXiu Jianfeng 	 * matter because it's an edge case.
87*1547db7dSXiu Jianfeng 	 */
88*1547db7dSXiu Jianfeng 	if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
89*1547db7dSXiu Jianfeng 			unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
90*1547db7dSXiu Jianfeng 		set_bits(_TIF_RESTOREALL, &current_thread_info()->flags);
91*1547db7dSXiu Jianfeng 
92*1547db7dSXiu Jianfeng 	/*
93*1547db7dSXiu Jianfeng 	 * If the system call was made with a transaction active, doom it and
94*1547db7dSXiu Jianfeng 	 * return without performing the system call. Unless it was an
95*1547db7dSXiu Jianfeng 	 * unsupported scv vector, in which case it's treated like an illegal
96*1547db7dSXiu Jianfeng 	 * instruction.
97*1547db7dSXiu Jianfeng 	 */
98*1547db7dSXiu Jianfeng #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
99*1547db7dSXiu Jianfeng 	if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) &&
100*1547db7dSXiu Jianfeng 	    !trap_is_unsupported_scv(regs)) {
101*1547db7dSXiu Jianfeng 		/* Enable TM in the kernel, and disable EE (for scv) */
102*1547db7dSXiu Jianfeng 		hard_irq_disable();
103*1547db7dSXiu Jianfeng 		mtmsr(mfmsr() | MSR_TM);
104*1547db7dSXiu Jianfeng 
105*1547db7dSXiu Jianfeng 		/* tabort, this dooms the transaction, nothing else */
106*1547db7dSXiu Jianfeng 		asm volatile(".long 0x7c00071d | ((%0) << 16)"
107*1547db7dSXiu Jianfeng 				:: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT));
108*1547db7dSXiu Jianfeng 
109*1547db7dSXiu Jianfeng 		/*
110*1547db7dSXiu Jianfeng 		 * Userspace will never see the return value. Execution will
111*1547db7dSXiu Jianfeng 		 * resume after the tbegin. of the aborted transaction with the
112*1547db7dSXiu Jianfeng 		 * checkpointed register state. A context switch could occur
113*1547db7dSXiu Jianfeng 		 * or signal delivered to the process before resuming the
114*1547db7dSXiu Jianfeng 		 * doomed transaction context, but that should all be handled
115*1547db7dSXiu Jianfeng 		 * as expected.
116*1547db7dSXiu Jianfeng 		 */
117*1547db7dSXiu Jianfeng 		return -ENOSYS;
118*1547db7dSXiu Jianfeng 	}
119*1547db7dSXiu Jianfeng #endif // CONFIG_PPC_TRANSACTIONAL_MEM
120*1547db7dSXiu Jianfeng 
121*1547db7dSXiu Jianfeng 	local_irq_enable();
122*1547db7dSXiu Jianfeng 
123*1547db7dSXiu Jianfeng 	if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) {
124*1547db7dSXiu Jianfeng 		if (unlikely(trap_is_unsupported_scv(regs))) {
125*1547db7dSXiu Jianfeng 			/* Unsupported scv vector */
126*1547db7dSXiu Jianfeng 			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
127*1547db7dSXiu Jianfeng 			return regs->gpr[3];
128*1547db7dSXiu Jianfeng 		}
129*1547db7dSXiu Jianfeng 		/*
130*1547db7dSXiu Jianfeng 		 * We use the return value of do_syscall_trace_enter() as the
131*1547db7dSXiu Jianfeng 		 * syscall number. If the syscall was rejected for any reason
132*1547db7dSXiu Jianfeng 		 * do_syscall_trace_enter() returns an invalid syscall number
133*1547db7dSXiu Jianfeng 		 * and the test against NR_syscalls will fail and the return
134*1547db7dSXiu Jianfeng 		 * value to be used is in regs->gpr[3].
135*1547db7dSXiu Jianfeng 		 */
136*1547db7dSXiu Jianfeng 		r0 = do_syscall_trace_enter(regs);
137*1547db7dSXiu Jianfeng 		if (unlikely(r0 >= NR_syscalls))
138*1547db7dSXiu Jianfeng 			return regs->gpr[3];
139*1547db7dSXiu Jianfeng 		r3 = regs->gpr[3];
140*1547db7dSXiu Jianfeng 		r4 = regs->gpr[4];
141*1547db7dSXiu Jianfeng 		r5 = regs->gpr[5];
142*1547db7dSXiu Jianfeng 		r6 = regs->gpr[6];
143*1547db7dSXiu Jianfeng 		r7 = regs->gpr[7];
144*1547db7dSXiu Jianfeng 		r8 = regs->gpr[8];
145*1547db7dSXiu Jianfeng 
146*1547db7dSXiu Jianfeng 	} else if (unlikely(r0 >= NR_syscalls)) {
147*1547db7dSXiu Jianfeng 		if (unlikely(trap_is_unsupported_scv(regs))) {
148*1547db7dSXiu Jianfeng 			/* Unsupported scv vector */
149*1547db7dSXiu Jianfeng 			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
150*1547db7dSXiu Jianfeng 			return regs->gpr[3];
151*1547db7dSXiu Jianfeng 		}
152*1547db7dSXiu Jianfeng 		return -ENOSYS;
153*1547db7dSXiu Jianfeng 	}
154*1547db7dSXiu Jianfeng 
155*1547db7dSXiu Jianfeng 	/* May be faster to do array_index_nospec? */
156*1547db7dSXiu Jianfeng 	barrier_nospec();
157*1547db7dSXiu Jianfeng 
158*1547db7dSXiu Jianfeng 	if (unlikely(is_compat_task())) {
159*1547db7dSXiu Jianfeng 		f = (void *)compat_sys_call_table[r0];
160*1547db7dSXiu Jianfeng 
161*1547db7dSXiu Jianfeng 		r3 &= 0x00000000ffffffffULL;
162*1547db7dSXiu Jianfeng 		r4 &= 0x00000000ffffffffULL;
163*1547db7dSXiu Jianfeng 		r5 &= 0x00000000ffffffffULL;
164*1547db7dSXiu Jianfeng 		r6 &= 0x00000000ffffffffULL;
165*1547db7dSXiu Jianfeng 		r7 &= 0x00000000ffffffffULL;
166*1547db7dSXiu Jianfeng 		r8 &= 0x00000000ffffffffULL;
167*1547db7dSXiu Jianfeng 
168*1547db7dSXiu Jianfeng 	} else {
169*1547db7dSXiu Jianfeng 		f = (void *)sys_call_table[r0];
170*1547db7dSXiu Jianfeng 	}
171*1547db7dSXiu Jianfeng 
172*1547db7dSXiu Jianfeng 	return f(r3, r4, r5, r6, r7, r8);
173*1547db7dSXiu Jianfeng }
174