xref: /openbmc/linux/arch/powerpc/kernel/syscall.c (revision 7cc39531)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 
3 #include <linux/compat.h>
4 #include <linux/context_tracking.h>
5 #include <linux/randomize_kstack.h>
6 
7 #include <asm/interrupt.h>
8 #include <asm/kup.h>
9 #include <asm/syscall.h>
10 #include <asm/time.h>
11 #include <asm/tm.h>
12 #include <asm/unistd.h>
13 
14 
15 typedef long (*syscall_fn)(long, long, long, long, long, long);
16 
17 /* Has to run notrace because it is entered not completely "reconciled" */
18 notrace long system_call_exception(long r3, long r4, long r5,
19 				   long r6, long r7, long r8,
20 				   unsigned long r0, struct pt_regs *regs)
21 {
22 	long ret;
23 	syscall_fn f;
24 
25 	kuap_lock();
26 
27 	add_random_kstack_offset();
28 	regs->orig_gpr3 = r3;
29 
30 	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
31 		BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
32 
33 	trace_hardirqs_off(); /* finish reconciling */
34 
35 	CT_WARN_ON(ct_state() == CONTEXT_KERNEL);
36 	user_exit_irqoff();
37 
38 	BUG_ON(regs_is_unrecoverable(regs));
39 	BUG_ON(!(regs->msr & MSR_PR));
40 	BUG_ON(arch_irq_disabled_regs(regs));
41 
42 #ifdef CONFIG_PPC_PKEY
43 	if (mmu_has_feature(MMU_FTR_PKEY)) {
44 		unsigned long amr, iamr;
45 		bool flush_needed = false;
46 		/*
47 		 * When entering from userspace we mostly have the AMR/IAMR
48 		 * different from kernel default values. Hence don't compare.
49 		 */
50 		amr = mfspr(SPRN_AMR);
51 		iamr = mfspr(SPRN_IAMR);
52 		regs->amr  = amr;
53 		regs->iamr = iamr;
54 		if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
55 			mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
56 			flush_needed = true;
57 		}
58 		if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
59 			mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
60 			flush_needed = true;
61 		}
62 		if (flush_needed)
63 			isync();
64 	} else
65 #endif
66 		kuap_assert_locked();
67 
68 	booke_restore_dbcr0();
69 
70 	account_cpu_user_entry();
71 
72 	account_stolen_time();
73 
74 	/*
75 	 * This is not required for the syscall exit path, but makes the
76 	 * stack frame look nicer. If this was initialised in the first stack
77 	 * frame, or if the unwinder was taught the first stack frame always
78 	 * returns to user with IRQS_ENABLED, this store could be avoided!
79 	 */
80 	irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
81 
82 	/*
83 	 * If system call is called with TM active, set _TIF_RESTOREALL to
84 	 * prevent RFSCV being used to return to userspace, because POWER9
85 	 * TM implementation has problems with this instruction returning to
86 	 * transactional state. Final register values are not relevant because
87 	 * the transaction will be aborted upon return anyway. Or in the case
88 	 * of unsupported_scv SIGILL fault, the return state does not much
89 	 * matter because it's an edge case.
90 	 */
91 	if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
92 			unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
93 		set_bits(_TIF_RESTOREALL, &current_thread_info()->flags);
94 
95 	/*
96 	 * If the system call was made with a transaction active, doom it and
97 	 * return without performing the system call. Unless it was an
98 	 * unsupported scv vector, in which case it's treated like an illegal
99 	 * instruction.
100 	 */
101 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
102 	if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) &&
103 	    !trap_is_unsupported_scv(regs)) {
104 		/* Enable TM in the kernel, and disable EE (for scv) */
105 		hard_irq_disable();
106 		mtmsr(mfmsr() | MSR_TM);
107 
108 		/* tabort, this dooms the transaction, nothing else */
109 		asm volatile(".long 0x7c00071d | ((%0) << 16)"
110 				:: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT));
111 
112 		/*
113 		 * Userspace will never see the return value. Execution will
114 		 * resume after the tbegin. of the aborted transaction with the
115 		 * checkpointed register state. A context switch could occur
116 		 * or signal delivered to the process before resuming the
117 		 * doomed transaction context, but that should all be handled
118 		 * as expected.
119 		 */
120 		return -ENOSYS;
121 	}
122 #endif // CONFIG_PPC_TRANSACTIONAL_MEM
123 
124 	local_irq_enable();
125 
126 	if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) {
127 		if (unlikely(trap_is_unsupported_scv(regs))) {
128 			/* Unsupported scv vector */
129 			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
130 			return regs->gpr[3];
131 		}
132 		/*
133 		 * We use the return value of do_syscall_trace_enter() as the
134 		 * syscall number. If the syscall was rejected for any reason
135 		 * do_syscall_trace_enter() returns an invalid syscall number
136 		 * and the test against NR_syscalls will fail and the return
137 		 * value to be used is in regs->gpr[3].
138 		 */
139 		r0 = do_syscall_trace_enter(regs);
140 		if (unlikely(r0 >= NR_syscalls))
141 			return regs->gpr[3];
142 		r3 = regs->gpr[3];
143 		r4 = regs->gpr[4];
144 		r5 = regs->gpr[5];
145 		r6 = regs->gpr[6];
146 		r7 = regs->gpr[7];
147 		r8 = regs->gpr[8];
148 
149 	} else if (unlikely(r0 >= NR_syscalls)) {
150 		if (unlikely(trap_is_unsupported_scv(regs))) {
151 			/* Unsupported scv vector */
152 			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
153 			return regs->gpr[3];
154 		}
155 		return -ENOSYS;
156 	}
157 
158 	/* May be faster to do array_index_nospec? */
159 	barrier_nospec();
160 
161 	if (unlikely(is_compat_task())) {
162 		f = (void *)compat_sys_call_table[r0];
163 
164 		r3 &= 0x00000000ffffffffULL;
165 		r4 &= 0x00000000ffffffffULL;
166 		r5 &= 0x00000000ffffffffULL;
167 		r6 &= 0x00000000ffffffffULL;
168 		r7 &= 0x00000000ffffffffULL;
169 		r8 &= 0x00000000ffffffffULL;
170 
171 	} else {
172 		f = (void *)sys_call_table[r0];
173 	}
174 
175 	ret = f(r3, r4, r5, r6, r7, r8);
176 
177 	/*
178 	 * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
179 	 * so the maximum stack offset is 1k bytes (10 bits).
180 	 *
181 	 * The actual entropy will be further reduced by the compiler when
182 	 * applying stack alignment constraints: the powerpc architecture
183 	 * may have two kinds of stack alignment (16-bytes and 8-bytes).
184 	 *
185 	 * So the resulting 6 or 7 bits of entropy is seen in SP[9:4] or SP[9:3].
186 	 */
187 	choose_random_kstack_offset(mftb());
188 
189 	return ret;
190 }
191