xref: /openbmc/linux/arch/loongarch/kernel/fpu.S (revision b1c3d2be)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Author: Lu Zeng <zenglu@loongson.cn>
4 *         Pei Huang <huangpei@loongson.cn>
5 *         Huacai Chen <chenhuacai@loongson.cn>
6 *
7 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
8 */
9#include <asm/asm.h>
10#include <asm/asmmacro.h>
11#include <asm/asm-extable.h>
12#include <asm/asm-offsets.h>
13#include <asm/errno.h>
14#include <asm/export.h>
15#include <asm/fpregdef.h>
16#include <asm/loongarch.h>
17#include <asm/regdef.h>
18
19#define FPU_REG_WIDTH		8
20#define LSX_REG_WIDTH		16
21#define LASX_REG_WIDTH		32
22
23	.macro	EX insn, reg, src, offs
24.ex\@:	\insn	\reg, \src, \offs
25	_asm_extable .ex\@, fault
26	.endm
27
28	.macro sc_save_fp base
29	EX	fst.d	$f0,  \base, (0 * FPU_REG_WIDTH)
30	EX	fst.d	$f1,  \base, (1 * FPU_REG_WIDTH)
31	EX	fst.d	$f2,  \base, (2 * FPU_REG_WIDTH)
32	EX	fst.d	$f3,  \base, (3 * FPU_REG_WIDTH)
33	EX	fst.d	$f4,  \base, (4 * FPU_REG_WIDTH)
34	EX	fst.d	$f5,  \base, (5 * FPU_REG_WIDTH)
35	EX	fst.d	$f6,  \base, (6 * FPU_REG_WIDTH)
36	EX	fst.d	$f7,  \base, (7 * FPU_REG_WIDTH)
37	EX	fst.d	$f8,  \base, (8 * FPU_REG_WIDTH)
38	EX	fst.d	$f9,  \base, (9 * FPU_REG_WIDTH)
39	EX	fst.d	$f10, \base, (10 * FPU_REG_WIDTH)
40	EX	fst.d	$f11, \base, (11 * FPU_REG_WIDTH)
41	EX	fst.d	$f12, \base, (12 * FPU_REG_WIDTH)
42	EX	fst.d	$f13, \base, (13 * FPU_REG_WIDTH)
43	EX	fst.d	$f14, \base, (14 * FPU_REG_WIDTH)
44	EX	fst.d	$f15, \base, (15 * FPU_REG_WIDTH)
45	EX	fst.d	$f16, \base, (16 * FPU_REG_WIDTH)
46	EX	fst.d	$f17, \base, (17 * FPU_REG_WIDTH)
47	EX	fst.d	$f18, \base, (18 * FPU_REG_WIDTH)
48	EX	fst.d	$f19, \base, (19 * FPU_REG_WIDTH)
49	EX	fst.d	$f20, \base, (20 * FPU_REG_WIDTH)
50	EX	fst.d	$f21, \base, (21 * FPU_REG_WIDTH)
51	EX	fst.d	$f22, \base, (22 * FPU_REG_WIDTH)
52	EX	fst.d	$f23, \base, (23 * FPU_REG_WIDTH)
53	EX	fst.d	$f24, \base, (24 * FPU_REG_WIDTH)
54	EX	fst.d	$f25, \base, (25 * FPU_REG_WIDTH)
55	EX	fst.d	$f26, \base, (26 * FPU_REG_WIDTH)
56	EX	fst.d	$f27, \base, (27 * FPU_REG_WIDTH)
57	EX	fst.d	$f28, \base, (28 * FPU_REG_WIDTH)
58	EX	fst.d	$f29, \base, (29 * FPU_REG_WIDTH)
59	EX	fst.d	$f30, \base, (30 * FPU_REG_WIDTH)
60	EX	fst.d	$f31, \base, (31 * FPU_REG_WIDTH)
61	.endm
62
63	.macro sc_restore_fp base
64	EX	fld.d	$f0,  \base, (0 * FPU_REG_WIDTH)
65	EX	fld.d	$f1,  \base, (1 * FPU_REG_WIDTH)
66	EX	fld.d	$f2,  \base, (2 * FPU_REG_WIDTH)
67	EX	fld.d	$f3,  \base, (3 * FPU_REG_WIDTH)
68	EX	fld.d	$f4,  \base, (4 * FPU_REG_WIDTH)
69	EX	fld.d	$f5,  \base, (5 * FPU_REG_WIDTH)
70	EX	fld.d	$f6,  \base, (6 * FPU_REG_WIDTH)
71	EX	fld.d	$f7,  \base, (7 * FPU_REG_WIDTH)
72	EX	fld.d	$f8,  \base, (8 * FPU_REG_WIDTH)
73	EX	fld.d	$f9,  \base, (9 * FPU_REG_WIDTH)
74	EX	fld.d	$f10, \base, (10 * FPU_REG_WIDTH)
75	EX	fld.d	$f11, \base, (11 * FPU_REG_WIDTH)
76	EX	fld.d	$f12, \base, (12 * FPU_REG_WIDTH)
77	EX	fld.d	$f13, \base, (13 * FPU_REG_WIDTH)
78	EX	fld.d	$f14, \base, (14 * FPU_REG_WIDTH)
79	EX	fld.d	$f15, \base, (15 * FPU_REG_WIDTH)
80	EX	fld.d	$f16, \base, (16 * FPU_REG_WIDTH)
81	EX	fld.d	$f17, \base, (17 * FPU_REG_WIDTH)
82	EX	fld.d	$f18, \base, (18 * FPU_REG_WIDTH)
83	EX	fld.d	$f19, \base, (19 * FPU_REG_WIDTH)
84	EX	fld.d	$f20, \base, (20 * FPU_REG_WIDTH)
85	EX	fld.d	$f21, \base, (21 * FPU_REG_WIDTH)
86	EX	fld.d	$f22, \base, (22 * FPU_REG_WIDTH)
87	EX	fld.d	$f23, \base, (23 * FPU_REG_WIDTH)
88	EX	fld.d	$f24, \base, (24 * FPU_REG_WIDTH)
89	EX	fld.d	$f25, \base, (25 * FPU_REG_WIDTH)
90	EX	fld.d	$f26, \base, (26 * FPU_REG_WIDTH)
91	EX	fld.d	$f27, \base, (27 * FPU_REG_WIDTH)
92	EX	fld.d	$f28, \base, (28 * FPU_REG_WIDTH)
93	EX	fld.d	$f29, \base, (29 * FPU_REG_WIDTH)
94	EX	fld.d	$f30, \base, (30 * FPU_REG_WIDTH)
95	EX	fld.d	$f31, \base, (31 * FPU_REG_WIDTH)
96	.endm
97
98	.macro sc_save_fcc base, tmp0, tmp1
99	movcf2gr	\tmp0, $fcc0
100	move		\tmp1, \tmp0
101	movcf2gr	\tmp0, $fcc1
102	bstrins.d	\tmp1, \tmp0, 15, 8
103	movcf2gr	\tmp0, $fcc2
104	bstrins.d	\tmp1, \tmp0, 23, 16
105	movcf2gr	\tmp0, $fcc3
106	bstrins.d	\tmp1, \tmp0, 31, 24
107	movcf2gr	\tmp0, $fcc4
108	bstrins.d	\tmp1, \tmp0, 39, 32
109	movcf2gr	\tmp0, $fcc5
110	bstrins.d	\tmp1, \tmp0, 47, 40
111	movcf2gr	\tmp0, $fcc6
112	bstrins.d	\tmp1, \tmp0, 55, 48
113	movcf2gr	\tmp0, $fcc7
114	bstrins.d	\tmp1, \tmp0, 63, 56
115	EX	st.d	\tmp1, \base, 0
116	.endm
117
118	.macro sc_restore_fcc base, tmp0, tmp1
119	EX	ld.d	\tmp0, \base, 0
120	bstrpick.d	\tmp1, \tmp0, 7, 0
121	movgr2cf	$fcc0, \tmp1
122	bstrpick.d	\tmp1, \tmp0, 15, 8
123	movgr2cf	$fcc1, \tmp1
124	bstrpick.d	\tmp1, \tmp0, 23, 16
125	movgr2cf	$fcc2, \tmp1
126	bstrpick.d	\tmp1, \tmp0, 31, 24
127	movgr2cf	$fcc3, \tmp1
128	bstrpick.d	\tmp1, \tmp0, 39, 32
129	movgr2cf	$fcc4, \tmp1
130	bstrpick.d	\tmp1, \tmp0, 47, 40
131	movgr2cf	$fcc5, \tmp1
132	bstrpick.d	\tmp1, \tmp0, 55, 48
133	movgr2cf	$fcc6, \tmp1
134	bstrpick.d	\tmp1, \tmp0, 63, 56
135	movgr2cf	$fcc7, \tmp1
136	.endm
137
138	.macro sc_save_fcsr base, tmp0
139	movfcsr2gr	\tmp0, fcsr0
140	EX	st.w	\tmp0, \base, 0
141	.endm
142
143	.macro sc_restore_fcsr base, tmp0
144	EX	ld.w	\tmp0, \base, 0
145	movgr2fcsr	fcsr0, \tmp0
146	.endm
147
148/*
149 * Save a thread's fp context.
150 */
151SYM_FUNC_START(_save_fp)
152	fpu_save_csr	a0 t1
153	fpu_save_double	a0 t1			# clobbers t1
154	fpu_save_cc	a0 t1 t2		# clobbers t1, t2
155	jr		ra
156SYM_FUNC_END(_save_fp)
157EXPORT_SYMBOL(_save_fp)
158
159/*
160 * Restore a thread's fp context.
161 */
162SYM_FUNC_START(_restore_fp)
163	fpu_restore_double	a0 t1		# clobbers t1
164	fpu_restore_csr		a0 t1
165	fpu_restore_cc		a0 t1 t2	# clobbers t1, t2
166	jr			ra
167SYM_FUNC_END(_restore_fp)
168
169/*
170 * Load the FPU with signalling NANS.  This bit pattern we're using has
171 * the property that no matter whether considered as single or as double
172 * precision represents signaling NANS.
173 *
174 * The value to initialize fcsr0 to comes in $a0.
175 */
176
177SYM_FUNC_START(_init_fpu)
178	li.w	t1, CSR_EUEN_FPEN
179	csrxchg	t1, t1, LOONGARCH_CSR_EUEN
180
181	movgr2fcsr	fcsr0, a0
182
183	li.w	t1, -1				# SNaN
184
185	movgr2fr.d	$f0, t1
186	movgr2fr.d	$f1, t1
187	movgr2fr.d	$f2, t1
188	movgr2fr.d	$f3, t1
189	movgr2fr.d	$f4, t1
190	movgr2fr.d	$f5, t1
191	movgr2fr.d	$f6, t1
192	movgr2fr.d	$f7, t1
193	movgr2fr.d	$f8, t1
194	movgr2fr.d	$f9, t1
195	movgr2fr.d	$f10, t1
196	movgr2fr.d	$f11, t1
197	movgr2fr.d	$f12, t1
198	movgr2fr.d	$f13, t1
199	movgr2fr.d	$f14, t1
200	movgr2fr.d	$f15, t1
201	movgr2fr.d	$f16, t1
202	movgr2fr.d	$f17, t1
203	movgr2fr.d	$f18, t1
204	movgr2fr.d	$f19, t1
205	movgr2fr.d	$f20, t1
206	movgr2fr.d	$f21, t1
207	movgr2fr.d	$f22, t1
208	movgr2fr.d	$f23, t1
209	movgr2fr.d	$f24, t1
210	movgr2fr.d	$f25, t1
211	movgr2fr.d	$f26, t1
212	movgr2fr.d	$f27, t1
213	movgr2fr.d	$f28, t1
214	movgr2fr.d	$f29, t1
215	movgr2fr.d	$f30, t1
216	movgr2fr.d	$f31, t1
217
218	jr	ra
219SYM_FUNC_END(_init_fpu)
220
221/*
222 * a0: fpregs
223 * a1: fcc
224 * a2: fcsr
225 */
226SYM_FUNC_START(_save_fp_context)
227	sc_save_fcc	a1 t1 t2
228	sc_save_fcsr	a2 t1
229	sc_save_fp	a0
230	li.w		a0, 0				# success
231	jr		ra
232SYM_FUNC_END(_save_fp_context)
233
234/*
235 * a0: fpregs
236 * a1: fcc
237 * a2: fcsr
238 */
239SYM_FUNC_START(_restore_fp_context)
240	sc_restore_fp	a0
241	sc_restore_fcc	a1 t1 t2
242	sc_restore_fcsr	a2 t1
243	li.w		a0, 0				# success
244	jr		ra
245SYM_FUNC_END(_restore_fp_context)
246
247SYM_FUNC_START(fault)
248	li.w	a0, -EFAULT				# failure
249	jr	ra
250SYM_FUNC_END(fault)
251