xref: /openbmc/linux/arch/loongarch/kernel/fpu.S (revision 2dd6532e)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Author: Lu Zeng <zenglu@loongson.cn>
4 *         Pei Huang <huangpei@loongson.cn>
5 *         Huacai Chen <chenhuacai@loongson.cn>
6 *
7 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
8 */
9#include <asm/asm.h>
10#include <asm/asmmacro.h>
11#include <asm/asm-offsets.h>
12#include <asm/errno.h>
13#include <asm/export.h>
14#include <asm/fpregdef.h>
15#include <asm/loongarch.h>
16#include <asm/regdef.h>
17
18#define FPU_REG_WIDTH		8
19#define LSX_REG_WIDTH		16
20#define LASX_REG_WIDTH		32
21
22	.macro	EX insn, reg, src, offs
23.ex\@:	\insn	\reg, \src, \offs
24	.section __ex_table,"a"
25	PTR	.ex\@, fault
26	.previous
27	.endm
28
29	.macro sc_save_fp base
30	EX	fst.d $f0,  \base, (0 * FPU_REG_WIDTH)
31	EX	fst.d $f1,  \base, (1 * FPU_REG_WIDTH)
32	EX	fst.d $f2,  \base, (2 * FPU_REG_WIDTH)
33	EX	fst.d $f3,  \base, (3 * FPU_REG_WIDTH)
34	EX	fst.d $f4,  \base, (4 * FPU_REG_WIDTH)
35	EX	fst.d $f5,  \base, (5 * FPU_REG_WIDTH)
36	EX	fst.d $f6,  \base, (6 * FPU_REG_WIDTH)
37	EX	fst.d $f7,  \base, (7 * FPU_REG_WIDTH)
38	EX	fst.d $f8,  \base, (8 * FPU_REG_WIDTH)
39	EX	fst.d $f9,  \base, (9 * FPU_REG_WIDTH)
40	EX	fst.d $f10, \base, (10 * FPU_REG_WIDTH)
41	EX	fst.d $f11, \base, (11 * FPU_REG_WIDTH)
42	EX	fst.d $f12, \base, (12 * FPU_REG_WIDTH)
43	EX	fst.d $f13, \base, (13 * FPU_REG_WIDTH)
44	EX	fst.d $f14, \base, (14 * FPU_REG_WIDTH)
45	EX	fst.d $f15, \base, (15 * FPU_REG_WIDTH)
46	EX	fst.d $f16, \base, (16 * FPU_REG_WIDTH)
47	EX	fst.d $f17, \base, (17 * FPU_REG_WIDTH)
48	EX	fst.d $f18, \base, (18 * FPU_REG_WIDTH)
49	EX	fst.d $f19, \base, (19 * FPU_REG_WIDTH)
50	EX	fst.d $f20, \base, (20 * FPU_REG_WIDTH)
51	EX	fst.d $f21, \base, (21 * FPU_REG_WIDTH)
52	EX	fst.d $f22, \base, (22 * FPU_REG_WIDTH)
53	EX	fst.d $f23, \base, (23 * FPU_REG_WIDTH)
54	EX	fst.d $f24, \base, (24 * FPU_REG_WIDTH)
55	EX	fst.d $f25, \base, (25 * FPU_REG_WIDTH)
56	EX	fst.d $f26, \base, (26 * FPU_REG_WIDTH)
57	EX	fst.d $f27, \base, (27 * FPU_REG_WIDTH)
58	EX	fst.d $f28, \base, (28 * FPU_REG_WIDTH)
59	EX	fst.d $f29, \base, (29 * FPU_REG_WIDTH)
60	EX	fst.d $f30, \base, (30 * FPU_REG_WIDTH)
61	EX	fst.d $f31, \base, (31 * FPU_REG_WIDTH)
62	.endm
63
64	.macro sc_restore_fp base
65	EX	fld.d $f0,  \base, (0 * FPU_REG_WIDTH)
66	EX	fld.d $f1,  \base, (1 * FPU_REG_WIDTH)
67	EX	fld.d $f2,  \base, (2 * FPU_REG_WIDTH)
68	EX	fld.d $f3,  \base, (3 * FPU_REG_WIDTH)
69	EX	fld.d $f4,  \base, (4 * FPU_REG_WIDTH)
70	EX	fld.d $f5,  \base, (5 * FPU_REG_WIDTH)
71	EX	fld.d $f6,  \base, (6 * FPU_REG_WIDTH)
72	EX	fld.d $f7,  \base, (7 * FPU_REG_WIDTH)
73	EX	fld.d $f8,  \base, (8 * FPU_REG_WIDTH)
74	EX	fld.d $f9,  \base, (9 * FPU_REG_WIDTH)
75	EX	fld.d $f10, \base, (10 * FPU_REG_WIDTH)
76	EX	fld.d $f11, \base, (11 * FPU_REG_WIDTH)
77	EX	fld.d $f12, \base, (12 * FPU_REG_WIDTH)
78	EX	fld.d $f13, \base, (13 * FPU_REG_WIDTH)
79	EX	fld.d $f14, \base, (14 * FPU_REG_WIDTH)
80	EX	fld.d $f15, \base, (15 * FPU_REG_WIDTH)
81	EX	fld.d $f16, \base, (16 * FPU_REG_WIDTH)
82	EX	fld.d $f17, \base, (17 * FPU_REG_WIDTH)
83	EX	fld.d $f18, \base, (18 * FPU_REG_WIDTH)
84	EX	fld.d $f19, \base, (19 * FPU_REG_WIDTH)
85	EX	fld.d $f20, \base, (20 * FPU_REG_WIDTH)
86	EX	fld.d $f21, \base, (21 * FPU_REG_WIDTH)
87	EX	fld.d $f22, \base, (22 * FPU_REG_WIDTH)
88	EX	fld.d $f23, \base, (23 * FPU_REG_WIDTH)
89	EX	fld.d $f24, \base, (24 * FPU_REG_WIDTH)
90	EX	fld.d $f25, \base, (25 * FPU_REG_WIDTH)
91	EX	fld.d $f26, \base, (26 * FPU_REG_WIDTH)
92	EX	fld.d $f27, \base, (27 * FPU_REG_WIDTH)
93	EX	fld.d $f28, \base, (28 * FPU_REG_WIDTH)
94	EX	fld.d $f29, \base, (29 * FPU_REG_WIDTH)
95	EX	fld.d $f30, \base, (30 * FPU_REG_WIDTH)
96	EX	fld.d $f31, \base, (31 * FPU_REG_WIDTH)
97	.endm
98
99	.macro sc_save_fcc base, tmp0, tmp1
100	movcf2gr	\tmp0, $fcc0
101	move	\tmp1, \tmp0
102	movcf2gr	\tmp0, $fcc1
103	bstrins.d	\tmp1, \tmp0, 15, 8
104	movcf2gr	\tmp0, $fcc2
105	bstrins.d	\tmp1, \tmp0, 23, 16
106	movcf2gr	\tmp0, $fcc3
107	bstrins.d	\tmp1, \tmp0, 31, 24
108	movcf2gr	\tmp0, $fcc4
109	bstrins.d	\tmp1, \tmp0, 39, 32
110	movcf2gr	\tmp0, $fcc5
111	bstrins.d	\tmp1, \tmp0, 47, 40
112	movcf2gr	\tmp0, $fcc6
113	bstrins.d	\tmp1, \tmp0, 55, 48
114	movcf2gr	\tmp0, $fcc7
115	bstrins.d	\tmp1, \tmp0, 63, 56
116	EX	st.d \tmp1, \base, 0
117	.endm
118
119	.macro sc_restore_fcc base, tmp0, tmp1
120	EX	ld.d \tmp0, \base, 0
121	bstrpick.d	\tmp1, \tmp0, 7, 0
122	movgr2cf	$fcc0, \tmp1
123	bstrpick.d	\tmp1, \tmp0, 15, 8
124	movgr2cf	$fcc1, \tmp1
125	bstrpick.d	\tmp1, \tmp0, 23, 16
126	movgr2cf	$fcc2, \tmp1
127	bstrpick.d	\tmp1, \tmp0, 31, 24
128	movgr2cf	$fcc3, \tmp1
129	bstrpick.d	\tmp1, \tmp0, 39, 32
130	movgr2cf	$fcc4, \tmp1
131	bstrpick.d	\tmp1, \tmp0, 47, 40
132	movgr2cf	$fcc5, \tmp1
133	bstrpick.d	\tmp1, \tmp0, 55, 48
134	movgr2cf	$fcc6, \tmp1
135	bstrpick.d	\tmp1, \tmp0, 63, 56
136	movgr2cf	$fcc7, \tmp1
137	.endm
138
139	.macro sc_save_fcsr base, tmp0
140	movfcsr2gr	\tmp0, fcsr0
141	EX	st.w \tmp0, \base, 0
142	.endm
143
144	.macro sc_restore_fcsr base, tmp0
145	EX	ld.w \tmp0, \base, 0
146	movgr2fcsr	fcsr0, \tmp0
147	.endm
148
149	.macro sc_save_vcsr base, tmp0
150	movfcsr2gr	\tmp0, vcsr16
151	EX	st.w \tmp0, \base, 0
152	.endm
153
154	.macro sc_restore_vcsr base, tmp0
155	EX	ld.w \tmp0, \base, 0
156	movgr2fcsr	vcsr16, \tmp0
157	.endm
158
159/*
160 * Save a thread's fp context.
161 */
162SYM_FUNC_START(_save_fp)
163	fpu_save_csr	a0 t1
164	fpu_save_double a0 t1			# clobbers t1
165	fpu_save_cc	a0 t1 t2		# clobbers t1, t2
166	jirl zero, ra, 0
167SYM_FUNC_END(_save_fp)
168EXPORT_SYMBOL(_save_fp)
169
170/*
171 * Restore a thread's fp context.
172 */
173SYM_FUNC_START(_restore_fp)
174	fpu_restore_double a0 t1		# clobbers t1
175	fpu_restore_csr	a0 t1
176	fpu_restore_cc	a0 t1 t2		# clobbers t1, t2
177	jirl zero, ra, 0
178SYM_FUNC_END(_restore_fp)
179
180/*
181 * Load the FPU with signalling NANS.  This bit pattern we're using has
182 * the property that no matter whether considered as single or as double
183 * precision represents signaling NANS.
184 *
185 * The value to initialize fcsr0 to comes in $a0.
186 */
187
188SYM_FUNC_START(_init_fpu)
189	li.w	t1, CSR_EUEN_FPEN
190	csrxchg	t1, t1, LOONGARCH_CSR_EUEN
191
192	movgr2fcsr	fcsr0, a0
193
194	li.w	t1, -1				# SNaN
195
196	movgr2fr.d	$f0, t1
197	movgr2fr.d	$f1, t1
198	movgr2fr.d	$f2, t1
199	movgr2fr.d	$f3, t1
200	movgr2fr.d	$f4, t1
201	movgr2fr.d	$f5, t1
202	movgr2fr.d	$f6, t1
203	movgr2fr.d	$f7, t1
204	movgr2fr.d	$f8, t1
205	movgr2fr.d	$f9, t1
206	movgr2fr.d	$f10, t1
207	movgr2fr.d	$f11, t1
208	movgr2fr.d	$f12, t1
209	movgr2fr.d	$f13, t1
210	movgr2fr.d	$f14, t1
211	movgr2fr.d	$f15, t1
212	movgr2fr.d	$f16, t1
213	movgr2fr.d	$f17, t1
214	movgr2fr.d	$f18, t1
215	movgr2fr.d	$f19, t1
216	movgr2fr.d	$f20, t1
217	movgr2fr.d	$f21, t1
218	movgr2fr.d	$f22, t1
219	movgr2fr.d	$f23, t1
220	movgr2fr.d	$f24, t1
221	movgr2fr.d	$f25, t1
222	movgr2fr.d	$f26, t1
223	movgr2fr.d	$f27, t1
224	movgr2fr.d	$f28, t1
225	movgr2fr.d	$f29, t1
226	movgr2fr.d	$f30, t1
227	movgr2fr.d	$f31, t1
228
229	jirl zero, ra, 0
230SYM_FUNC_END(_init_fpu)
231
232/*
233 * a0: fpregs
234 * a1: fcc
235 * a2: fcsr
236 */
237SYM_FUNC_START(_save_fp_context)
238	sc_save_fcc a1 t1 t2
239	sc_save_fcsr a2 t1
240	sc_save_fp a0
241	li.w	a0, 0					# success
242	jirl zero, ra, 0
243SYM_FUNC_END(_save_fp_context)
244
245/*
246 * a0: fpregs
247 * a1: fcc
248 * a2: fcsr
249 */
250SYM_FUNC_START(_restore_fp_context)
251	sc_restore_fp a0
252	sc_restore_fcc a1 t1 t2
253	sc_restore_fcsr a2 t1
254	li.w	a0, 0					# success
255	jirl zero, ra, 0
256SYM_FUNC_END(_restore_fp_context)
257
258SYM_FUNC_START(fault)
259	li.w	a0, -EFAULT				# failure
260	jirl zero, ra, 0
261SYM_FUNC_END(fault)
262