xref: /openbmc/linux/arch/loongarch/lib/copy_user.S (revision 046b212a)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4 */
5
6#include <linux/export.h>
7#include <asm/alternative-asm.h>
8#include <asm/asm.h>
9#include <asm/asmmacro.h>
10#include <asm/asm-extable.h>
11#include <asm/cpu.h>
12#include <asm/regdef.h>
13
14SYM_FUNC_START(__copy_user)
15	/*
16	 * Some CPUs support hardware unaligned access
17	 */
18	ALTERNATIVE	"b __copy_user_generic",	\
19			"b __copy_user_fast", CPU_FEATURE_UAL
20SYM_FUNC_END(__copy_user)
21
22EXPORT_SYMBOL(__copy_user)
23
24/*
25 * unsigned long __copy_user_generic(void *to, const void *from, size_t n)
26 *
27 * a0: to
28 * a1: from
29 * a2: n
30 */
31SYM_FUNC_START(__copy_user_generic)
32	beqz	a2, 3f
33
341:	ld.b	t0, a1, 0
352:	st.b	t0, a0, 0
36	addi.d	a0, a0, 1
37	addi.d	a1, a1, 1
38	addi.d	a2, a2, -1
39	bgtz	a2, 1b
40
413:	move	a0, a2
42	jr	ra
43
44	_asm_extable 1b, 3b
45	_asm_extable 2b, 3b
46SYM_FUNC_END(__copy_user_generic)
47
48/*
49 * unsigned long __copy_user_fast(void *to, const void *from, unsigned long n)
50 *
51 * a0: to
52 * a1: from
53 * a2: n
54 */
55SYM_FUNC_START(__copy_user_fast)
56	sltui	t0, a2, 9
57	bnez	t0, .Lsmall
58
590:	ld.d	t0, a1, 0
601:	st.d	t0, a0, 0
61	add.d	a3, a1, a2
62	add.d	a2, a0, a2
63
64	/* align up destination address */
65	andi	t1, a0, 7
66	sub.d	t0, zero, t1
67	addi.d	t0, t0, 8
68	add.d	a1, a1, t0
69	add.d	a0, a0, t0
70
71	addi.d	a4, a3, -64
72	bgeu	a1, a4, .Llt64
73
74	/* copy 64 bytes at a time */
75.Lloop64:
762:	ld.d	t0, a1, 0
773:	ld.d	t1, a1, 8
784:	ld.d	t2, a1, 16
795:	ld.d	t3, a1, 24
806:	ld.d	t4, a1, 32
817:	ld.d	t5, a1, 40
828:	ld.d	t6, a1, 48
839:	ld.d	t7, a1, 56
8410:	st.d	t0, a0, 0
8511:	st.d	t1, a0, 8
8612:	st.d	t2, a0, 16
8713:	st.d	t3, a0, 24
8814:	st.d	t4, a0, 32
8915:	st.d	t5, a0, 40
9016:	st.d	t6, a0, 48
9117:	st.d	t7, a0, 56
92	addi.d	a1, a1, 64
93	addi.d	a0, a0, 64
94	bltu	a1, a4, .Lloop64
95
96	/* copy the remaining bytes */
97.Llt64:
98	addi.d	a4, a3, -32
99	bgeu	a1, a4, .Llt32
10018:	ld.d	t0, a1, 0
10119:	ld.d	t1, a1, 8
10220:	ld.d	t2, a1, 16
10321:	ld.d	t3, a1, 24
10422:	st.d	t0, a0, 0
10523:	st.d	t1, a0, 8
10624:	st.d	t2, a0, 16
10725:	st.d	t3, a0, 24
108	addi.d	a1, a1, 32
109	addi.d	a0, a0, 32
110
111.Llt32:
112	addi.d	a4, a3, -16
113	bgeu	a1, a4, .Llt16
11426:	ld.d	t0, a1, 0
11527:	ld.d	t1, a1, 8
11628:	st.d	t0, a0, 0
11729:	st.d	t1, a0, 8
118	addi.d	a1, a1, 16
119	addi.d	a0, a0, 16
120
121.Llt16:
122	addi.d	a4, a3, -8
123	bgeu	a1, a4, .Llt8
12430:	ld.d	t0, a1, 0
12531:	st.d	t0, a0, 0
126	addi.d	a1, a1, 8
127	addi.d	a0, a0, 8
128
129.Llt8:
13032:	ld.d	t0, a3, -8
13133:	st.d	t0, a2, -8
132
133	/* return */
134	move	a0, zero
135	jr	ra
136
137	.align	5
138.Lsmall:
139	pcaddi	t0, 8
140	slli.d	a3, a2, 5
141	add.d	t0, t0, a3
142	jr	t0
143
144	.align	5
145	move	a0, zero
146	jr	ra
147
148	.align	5
14934:	ld.b	t0, a1, 0
15035:	st.b	t0, a0, 0
151	move	a0, zero
152	jr	ra
153
154	.align	5
15536:	ld.h	t0, a1, 0
15637:	st.h	t0, a0, 0
157	move	a0, zero
158	jr	ra
159
160	.align	5
16138:	ld.h	t0, a1, 0
16239:	ld.b	t1, a1, 2
16340:	st.h	t0, a0, 0
16441:	st.b	t1, a0, 2
165	move	a0, zero
166	jr	ra
167
168	.align	5
16942:	ld.w	t0, a1, 0
17043:	st.w	t0, a0, 0
171	move	a0, zero
172	jr	ra
173
174	.align	5
17544:	ld.w	t0, a1, 0
17645:	ld.b	t1, a1, 4
17746:	st.w	t0, a0, 0
17847:	st.b	t1, a0, 4
179	move	a0, zero
180	jr	ra
181
182	.align	5
18348:	ld.w	t0, a1, 0
18449:	ld.h	t1, a1, 4
18550:	st.w	t0, a0, 0
18651:	st.h	t1, a0, 4
187	move	a0, zero
188	jr	ra
189
190	.align	5
19152:	ld.w	t0, a1, 0
19253:	ld.w	t1, a1, 3
19354:	st.w	t0, a0, 0
19455:	st.w	t1, a0, 3
195	move	a0, zero
196	jr	ra
197
198	.align	5
19956:	ld.d	t0, a1, 0
20057:	st.d	t0, a0, 0
201	move	a0, zero
202	jr	ra
203
204	/* fixup and ex_table */
205.Llarge_fixup:
206	sub.d	a2, a2, a0
207
208.Lsmall_fixup:
20958:	ld.b	t0, a1, 0
21059:	st.b	t0, a0, 0
211	addi.d	a0, a0, 1
212	addi.d	a1, a1, 1
213	addi.d	a2, a2, -1
214	bgt	a2, zero, 58b
215
216.Lexit:
217	move	a0, a2
218	jr	ra
219
220	_asm_extable 0b, .Lsmall_fixup
221	_asm_extable 1b, .Lsmall_fixup
222	_asm_extable 2b, .Llarge_fixup
223	_asm_extable 3b, .Llarge_fixup
224	_asm_extable 4b, .Llarge_fixup
225	_asm_extable 5b, .Llarge_fixup
226	_asm_extable 6b, .Llarge_fixup
227	_asm_extable 7b, .Llarge_fixup
228	_asm_extable 8b, .Llarge_fixup
229	_asm_extable 9b, .Llarge_fixup
230	_asm_extable 10b, .Llarge_fixup
231	_asm_extable 11b, .Llarge_fixup
232	_asm_extable 12b, .Llarge_fixup
233	_asm_extable 13b, .Llarge_fixup
234	_asm_extable 14b, .Llarge_fixup
235	_asm_extable 15b, .Llarge_fixup
236	_asm_extable 16b, .Llarge_fixup
237	_asm_extable 17b, .Llarge_fixup
238	_asm_extable 18b, .Llarge_fixup
239	_asm_extable 19b, .Llarge_fixup
240	_asm_extable 20b, .Llarge_fixup
241	_asm_extable 21b, .Llarge_fixup
242	_asm_extable 22b, .Llarge_fixup
243	_asm_extable 23b, .Llarge_fixup
244	_asm_extable 24b, .Llarge_fixup
245	_asm_extable 25b, .Llarge_fixup
246	_asm_extable 26b, .Llarge_fixup
247	_asm_extable 27b, .Llarge_fixup
248	_asm_extable 28b, .Llarge_fixup
249	_asm_extable 29b, .Llarge_fixup
250	_asm_extable 30b, .Llarge_fixup
251	_asm_extable 31b, .Llarge_fixup
252	_asm_extable 32b, .Llarge_fixup
253	_asm_extable 33b, .Llarge_fixup
254	_asm_extable 34b, .Lexit
255	_asm_extable 35b, .Lexit
256	_asm_extable 36b, .Lsmall_fixup
257	_asm_extable 37b, .Lsmall_fixup
258	_asm_extable 38b, .Lsmall_fixup
259	_asm_extable 39b, .Lsmall_fixup
260	_asm_extable 40b, .Lsmall_fixup
261	_asm_extable 41b, .Lsmall_fixup
262	_asm_extable 42b, .Lsmall_fixup
263	_asm_extable 43b, .Lsmall_fixup
264	_asm_extable 44b, .Lsmall_fixup
265	_asm_extable 45b, .Lsmall_fixup
266	_asm_extable 46b, .Lsmall_fixup
267	_asm_extable 47b, .Lsmall_fixup
268	_asm_extable 48b, .Lsmall_fixup
269	_asm_extable 49b, .Lsmall_fixup
270	_asm_extable 50b, .Lsmall_fixup
271	_asm_extable 51b, .Lsmall_fixup
272	_asm_extable 52b, .Lsmall_fixup
273	_asm_extable 53b, .Lsmall_fixup
274	_asm_extable 54b, .Lsmall_fixup
275	_asm_extable 55b, .Lsmall_fixup
276	_asm_extable 56b, .Lsmall_fixup
277	_asm_extable 57b, .Lsmall_fixup
278	_asm_extable 58b, .Lexit
279	_asm_extable 59b, .Lexit
280SYM_FUNC_END(__copy_user_fast)
281