xref: /openbmc/linux/arch/loongarch/lib/memmove.S (revision 248ed9e227e6cf59acb1aaf3aa30d530a0232c1a)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4 */
5
6#include <asm/alternative-asm.h>
7#include <asm/asm.h>
8#include <asm/asmmacro.h>
9#include <asm/cpu.h>
10#include <asm/export.h>
11#include <asm/regdef.h>
12
13SYM_FUNC_START(memmove)
14	blt	a0, a1, 1f	/* dst < src, memcpy */
15	blt	a1, a0, 3f	/* src < dst, rmemcpy */
16	jr	ra		/* dst == src, return */
17
18	/* if (src - dst) < 64, copy 1 byte at a time */
191:	ori	a3, zero, 64
20	sub.d	t0, a1, a0
21	blt	t0, a3, 2f
22	b	memcpy
232:	b	__memcpy_generic
24
25	/* if (dst - src) < 64, copy 1 byte at a time */
263:	ori	a3, zero, 64
27	sub.d	t0, a0, a1
28	blt	t0, a3, 4f
29	b	rmemcpy
304:	b	__rmemcpy_generic
31SYM_FUNC_END(memmove)
32_ASM_NOKPROBE(memmove)
33
34EXPORT_SYMBOL(memmove)
35
36SYM_FUNC_START(rmemcpy)
37	/*
38	 * Some CPUs support hardware unaligned access
39	 */
40	ALTERNATIVE	"b __rmemcpy_generic", \
41			"b __rmemcpy_fast", CPU_FEATURE_UAL
42SYM_FUNC_END(rmemcpy)
43_ASM_NOKPROBE(rmemcpy)
44
45/*
46 * void *__rmemcpy_generic(void *dst, const void *src, size_t n)
47 *
48 * a0: dst
49 * a1: src
50 * a2: n
51 */
52SYM_FUNC_START(__rmemcpy_generic)
53	move	a3, a0
54	beqz	a2, 2f
55
56	add.d	a0, a0, a2
57	add.d	a1, a1, a2
58
591:	ld.b	t0, a1, -1
60	st.b	t0, a0, -1
61	addi.d	a0, a0, -1
62	addi.d	a1, a1, -1
63	addi.d	a2, a2, -1
64	bgt	a2, zero, 1b
65
662:	move	a0, a3
67	jr	ra
68SYM_FUNC_END(__rmemcpy_generic)
69_ASM_NOKPROBE(__rmemcpy_generic)
70
71/*
72 * void *__rmemcpy_fast(void *dst, const void *src, size_t n)
73 *
74 * a0: dst
75 * a1: src
76 * a2: n
77 */
78SYM_FUNC_START(__rmemcpy_fast)
79	move	a3, a0
80	beqz	a2, 3f
81
82	add.d	a0, a0, a2
83	add.d	a1, a1, a2
84
85	ori	a4, zero, 64
86	blt	a2, a4, 2f
87
88	/* copy 64 bytes at a time */
891:	ld.d	t0, a1, -8
90	ld.d	t1, a1, -16
91	ld.d	t2, a1, -24
92	ld.d	t3, a1, -32
93	ld.d	t4, a1, -40
94	ld.d	t5, a1, -48
95	ld.d	t6, a1, -56
96	ld.d	t7, a1, -64
97	st.d	t0, a0, -8
98	st.d	t1, a0, -16
99	st.d	t2, a0, -24
100	st.d	t3, a0, -32
101	st.d	t4, a0, -40
102	st.d	t5, a0, -48
103	st.d	t6, a0, -56
104	st.d	t7, a0, -64
105
106	addi.d	a0, a0, -64
107	addi.d	a1, a1, -64
108	addi.d	a2, a2, -64
109	bge	a2, a4, 1b
110
111	beqz	a2, 3f
112
113	/* copy the remaining bytes */
1142:	ld.b	t0, a1, -1
115	st.b	t0, a0, -1
116	addi.d	a0, a0, -1
117	addi.d	a1, a1, -1
118	addi.d	a2, a2, -1
119	bgt	a2, zero, 2b
120
121	/* return */
1223:	move	a0, a3
123	jr	ra
124SYM_FUNC_END(__rmemcpy_fast)
125_ASM_NOKPROBE(__rmemcpy_fast)
126