xref: /openbmc/linux/arch/loongarch/lib/memset.S (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
1a275a82dSHuacai Chen/* SPDX-License-Identifier: GPL-2.0 */
2a275a82dSHuacai Chen/*
3a275a82dSHuacai Chen * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4a275a82dSHuacai Chen */
5a275a82dSHuacai Chen
655b46ff9SMasahiro Yamada#include <linux/export.h>
7a275a82dSHuacai Chen#include <asm/alternative-asm.h>
8a275a82dSHuacai Chen#include <asm/asm.h>
9a275a82dSHuacai Chen#include <asm/asmmacro.h>
10a275a82dSHuacai Chen#include <asm/cpu.h>
11a275a82dSHuacai Chen#include <asm/regdef.h>
12a275a82dSHuacai Chen
13a275a82dSHuacai Chen.macro fill_to_64 r0
14a275a82dSHuacai Chen	bstrins.d \r0, \r0, 15, 8
15a275a82dSHuacai Chen	bstrins.d \r0, \r0, 31, 16
16a275a82dSHuacai Chen	bstrins.d \r0, \r0, 63, 32
17a275a82dSHuacai Chen.endm
18a275a82dSHuacai Chen
19*5aa4ac64SQing Zhang.section .noinstr.text, "ax"
20*5aa4ac64SQing Zhang
21a275a82dSHuacai ChenSYM_FUNC_START(memset)
22a275a82dSHuacai Chen	/*
23a275a82dSHuacai Chen	 * Some CPUs support hardware unaligned access
24a275a82dSHuacai Chen	 */
25a275a82dSHuacai Chen	ALTERNATIVE	"b __memset_generic", \
26a275a82dSHuacai Chen			"b __memset_fast", CPU_FEATURE_UAL
27a275a82dSHuacai ChenSYM_FUNC_END(memset)
28*5aa4ac64SQing ZhangSYM_FUNC_ALIAS(__memset, memset)
29a275a82dSHuacai Chen
30a275a82dSHuacai ChenEXPORT_SYMBOL(memset)
31*5aa4ac64SQing ZhangEXPORT_SYMBOL(__memset)
32*5aa4ac64SQing Zhang
33*5aa4ac64SQing Zhang_ASM_NOKPROBE(memset)
34*5aa4ac64SQing Zhang_ASM_NOKPROBE(__memset)
35a275a82dSHuacai Chen
36a275a82dSHuacai Chen/*
37a275a82dSHuacai Chen * void *__memset_generic(void *s, int c, size_t n)
38a275a82dSHuacai Chen *
39a275a82dSHuacai Chen * a0: s
40a275a82dSHuacai Chen * a1: c
41a275a82dSHuacai Chen * a2: n
42a275a82dSHuacai Chen */
43a275a82dSHuacai ChenSYM_FUNC_START(__memset_generic)
44a275a82dSHuacai Chen	move	a3, a0
45a275a82dSHuacai Chen	beqz	a2, 2f
46a275a82dSHuacai Chen
47a275a82dSHuacai Chen1:	st.b	a1, a0, 0
48a275a82dSHuacai Chen	addi.d	a0, a0, 1
49a275a82dSHuacai Chen	addi.d	a2, a2, -1
50a275a82dSHuacai Chen	bgt	a2, zero, 1b
51a275a82dSHuacai Chen
52a275a82dSHuacai Chen2:	move	a0, a3
53a275a82dSHuacai Chen	jr	ra
54a275a82dSHuacai ChenSYM_FUNC_END(__memset_generic)
55fcf77d01STiezhu Yang_ASM_NOKPROBE(__memset_generic)
56a275a82dSHuacai Chen
57a275a82dSHuacai Chen/*
58a275a82dSHuacai Chen * void *__memset_fast(void *s, int c, size_t n)
59a275a82dSHuacai Chen *
60a275a82dSHuacai Chen * a0: s
61a275a82dSHuacai Chen * a1: c
62a275a82dSHuacai Chen * a2: n
63a275a82dSHuacai Chen */
64a275a82dSHuacai ChenSYM_FUNC_START(__memset_fast)
65a275a82dSHuacai Chen	/* fill a1 to 64 bits */
66a275a82dSHuacai Chen	fill_to_64 a1
67a275a82dSHuacai Chen
688941e93cSWANG Rui	sltui	t0, a2, 9
698941e93cSWANG Rui	bnez	t0, .Lsmall
708941e93cSWANG Rui
718941e93cSWANG Rui	add.d	a2, a0, a2
728941e93cSWANG Rui	st.d	a1, a0, 0
738941e93cSWANG Rui
748941e93cSWANG Rui	/* align up address */
758941e93cSWANG Rui	addi.d	a3, a0, 8
768941e93cSWANG Rui	bstrins.d	a3, zero, 2, 0
778941e93cSWANG Rui
788941e93cSWANG Rui	addi.d	a4, a2, -64
798941e93cSWANG Rui	bgeu	a3, a4, .Llt64
808941e93cSWANG Rui
81a275a82dSHuacai Chen	/* set 64 bytes at a time */
828941e93cSWANG Rui.Lloop64:
838941e93cSWANG Rui	st.d	a1, a3, 0
848941e93cSWANG Rui	st.d	a1, a3, 8
858941e93cSWANG Rui	st.d	a1, a3, 16
868941e93cSWANG Rui	st.d	a1, a3, 24
878941e93cSWANG Rui	st.d	a1, a3, 32
888941e93cSWANG Rui	st.d	a1, a3, 40
898941e93cSWANG Rui	st.d	a1, a3, 48
908941e93cSWANG Rui	st.d	a1, a3, 56
918941e93cSWANG Rui	addi.d	a3, a3, 64
928941e93cSWANG Rui	bltu	a3, a4, .Lloop64
93a275a82dSHuacai Chen
94a275a82dSHuacai Chen	/* set the remaining bytes */
958941e93cSWANG Rui.Llt64:
968941e93cSWANG Rui	addi.d	a4, a2, -32
978941e93cSWANG Rui	bgeu	a3, a4, .Llt32
988941e93cSWANG Rui	st.d	a1, a3, 0
998941e93cSWANG Rui	st.d	a1, a3, 8
1008941e93cSWANG Rui	st.d	a1, a3, 16
1018941e93cSWANG Rui	st.d	a1, a3, 24
1028941e93cSWANG Rui	addi.d	a3, a3, 32
1038941e93cSWANG Rui
1048941e93cSWANG Rui.Llt32:
1058941e93cSWANG Rui	addi.d	a4, a2, -16
1068941e93cSWANG Rui	bgeu	a3, a4, .Llt16
1078941e93cSWANG Rui	st.d	a1, a3, 0
1088941e93cSWANG Rui	st.d	a1, a3, 8
1098941e93cSWANG Rui	addi.d	a3, a3, 16
1108941e93cSWANG Rui
1118941e93cSWANG Rui.Llt16:
1128941e93cSWANG Rui	addi.d	a4, a2, -8
1138941e93cSWANG Rui	bgeu	a3, a4, .Llt8
1148941e93cSWANG Rui	st.d	a1, a3, 0
1158941e93cSWANG Rui
1168941e93cSWANG Rui.Llt8:
1178941e93cSWANG Rui	st.d	a1, a2, -8
118a275a82dSHuacai Chen
119a275a82dSHuacai Chen	/* return */
1208941e93cSWANG Rui	jr	ra
1218941e93cSWANG Rui
1228941e93cSWANG Rui	.align	4
1238941e93cSWANG Rui.Lsmall:
1248941e93cSWANG Rui	pcaddi	t0, 4
1258941e93cSWANG Rui	slli.d	a2, a2, 4
1268941e93cSWANG Rui	add.d	t0, t0, a2
1278941e93cSWANG Rui	jr	t0
1288941e93cSWANG Rui
1298941e93cSWANG Rui	.align	4
1308941e93cSWANG Rui0:	jr	ra
1318941e93cSWANG Rui
1328941e93cSWANG Rui	.align	4
1338941e93cSWANG Rui1:	st.b	a1, a0, 0
1348941e93cSWANG Rui	jr	ra
1358941e93cSWANG Rui
1368941e93cSWANG Rui	.align	4
1378941e93cSWANG Rui2:	st.h	a1, a0, 0
1388941e93cSWANG Rui	jr	ra
1398941e93cSWANG Rui
1408941e93cSWANG Rui	.align	4
1418941e93cSWANG Rui3:	st.h	a1, a0, 0
1428941e93cSWANG Rui	st.b	a1, a0, 2
1438941e93cSWANG Rui	jr	ra
1448941e93cSWANG Rui
1458941e93cSWANG Rui	.align	4
1468941e93cSWANG Rui4:	st.w	a1, a0, 0
1478941e93cSWANG Rui	jr	ra
1488941e93cSWANG Rui
1498941e93cSWANG Rui	.align	4
1508941e93cSWANG Rui5:	st.w	a1, a0, 0
1518941e93cSWANG Rui	st.b	a1, a0, 4
1528941e93cSWANG Rui	jr	ra
1538941e93cSWANG Rui
1548941e93cSWANG Rui	.align	4
1558941e93cSWANG Rui6:	st.w	a1, a0, 0
1568941e93cSWANG Rui	st.h	a1, a0, 4
1578941e93cSWANG Rui	jr	ra
1588941e93cSWANG Rui
1598941e93cSWANG Rui	.align	4
1608941e93cSWANG Rui7:	st.w	a1, a0, 0
1618941e93cSWANG Rui	st.w	a1, a0, 3
1628941e93cSWANG Rui	jr	ra
1638941e93cSWANG Rui
1648941e93cSWANG Rui	.align	4
1658941e93cSWANG Rui8:	st.d	a1, a0, 0
166a275a82dSHuacai Chen	jr	ra
167a275a82dSHuacai ChenSYM_FUNC_END(__memset_fast)
168fcf77d01STiezhu Yang_ASM_NOKPROBE(__memset_fast)
169