1a275a82dSHuacai Chen/* SPDX-License-Identifier: GPL-2.0 */ 2a275a82dSHuacai Chen/* 3a275a82dSHuacai Chen * Copyright (C) 2020-2022 Loongson Technology Corporation Limited 4a275a82dSHuacai Chen */ 5a275a82dSHuacai Chen 655b46ff9SMasahiro Yamada#include <linux/export.h> 7a275a82dSHuacai Chen#include <asm/alternative-asm.h> 8a275a82dSHuacai Chen#include <asm/asm.h> 9a275a82dSHuacai Chen#include <asm/asmmacro.h> 10a275a82dSHuacai Chen#include <asm/cpu.h> 11a275a82dSHuacai Chen#include <asm/regdef.h> 12a275a82dSHuacai Chen 13a275a82dSHuacai Chen.macro fill_to_64 r0 14a275a82dSHuacai Chen bstrins.d \r0, \r0, 15, 8 15a275a82dSHuacai Chen bstrins.d \r0, \r0, 31, 16 16a275a82dSHuacai Chen bstrins.d \r0, \r0, 63, 32 17a275a82dSHuacai Chen.endm 18a275a82dSHuacai Chen 19*5aa4ac64SQing Zhang.section .noinstr.text, "ax" 20*5aa4ac64SQing Zhang 21a275a82dSHuacai ChenSYM_FUNC_START(memset) 22a275a82dSHuacai Chen /* 23a275a82dSHuacai Chen * Some CPUs support hardware unaligned access 24a275a82dSHuacai Chen */ 25a275a82dSHuacai Chen ALTERNATIVE "b __memset_generic", \ 26a275a82dSHuacai Chen "b __memset_fast", CPU_FEATURE_UAL 27a275a82dSHuacai ChenSYM_FUNC_END(memset) 28*5aa4ac64SQing ZhangSYM_FUNC_ALIAS(__memset, memset) 29a275a82dSHuacai Chen 30a275a82dSHuacai ChenEXPORT_SYMBOL(memset) 31*5aa4ac64SQing ZhangEXPORT_SYMBOL(__memset) 32*5aa4ac64SQing Zhang 33*5aa4ac64SQing Zhang_ASM_NOKPROBE(memset) 34*5aa4ac64SQing Zhang_ASM_NOKPROBE(__memset) 35a275a82dSHuacai Chen 36a275a82dSHuacai Chen/* 37a275a82dSHuacai Chen * void *__memset_generic(void *s, int c, size_t n) 38a275a82dSHuacai Chen * 39a275a82dSHuacai Chen * a0: s 40a275a82dSHuacai Chen * a1: c 41a275a82dSHuacai Chen * a2: n 42a275a82dSHuacai Chen */ 43a275a82dSHuacai ChenSYM_FUNC_START(__memset_generic) 44a275a82dSHuacai Chen move a3, a0 45a275a82dSHuacai Chen beqz a2, 2f 46a275a82dSHuacai Chen 47a275a82dSHuacai Chen1: st.b a1, a0, 0 48a275a82dSHuacai Chen addi.d a0, a0, 1 49a275a82dSHuacai Chen addi.d a2, a2, -1 50a275a82dSHuacai Chen bgt a2, zero, 1b 51a275a82dSHuacai Chen 52a275a82dSHuacai Chen2: move a0, a3 53a275a82dSHuacai Chen jr ra 54a275a82dSHuacai ChenSYM_FUNC_END(__memset_generic) 55fcf77d01STiezhu Yang_ASM_NOKPROBE(__memset_generic) 56a275a82dSHuacai Chen 57a275a82dSHuacai Chen/* 58a275a82dSHuacai Chen * void *__memset_fast(void *s, int c, size_t n) 59a275a82dSHuacai Chen * 60a275a82dSHuacai Chen * a0: s 61a275a82dSHuacai Chen * a1: c 62a275a82dSHuacai Chen * a2: n 63a275a82dSHuacai Chen */ 64a275a82dSHuacai ChenSYM_FUNC_START(__memset_fast) 65a275a82dSHuacai Chen /* fill a1 to 64 bits */ 66a275a82dSHuacai Chen fill_to_64 a1 67a275a82dSHuacai Chen 688941e93cSWANG Rui sltui t0, a2, 9 698941e93cSWANG Rui bnez t0, .Lsmall 708941e93cSWANG Rui 718941e93cSWANG Rui add.d a2, a0, a2 728941e93cSWANG Rui st.d a1, a0, 0 738941e93cSWANG Rui 748941e93cSWANG Rui /* align up address */ 758941e93cSWANG Rui addi.d a3, a0, 8 768941e93cSWANG Rui bstrins.d a3, zero, 2, 0 778941e93cSWANG Rui 788941e93cSWANG Rui addi.d a4, a2, -64 798941e93cSWANG Rui bgeu a3, a4, .Llt64 808941e93cSWANG Rui 81a275a82dSHuacai Chen /* set 64 bytes at a time */ 828941e93cSWANG Rui.Lloop64: 838941e93cSWANG Rui st.d a1, a3, 0 848941e93cSWANG Rui st.d a1, a3, 8 858941e93cSWANG Rui st.d a1, a3, 16 868941e93cSWANG Rui st.d a1, a3, 24 878941e93cSWANG Rui st.d a1, a3, 32 888941e93cSWANG Rui st.d a1, a3, 40 898941e93cSWANG Rui st.d a1, a3, 48 908941e93cSWANG Rui st.d a1, a3, 56 918941e93cSWANG Rui addi.d a3, a3, 64 928941e93cSWANG Rui bltu a3, a4, .Lloop64 93a275a82dSHuacai Chen 94a275a82dSHuacai Chen /* set the remaining bytes */ 958941e93cSWANG Rui.Llt64: 968941e93cSWANG Rui addi.d a4, a2, -32 978941e93cSWANG Rui bgeu a3, a4, .Llt32 988941e93cSWANG Rui st.d a1, a3, 0 998941e93cSWANG Rui st.d a1, a3, 8 1008941e93cSWANG Rui st.d a1, a3, 16 1018941e93cSWANG Rui st.d a1, a3, 24 1028941e93cSWANG Rui addi.d a3, a3, 32 1038941e93cSWANG Rui 1048941e93cSWANG Rui.Llt32: 1058941e93cSWANG Rui addi.d a4, a2, -16 1068941e93cSWANG Rui bgeu a3, a4, .Llt16 1078941e93cSWANG Rui st.d a1, a3, 0 1088941e93cSWANG Rui st.d a1, a3, 8 1098941e93cSWANG Rui addi.d a3, a3, 16 1108941e93cSWANG Rui 1118941e93cSWANG Rui.Llt16: 1128941e93cSWANG Rui addi.d a4, a2, -8 1138941e93cSWANG Rui bgeu a3, a4, .Llt8 1148941e93cSWANG Rui st.d a1, a3, 0 1158941e93cSWANG Rui 1168941e93cSWANG Rui.Llt8: 1178941e93cSWANG Rui st.d a1, a2, -8 118a275a82dSHuacai Chen 119a275a82dSHuacai Chen /* return */ 1208941e93cSWANG Rui jr ra 1218941e93cSWANG Rui 1228941e93cSWANG Rui .align 4 1238941e93cSWANG Rui.Lsmall: 1248941e93cSWANG Rui pcaddi t0, 4 1258941e93cSWANG Rui slli.d a2, a2, 4 1268941e93cSWANG Rui add.d t0, t0, a2 1278941e93cSWANG Rui jr t0 1288941e93cSWANG Rui 1298941e93cSWANG Rui .align 4 1308941e93cSWANG Rui0: jr ra 1318941e93cSWANG Rui 1328941e93cSWANG Rui .align 4 1338941e93cSWANG Rui1: st.b a1, a0, 0 1348941e93cSWANG Rui jr ra 1358941e93cSWANG Rui 1368941e93cSWANG Rui .align 4 1378941e93cSWANG Rui2: st.h a1, a0, 0 1388941e93cSWANG Rui jr ra 1398941e93cSWANG Rui 1408941e93cSWANG Rui .align 4 1418941e93cSWANG Rui3: st.h a1, a0, 0 1428941e93cSWANG Rui st.b a1, a0, 2 1438941e93cSWANG Rui jr ra 1448941e93cSWANG Rui 1458941e93cSWANG Rui .align 4 1468941e93cSWANG Rui4: st.w a1, a0, 0 1478941e93cSWANG Rui jr ra 1488941e93cSWANG Rui 1498941e93cSWANG Rui .align 4 1508941e93cSWANG Rui5: st.w a1, a0, 0 1518941e93cSWANG Rui st.b a1, a0, 4 1528941e93cSWANG Rui jr ra 1538941e93cSWANG Rui 1548941e93cSWANG Rui .align 4 1558941e93cSWANG Rui6: st.w a1, a0, 0 1568941e93cSWANG Rui st.h a1, a0, 4 1578941e93cSWANG Rui jr ra 1588941e93cSWANG Rui 1598941e93cSWANG Rui .align 4 1608941e93cSWANG Rui7: st.w a1, a0, 0 1618941e93cSWANG Rui st.w a1, a0, 3 1628941e93cSWANG Rui jr ra 1638941e93cSWANG Rui 1648941e93cSWANG Rui .align 4 1658941e93cSWANG Rui8: st.d a1, a0, 0 166a275a82dSHuacai Chen jr ra 167a275a82dSHuacai ChenSYM_FUNC_END(__memset_fast) 168fcf77d01STiezhu Yang_ASM_NOKPROBE(__memset_fast) 169