1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited 4 */ 5 6#include <linux/export.h> 7#include <asm/alternative-asm.h> 8#include <asm/asm.h> 9#include <asm/asmmacro.h> 10#include <asm/cpu.h> 11#include <asm/regdef.h> 12 13.section .noinstr.text, "ax" 14 15SYM_FUNC_START(memcpy) 16 /* 17 * Some CPUs support hardware unaligned access 18 */ 19 ALTERNATIVE "b __memcpy_generic", \ 20 "b __memcpy_fast", CPU_FEATURE_UAL 21SYM_FUNC_END(memcpy) 22SYM_FUNC_ALIAS(__memcpy, memcpy) 23 24EXPORT_SYMBOL(memcpy) 25EXPORT_SYMBOL(__memcpy) 26 27_ASM_NOKPROBE(memcpy) 28_ASM_NOKPROBE(__memcpy) 29 30/* 31 * void *__memcpy_generic(void *dst, const void *src, size_t n) 32 * 33 * a0: dst 34 * a1: src 35 * a2: n 36 */ 37SYM_FUNC_START(__memcpy_generic) 38 move a3, a0 39 beqz a2, 2f 40 411: ld.b t0, a1, 0 42 st.b t0, a0, 0 43 addi.d a0, a0, 1 44 addi.d a1, a1, 1 45 addi.d a2, a2, -1 46 bgt a2, zero, 1b 47 482: move a0, a3 49 jr ra 50SYM_FUNC_END(__memcpy_generic) 51_ASM_NOKPROBE(__memcpy_generic) 52 53 .align 5 54SYM_FUNC_START_NOALIGN(__memcpy_small) 55 pcaddi t0, 8 56 slli.d a2, a2, 5 57 add.d t0, t0, a2 58 jr t0 59 60 .align 5 610: jr ra 62 63 .align 5 641: ld.b t0, a1, 0 65 st.b t0, a0, 0 66 jr ra 67 68 .align 5 692: ld.h t0, a1, 0 70 st.h t0, a0, 0 71 jr ra 72 73 .align 5 743: ld.h t0, a1, 0 75 ld.b t1, a1, 2 76 st.h t0, a0, 0 77 st.b t1, a0, 2 78 jr ra 79 80 .align 5 814: ld.w t0, a1, 0 82 st.w t0, a0, 0 83 jr ra 84 85 .align 5 865: ld.w t0, a1, 0 87 ld.b t1, a1, 4 88 st.w t0, a0, 0 89 st.b t1, a0, 4 90 jr ra 91 92 .align 5 936: ld.w t0, a1, 0 94 ld.h t1, a1, 4 95 st.w t0, a0, 0 96 st.h t1, a0, 4 97 jr ra 98 99 .align 5 1007: ld.w t0, a1, 0 101 ld.w t1, a1, 3 102 st.w t0, a0, 0 103 st.w t1, a0, 3 104 jr ra 105 106 .align 5 1078: ld.d t0, a1, 0 108 st.d t0, a0, 0 109 jr ra 110SYM_FUNC_END(__memcpy_small) 111_ASM_NOKPROBE(__memcpy_small) 112 113/* 114 * void *__memcpy_fast(void *dst, const void *src, size_t n) 115 * 116 * a0: dst 117 * a1: src 118 * a2: n 119 */ 120SYM_FUNC_START(__memcpy_fast) 121 sltui t0, a2, 9 122 bnez t0, __memcpy_small 123 124 add.d a3, a1, a2 125 add.d a2, a0, a2 126 ld.d a6, a1, 0 127 ld.d a7, a3, -8 128 129 /* align up destination address */ 130 andi t1, a0, 7 131 sub.d t0, zero, t1 132 addi.d t0, t0, 8 133 add.d a1, a1, t0 134 add.d a5, a0, t0 135 136 addi.d a4, a3, -64 137 bgeu a1, a4, .Llt64 138 139 /* copy 64 bytes at a time */ 140.Lloop64: 141 ld.d t0, a1, 0 142 ld.d t1, a1, 8 143 ld.d t2, a1, 16 144 ld.d t3, a1, 24 145 ld.d t4, a1, 32 146 ld.d t5, a1, 40 147 ld.d t6, a1, 48 148 ld.d t7, a1, 56 149 addi.d a1, a1, 64 150 st.d t0, a5, 0 151 st.d t1, a5, 8 152 st.d t2, a5, 16 153 st.d t3, a5, 24 154 st.d t4, a5, 32 155 st.d t5, a5, 40 156 st.d t6, a5, 48 157 st.d t7, a5, 56 158 addi.d a5, a5, 64 159 bltu a1, a4, .Lloop64 160 161 /* copy the remaining bytes */ 162.Llt64: 163 addi.d a4, a3, -32 164 bgeu a1, a4, .Llt32 165 ld.d t0, a1, 0 166 ld.d t1, a1, 8 167 ld.d t2, a1, 16 168 ld.d t3, a1, 24 169 addi.d a1, a1, 32 170 st.d t0, a5, 0 171 st.d t1, a5, 8 172 st.d t2, a5, 16 173 st.d t3, a5, 24 174 addi.d a5, a5, 32 175 176.Llt32: 177 addi.d a4, a3, -16 178 bgeu a1, a4, .Llt16 179 ld.d t0, a1, 0 180 ld.d t1, a1, 8 181 addi.d a1, a1, 16 182 st.d t0, a5, 0 183 st.d t1, a5, 8 184 addi.d a5, a5, 16 185 186.Llt16: 187 addi.d a4, a3, -8 188 bgeu a1, a4, .Llt8 189 ld.d t0, a1, 0 190 st.d t0, a5, 0 191 192.Llt8: 193 st.d a6, a0, 0 194 st.d a7, a2, -8 195 196 /* return */ 197 jr ra 198SYM_FUNC_END(__memcpy_fast) 199_ASM_NOKPROBE(__memcpy_fast) 200