1/* 2 * arch/xtensa/lib/usercopy.S 3 * 4 * Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S) 5 * 6 * DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>. 7 * It needs to remain separate and distinct. The hal files are part 8 * of the Xtensa link-time HAL, and those files may differ per 9 * processor configuration. Patching the kernel for another 10 * processor configuration includes replacing the hal files, and we 11 * could lose the special functionality for accessing user-space 12 * memory during such a patch. We sacrifice a little code space here 13 * in favor to simplify code maintenance. 14 * 15 * This file is subject to the terms and conditions of the GNU General 16 * Public License. See the file "COPYING" in the main directory of 17 * this archive for more details. 18 * 19 * Copyright (C) 2002 Tensilica Inc. 20 */ 21 22 23/* 24 * size_t __xtensa_copy_user (void *dst, const void *src, size_t len); 25 * 26 * The returned value is the number of bytes not copied. Implies zero 27 * is success. 28 * 29 * The general case algorithm is as follows: 30 * If the destination and source are both aligned, 31 * do 16B chunks with a loop, and then finish up with 32 * 8B, 4B, 2B, and 1B copies conditional on the length. 33 * If destination is aligned and source unaligned, 34 * do the same, but use SRC to align the source data. 35 * If destination is unaligned, align it by conditionally 36 * copying 1B and 2B and then retest. 37 * This code tries to use fall-through braches for the common 38 * case of aligned destinations (except for the branches to 39 * the alignment label). 40 * 41 * Register use: 42 * a0/ return address 43 * a1/ stack pointer 44 * a2/ return value 45 * a3/ src 46 * a4/ length 47 * a5/ dst 48 * a6/ tmp 49 * a7/ tmp 50 * a8/ tmp 51 * a9/ tmp 52 * a10/ tmp 53 * a11/ original length 54 */ 55 56#include <variant/core.h> 57 58#ifdef __XTENSA_EB__ 59#define ALIGN(R, W0, W1) src R, W0, W1 60#define SSA8(R) ssa8b R 61#else 62#define ALIGN(R, W0, W1) src R, W1, W0 63#define SSA8(R) ssa8l R 64#endif 65 66/* Load or store instructions that may cause exceptions use the EX macro. */ 67 68#define EX(insn,reg1,reg2,offset,handler) \ 699: insn reg1, reg2, offset; \ 70 .section __ex_table, "a"; \ 71 .word 9b, handler; \ 72 .previous 73 74 75 .text 76 .align 4 77 .global __xtensa_copy_user 78 .type __xtensa_copy_user,@function 79__xtensa_copy_user: 80 entry sp, 16 # minimal stack frame 81 # a2/ dst, a3/ src, a4/ len 82 mov a5, a2 # copy dst so that a2 is return value 83 mov a11, a4 # preserve original len for error case 84.Lcommon: 85 bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2 86 bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4 87.Ldstaligned: # return here from .Ldstunaligned when dst is aligned 88 srli a7, a4, 4 # number of loop iterations with 16B 89 # per iteration 90 movi a8, 3 # if source is also aligned, 91 bnone a3, a8, .Laligned # then use word copy 92 SSA8( a3) # set shift amount from byte offset 93 bnez a4, .Lsrcunaligned 94 movi a2, 0 # return success for len==0 95 retw 96 97/* 98 * Destination is unaligned 99 */ 100 101.Ldst1mod2: # dst is only byte aligned 102 bltui a4, 7, .Lbytecopy # do short copies byte by byte 103 104 # copy 1 byte 105 EX(l8ui, a6, a3, 0, fixup) 106 addi a3, a3, 1 107 EX(s8i, a6, a5, 0, fixup) 108 addi a5, a5, 1 109 addi a4, a4, -1 110 bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then 111 # return to main algorithm 112.Ldst2mod4: # dst 16-bit aligned 113 # copy 2 bytes 114 bltui a4, 6, .Lbytecopy # do short copies byte by byte 115 EX(l8ui, a6, a3, 0, fixup) 116 EX(l8ui, a7, a3, 1, fixup) 117 addi a3, a3, 2 118 EX(s8i, a6, a5, 0, fixup) 119 EX(s8i, a7, a5, 1, fixup) 120 addi a5, a5, 2 121 addi a4, a4, -2 122 j .Ldstaligned # dst is now aligned, return to main algorithm 123 124/* 125 * Byte by byte copy 126 */ 127 .align 4 128 .byte 0 # 1 mod 4 alignment for LOOPNEZ 129 # (0 mod 4 alignment for LBEG) 130.Lbytecopy: 131#if XCHAL_HAVE_LOOPS 132 loopnez a4, .Lbytecopydone 133#else /* !XCHAL_HAVE_LOOPS */ 134 beqz a4, .Lbytecopydone 135 add a7, a3, a4 # a7 = end address for source 136#endif /* !XCHAL_HAVE_LOOPS */ 137.Lnextbyte: 138 EX(l8ui, a6, a3, 0, fixup) 139 addi a3, a3, 1 140 EX(s8i, a6, a5, 0, fixup) 141 addi a5, a5, 1 142#if !XCHAL_HAVE_LOOPS 143 blt a3, a7, .Lnextbyte 144#endif /* !XCHAL_HAVE_LOOPS */ 145.Lbytecopydone: 146 movi a2, 0 # return success for len bytes copied 147 retw 148 149/* 150 * Destination and source are word-aligned. 151 */ 152 # copy 16 bytes per iteration for word-aligned dst and word-aligned src 153 .align 4 # 1 mod 4 alignment for LOOPNEZ 154 .byte 0 # (0 mod 4 alignment for LBEG) 155.Laligned: 156#if XCHAL_HAVE_LOOPS 157 loopnez a7, .Loop1done 158#else /* !XCHAL_HAVE_LOOPS */ 159 beqz a7, .Loop1done 160 slli a8, a7, 4 161 add a8, a8, a3 # a8 = end of last 16B source chunk 162#endif /* !XCHAL_HAVE_LOOPS */ 163.Loop1: 164 EX(l32i, a6, a3, 0, fixup) 165 EX(l32i, a7, a3, 4, fixup) 166 EX(s32i, a6, a5, 0, fixup) 167 EX(l32i, a6, a3, 8, fixup) 168 EX(s32i, a7, a5, 4, fixup) 169 EX(l32i, a7, a3, 12, fixup) 170 EX(s32i, a6, a5, 8, fixup) 171 addi a3, a3, 16 172 EX(s32i, a7, a5, 12, fixup) 173 addi a5, a5, 16 174#if !XCHAL_HAVE_LOOPS 175 blt a3, a8, .Loop1 176#endif /* !XCHAL_HAVE_LOOPS */ 177.Loop1done: 178 bbci.l a4, 3, .L2 179 # copy 8 bytes 180 EX(l32i, a6, a3, 0, fixup) 181 EX(l32i, a7, a3, 4, fixup) 182 addi a3, a3, 8 183 EX(s32i, a6, a5, 0, fixup) 184 EX(s32i, a7, a5, 4, fixup) 185 addi a5, a5, 8 186.L2: 187 bbci.l a4, 2, .L3 188 # copy 4 bytes 189 EX(l32i, a6, a3, 0, fixup) 190 addi a3, a3, 4 191 EX(s32i, a6, a5, 0, fixup) 192 addi a5, a5, 4 193.L3: 194 bbci.l a4, 1, .L4 195 # copy 2 bytes 196 EX(l16ui, a6, a3, 0, fixup) 197 addi a3, a3, 2 198 EX(s16i, a6, a5, 0, fixup) 199 addi a5, a5, 2 200.L4: 201 bbci.l a4, 0, .L5 202 # copy 1 byte 203 EX(l8ui, a6, a3, 0, fixup) 204 EX(s8i, a6, a5, 0, fixup) 205.L5: 206 movi a2, 0 # return success for len bytes copied 207 retw 208 209/* 210 * Destination is aligned, Source is unaligned 211 */ 212 213 .align 4 214 .byte 0 # 1 mod 4 alignement for LOOPNEZ 215 # (0 mod 4 alignment for LBEG) 216.Lsrcunaligned: 217 # copy 16 bytes per iteration for word-aligned dst and unaligned src 218 and a10, a3, a8 # save unalignment offset for below 219 sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware) 220 EX(l32i, a6, a3, 0, fixup) # load first word 221#if XCHAL_HAVE_LOOPS 222 loopnez a7, .Loop2done 223#else /* !XCHAL_HAVE_LOOPS */ 224 beqz a7, .Loop2done 225 slli a12, a7, 4 226 add a12, a12, a3 # a12 = end of last 16B source chunk 227#endif /* !XCHAL_HAVE_LOOPS */ 228.Loop2: 229 EX(l32i, a7, a3, 4, fixup) 230 EX(l32i, a8, a3, 8, fixup) 231 ALIGN( a6, a6, a7) 232 EX(s32i, a6, a5, 0, fixup) 233 EX(l32i, a9, a3, 12, fixup) 234 ALIGN( a7, a7, a8) 235 EX(s32i, a7, a5, 4, fixup) 236 EX(l32i, a6, a3, 16, fixup) 237 ALIGN( a8, a8, a9) 238 EX(s32i, a8, a5, 8, fixup) 239 addi a3, a3, 16 240 ALIGN( a9, a9, a6) 241 EX(s32i, a9, a5, 12, fixup) 242 addi a5, a5, 16 243#if !XCHAL_HAVE_LOOPS 244 blt a3, a12, .Loop2 245#endif /* !XCHAL_HAVE_LOOPS */ 246.Loop2done: 247 bbci.l a4, 3, .L12 248 # copy 8 bytes 249 EX(l32i, a7, a3, 4, fixup) 250 EX(l32i, a8, a3, 8, fixup) 251 ALIGN( a6, a6, a7) 252 EX(s32i, a6, a5, 0, fixup) 253 addi a3, a3, 8 254 ALIGN( a7, a7, a8) 255 EX(s32i, a7, a5, 4, fixup) 256 addi a5, a5, 8 257 mov a6, a8 258.L12: 259 bbci.l a4, 2, .L13 260 # copy 4 bytes 261 EX(l32i, a7, a3, 4, fixup) 262 addi a3, a3, 4 263 ALIGN( a6, a6, a7) 264 EX(s32i, a6, a5, 0, fixup) 265 addi a5, a5, 4 266 mov a6, a7 267.L13: 268 add a3, a3, a10 # readjust a3 with correct misalignment 269 bbci.l a4, 1, .L14 270 # copy 2 bytes 271 EX(l8ui, a6, a3, 0, fixup) 272 EX(l8ui, a7, a3, 1, fixup) 273 addi a3, a3, 2 274 EX(s8i, a6, a5, 0, fixup) 275 EX(s8i, a7, a5, 1, fixup) 276 addi a5, a5, 2 277.L14: 278 bbci.l a4, 0, .L15 279 # copy 1 byte 280 EX(l8ui, a6, a3, 0, fixup) 281 EX(s8i, a6, a5, 0, fixup) 282.L15: 283 movi a2, 0 # return success for len bytes copied 284 retw 285 286 287 .section .fixup, "ax" 288 .align 4 289 290/* a2 = original dst; a5 = current dst; a11= original len 291 * bytes_copied = a5 - a2 292 * retval = bytes_not_copied = original len - bytes_copied 293 * retval = a11 - (a5 - a2) 294 */ 295 296 297fixup: 298 sub a2, a5, a2 /* a2 <-- bytes copied */ 299 sub a2, a11, a2 /* a2 <-- bytes not copied */ 300 retw 301