1/* 2 * arch/xtensa/lib/memset.S 3 * 4 * ANSI C standard library function memset 5 * (Well, almost. .fixup code might return zero.) 6 * 7 * This file is subject to the terms and conditions of the GNU General 8 * Public License. See the file "COPYING" in the main directory of 9 * this archive for more details. 10 * 11 * Copyright (C) 2002 Tensilica Inc. 12 */ 13 14#include <variant/core.h> 15 16/* 17 * void *memset(void *dst, int c, size_t length) 18 * 19 * The algorithm is as follows: 20 * Create a word with c in all byte positions 21 * If the destination is aligned, 22 * do 16B chucks with a loop, and then finish up with 23 * 8B, 4B, 2B, and 1B stores conditional on the length. 24 * If destination is unaligned, align it by conditionally 25 * setting 1B and 2B and then go to aligned case. 26 * This code tries to use fall-through branches for the common 27 * case of an aligned destination (except for the branches to 28 * the alignment labels). 29 */ 30 31/* Load or store instructions that may cause exceptions use the EX macro. */ 32 33#define EX(insn,reg1,reg2,offset,handler) \ 349: insn reg1, reg2, offset; \ 35 .section __ex_table, "a"; \ 36 .word 9b, handler; \ 37 .previous 38 39 40.text 41.align 4 42.global memset 43.type memset,@function 44memset: 45 entry sp, 16 # minimal stack frame 46 # a2/ dst, a3/ c, a4/ length 47 extui a3, a3, 0, 8 # mask to just 8 bits 48 slli a7, a3, 8 # duplicate character in all bytes of word 49 or a3, a3, a7 # ... 50 slli a7, a3, 16 # ... 51 or a3, a3, a7 # ... 52 mov a5, a2 # copy dst so that a2 is return value 53 movi a6, 3 # for alignment tests 54 bany a2, a6, .Ldstunaligned # if dst is unaligned 55.L0: # return here from .Ldstunaligned when dst is aligned 56 srli a7, a4, 4 # number of loop iterations with 16B 57 # per iteration 58 bnez a4, .Laligned 59 retw 60 61/* 62 * Destination is word-aligned. 63 */ 64 # set 16 bytes per iteration for word-aligned dst 65 .align 4 # 1 mod 4 alignment for LOOPNEZ 66 .byte 0 # (0 mod 4 alignment for LBEG) 67.Laligned: 68#if XCHAL_HAVE_LOOPS 69 loopnez a7, .Loop1done 70#else /* !XCHAL_HAVE_LOOPS */ 71 beqz a7, .Loop1done 72 slli a6, a7, 4 73 add a6, a6, a5 # a6 = end of last 16B chunk 74#endif /* !XCHAL_HAVE_LOOPS */ 75.Loop1: 76 EX(s32i, a3, a5, 0, memset_fixup) 77 EX(s32i, a3, a5, 4, memset_fixup) 78 EX(s32i, a3, a5, 8, memset_fixup) 79 EX(s32i, a3, a5, 12, memset_fixup) 80 addi a5, a5, 16 81#if !XCHAL_HAVE_LOOPS 82 blt a5, a6, .Loop1 83#endif /* !XCHAL_HAVE_LOOPS */ 84.Loop1done: 85 bbci.l a4, 3, .L2 86 # set 8 bytes 87 EX(s32i, a3, a5, 0, memset_fixup) 88 EX(s32i, a3, a5, 4, memset_fixup) 89 addi a5, a5, 8 90.L2: 91 bbci.l a4, 2, .L3 92 # set 4 bytes 93 EX(s32i, a3, a5, 0, memset_fixup) 94 addi a5, a5, 4 95.L3: 96 bbci.l a4, 1, .L4 97 # set 2 bytes 98 EX(s16i, a3, a5, 0, memset_fixup) 99 addi a5, a5, 2 100.L4: 101 bbci.l a4, 0, .L5 102 # set 1 byte 103 EX(s8i, a3, a5, 0, memset_fixup) 104.L5: 105.Lret1: 106 retw 107 108/* 109 * Destination is unaligned 110 */ 111 112.Ldstunaligned: 113 bltui a4, 8, .Lbyteset # do short copies byte by byte 114 bbci.l a5, 0, .L20 # branch if dst alignment half-aligned 115 # dst is only byte aligned 116 # set 1 byte 117 EX(s8i, a3, a5, 0, memset_fixup) 118 addi a5, a5, 1 119 addi a4, a4, -1 120 # now retest if dst aligned 121 bbci.l a5, 1, .L0 # if now aligned, return to main algorithm 122.L20: 123 # dst half-aligned 124 # set 2 bytes 125 EX(s16i, a3, a5, 0, memset_fixup) 126 addi a5, a5, 2 127 addi a4, a4, -2 128 j .L0 # dst is now aligned, return to main algorithm 129 130/* 131 * Byte by byte set 132 */ 133 .align 4 134 .byte 0 # 1 mod 4 alignment for LOOPNEZ 135 # (0 mod 4 alignment for LBEG) 136.Lbyteset: 137#if XCHAL_HAVE_LOOPS 138 loopnez a4, .Lbytesetdone 139#else /* !XCHAL_HAVE_LOOPS */ 140 beqz a4, .Lbytesetdone 141 add a6, a5, a4 # a6 = ending address 142#endif /* !XCHAL_HAVE_LOOPS */ 143.Lbyteloop: 144 EX(s8i, a3, a5, 0, memset_fixup) 145 addi a5, a5, 1 146#if !XCHAL_HAVE_LOOPS 147 blt a5, a6, .Lbyteloop 148#endif /* !XCHAL_HAVE_LOOPS */ 149.Lbytesetdone: 150 retw 151 152 153 .section .fixup, "ax" 154 .align 4 155 156/* We return zero if a failure occurred. */ 157 158memset_fixup: 159 movi a2, 0 160 retw 161