1/* 2 * arch/xtensa/lib/memset.S 3 * 4 * ANSI C standard library function memset 5 * (Well, almost. .fixup code might return zero.) 6 * 7 * This file is subject to the terms and conditions of the GNU General 8 * Public License. See the file "COPYING" in the main directory of 9 * this archive for more details. 10 * 11 * Copyright (C) 2002 Tensilica Inc. 12 */ 13 14#include <linux/linkage.h> 15#include <variant/core.h> 16#include <asm/asmmacro.h> 17 18/* 19 * void *memset(void *dst, int c, size_t length) 20 * 21 * The algorithm is as follows: 22 * Create a word with c in all byte positions 23 * If the destination is aligned, 24 * do 16B chucks with a loop, and then finish up with 25 * 8B, 4B, 2B, and 1B stores conditional on the length. 26 * If destination is unaligned, align it by conditionally 27 * setting 1B and 2B and then go to aligned case. 28 * This code tries to use fall-through branches for the common 29 * case of an aligned destination (except for the branches to 30 * the alignment labels). 31 */ 32 33.text 34ENTRY(__memset) 35WEAK(memset) 36 37 entry sp, 16 # minimal stack frame 38 # a2/ dst, a3/ c, a4/ length 39 extui a3, a3, 0, 8 # mask to just 8 bits 40 slli a7, a3, 8 # duplicate character in all bytes of word 41 or a3, a3, a7 # ... 42 slli a7, a3, 16 # ... 43 or a3, a3, a7 # ... 44 mov a5, a2 # copy dst so that a2 is return value 45 movi a6, 3 # for alignment tests 46 bany a2, a6, .Ldstunaligned # if dst is unaligned 47.L0: # return here from .Ldstunaligned when dst is aligned 48 srli a7, a4, 4 # number of loop iterations with 16B 49 # per iteration 50 bnez a4, .Laligned 51 retw 52 53/* 54 * Destination is word-aligned. 55 */ 56 # set 16 bytes per iteration for word-aligned dst 57 .align 4 # 1 mod 4 alignment for LOOPNEZ 58 .byte 0 # (0 mod 4 alignment for LBEG) 59.Laligned: 60#if XCHAL_HAVE_LOOPS 61 loopnez a7, .Loop1done 62#else /* !XCHAL_HAVE_LOOPS */ 63 beqz a7, .Loop1done 64 slli a6, a7, 4 65 add a6, a6, a5 # a6 = end of last 16B chunk 66#endif /* !XCHAL_HAVE_LOOPS */ 67.Loop1: 68EX(10f) s32i a3, a5, 0 69EX(10f) s32i a3, a5, 4 70EX(10f) s32i a3, a5, 8 71EX(10f) s32i a3, a5, 12 72 addi a5, a5, 16 73#if !XCHAL_HAVE_LOOPS 74 blt a5, a6, .Loop1 75#endif /* !XCHAL_HAVE_LOOPS */ 76.Loop1done: 77 bbci.l a4, 3, .L2 78 # set 8 bytes 79EX(10f) s32i a3, a5, 0 80EX(10f) s32i a3, a5, 4 81 addi a5, a5, 8 82.L2: 83 bbci.l a4, 2, .L3 84 # set 4 bytes 85EX(10f) s32i a3, a5, 0 86 addi a5, a5, 4 87.L3: 88 bbci.l a4, 1, .L4 89 # set 2 bytes 90EX(10f) s16i a3, a5, 0 91 addi a5, a5, 2 92.L4: 93 bbci.l a4, 0, .L5 94 # set 1 byte 95EX(10f) s8i a3, a5, 0 96.L5: 97.Lret1: 98 retw 99 100/* 101 * Destination is unaligned 102 */ 103 104.Ldstunaligned: 105 bltui a4, 8, .Lbyteset # do short copies byte by byte 106 bbci.l a5, 0, .L20 # branch if dst alignment half-aligned 107 # dst is only byte aligned 108 # set 1 byte 109EX(10f) s8i a3, a5, 0 110 addi a5, a5, 1 111 addi a4, a4, -1 112 # now retest if dst aligned 113 bbci.l a5, 1, .L0 # if now aligned, return to main algorithm 114.L20: 115 # dst half-aligned 116 # set 2 bytes 117EX(10f) s16i a3, a5, 0 118 addi a5, a5, 2 119 addi a4, a4, -2 120 j .L0 # dst is now aligned, return to main algorithm 121 122/* 123 * Byte by byte set 124 */ 125 .align 4 126 .byte 0 # 1 mod 4 alignment for LOOPNEZ 127 # (0 mod 4 alignment for LBEG) 128.Lbyteset: 129#if XCHAL_HAVE_LOOPS 130 loopnez a4, .Lbytesetdone 131#else /* !XCHAL_HAVE_LOOPS */ 132 beqz a4, .Lbytesetdone 133 add a6, a5, a4 # a6 = ending address 134#endif /* !XCHAL_HAVE_LOOPS */ 135.Lbyteloop: 136EX(10f) s8i a3, a5, 0 137 addi a5, a5, 1 138#if !XCHAL_HAVE_LOOPS 139 blt a5, a6, .Lbyteloop 140#endif /* !XCHAL_HAVE_LOOPS */ 141.Lbytesetdone: 142 retw 143 144ENDPROC(__memset) 145 146 .section .fixup, "ax" 147 .align 4 148 149/* We return zero if a failure occurred. */ 150 15110: 152 movi a2, 0 153 retw 154