1249ac17eSChris Zankel/* 2249ac17eSChris Zankel * arch/xtensa/lib/memset.S 3249ac17eSChris Zankel * 4249ac17eSChris Zankel * ANSI C standard library function memset 5249ac17eSChris Zankel * (Well, almost. .fixup code might return zero.) 6249ac17eSChris Zankel * 7249ac17eSChris Zankel * This file is subject to the terms and conditions of the GNU General 8249ac17eSChris Zankel * Public License. See the file "COPYING" in the main directory of 9249ac17eSChris Zankel * this archive for more details. 10249ac17eSChris Zankel * 11249ac17eSChris Zankel * Copyright (C) 2002 Tensilica Inc. 12249ac17eSChris Zankel */ 13249ac17eSChris Zankel 145cf97ebdSMax Filippov#include <linux/linkage.h> 150013acebSMax Filippov#include <asm/asmmacro.h> 168f8d5745SMax Filippov#include <asm/core.h> 17249ac17eSChris Zankel 18249ac17eSChris Zankel/* 19249ac17eSChris Zankel * void *memset(void *dst, int c, size_t length) 20249ac17eSChris Zankel * 21249ac17eSChris Zankel * The algorithm is as follows: 22249ac17eSChris Zankel * Create a word with c in all byte positions 23249ac17eSChris Zankel * If the destination is aligned, 24249ac17eSChris Zankel * do 16B chucks with a loop, and then finish up with 25249ac17eSChris Zankel * 8B, 4B, 2B, and 1B stores conditional on the length. 26249ac17eSChris Zankel * If destination is unaligned, align it by conditionally 27249ac17eSChris Zankel * setting 1B and 2B and then go to aligned case. 28249ac17eSChris Zankel * This code tries to use fall-through branches for the common 29249ac17eSChris Zankel * case of an aligned destination (except for the branches to 30249ac17eSChris Zankel * the alignment labels). 31249ac17eSChris Zankel */ 32249ac17eSChris Zankel 33249ac17eSChris Zankel.text 34c633544aSMax FilippovENTRY(__memset) 35c633544aSMax FilippovWEAK(memset) 365cf97ebdSMax Filippov 37d6d5f19eSMax Filippov abi_entry_default 38249ac17eSChris Zankel # a2/ dst, a3/ c, a4/ length 39249ac17eSChris Zankel extui a3, a3, 0, 8 # mask to just 8 bits 40249ac17eSChris Zankel slli a7, a3, 8 # duplicate character in all bytes of word 41249ac17eSChris Zankel or a3, a3, a7 # ... 42249ac17eSChris Zankel slli a7, a3, 16 # ... 43249ac17eSChris Zankel or a3, a3, a7 # ... 44249ac17eSChris Zankel mov a5, a2 # copy dst so that a2 is return value 45249ac17eSChris Zankel movi a6, 3 # for alignment tests 46249ac17eSChris Zankel bany a2, a6, .Ldstunaligned # if dst is unaligned 47249ac17eSChris Zankel.L0: # return here from .Ldstunaligned when dst is aligned 48249ac17eSChris Zankel srli a7, a4, 4 # number of loop iterations with 16B 49249ac17eSChris Zankel # per iteration 50249ac17eSChris Zankel bnez a4, .Laligned 51d6d5f19eSMax Filippov abi_ret_default 52249ac17eSChris Zankel 53249ac17eSChris Zankel/* 54249ac17eSChris Zankel * Destination is word-aligned. 55249ac17eSChris Zankel */ 56249ac17eSChris Zankel # set 16 bytes per iteration for word-aligned dst 57249ac17eSChris Zankel .align 4 # 1 mod 4 alignment for LOOPNEZ 58249ac17eSChris Zankel .byte 0 # (0 mod 4 alignment for LBEG) 59249ac17eSChris Zankel.Laligned: 60249ac17eSChris Zankel#if XCHAL_HAVE_LOOPS 61249ac17eSChris Zankel loopnez a7, .Loop1done 62249ac17eSChris Zankel#else /* !XCHAL_HAVE_LOOPS */ 63249ac17eSChris Zankel beqz a7, .Loop1done 64249ac17eSChris Zankel slli a6, a7, 4 65249ac17eSChris Zankel add a6, a6, a5 # a6 = end of last 16B chunk 66249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */ 67249ac17eSChris Zankel.Loop1: 680013acebSMax FilippovEX(10f) s32i a3, a5, 0 690013acebSMax FilippovEX(10f) s32i a3, a5, 4 700013acebSMax FilippovEX(10f) s32i a3, a5, 8 710013acebSMax FilippovEX(10f) s32i a3, a5, 12 72249ac17eSChris Zankel addi a5, a5, 16 73249ac17eSChris Zankel#if !XCHAL_HAVE_LOOPS 74249ac17eSChris Zankel blt a5, a6, .Loop1 75249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */ 76249ac17eSChris Zankel.Loop1done: 77249ac17eSChris Zankel bbci.l a4, 3, .L2 78249ac17eSChris Zankel # set 8 bytes 790013acebSMax FilippovEX(10f) s32i a3, a5, 0 800013acebSMax FilippovEX(10f) s32i a3, a5, 4 81249ac17eSChris Zankel addi a5, a5, 8 82249ac17eSChris Zankel.L2: 83249ac17eSChris Zankel bbci.l a4, 2, .L3 84249ac17eSChris Zankel # set 4 bytes 850013acebSMax FilippovEX(10f) s32i a3, a5, 0 86249ac17eSChris Zankel addi a5, a5, 4 87249ac17eSChris Zankel.L3: 88249ac17eSChris Zankel bbci.l a4, 1, .L4 89249ac17eSChris Zankel # set 2 bytes 900013acebSMax FilippovEX(10f) s16i a3, a5, 0 91249ac17eSChris Zankel addi a5, a5, 2 92249ac17eSChris Zankel.L4: 93249ac17eSChris Zankel bbci.l a4, 0, .L5 94249ac17eSChris Zankel # set 1 byte 950013acebSMax FilippovEX(10f) s8i a3, a5, 0 96249ac17eSChris Zankel.L5: 97249ac17eSChris Zankel.Lret1: 98d6d5f19eSMax Filippov abi_ret_default 99249ac17eSChris Zankel 100249ac17eSChris Zankel/* 101249ac17eSChris Zankel * Destination is unaligned 102249ac17eSChris Zankel */ 103249ac17eSChris Zankel 104249ac17eSChris Zankel.Ldstunaligned: 105249ac17eSChris Zankel bltui a4, 8, .Lbyteset # do short copies byte by byte 106249ac17eSChris Zankel bbci.l a5, 0, .L20 # branch if dst alignment half-aligned 107249ac17eSChris Zankel # dst is only byte aligned 108249ac17eSChris Zankel # set 1 byte 1090013acebSMax FilippovEX(10f) s8i a3, a5, 0 110249ac17eSChris Zankel addi a5, a5, 1 111249ac17eSChris Zankel addi a4, a4, -1 112249ac17eSChris Zankel # now retest if dst aligned 113249ac17eSChris Zankel bbci.l a5, 1, .L0 # if now aligned, return to main algorithm 114249ac17eSChris Zankel.L20: 115249ac17eSChris Zankel # dst half-aligned 116249ac17eSChris Zankel # set 2 bytes 1170013acebSMax FilippovEX(10f) s16i a3, a5, 0 118249ac17eSChris Zankel addi a5, a5, 2 119249ac17eSChris Zankel addi a4, a4, -2 120249ac17eSChris Zankel j .L0 # dst is now aligned, return to main algorithm 121249ac17eSChris Zankel 122249ac17eSChris Zankel/* 123249ac17eSChris Zankel * Byte by byte set 124249ac17eSChris Zankel */ 125249ac17eSChris Zankel .align 4 126249ac17eSChris Zankel .byte 0 # 1 mod 4 alignment for LOOPNEZ 127249ac17eSChris Zankel # (0 mod 4 alignment for LBEG) 128249ac17eSChris Zankel.Lbyteset: 129249ac17eSChris Zankel#if XCHAL_HAVE_LOOPS 130249ac17eSChris Zankel loopnez a4, .Lbytesetdone 131249ac17eSChris Zankel#else /* !XCHAL_HAVE_LOOPS */ 132249ac17eSChris Zankel beqz a4, .Lbytesetdone 133249ac17eSChris Zankel add a6, a5, a4 # a6 = ending address 134249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */ 135249ac17eSChris Zankel.Lbyteloop: 1360013acebSMax FilippovEX(10f) s8i a3, a5, 0 137249ac17eSChris Zankel addi a5, a5, 1 138249ac17eSChris Zankel#if !XCHAL_HAVE_LOOPS 139249ac17eSChris Zankel blt a5, a6, .Lbyteloop 140249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */ 141249ac17eSChris Zankel.Lbytesetdone: 142d6d5f19eSMax Filippov abi_ret_default 143249ac17eSChris Zankel 144c633544aSMax FilippovENDPROC(__memset) 145*338d9150SMax FilippovEXPORT_SYMBOL(__memset) 146*338d9150SMax FilippovEXPORT_SYMBOL(memset) 147249ac17eSChris Zankel 148249ac17eSChris Zankel .section .fixup, "ax" 149249ac17eSChris Zankel .align 4 150249ac17eSChris Zankel 151249ac17eSChris Zankel/* We return zero if a failure occurred. */ 152249ac17eSChris Zankel 1530013acebSMax Filippov10: 154249ac17eSChris Zankel movi a2, 0 155d6d5f19eSMax Filippov abi_ret_default 156