1*83d290c5STom Rini/* SPDX-License-Identifier: GPL-2.0 */ 2d8834a13SMatthias Weisser/* 3d8834a13SMatthias Weisser * linux/arch/arm/lib/memset.S 4d8834a13SMatthias Weisser * 5d8834a13SMatthias Weisser * Copyright (C) 1995-2000 Russell King 6d8834a13SMatthias Weisser * 7d8834a13SMatthias Weisser * ASM optimised string functions 8d8834a13SMatthias Weisser */ 975d7a0d7SStefan Agner#include <linux/linkage.h> 10d8834a13SMatthias Weisser#include <asm/assembler.h> 11d8834a13SMatthias Weisser 12d8834a13SMatthias Weisser .text 13d8834a13SMatthias Weisser .align 5 14d8834a13SMatthias Weisser 1575d7a0d7SStefan Agner .syntax unified 163a649407STom Rini#if CONFIG_IS_ENABLED(SYS_THUMB_BUILD) && !defined(MEMSET_NO_THUMB_BUILD) 1775d7a0d7SStefan Agner .thumb 1875d7a0d7SStefan Agner .thumb_func 1975d7a0d7SStefan Agner#endif 2075d7a0d7SStefan AgnerENTRY(memset) 21d8834a13SMatthias Weisser ands r3, r0, #3 @ 1 unaligned? 2275d7a0d7SStefan Agner mov ip, r0 @ preserve r0 as return value 2375d7a0d7SStefan Agner bne 6f @ 1 24d8834a13SMatthias Weisser/* 2575d7a0d7SStefan Agner * we know that the pointer in ip is aligned to a word boundary. 26d8834a13SMatthias Weisser */ 2775d7a0d7SStefan Agner1: orr r1, r1, r1, lsl #8 28d8834a13SMatthias Weisser orr r1, r1, r1, lsl #16 29d8834a13SMatthias Weisser mov r3, r1 30d8834a13SMatthias Weisser cmp r2, #16 31d8834a13SMatthias Weisser blt 4f 32d8834a13SMatthias Weisser 33d8834a13SMatthias Weisser#if ! CALGN(1)+0 34d8834a13SMatthias Weisser 35d8834a13SMatthias Weisser/* 3675d7a0d7SStefan Agner * We need 2 extra registers for this loop - use r8 and the LR 37d8834a13SMatthias Weisser */ 3875d7a0d7SStefan Agner stmfd sp!, {r8, lr} 3975d7a0d7SStefan Agner mov r8, r1 40d8834a13SMatthias Weisser mov lr, r1 41d8834a13SMatthias Weisser 42d8834a13SMatthias Weisser2: subs r2, r2, #64 4375d7a0d7SStefan Agner stmiage ip!, {r1, r3, r8, lr} @ 64 bytes at a time. 4475d7a0d7SStefan Agner stmiage ip!, {r1, r3, r8, lr} 4575d7a0d7SStefan Agner stmiage ip!, {r1, r3, r8, lr} 4675d7a0d7SStefan Agner stmiage ip!, {r1, r3, r8, lr} 47d8834a13SMatthias Weisser bgt 2b 4875d7a0d7SStefan Agner ldmfdeq sp!, {r8, pc} @ Now <64 bytes to go. 49d8834a13SMatthias Weisser/* 50d8834a13SMatthias Weisser * No need to correct the count; we're only testing bits from now on 51d8834a13SMatthias Weisser */ 52d8834a13SMatthias Weisser tst r2, #32 5375d7a0d7SStefan Agner stmiane ip!, {r1, r3, r8, lr} 5475d7a0d7SStefan Agner stmiane ip!, {r1, r3, r8, lr} 55d8834a13SMatthias Weisser tst r2, #16 5675d7a0d7SStefan Agner stmiane ip!, {r1, r3, r8, lr} 5775d7a0d7SStefan Agner ldmfd sp!, {r8, lr} 58d8834a13SMatthias Weisser 59d8834a13SMatthias Weisser#else 60d8834a13SMatthias Weisser 61d8834a13SMatthias Weisser/* 62d8834a13SMatthias Weisser * This version aligns the destination pointer in order to write 63d8834a13SMatthias Weisser * whole cache lines at once. 64d8834a13SMatthias Weisser */ 65d8834a13SMatthias Weisser 6675d7a0d7SStefan Agner stmfd sp!, {r4-r8, lr} 67d8834a13SMatthias Weisser mov r4, r1 68d8834a13SMatthias Weisser mov r5, r1 69d8834a13SMatthias Weisser mov r6, r1 70d8834a13SMatthias Weisser mov r7, r1 7175d7a0d7SStefan Agner mov r8, r1 72d8834a13SMatthias Weisser mov lr, r1 73d8834a13SMatthias Weisser 74d8834a13SMatthias Weisser cmp r2, #96 7575d7a0d7SStefan Agner tstgt ip, #31 76d8834a13SMatthias Weisser ble 3f 77d8834a13SMatthias Weisser 7875d7a0d7SStefan Agner and r8, ip, #31 7975d7a0d7SStefan Agner rsb r8, r8, #32 8075d7a0d7SStefan Agner sub r2, r2, r8 8175d7a0d7SStefan Agner movs r8, r8, lsl #(32 - 4) 8275d7a0d7SStefan Agner stmiacs ip!, {r4, r5, r6, r7} 8375d7a0d7SStefan Agner stmiami ip!, {r4, r5} 8475d7a0d7SStefan Agner tst r8, #(1 << 30) 8575d7a0d7SStefan Agner mov r8, r1 8675d7a0d7SStefan Agner strne r1, [ip], #4 87d8834a13SMatthias Weisser 88d8834a13SMatthias Weisser3: subs r2, r2, #64 8975d7a0d7SStefan Agner stmiage ip!, {r1, r3-r8, lr} 9075d7a0d7SStefan Agner stmiage ip!, {r1, r3-r8, lr} 91d8834a13SMatthias Weisser bgt 3b 9275d7a0d7SStefan Agner ldmfdeq sp!, {r4-r8, pc} 93d8834a13SMatthias Weisser 94d8834a13SMatthias Weisser tst r2, #32 9575d7a0d7SStefan Agner stmiane ip!, {r1, r3-r8, lr} 96d8834a13SMatthias Weisser tst r2, #16 9775d7a0d7SStefan Agner stmiane ip!, {r4-r7} 9875d7a0d7SStefan Agner ldmfd sp!, {r4-r8, lr} 99d8834a13SMatthias Weisser 100d8834a13SMatthias Weisser#endif 101d8834a13SMatthias Weisser 102d8834a13SMatthias Weisser4: tst r2, #8 10375d7a0d7SStefan Agner stmiane ip!, {r1, r3} 104d8834a13SMatthias Weisser tst r2, #4 10575d7a0d7SStefan Agner strne r1, [ip], #4 106d8834a13SMatthias Weisser/* 107d8834a13SMatthias Weisser * When we get here, we've got less than 4 bytes to zero. We 108d8834a13SMatthias Weisser * may have an unaligned pointer as well. 109d8834a13SMatthias Weisser */ 110d8834a13SMatthias Weisser5: tst r2, #2 11175d7a0d7SStefan Agner strbne r1, [ip], #1 11275d7a0d7SStefan Agner strbne r1, [ip], #1 113d8834a13SMatthias Weisser tst r2, #1 11475d7a0d7SStefan Agner strbne r1, [ip], #1 11575d7a0d7SStefan Agner ret lr 11675d7a0d7SStefan Agner 11775d7a0d7SStefan Agner6: subs r2, r2, #4 @ 1 do we have enough 11875d7a0d7SStefan Agner blt 5b @ 1 bytes to align with? 11975d7a0d7SStefan Agner cmp r3, #2 @ 1 12075d7a0d7SStefan Agner strblt r1, [ip], #1 @ 1 12175d7a0d7SStefan Agner strble r1, [ip], #1 @ 1 12275d7a0d7SStefan Agner strb r1, [ip], #1 @ 1 12375d7a0d7SStefan Agner add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) 12475d7a0d7SStefan Agner b 1b 12575d7a0d7SStefan AgnerENDPROC(memset) 126