1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */ 21da177e4SLinus Torvalds/* 31da177e4SLinus Torvalds * linux/arch/arm/lib/memset.S 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * Copyright (C) 1995-2000 Russell King 61da177e4SLinus Torvalds * 71da177e4SLinus Torvalds * ASM optimised string functions 81da177e4SLinus Torvalds */ 91da177e4SLinus Torvalds#include <linux/linkage.h> 101da177e4SLinus Torvalds#include <asm/assembler.h> 11c2459d35SLin Yongting#include <asm/unwind.h> 121da177e4SLinus Torvalds 131da177e4SLinus Torvalds .text 141da177e4SLinus Torvalds .align 5 151da177e4SLinus Torvalds 16d6d51a96SLinus WalleijENTRY(__memset) 171bd46782SRussell KingENTRY(mmioset) 18735e8d93SFangrui SongWEAK(memset) 19c2459d35SLin YongtingUNWIND( .fnstart ) 20*7c8ad760SKursad Oney and r1, r1, #255 @ cast to unsigned char 21418df63aSNicolas Pitre ands r3, r0, #3 @ 1 unaligned? 22418df63aSNicolas Pitre mov ip, r0 @ preserve r0 as return value 23418df63aSNicolas Pitre bne 6f @ 1 241da177e4SLinus Torvalds/* 25455bd4c4SIvan Djelic * we know that the pointer in ip is aligned to a word boundary. 261da177e4SLinus Torvalds */ 27418df63aSNicolas Pitre1: orr r1, r1, r1, lsl #8 281da177e4SLinus Torvalds orr r1, r1, r1, lsl #16 291da177e4SLinus Torvalds mov r3, r1 30fd1d3626SMatthew Wilcox7: cmp r2, #16 311da177e4SLinus Torvalds blt 4f 32ad3d09b5SArd BiesheuvelUNWIND( .fnend ) 33f91a8dccSNicolas Pitre 34f91a8dccSNicolas Pitre#if ! CALGN(1)+0 35f91a8dccSNicolas Pitre 361da177e4SLinus Torvalds/* 37455bd4c4SIvan Djelic * We need 2 extra registers for this loop - use r8 and the LR 381da177e4SLinus Torvalds */ 39c2459d35SLin YongtingUNWIND( .fnstart ) 40c2459d35SLin YongtingUNWIND( .save {r8, lr} ) 41ad3d09b5SArd Biesheuvel stmfd sp!, {r8, lr} 42455bd4c4SIvan Djelic mov r8, r1 43fd1d3626SMatthew Wilcox mov lr, r3 441da177e4SLinus Torvalds 451da177e4SLinus Torvalds2: subs r2, r2, #64 46e44fc388SStefan Agner stmiage ip!, {r1, r3, r8, lr} @ 64 bytes at a time. 47e44fc388SStefan Agner stmiage ip!, {r1, r3, r8, lr} 48e44fc388SStefan Agner stmiage ip!, {r1, r3, r8, lr} 49e44fc388SStefan Agner stmiage ip!, {r1, r3, r8, lr} 501da177e4SLinus Torvalds bgt 2b 51e44fc388SStefan Agner ldmfdeq sp!, {r8, pc} @ Now <64 bytes to go. 521da177e4SLinus Torvalds/* 531da177e4SLinus Torvalds * No need to correct the count; we're only testing bits from now on 541da177e4SLinus Torvalds */ 551da177e4SLinus Torvalds tst r2, #32 56e44fc388SStefan Agner stmiane ip!, {r1, r3, r8, lr} 57e44fc388SStefan Agner stmiane ip!, {r1, r3, r8, lr} 581da177e4SLinus Torvalds tst r2, #16 59e44fc388SStefan Agner stmiane ip!, {r1, r3, r8, lr} 60455bd4c4SIvan Djelic ldmfd sp!, {r8, lr} 61c2459d35SLin YongtingUNWIND( .fnend ) 621da177e4SLinus Torvalds 63f91a8dccSNicolas Pitre#else 64f91a8dccSNicolas Pitre 65f91a8dccSNicolas Pitre/* 66f91a8dccSNicolas Pitre * This version aligns the destination pointer in order to write 67f91a8dccSNicolas Pitre * whole cache lines at once. 68f91a8dccSNicolas Pitre */ 69f91a8dccSNicolas Pitre 70c2459d35SLin YongtingUNWIND( .fnstart ) 71c2459d35SLin YongtingUNWIND( .save {r4-r8, lr} ) 72ad3d09b5SArd Biesheuvel stmfd sp!, {r4-r8, lr} 73f91a8dccSNicolas Pitre mov r4, r1 74fd1d3626SMatthew Wilcox mov r5, r3 75f91a8dccSNicolas Pitre mov r6, r1 76fd1d3626SMatthew Wilcox mov r7, r3 77455bd4c4SIvan Djelic mov r8, r1 78fd1d3626SMatthew Wilcox mov lr, r3 79f91a8dccSNicolas Pitre 80f91a8dccSNicolas Pitre cmp r2, #96 81455bd4c4SIvan Djelic tstgt ip, #31 82f91a8dccSNicolas Pitre ble 3f 83f91a8dccSNicolas Pitre 84455bd4c4SIvan Djelic and r8, ip, #31 85455bd4c4SIvan Djelic rsb r8, r8, #32 86455bd4c4SIvan Djelic sub r2, r2, r8 87455bd4c4SIvan Djelic movs r8, r8, lsl #(32 - 4) 88e44fc388SStefan Agner stmiacs ip!, {r4, r5, r6, r7} 89e44fc388SStefan Agner stmiami ip!, {r4, r5} 90455bd4c4SIvan Djelic tst r8, #(1 << 30) 91455bd4c4SIvan Djelic mov r8, r1 92455bd4c4SIvan Djelic strne r1, [ip], #4 93f91a8dccSNicolas Pitre 94f91a8dccSNicolas Pitre3: subs r2, r2, #64 95e44fc388SStefan Agner stmiage ip!, {r1, r3-r8, lr} 96e44fc388SStefan Agner stmiage ip!, {r1, r3-r8, lr} 97f91a8dccSNicolas Pitre bgt 3b 98e44fc388SStefan Agner ldmfdeq sp!, {r4-r8, pc} 99f91a8dccSNicolas Pitre 100f91a8dccSNicolas Pitre tst r2, #32 101e44fc388SStefan Agner stmiane ip!, {r1, r3-r8, lr} 102f91a8dccSNicolas Pitre tst r2, #16 103e44fc388SStefan Agner stmiane ip!, {r4-r7} 104455bd4c4SIvan Djelic ldmfd sp!, {r4-r8, lr} 105c2459d35SLin YongtingUNWIND( .fnend ) 106f91a8dccSNicolas Pitre 107f91a8dccSNicolas Pitre#endif 108f91a8dccSNicolas Pitre 109c2459d35SLin YongtingUNWIND( .fnstart ) 1101da177e4SLinus Torvalds4: tst r2, #8 111e44fc388SStefan Agner stmiane ip!, {r1, r3} 1121da177e4SLinus Torvalds tst r2, #4 113455bd4c4SIvan Djelic strne r1, [ip], #4 1141da177e4SLinus Torvalds/* 115fd1d3626SMatthew Wilcox * When we get here, we've got less than 4 bytes to set. We 1161da177e4SLinus Torvalds * may have an unaligned pointer as well. 1171da177e4SLinus Torvalds */ 1181da177e4SLinus Torvalds5: tst r2, #2 119e44fc388SStefan Agner strbne r1, [ip], #1 120e44fc388SStefan Agner strbne r1, [ip], #1 1211da177e4SLinus Torvalds tst r2, #1 122e44fc388SStefan Agner strbne r1, [ip], #1 1236ebbf2ceSRussell King ret lr 124418df63aSNicolas Pitre 125418df63aSNicolas Pitre6: subs r2, r2, #4 @ 1 do we have enough 126418df63aSNicolas Pitre blt 5b @ 1 bytes to align with? 127418df63aSNicolas Pitre cmp r3, #2 @ 1 128e44fc388SStefan Agner strblt r1, [ip], #1 @ 1 129e44fc388SStefan Agner strble r1, [ip], #1 @ 1 130418df63aSNicolas Pitre strb r1, [ip], #1 @ 1 131418df63aSNicolas Pitre add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) 132418df63aSNicolas Pitre b 1b 133c2459d35SLin YongtingUNWIND( .fnend ) 13493ed3970SCatalin MarinasENDPROC(memset) 1351bd46782SRussell KingENDPROC(mmioset) 136d6d51a96SLinus WalleijENDPROC(__memset) 137fd1d3626SMatthew Wilcox 138fd1d3626SMatthew WilcoxENTRY(__memset32) 139fd1d3626SMatthew WilcoxUNWIND( .fnstart ) 140fd1d3626SMatthew Wilcox mov r3, r1 @ copy r1 to r3 and fall into memset64 141fd1d3626SMatthew WilcoxUNWIND( .fnend ) 142fd1d3626SMatthew WilcoxENDPROC(__memset32) 143fd1d3626SMatthew WilcoxENTRY(__memset64) 144fd1d3626SMatthew WilcoxUNWIND( .fnstart ) 145fd1d3626SMatthew Wilcox mov ip, r0 @ preserve r0 as return value 146fd1d3626SMatthew Wilcox b 7b @ jump into the middle of memset 147fd1d3626SMatthew WilcoxUNWIND( .fnend ) 148fd1d3626SMatthew WilcoxENDPROC(__memset64) 149