1*d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */ 25210d1e6SVineet Gupta/* 35210d1e6SVineet Gupta * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 45210d1e6SVineet Gupta */ 55210d1e6SVineet Gupta 65210d1e6SVineet Gupta/* If dst and src are 4 byte aligned, copy 8 bytes at a time. 75210d1e6SVineet Gupta If the src is 4, but not 8 byte aligned, we first read 4 bytes to get 85210d1e6SVineet Gupta it 8 byte aligned. Thus, we can do a little read-ahead, without 95210d1e6SVineet Gupta dereferencing a cache line that we should not touch. 105210d1e6SVineet Gupta Note that short and long instructions have been scheduled to avoid 115210d1e6SVineet Gupta branch stalls. 125210d1e6SVineet Gupta The beq_s to r3z could be made unaligned & long to avoid a stall 135210d1e6SVineet Gupta there, but the it is not likely to be taken often, and it 145210d1e6SVineet Gupta would also be likey to cost an unaligned mispredict at the next call. */ 155210d1e6SVineet Gupta 16ec7ac6afSVineet Gupta#include <linux/linkage.h> 175210d1e6SVineet Gupta 1886effd0dSVineet GuptaENTRY_CFI(strcpy) 195210d1e6SVineet Gupta or r2,r0,r1 205210d1e6SVineet Gupta bmsk_s r2,r2,1 215210d1e6SVineet Gupta brne.d r2,0,charloop 225210d1e6SVineet Gupta mov_s r10,r0 235210d1e6SVineet Gupta ld_s r3,[r1,0] 245210d1e6SVineet Gupta mov r8,0x01010101 255210d1e6SVineet Gupta bbit0.d r1,2,loop_start 265210d1e6SVineet Gupta ror r12,r8 275210d1e6SVineet Gupta sub r2,r3,r8 285210d1e6SVineet Gupta bic_s r2,r2,r3 295210d1e6SVineet Gupta tst_s r2,r12 305210d1e6SVineet Gupta bne r3z 315210d1e6SVineet Gupta mov_s r4,r3 325210d1e6SVineet Gupta .balign 4 335210d1e6SVineet Guptaloop: 345210d1e6SVineet Gupta ld.a r3,[r1,4] 355210d1e6SVineet Gupta st.ab r4,[r10,4] 365210d1e6SVineet Guptaloop_start: 375210d1e6SVineet Gupta ld.a r4,[r1,4] 385210d1e6SVineet Gupta sub r2,r3,r8 395210d1e6SVineet Gupta bic_s r2,r2,r3 405210d1e6SVineet Gupta tst_s r2,r12 415210d1e6SVineet Gupta bne_s r3z 425210d1e6SVineet Gupta st.ab r3,[r10,4] 435210d1e6SVineet Gupta sub r2,r4,r8 445210d1e6SVineet Gupta bic r2,r2,r4 455210d1e6SVineet Gupta tst r2,r12 465210d1e6SVineet Gupta beq loop 475210d1e6SVineet Gupta mov_s r3,r4 485210d1e6SVineet Gupta#ifdef __LITTLE_ENDIAN__ 495210d1e6SVineet Guptar3z: bmsk.f r1,r3,7 505210d1e6SVineet Gupta lsr_s r3,r3,8 515210d1e6SVineet Gupta#else 525210d1e6SVineet Guptar3z: lsr.f r1,r3,24 535210d1e6SVineet Gupta asl_s r3,r3,8 545210d1e6SVineet Gupta#endif 555210d1e6SVineet Gupta bne.d r3z 565210d1e6SVineet Gupta stb.ab r1,[r10,1] 575210d1e6SVineet Gupta j_s [blink] 585210d1e6SVineet Gupta 595210d1e6SVineet Gupta .balign 4 605210d1e6SVineet Guptacharloop: 615210d1e6SVineet Gupta ldb.ab r3,[r1,1] 625210d1e6SVineet Gupta 635210d1e6SVineet Gupta 645210d1e6SVineet Gupta brne.d r3,0,charloop 655210d1e6SVineet Gupta stb.ab r3,[r10,1] 665210d1e6SVineet Gupta j [blink] 6786effd0dSVineet GuptaEND_CFI(strcpy) 68