xref: /openbmc/linux/arch/arc/lib/strcpy-700.S (revision 75bf465f0bc33e9b776a46d6a1b9b990f5fb7c37)
1*d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */
25210d1e6SVineet Gupta/*
35210d1e6SVineet Gupta * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
45210d1e6SVineet Gupta */
55210d1e6SVineet Gupta
65210d1e6SVineet Gupta/* If dst and src are 4 byte aligned, copy 8 bytes at a time.
75210d1e6SVineet Gupta   If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
85210d1e6SVineet Gupta   it 8 byte aligned.  Thus, we can do a little read-ahead, without
95210d1e6SVineet Gupta   dereferencing a cache line that we should not touch.
105210d1e6SVineet Gupta   Note that short and long instructions have been scheduled to avoid
115210d1e6SVineet Gupta   branch stalls.
125210d1e6SVineet Gupta   The beq_s to r3z could be made unaligned & long to avoid a stall
135210d1e6SVineet Gupta   there, but the it is not likely to be taken often, and it
145210d1e6SVineet Gupta   would also be likey to cost an unaligned mispredict at the next call.  */
155210d1e6SVineet Gupta
16ec7ac6afSVineet Gupta#include <linux/linkage.h>
175210d1e6SVineet Gupta
1886effd0dSVineet GuptaENTRY_CFI(strcpy)
195210d1e6SVineet Gupta	or	r2,r0,r1
205210d1e6SVineet Gupta	bmsk_s	r2,r2,1
215210d1e6SVineet Gupta	brne.d	r2,0,charloop
225210d1e6SVineet Gupta	mov_s	r10,r0
235210d1e6SVineet Gupta	ld_s	r3,[r1,0]
245210d1e6SVineet Gupta	mov	r8,0x01010101
255210d1e6SVineet Gupta	bbit0.d	r1,2,loop_start
265210d1e6SVineet Gupta	ror	r12,r8
275210d1e6SVineet Gupta	sub	r2,r3,r8
285210d1e6SVineet Gupta	bic_s	r2,r2,r3
295210d1e6SVineet Gupta	tst_s	r2,r12
305210d1e6SVineet Gupta	bne	r3z
315210d1e6SVineet Gupta	mov_s	r4,r3
325210d1e6SVineet Gupta	.balign 4
335210d1e6SVineet Guptaloop:
345210d1e6SVineet Gupta	ld.a	r3,[r1,4]
355210d1e6SVineet Gupta	st.ab	r4,[r10,4]
365210d1e6SVineet Guptaloop_start:
375210d1e6SVineet Gupta	ld.a	r4,[r1,4]
385210d1e6SVineet Gupta	sub	r2,r3,r8
395210d1e6SVineet Gupta	bic_s	r2,r2,r3
405210d1e6SVineet Gupta	tst_s	r2,r12
415210d1e6SVineet Gupta	bne_s	r3z
425210d1e6SVineet Gupta	st.ab	r3,[r10,4]
435210d1e6SVineet Gupta	sub	r2,r4,r8
445210d1e6SVineet Gupta	bic	r2,r2,r4
455210d1e6SVineet Gupta	tst	r2,r12
465210d1e6SVineet Gupta	beq	loop
475210d1e6SVineet Gupta	mov_s	r3,r4
485210d1e6SVineet Gupta#ifdef __LITTLE_ENDIAN__
495210d1e6SVineet Guptar3z:	bmsk.f	r1,r3,7
505210d1e6SVineet Gupta	lsr_s	r3,r3,8
515210d1e6SVineet Gupta#else
525210d1e6SVineet Guptar3z:	lsr.f	r1,r3,24
535210d1e6SVineet Gupta	asl_s	r3,r3,8
545210d1e6SVineet Gupta#endif
555210d1e6SVineet Gupta	bne.d	r3z
565210d1e6SVineet Gupta	stb.ab	r1,[r10,1]
575210d1e6SVineet Gupta	j_s	[blink]
585210d1e6SVineet Gupta
595210d1e6SVineet Gupta	.balign	4
605210d1e6SVineet Guptacharloop:
615210d1e6SVineet Gupta	ldb.ab	r3,[r1,1]
625210d1e6SVineet Gupta
635210d1e6SVineet Gupta
645210d1e6SVineet Gupta	brne.d	r3,0,charloop
655210d1e6SVineet Gupta	stb.ab	r3,[r10,1]
665210d1e6SVineet Gupta	j	[blink]
6786effd0dSVineet GuptaEND_CFI(strcpy)
68