xref: /openbmc/linux/arch/arc/lib/strcmp.S (revision d2912cb1)
1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */
25210d1e6SVineet Gupta/*
35210d1e6SVineet Gupta * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
45210d1e6SVineet Gupta */
55210d1e6SVineet Gupta
65210d1e6SVineet Gupta/* This is optimized primarily for the ARC700.
75210d1e6SVineet Gupta   It would be possible to speed up the loops by one cycle / word
85210d1e6SVineet Gupta   respective one cycle / byte by forcing double source 1 alignment, unrolling
95210d1e6SVineet Gupta   by a factor of two, and speculatively loading the second word / byte of
105210d1e6SVineet Gupta   source 1; however, that would increase the overhead for loop setup / finish,
115210d1e6SVineet Gupta   and strcmp might often terminate early.  */
125210d1e6SVineet Gupta
13ec7ac6afSVineet Gupta#include <linux/linkage.h>
145210d1e6SVineet Gupta
1586effd0dSVineet GuptaENTRY_CFI(strcmp)
165210d1e6SVineet Gupta	or	r2,r0,r1
175210d1e6SVineet Gupta	bmsk_s	r2,r2,1
185210d1e6SVineet Gupta	brne	r2,0,.Lcharloop
195210d1e6SVineet Gupta	mov_s	r12,0x01010101
205210d1e6SVineet Gupta	ror	r5,r12
215210d1e6SVineet Gupta.Lwordloop:
225210d1e6SVineet Gupta	ld.ab	r2,[r0,4]
235210d1e6SVineet Gupta	ld.ab	r3,[r1,4]
245210d1e6SVineet Gupta	nop_s
255210d1e6SVineet Gupta	sub	r4,r2,r12
265210d1e6SVineet Gupta	bic	r4,r4,r2
275210d1e6SVineet Gupta	and	r4,r4,r5
285210d1e6SVineet Gupta	brne	r4,0,.Lfound0
295210d1e6SVineet Gupta	breq	r2,r3,.Lwordloop
305210d1e6SVineet Gupta#ifdef	__LITTLE_ENDIAN__
315210d1e6SVineet Gupta	xor	r0,r2,r3	; mask for difference
325210d1e6SVineet Gupta	sub_s	r1,r0,1
335210d1e6SVineet Gupta	bic_s	r0,r0,r1	; mask for least significant difference bit
345210d1e6SVineet Gupta	sub	r1,r5,r0
355210d1e6SVineet Gupta	xor	r0,r5,r1	; mask for least significant difference byte
365210d1e6SVineet Gupta	and_s	r2,r2,r0
375210d1e6SVineet Gupta	and_s	r3,r3,r0
385210d1e6SVineet Gupta#endif /* LITTLE ENDIAN */
395210d1e6SVineet Gupta	cmp_s	r2,r3
405210d1e6SVineet Gupta	mov_s	r0,1
415210d1e6SVineet Gupta	j_s.d	[blink]
425210d1e6SVineet Gupta	bset.lo	r0,r0,31
435210d1e6SVineet Gupta
445210d1e6SVineet Gupta	.balign	4
455210d1e6SVineet Gupta#ifdef __LITTLE_ENDIAN__
465210d1e6SVineet Gupta.Lfound0:
475210d1e6SVineet Gupta	xor	r0,r2,r3	; mask for difference
485210d1e6SVineet Gupta	or	r0,r0,r4	; or in zero indicator
495210d1e6SVineet Gupta	sub_s	r1,r0,1
505210d1e6SVineet Gupta	bic_s	r0,r0,r1	; mask for least significant difference bit
515210d1e6SVineet Gupta	sub	r1,r5,r0
525210d1e6SVineet Gupta	xor	r0,r5,r1	; mask for least significant difference byte
535210d1e6SVineet Gupta	and_s	r2,r2,r0
545210d1e6SVineet Gupta	and_s	r3,r3,r0
555210d1e6SVineet Gupta	sub.f	r0,r2,r3
565210d1e6SVineet Gupta	mov.hi	r0,1
575210d1e6SVineet Gupta	j_s.d	[blink]
585210d1e6SVineet Gupta	bset.lo	r0,r0,31
595210d1e6SVineet Gupta#else /* BIG ENDIAN */
605210d1e6SVineet Gupta	/* The zero-detection above can mis-detect 0x01 bytes as zeroes
615210d1e6SVineet Gupta	   because of carry-propagateion from a lower significant zero byte.
625210d1e6SVineet Gupta	   We can compensate for this by checking that bit0 is zero.
635210d1e6SVineet Gupta	   This compensation is not necessary in the step where we
645210d1e6SVineet Gupta	   get a low estimate for r2, because in any affected bytes
655210d1e6SVineet Gupta	   we already have 0x00 or 0x01, which will remain unchanged
665210d1e6SVineet Gupta	   when bit 7 is cleared.  */
675210d1e6SVineet Gupta	.balign	4
685210d1e6SVineet Gupta.Lfound0:
695210d1e6SVineet Gupta	lsr	r0,r4,8
705210d1e6SVineet Gupta	lsr_s	r1,r2
715210d1e6SVineet Gupta	bic_s	r2,r2,r0	; get low estimate for r2 and get ...
725210d1e6SVineet Gupta	bic_s	r0,r0,r1	; <this is the adjusted mask for zeros>
735210d1e6SVineet Gupta	or_s	r3,r3,r0	; ... high estimate r3 so that r2 > r3 will ...
745210d1e6SVineet Gupta	cmp_s	r3,r2		; ... be independent of trailing garbage
755210d1e6SVineet Gupta	or_s	r2,r2,r0	; likewise for r3 > r2
765210d1e6SVineet Gupta	bic_s	r3,r3,r0
775210d1e6SVineet Gupta	rlc	r0,0		; r0 := r2 > r3 ? 1 : 0
785210d1e6SVineet Gupta	cmp_s	r2,r3
795210d1e6SVineet Gupta	j_s.d	[blink]
805210d1e6SVineet Gupta	bset.lo	r0,r0,31
815210d1e6SVineet Gupta#endif /* ENDIAN */
825210d1e6SVineet Gupta
835210d1e6SVineet Gupta	.balign	4
845210d1e6SVineet Gupta.Lcharloop:
855210d1e6SVineet Gupta	ldb.ab	r2,[r0,1]
865210d1e6SVineet Gupta	ldb.ab	r3,[r1,1]
875210d1e6SVineet Gupta	nop_s
885210d1e6SVineet Gupta	breq	r2,0,.Lcmpend
895210d1e6SVineet Gupta	breq	r2,r3,.Lcharloop
905210d1e6SVineet Gupta.Lcmpend:
915210d1e6SVineet Gupta	j_s.d	[blink]
925210d1e6SVineet Gupta	sub	r0,r2,r3
9386effd0dSVineet GuptaEND_CFI(strcmp)
94