xref: /openbmc/linux/arch/arc/lib/strchr-700.S (revision 5210d1e6889c8183ecad269e86e2d9c524015b5f)
1*5210d1e6SVineet Gupta/*
2*5210d1e6SVineet Gupta * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
3*5210d1e6SVineet Gupta *
4*5210d1e6SVineet Gupta * This program is free software; you can redistribute it and/or modify
5*5210d1e6SVineet Gupta * it under the terms of the GNU General Public License version 2 as
6*5210d1e6SVineet Gupta * published by the Free Software Foundation.
7*5210d1e6SVineet Gupta */
8*5210d1e6SVineet Gupta
9*5210d1e6SVineet Gupta/* ARC700 has a relatively long pipeline and branch prediction, so we want
10*5210d1e6SVineet Gupta   to avoid branches that are hard to predict.  On the other hand, the
11*5210d1e6SVineet Gupta   presence of the norm instruction makes it easier to operate on whole
12*5210d1e6SVineet Gupta   words branch-free.  */
13*5210d1e6SVineet Gupta
14*5210d1e6SVineet Gupta#include <asm/linkage.h>
15*5210d1e6SVineet Gupta
16*5210d1e6SVineet GuptaARC_ENTRY strchr
17*5210d1e6SVineet Gupta	extb_s	r1,r1
18*5210d1e6SVineet Gupta	asl	r5,r1,8
19*5210d1e6SVineet Gupta	bmsk	r2,r0,1
20*5210d1e6SVineet Gupta	or	r5,r5,r1
21*5210d1e6SVineet Gupta	mov_s	r3,0x01010101
22*5210d1e6SVineet Gupta	breq.d	r2,r0,.Laligned
23*5210d1e6SVineet Gupta	asl	r4,r5,16
24*5210d1e6SVineet Gupta	sub_s	r0,r0,r2
25*5210d1e6SVineet Gupta	asl	r7,r2,3
26*5210d1e6SVineet Gupta	ld_s	r2,[r0]
27*5210d1e6SVineet Gupta#ifdef __LITTLE_ENDIAN__
28*5210d1e6SVineet Gupta	asl	r7,r3,r7
29*5210d1e6SVineet Gupta#else
30*5210d1e6SVineet Gupta	lsr	r7,r3,r7
31*5210d1e6SVineet Gupta#endif
32*5210d1e6SVineet Gupta	or	r5,r5,r4
33*5210d1e6SVineet Gupta	ror	r4,r3
34*5210d1e6SVineet Gupta	sub	r12,r2,r7
35*5210d1e6SVineet Gupta	bic_s	r12,r12,r2
36*5210d1e6SVineet Gupta	and	r12,r12,r4
37*5210d1e6SVineet Gupta	brne.d	r12,0,.Lfound0_ua
38*5210d1e6SVineet Gupta	xor	r6,r2,r5
39*5210d1e6SVineet Gupta	ld.a	r2,[r0,4]
40*5210d1e6SVineet Gupta	sub	r12,r6,r7
41*5210d1e6SVineet Gupta	bic	r12,r12,r6
42*5210d1e6SVineet Gupta	and	r7,r12,r4
43*5210d1e6SVineet Gupta	breq	r7,0,.Loop ; For speed, we want this branch to be unaligned.
44*5210d1e6SVineet Gupta	b	.Lfound_char ; Likewise this one.
45*5210d1e6SVineet Gupta; /* We require this code address to be unaligned for speed...  */
46*5210d1e6SVineet Gupta.Laligned:
47*5210d1e6SVineet Gupta	ld_s	r2,[r0]
48*5210d1e6SVineet Gupta	or	r5,r5,r4
49*5210d1e6SVineet Gupta	ror	r4,r3
50*5210d1e6SVineet Gupta; /* ... so that this code address is aligned, for itself and ...  */
51*5210d1e6SVineet Gupta.Loop:
52*5210d1e6SVineet Gupta	sub	r12,r2,r3
53*5210d1e6SVineet Gupta	bic_s	r12,r12,r2
54*5210d1e6SVineet Gupta	and	r12,r12,r4
55*5210d1e6SVineet Gupta	brne.d	r12,0,.Lfound0
56*5210d1e6SVineet Gupta	xor	r6,r2,r5
57*5210d1e6SVineet Gupta	ld.a	r2,[r0,4]
58*5210d1e6SVineet Gupta	sub	r12,r6,r3
59*5210d1e6SVineet Gupta	bic	r12,r12,r6
60*5210d1e6SVineet Gupta	and	r7,r12,r4
61*5210d1e6SVineet Gupta	breq	r7,0,.Loop /* ... so that this branch is unaligned.  */
62*5210d1e6SVineet Gupta	; Found searched-for character.  r0 has already advanced to next word.
63*5210d1e6SVineet Gupta#ifdef __LITTLE_ENDIAN__
64*5210d1e6SVineet Gupta/* We only need the information about the first matching byte
65*5210d1e6SVineet Gupta   (i.e. the least significant matching byte) to be exact,
66*5210d1e6SVineet Gupta   hence there is no problem with carry effects.  */
67*5210d1e6SVineet Gupta.Lfound_char:
68*5210d1e6SVineet Gupta	sub	r3,r7,1
69*5210d1e6SVineet Gupta	bic	r3,r3,r7
70*5210d1e6SVineet Gupta	norm	r2,r3
71*5210d1e6SVineet Gupta	sub_s	r0,r0,1
72*5210d1e6SVineet Gupta	asr_s	r2,r2,3
73*5210d1e6SVineet Gupta	j.d	[blink]
74*5210d1e6SVineet Gupta	sub_s	r0,r0,r2
75*5210d1e6SVineet Gupta
76*5210d1e6SVineet Gupta	.balign	4
77*5210d1e6SVineet Gupta.Lfound0_ua:
78*5210d1e6SVineet Gupta	mov	r3,r7
79*5210d1e6SVineet Gupta.Lfound0:
80*5210d1e6SVineet Gupta	sub	r3,r6,r3
81*5210d1e6SVineet Gupta	bic	r3,r3,r6
82*5210d1e6SVineet Gupta	and	r2,r3,r4
83*5210d1e6SVineet Gupta	or_s	r12,r12,r2
84*5210d1e6SVineet Gupta	sub_s	r3,r12,1
85*5210d1e6SVineet Gupta	bic_s	r3,r3,r12
86*5210d1e6SVineet Gupta	norm	r3,r3
87*5210d1e6SVineet Gupta	add_s	r0,r0,3
88*5210d1e6SVineet Gupta	asr_s	r12,r3,3
89*5210d1e6SVineet Gupta	asl.f	0,r2,r3
90*5210d1e6SVineet Gupta	sub_s	r0,r0,r12
91*5210d1e6SVineet Gupta	j_s.d	[blink]
92*5210d1e6SVineet Gupta	mov.pl	r0,0
93*5210d1e6SVineet Gupta#else /* BIG ENDIAN */
94*5210d1e6SVineet Gupta.Lfound_char:
95*5210d1e6SVineet Gupta	lsr	r7,r7,7
96*5210d1e6SVineet Gupta
97*5210d1e6SVineet Gupta	bic	r2,r7,r6
98*5210d1e6SVineet Gupta	norm	r2,r2
99*5210d1e6SVineet Gupta	sub_s	r0,r0,4
100*5210d1e6SVineet Gupta	asr_s	r2,r2,3
101*5210d1e6SVineet Gupta	j.d	[blink]
102*5210d1e6SVineet Gupta	add_s	r0,r0,r2
103*5210d1e6SVineet Gupta
104*5210d1e6SVineet Gupta.Lfound0_ua:
105*5210d1e6SVineet Gupta	mov_s	r3,r7
106*5210d1e6SVineet Gupta.Lfound0:
107*5210d1e6SVineet Gupta	asl_s	r2,r2,7
108*5210d1e6SVineet Gupta	or	r7,r6,r4
109*5210d1e6SVineet Gupta	bic_s	r12,r12,r2
110*5210d1e6SVineet Gupta	sub	r2,r7,r3
111*5210d1e6SVineet Gupta	or	r2,r2,r6
112*5210d1e6SVineet Gupta	bic	r12,r2,r12
113*5210d1e6SVineet Gupta	bic.f	r3,r4,r12
114*5210d1e6SVineet Gupta	norm	r3,r3
115*5210d1e6SVineet Gupta
116*5210d1e6SVineet Gupta	add.pl	r3,r3,1
117*5210d1e6SVineet Gupta	asr_s	r12,r3,3
118*5210d1e6SVineet Gupta	asl.f	0,r2,r3
119*5210d1e6SVineet Gupta	add_s	r0,r0,r12
120*5210d1e6SVineet Gupta	j_s.d	[blink]
121*5210d1e6SVineet Gupta	mov.mi	r0,0
122*5210d1e6SVineet Gupta#endif /* ENDIAN */
123*5210d1e6SVineet GuptaARC_EXIT strchr
124