xref: /openbmc/linux/arch/arc/lib/strchr-700.S (revision ca79522c)
1/*
2 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9/* ARC700 has a relatively long pipeline and branch prediction, so we want
10   to avoid branches that are hard to predict.  On the other hand, the
11   presence of the norm instruction makes it easier to operate on whole
12   words branch-free.  */
13
14#include <asm/linkage.h>
15
16ARC_ENTRY strchr
17	extb_s	r1,r1
18	asl	r5,r1,8
19	bmsk	r2,r0,1
20	or	r5,r5,r1
21	mov_s	r3,0x01010101
22	breq.d	r2,r0,.Laligned
23	asl	r4,r5,16
24	sub_s	r0,r0,r2
25	asl	r7,r2,3
26	ld_s	r2,[r0]
27#ifdef __LITTLE_ENDIAN__
28	asl	r7,r3,r7
29#else
30	lsr	r7,r3,r7
31#endif
32	or	r5,r5,r4
33	ror	r4,r3
34	sub	r12,r2,r7
35	bic_s	r12,r12,r2
36	and	r12,r12,r4
37	brne.d	r12,0,.Lfound0_ua
38	xor	r6,r2,r5
39	ld.a	r2,[r0,4]
40	sub	r12,r6,r7
41	bic	r12,r12,r6
42	and	r7,r12,r4
43	breq	r7,0,.Loop ; For speed, we want this branch to be unaligned.
44	b	.Lfound_char ; Likewise this one.
45; /* We require this code address to be unaligned for speed...  */
46.Laligned:
47	ld_s	r2,[r0]
48	or	r5,r5,r4
49	ror	r4,r3
50; /* ... so that this code address is aligned, for itself and ...  */
51.Loop:
52	sub	r12,r2,r3
53	bic_s	r12,r12,r2
54	and	r12,r12,r4
55	brne.d	r12,0,.Lfound0
56	xor	r6,r2,r5
57	ld.a	r2,[r0,4]
58	sub	r12,r6,r3
59	bic	r12,r12,r6
60	and	r7,r12,r4
61	breq	r7,0,.Loop /* ... so that this branch is unaligned.  */
62	; Found searched-for character.  r0 has already advanced to next word.
63#ifdef __LITTLE_ENDIAN__
64/* We only need the information about the first matching byte
65   (i.e. the least significant matching byte) to be exact,
66   hence there is no problem with carry effects.  */
67.Lfound_char:
68	sub	r3,r7,1
69	bic	r3,r3,r7
70	norm	r2,r3
71	sub_s	r0,r0,1
72	asr_s	r2,r2,3
73	j.d	[blink]
74	sub_s	r0,r0,r2
75
76	.balign	4
77.Lfound0_ua:
78	mov	r3,r7
79.Lfound0:
80	sub	r3,r6,r3
81	bic	r3,r3,r6
82	and	r2,r3,r4
83	or_s	r12,r12,r2
84	sub_s	r3,r12,1
85	bic_s	r3,r3,r12
86	norm	r3,r3
87	add_s	r0,r0,3
88	asr_s	r12,r3,3
89	asl.f	0,r2,r3
90	sub_s	r0,r0,r12
91	j_s.d	[blink]
92	mov.pl	r0,0
93#else /* BIG ENDIAN */
94.Lfound_char:
95	lsr	r7,r7,7
96
97	bic	r2,r7,r6
98	norm	r2,r2
99	sub_s	r0,r0,4
100	asr_s	r2,r2,3
101	j.d	[blink]
102	add_s	r0,r0,r2
103
104.Lfound0_ua:
105	mov_s	r3,r7
106.Lfound0:
107	asl_s	r2,r2,7
108	or	r7,r6,r4
109	bic_s	r12,r12,r2
110	sub	r2,r7,r3
111	or	r2,r2,r6
112	bic	r12,r2,r12
113	bic.f	r3,r4,r12
114	norm	r3,r3
115
116	add.pl	r3,r3,1
117	asr_s	r12,r3,3
118	asl.f	0,r2,r3
119	add_s	r0,r0,r12
120	j_s.d	[blink]
121	mov.mi	r0,0
122#endif /* ENDIAN */
123ARC_EXIT strchr
124