1/*
2 * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 and
6 * only version 2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA.
17 */
18
19/* Numerology:
20 * WXYZ
21 * W: width in bytes
22 * X: Load=0, Store=1
23 * Y: Location 0=preamble,8=loop,9=epilog
24 * Z: Location=0,handler=9
25 */
26	.text
27	.global FUNCNAME
28	.type FUNCNAME, @function
29	.p2align 5
30FUNCNAME:
31	{
32		p0 = cmp.gtu(bytes,#0)
33		if (!p0.new) jump:nt .Ldone
34		r3 = or(dst,src)
35		r4 = xor(dst,src)
36	}
37	{
38		p1 = cmp.gtu(bytes,#15)
39		p0 = bitsclr(r3,#7)
40		if (!p0.new) jump:nt .Loop_not_aligned_8
41		src_dst_sav = combine(src,dst)
42	}
43
44	{
45		loopcount = lsr(bytes,#3)
46		if (!p1) jump .Lsmall
47	}
48	p3=sp1loop0(.Loop8,loopcount)
49.Loop8:
508080:
518180:
52	{
53		if (p3) memd(dst++#8) = d_dbuf
54		d_dbuf = memd(src++#8)
55	}:endloop0
568190:
57	{
58		memd(dst++#8) = d_dbuf
59		bytes -= asl(loopcount,#3)
60		jump .Lsmall
61	}
62
63.Loop_not_aligned_8:
64	{
65		p0 = bitsclr(r4,#7)
66		if (p0.new) jump:nt .Lalign
67	}
68	{
69		p0 = bitsclr(r3,#3)
70		if (!p0.new) jump:nt .Loop_not_aligned_4
71		p1 = cmp.gtu(bytes,#7)
72	}
73
74	{
75		if (!p1) jump .Lsmall
76		loopcount = lsr(bytes,#2)
77	}
78	p3=sp1loop0(.Loop4,loopcount)
79.Loop4:
804080:
814180:
82	{
83		if (p3) memw(dst++#4) = w_dbuf
84		w_dbuf = memw(src++#4)
85	}:endloop0
864190:
87	{
88		memw(dst++#4) = w_dbuf
89		bytes -= asl(loopcount,#2)
90		jump .Lsmall
91	}
92
93.Loop_not_aligned_4:
94	{
95		p0 = bitsclr(r3,#1)
96		if (!p0.new) jump:nt .Loop_not_aligned
97		p1 = cmp.gtu(bytes,#3)
98	}
99
100	{
101		if (!p1) jump .Lsmall
102		loopcount = lsr(bytes,#1)
103	}
104	p3=sp1loop0(.Loop2,loopcount)
105.Loop2:
1062080:
1072180:
108	{
109		if (p3) memh(dst++#2) = w_dbuf
110		w_dbuf = memuh(src++#2)
111	}:endloop0
1122190:
113	{
114		memh(dst++#2) = w_dbuf
115		bytes -= asl(loopcount,#1)
116		jump .Lsmall
117	}
118
119.Loop_not_aligned: /* Works for as small as one byte */
120	p3=sp1loop0(.Loop1,bytes)
121.Loop1:
1221080:
1231180:
124	{
125		if (p3) memb(dst++#1) = w_dbuf
126		w_dbuf = memub(src++#1)
127	}:endloop0
128	/* Done */
1291190:
130	{
131		memb(dst) = w_dbuf
132		jumpr r31
133		r0 = #0
134	}
135
136.Lsmall:
137	{
138		p0 = cmp.gtu(bytes,#0)
139		if (p0.new) jump:nt .Loop_not_aligned
140	}
141.Ldone:
142	{
143		r0 = #0
144		jumpr r31
145	}
146	.falign
147.Lalign:
1481000:
149	{
150		if (p0.new) w_dbuf = memub(src)
151		p0 = tstbit(src,#0)
152		if (!p1) jump .Lsmall
153	}
1541100:
155	{
156		if (p0) memb(dst++#1) = w_dbuf
157		if (p0) bytes = add(bytes,#-1)
158		if (p0) src = add(src,#1)
159	}
1602000:
161	{
162		if (p0.new) w_dbuf = memuh(src)
163		p0 = tstbit(src,#1)
164		if (!p1) jump .Lsmall
165	}
1662100:
167	{
168		if (p0) memh(dst++#2) = w_dbuf
169		if (p0) bytes = add(bytes,#-2)
170		if (p0) src = add(src,#2)
171	}
1724000:
173	{
174		if (p0.new) w_dbuf = memw(src)
175		p0 = tstbit(src,#2)
176		if (!p1) jump .Lsmall
177	}
1784100:
179	{
180		if (p0) memw(dst++#4) = w_dbuf
181		if (p0) bytes = add(bytes,#-4)
182		if (p0) src = add(src,#4)
183		jump FUNCNAME
184	}
185	.size FUNCNAME,.-FUNCNAME
186