xref: /openbmc/linux/arch/sparc/lib/U3memcpy.S (revision 4ed91d48259d9ddd378424d008f2e6559f7e78f8)
1/* U3memcpy.S: UltraSparc-III optimized memcpy.
2 *
3 * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
4 */
5
6#ifdef __KERNEL__
7#include <linux/linkage.h>
8#include <asm/visasm.h>
9#include <asm/asi.h>
10#define GLOBAL_SPARE	%g7
11#else
12#define ASI_BLK_P 0xf0
13#define FPRS_FEF  0x04
14#ifdef MEMCPY_DEBUG
15#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \
16		     clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0;
17#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
18#else
19#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
20#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
21#endif
22#define GLOBAL_SPARE	%g5
23#endif
24
25#ifndef EX_LD
26#define EX_LD(x,y)	x
27#endif
28#ifndef EX_LD_FP
29#define EX_LD_FP(x,y)	x
30#endif
31
32#ifndef EX_ST
33#define EX_ST(x,y)	x
34#endif
35#ifndef EX_ST_FP
36#define EX_ST_FP(x,y)	x
37#endif
38
39#ifndef LOAD
40#define LOAD(type,addr,dest)	type [addr], dest
41#endif
42
43#ifndef STORE
44#define STORE(type,src,addr)	type src, [addr]
45#endif
46
47#ifndef STORE_BLK
48#define STORE_BLK(src,addr)	stda src, [addr] ASI_BLK_P
49#endif
50
51#ifndef FUNC_NAME
52#define FUNC_NAME	U3memcpy
53#endif
54
55#ifndef PREAMBLE
56#define PREAMBLE
57#endif
58
59#ifndef XCC
60#define XCC xcc
61#endif
62
63	.register	%g2,#scratch
64	.register	%g3,#scratch
65
66	/* Special/non-trivial issues of this code:
67	 *
68	 * 1) %o5 is preserved from VISEntryHalf to VISExitHalf
69	 * 2) Only low 32 FPU registers are used so that only the
70	 *    lower half of the FPU register set is dirtied by this
71	 *    code.  This is especially important in the kernel.
72	 * 3) This code never prefetches cachelines past the end
73	 *    of the source buffer.
74	 */
75
76	.text
77#ifndef EX_RETVAL
78#define EX_RETVAL(x)	x
79__restore_fp:
80	VISExitHalf
81	retl
82	 nop
83ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
84	add	%g1, 1, %g1
85	add	%g2, %g1, %g2
86	ba,pt	%xcc, __restore_fp
87	 add	%o2, %g2, %o0
88ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
89ENTRY(U3_retl_o2_plus_g2_fp)
90	ba,pt	%xcc, __restore_fp
91	 add	%o2, %g2, %o0
92ENDPROC(U3_retl_o2_plus_g2_fp)
93ENTRY(U3_retl_o2_plus_g2_plus_8_fp)
94	add	%g2, 8, %g2
95	ba,pt	%xcc, __restore_fp
96	 add	%o2, %g2, %o0
97ENDPROC(U3_retl_o2_plus_g2_plus_8_fp)
98ENTRY(U3_retl_o2)
99	retl
100	 mov	%o2, %o0
101ENDPROC(U3_retl_o2)
102ENTRY(U3_retl_o2_plus_1)
103	retl
104	 add	%o2, 1, %o0
105ENDPROC(U3_retl_o2_plus_1)
106ENTRY(U3_retl_o2_plus_4)
107	retl
108	 add	%o2, 4, %o0
109ENDPROC(U3_retl_o2_plus_4)
110ENTRY(U3_retl_o2_plus_8)
111	retl
112	 add	%o2, 8, %o0
113ENDPROC(U3_retl_o2_plus_8)
114ENTRY(U3_retl_o2_plus_g1_plus_1)
115	add	%g1, 1, %g1
116	retl
117	 add	%o2, %g1, %o0
118ENDPROC(U3_retl_o2_plus_g1_plus_1)
119ENTRY(U3_retl_o2_fp)
120	ba,pt	%xcc, __restore_fp
121	 mov	%o2, %o0
122ENDPROC(U3_retl_o2_fp)
123ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
124	sll	%o3, 6, %o3
125	add	%o3, 0x80, %o3
126	ba,pt	%xcc, __restore_fp
127	 add	%o2, %o3, %o0
128ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
129ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
130	sll	%o3, 6, %o3
131	add	%o3, 0x40, %o3
132	ba,pt	%xcc, __restore_fp
133	 add	%o2, %o3, %o0
134ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
135ENTRY(U3_retl_o2_plus_GS_plus_0x10)
136	add	GLOBAL_SPARE, 0x10, GLOBAL_SPARE
137	retl
138	 add	%o2, GLOBAL_SPARE, %o0
139ENDPROC(U3_retl_o2_plus_GS_plus_0x10)
140ENTRY(U3_retl_o2_plus_GS_plus_0x08)
141	add	GLOBAL_SPARE, 0x08, GLOBAL_SPARE
142	retl
143	 add	%o2, GLOBAL_SPARE, %o0
144ENDPROC(U3_retl_o2_plus_GS_plus_0x08)
145ENTRY(U3_retl_o2_and_7_plus_GS)
146	and	%o2, 7, %o2
147	retl
148	 add	%o2, GLOBAL_SPARE, %o2
149ENDPROC(U3_retl_o2_and_7_plus_GS)
150ENTRY(U3_retl_o2_and_7_plus_GS_plus_8)
151	add	GLOBAL_SPARE, 8, GLOBAL_SPARE
152	and	%o2, 7, %o2
153	retl
154	 add	%o2, GLOBAL_SPARE, %o2
155ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8)
156#endif
157
158	.align		64
159
160	/* The cheetah's flexible spine, oversized liver, enlarged heart,
161	 * slender muscular body, and claws make it the swiftest hunter
162	 * in Africa and the fastest animal on land.  Can reach speeds
163	 * of up to 2.4GB per second.
164	 */
165
166	.globl	FUNC_NAME
167	.type	FUNC_NAME,#function
168FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
169	srlx		%o2, 31, %g2
170	cmp		%g2, 0
171	tne		%xcc, 5
172	PREAMBLE
173	mov		%o0, %o4
174	cmp		%o2, 0
175	be,pn		%XCC, 85f
176	 or		%o0, %o1, %o3
177	cmp		%o2, 16
178	blu,a,pn	%XCC, 80f
179	 or		%o3, %o2, %o3
180
181	cmp		%o2, (3 * 64)
182	blu,pt		%XCC, 70f
183	 andcc		%o3, 0x7, %g0
184
185	/* Clobbers o5/g1/g2/g3/g7/icc/xcc.  We must preserve
186	 * o5 from here until we hit VISExitHalf.
187	 */
188	VISEntryHalf
189
190	/* Is 'dst' already aligned on an 64-byte boundary? */
191	andcc		%o0, 0x3f, %g2
192	be,pt		%XCC, 2f
193
194	/* Compute abs((dst & 0x3f) - 0x40) into %g2.  This is the number
195	 * of bytes to copy to make 'dst' 64-byte aligned.  We pre-
196	 * subtract this from 'len'.
197	 */
198	 sub		%o0, %o1, GLOBAL_SPARE
199	sub		%g2, 0x40, %g2
200	sub		%g0, %g2, %g2
201	sub		%o2, %g2, %o2
202	andcc		%g2, 0x7, %g1
203	be,pt		%icc, 2f
204	 and		%g2, 0x38, %g2
205
2061:	subcc		%g1, 0x1, %g1
207	EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1)
208	EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1)
209	bgu,pt		%XCC, 1b
210	 add		%o1, 0x1, %o1
211
212	add		%o1, GLOBAL_SPARE, %o0
213
2142:	cmp		%g2, 0x0
215	and		%o1, 0x7, %g1
216	be,pt		%icc, 3f
217	 alignaddr	%o1, %g0, %o1
218
219	EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2)
2201:	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2)
221	add		%o1, 0x8, %o1
222	subcc		%g2, 0x8, %g2
223	faligndata	%f4, %f6, %f0
224	EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8)
225	be,pn		%icc, 3f
226	 add		%o0, 0x8, %o0
227
228	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2)
229	add		%o1, 0x8, %o1
230	subcc		%g2, 0x8, %g2
231	faligndata	%f6, %f4, %f2
232	EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8)
233	bne,pt		%icc, 1b
234	 add		%o0, 0x8, %o0
235
2363:	LOAD(prefetch, %o1 + 0x000, #one_read)
237	LOAD(prefetch, %o1 + 0x040, #one_read)
238	andn		%o2, (0x40 - 1), GLOBAL_SPARE
239	LOAD(prefetch, %o1 + 0x080, #one_read)
240	LOAD(prefetch, %o1 + 0x0c0, #one_read)
241	LOAD(prefetch, %o1 + 0x100, #one_read)
242	EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2)
243	LOAD(prefetch, %o1 + 0x140, #one_read)
244	EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2)
245	LOAD(prefetch, %o1 + 0x180, #one_read)
246	EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2)
247	LOAD(prefetch, %o1 + 0x1c0, #one_read)
248	faligndata	%f0, %f2, %f16
249	EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2)
250	faligndata	%f2, %f4, %f18
251	EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2)
252	faligndata	%f4, %f6, %f20
253	EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2)
254	faligndata	%f6, %f8, %f22
255
256	EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2)
257	faligndata	%f8, %f10, %f24
258	EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2)
259	faligndata	%f10, %f12, %f26
260	EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2)
261
262	subcc		GLOBAL_SPARE, 0x80, GLOBAL_SPARE
263	add		%o1, 0x40, %o1
264	bgu,pt		%XCC, 1f
265	 srl		GLOBAL_SPARE, 6, %o3
266	ba,pt		%xcc, 2f
267	 nop
268
269	.align		64
2701:
271	EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
272	faligndata	%f12, %f14, %f28
273	EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
274	faligndata	%f14, %f0, %f30
275	EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
276	EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
277	faligndata	%f0, %f2, %f16
278	add		%o0, 0x40, %o0
279
280	EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
281	faligndata	%f2, %f4, %f18
282	EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
283	faligndata	%f4, %f6, %f20
284	EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
285	subcc		%o3, 0x01, %o3
286	faligndata	%f6, %f8, %f22
287	EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80)
288
289	faligndata	%f8, %f10, %f24
290	EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
291	LOAD(prefetch, %o1 + 0x1c0, #one_read)
292	faligndata	%f10, %f12, %f26
293	bg,pt		%XCC, 1b
294	 add		%o1, 0x40, %o1
295
296	/* Finally we copy the last full 64-byte block. */
2972:
298	EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
299	faligndata	%f12, %f14, %f28
300	EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
301	faligndata	%f14, %f0, %f30
302	EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
303	EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
304	faligndata	%f0, %f2, %f16
305	EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
306	faligndata	%f2, %f4, %f18
307	EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
308	faligndata	%f4, %f6, %f20
309	EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
310	faligndata	%f6, %f8, %f22
311	EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40)
312	faligndata	%f8, %f10, %f24
313	cmp		%g1, 0
314	be,pt		%XCC, 1f
315	 add		%o0, 0x40, %o0
316	EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
3171:	faligndata	%f10, %f12, %f26
318	faligndata	%f12, %f14, %f28
319	faligndata	%f14, %f0, %f30
320	EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
321	add		%o0, 0x40, %o0
322	add		%o1, 0x40, %o1
323	membar		#Sync
324
325	/* Now we copy the (len modulo 64) bytes at the end.
326	 * Note how we borrow the %f0 loaded above.
327	 *
328	 * Also notice how this code is careful not to perform a
329	 * load past the end of the src buffer.
330	 */
331	and		%o2, 0x3f, %o2
332	andcc		%o2, 0x38, %g2
333	be,pn		%XCC, 2f
334	 subcc		%g2, 0x8, %g2
335	be,pn		%XCC, 2f
336	 cmp		%g1, 0
337
338	sub		%o2, %g2, %o2
339	be,a,pt		%XCC, 1f
340	 EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2)
341
3421:	EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2)
343	add		%o1, 0x8, %o1
344	subcc		%g2, 0x8, %g2
345	faligndata	%f0, %f2, %f8
346	EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
347	be,pn		%XCC, 2f
348	 add		%o0, 0x8, %o0
349	EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2)
350	add		%o1, 0x8, %o1
351	subcc		%g2, 0x8, %g2
352	faligndata	%f2, %f0, %f8
353	EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
354	bne,pn		%XCC, 1b
355	 add		%o0, 0x8, %o0
356
357	/* If anything is left, we copy it one byte at a time.
358	 * Note that %g1 is (src & 0x3) saved above before the
359	 * alignaddr was performed.
360	 */
3612:
362	cmp		%o2, 0
363	add		%o1, %g1, %o1
364	VISExitHalf
365	be,pn		%XCC, 85f
366	 sub		%o0, %o1, %o3
367
368	andcc		%g1, 0x7, %g0
369	bne,pn		%icc, 90f
370	 andcc		%o2, 0x8, %g0
371	be,pt		%icc, 1f
372	 nop
373	EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2)
374	EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2)
375	add		%o1, 0x8, %o1
376	sub		%o2, 8, %o2
377
3781:	andcc		%o2, 0x4, %g0
379	be,pt		%icc, 1f
380	 nop
381	EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2)
382	EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2)
383	add		%o1, 0x4, %o1
384	sub		%o2, 4, %o2
385
3861:	andcc		%o2, 0x2, %g0
387	be,pt		%icc, 1f
388	 nop
389	EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2)
390	EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2)
391	add		%o1, 0x2, %o1
392	sub		%o2, 2, %o2
393
3941:	andcc		%o2, 0x1, %g0
395	be,pt		%icc, 85f
396	 nop
397	EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2)
398	ba,pt		%xcc, 85f
399	 EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2)
400
401	.align		64
40270: /* 16 < len <= 64 */
403	bne,pn		%XCC, 75f
404	 sub		%o0, %o1, %o3
405
40672:
407	andn		%o2, 0xf, GLOBAL_SPARE
408	and		%o2, 0xf, %o2
4091:	subcc		GLOBAL_SPARE, 0x10, GLOBAL_SPARE
410	EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10)
411	EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10)
412	EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10)
413	add		%o1, 0x8, %o1
414	EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08)
415	bgu,pt		%XCC, 1b
416	 add		%o1, 0x8, %o1
41773:	andcc		%o2, 0x8, %g0
418	be,pt		%XCC, 1f
419	 nop
420	sub		%o2, 0x8, %o2
421	EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8)
422	EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8)
423	add		%o1, 0x8, %o1
4241:	andcc		%o2, 0x4, %g0
425	be,pt		%XCC, 1f
426	 nop
427	sub		%o2, 0x4, %o2
428	EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4)
429	EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4)
430	add		%o1, 0x4, %o1
4311:	cmp		%o2, 0
432	be,pt		%XCC, 85f
433	 nop
434	ba,pt		%xcc, 90f
435	 nop
436
43775:
438	andcc		%o0, 0x7, %g1
439	sub		%g1, 0x8, %g1
440	be,pn		%icc, 2f
441	 sub		%g0, %g1, %g1
442	sub		%o2, %g1, %o2
443
4441:	subcc		%g1, 1, %g1
445	EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1)
446	EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1)
447	bgu,pt		%icc, 1b
448	 add		%o1, 1, %o1
449
4502:	add		%o1, %o3, %o0
451	andcc		%o1, 0x7, %g1
452	bne,pt		%icc, 8f
453	 sll		%g1, 3, %g1
454
455	cmp		%o2, 16
456	bgeu,pt		%icc, 72b
457	 nop
458	ba,a,pt		%xcc, 73b
459
4608:	mov		64, %o3
461	andn		%o1, 0x7, %o1
462	EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2)
463	sub		%o3, %g1, %o3
464	andn		%o2, 0x7, GLOBAL_SPARE
465	sllx		%g2, %g1, %g2
4661:	EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS)
467	subcc		GLOBAL_SPARE, 0x8, GLOBAL_SPARE
468	add		%o1, 0x8, %o1
469	srlx		%g3, %o3, %o5
470	or		%o5, %g2, %o5
471	EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8)
472	add		%o0, 0x8, %o0
473	bgu,pt		%icc, 1b
474	 sllx		%g3, %g1, %g2
475
476	srl		%g1, 3, %g1
477	andcc		%o2, 0x7, %o2
478	be,pn		%icc, 85f
479	 add		%o1, %g1, %o1
480	ba,pt		%xcc, 90f
481	 sub		%o0, %o1, %o3
482
483	.align		64
48480: /* 0 < len <= 16 */
485	andcc		%o3, 0x3, %g0
486	bne,pn		%XCC, 90f
487	 sub		%o0, %o1, %o3
488
4891:
490	subcc		%o2, 4, %o2
491	EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4)
492	EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4)
493	bgu,pt		%XCC, 1b
494	 add		%o1, 4, %o1
495
49685:	retl
497	 mov		EX_RETVAL(%o4), %o0
498
499	.align		32
50090:
501	subcc		%o2, 1, %o2
502	EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1)
503	EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1)
504	bgu,pt		%XCC, 90b
505	 add		%o1, 1, %o1
506	retl
507	 mov		EX_RETVAL(%o4), %o0
508
509	.size		FUNC_NAME, .-FUNC_NAME
510