xref: /openbmc/linux/arch/powerpc/lib/copyuser_power7.S (revision 05cf4fe738242183f1237f1b3a28b4479348c0a1)
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2011
17 *
18 * Author: Anton Blanchard <anton@au.ibm.com>
19 */
20#include <asm/ppc_asm.h>
21
22#ifndef SELFTEST_CASE
23/* 0 == don't use VMX, 1 == use VMX */
24#define SELFTEST_CASE	0
25#endif
26
27#ifdef __BIG_ENDIAN__
28#define LVS(VRT,RA,RB)		lvsl	VRT,RA,RB
29#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRA,VRB,VRC
30#else
31#define LVS(VRT,RA,RB)		lvsr	VRT,RA,RB
32#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRB,VRA,VRC
33#endif
34
35	.macro err1
36100:
37	EX_TABLE(100b,.Ldo_err1)
38	.endm
39
40	.macro err2
41200:
42	EX_TABLE(200b,.Ldo_err2)
43	.endm
44
45#ifdef CONFIG_ALTIVEC
46	.macro err3
47300:
48	EX_TABLE(300b,.Ldo_err3)
49	.endm
50
51	.macro err4
52400:
53	EX_TABLE(400b,.Ldo_err4)
54	.endm
55
56
57.Ldo_err4:
58	ld	r16,STK_REG(R16)(r1)
59	ld	r15,STK_REG(R15)(r1)
60	ld	r14,STK_REG(R14)(r1)
61.Ldo_err3:
62	bl	exit_vmx_usercopy
63	ld	r0,STACKFRAMESIZE+16(r1)
64	mtlr	r0
65	b	.Lexit
66#endif /* CONFIG_ALTIVEC */
67
68.Ldo_err2:
69	ld	r22,STK_REG(R22)(r1)
70	ld	r21,STK_REG(R21)(r1)
71	ld	r20,STK_REG(R20)(r1)
72	ld	r19,STK_REG(R19)(r1)
73	ld	r18,STK_REG(R18)(r1)
74	ld	r17,STK_REG(R17)(r1)
75	ld	r16,STK_REG(R16)(r1)
76	ld	r15,STK_REG(R15)(r1)
77	ld	r14,STK_REG(R14)(r1)
78.Lexit:
79	addi	r1,r1,STACKFRAMESIZE
80.Ldo_err1:
81	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
82	ld	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
83	ld	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
84	b	__copy_tofrom_user_base
85
86
87_GLOBAL(__copy_tofrom_user_power7)
88	cmpldi	r5,16
89	cmpldi	cr1,r5,3328
90
91	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
92	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
93	std	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
94
95	blt	.Lshort_copy
96
97#ifdef CONFIG_ALTIVEC
98test_feature = SELFTEST_CASE
99BEGIN_FTR_SECTION
100	bgt	cr1,.Lvmx_copy
101END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
102#endif
103
104.Lnonvmx_copy:
105	/* Get the source 8B aligned */
106	neg	r6,r4
107	mtocrf	0x01,r6
108	clrldi	r6,r6,(64-3)
109
110	bf	cr7*4+3,1f
111err1;	lbz	r0,0(r4)
112	addi	r4,r4,1
113err1;	stb	r0,0(r3)
114	addi	r3,r3,1
115
1161:	bf	cr7*4+2,2f
117err1;	lhz	r0,0(r4)
118	addi	r4,r4,2
119err1;	sth	r0,0(r3)
120	addi	r3,r3,2
121
1222:	bf	cr7*4+1,3f
123err1;	lwz	r0,0(r4)
124	addi	r4,r4,4
125err1;	stw	r0,0(r3)
126	addi	r3,r3,4
127
1283:	sub	r5,r5,r6
129	cmpldi	r5,128
130	blt	5f
131
132	mflr	r0
133	stdu	r1,-STACKFRAMESIZE(r1)
134	std	r14,STK_REG(R14)(r1)
135	std	r15,STK_REG(R15)(r1)
136	std	r16,STK_REG(R16)(r1)
137	std	r17,STK_REG(R17)(r1)
138	std	r18,STK_REG(R18)(r1)
139	std	r19,STK_REG(R19)(r1)
140	std	r20,STK_REG(R20)(r1)
141	std	r21,STK_REG(R21)(r1)
142	std	r22,STK_REG(R22)(r1)
143	std	r0,STACKFRAMESIZE+16(r1)
144
145	srdi	r6,r5,7
146	mtctr	r6
147
148	/* Now do cacheline (128B) sized loads and stores. */
149	.align	5
1504:
151err2;	ld	r0,0(r4)
152err2;	ld	r6,8(r4)
153err2;	ld	r7,16(r4)
154err2;	ld	r8,24(r4)
155err2;	ld	r9,32(r4)
156err2;	ld	r10,40(r4)
157err2;	ld	r11,48(r4)
158err2;	ld	r12,56(r4)
159err2;	ld	r14,64(r4)
160err2;	ld	r15,72(r4)
161err2;	ld	r16,80(r4)
162err2;	ld	r17,88(r4)
163err2;	ld	r18,96(r4)
164err2;	ld	r19,104(r4)
165err2;	ld	r20,112(r4)
166err2;	ld	r21,120(r4)
167	addi	r4,r4,128
168err2;	std	r0,0(r3)
169err2;	std	r6,8(r3)
170err2;	std	r7,16(r3)
171err2;	std	r8,24(r3)
172err2;	std	r9,32(r3)
173err2;	std	r10,40(r3)
174err2;	std	r11,48(r3)
175err2;	std	r12,56(r3)
176err2;	std	r14,64(r3)
177err2;	std	r15,72(r3)
178err2;	std	r16,80(r3)
179err2;	std	r17,88(r3)
180err2;	std	r18,96(r3)
181err2;	std	r19,104(r3)
182err2;	std	r20,112(r3)
183err2;	std	r21,120(r3)
184	addi	r3,r3,128
185	bdnz	4b
186
187	clrldi	r5,r5,(64-7)
188
189	ld	r14,STK_REG(R14)(r1)
190	ld	r15,STK_REG(R15)(r1)
191	ld	r16,STK_REG(R16)(r1)
192	ld	r17,STK_REG(R17)(r1)
193	ld	r18,STK_REG(R18)(r1)
194	ld	r19,STK_REG(R19)(r1)
195	ld	r20,STK_REG(R20)(r1)
196	ld	r21,STK_REG(R21)(r1)
197	ld	r22,STK_REG(R22)(r1)
198	addi	r1,r1,STACKFRAMESIZE
199
200	/* Up to 127B to go */
2015:	srdi	r6,r5,4
202	mtocrf	0x01,r6
203
2046:	bf	cr7*4+1,7f
205err1;	ld	r0,0(r4)
206err1;	ld	r6,8(r4)
207err1;	ld	r7,16(r4)
208err1;	ld	r8,24(r4)
209err1;	ld	r9,32(r4)
210err1;	ld	r10,40(r4)
211err1;	ld	r11,48(r4)
212err1;	ld	r12,56(r4)
213	addi	r4,r4,64
214err1;	std	r0,0(r3)
215err1;	std	r6,8(r3)
216err1;	std	r7,16(r3)
217err1;	std	r8,24(r3)
218err1;	std	r9,32(r3)
219err1;	std	r10,40(r3)
220err1;	std	r11,48(r3)
221err1;	std	r12,56(r3)
222	addi	r3,r3,64
223
224	/* Up to 63B to go */
2257:	bf	cr7*4+2,8f
226err1;	ld	r0,0(r4)
227err1;	ld	r6,8(r4)
228err1;	ld	r7,16(r4)
229err1;	ld	r8,24(r4)
230	addi	r4,r4,32
231err1;	std	r0,0(r3)
232err1;	std	r6,8(r3)
233err1;	std	r7,16(r3)
234err1;	std	r8,24(r3)
235	addi	r3,r3,32
236
237	/* Up to 31B to go */
2388:	bf	cr7*4+3,9f
239err1;	ld	r0,0(r4)
240err1;	ld	r6,8(r4)
241	addi	r4,r4,16
242err1;	std	r0,0(r3)
243err1;	std	r6,8(r3)
244	addi	r3,r3,16
245
2469:	clrldi	r5,r5,(64-4)
247
248	/* Up to 15B to go */
249.Lshort_copy:
250	mtocrf	0x01,r5
251	bf	cr7*4+0,12f
252err1;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
253err1;	lwz	r6,4(r4)
254	addi	r4,r4,8
255err1;	stw	r0,0(r3)
256err1;	stw	r6,4(r3)
257	addi	r3,r3,8
258
25912:	bf	cr7*4+1,13f
260err1;	lwz	r0,0(r4)
261	addi	r4,r4,4
262err1;	stw	r0,0(r3)
263	addi	r3,r3,4
264
26513:	bf	cr7*4+2,14f
266err1;	lhz	r0,0(r4)
267	addi	r4,r4,2
268err1;	sth	r0,0(r3)
269	addi	r3,r3,2
270
27114:	bf	cr7*4+3,15f
272err1;	lbz	r0,0(r4)
273err1;	stb	r0,0(r3)
274
27515:	li	r3,0
276	blr
277
278.Lunwind_stack_nonvmx_copy:
279	addi	r1,r1,STACKFRAMESIZE
280	b	.Lnonvmx_copy
281
282.Lvmx_copy:
283#ifdef CONFIG_ALTIVEC
284	mflr	r0
285	std	r0,16(r1)
286	stdu	r1,-STACKFRAMESIZE(r1)
287	bl	enter_vmx_usercopy
288	cmpwi	cr1,r3,0
289	ld	r0,STACKFRAMESIZE+16(r1)
290	ld	r3,STK_REG(R31)(r1)
291	ld	r4,STK_REG(R30)(r1)
292	ld	r5,STK_REG(R29)(r1)
293	mtlr	r0
294
295	/*
296	 * We prefetch both the source and destination using enhanced touch
297	 * instructions. We use a stream ID of 0 for the load side and
298	 * 1 for the store side.
299	 */
300	clrrdi	r6,r4,7
301	clrrdi	r9,r3,7
302	ori	r9,r9,1		/* stream=1 */
303
304	srdi	r7,r5,7		/* length in cachelines, capped at 0x3FF */
305	cmpldi	r7,0x3FF
306	ble	1f
307	li	r7,0x3FF
3081:	lis	r0,0x0E00	/* depth=7 */
309	sldi	r7,r7,7
310	or	r7,r7,r0
311	ori	r10,r7,1	/* stream=1 */
312
313	lis	r8,0x8000	/* GO=1 */
314	clrldi	r8,r8,32
315
316	/* setup read stream 0 */
317	dcbt	0,r6,0b01000   /* addr from */
318	dcbt	0,r7,0b01010   /* length and depth from */
319	/* setup write stream 1 */
320	dcbtst	0,r9,0b01000   /* addr to */
321	dcbtst	0,r10,0b01010  /* length and depth to */
322	eieio
323	dcbt	0,r8,0b01010	/* all streams GO */
324
325	beq	cr1,.Lunwind_stack_nonvmx_copy
326
327	/*
328	 * If source and destination are not relatively aligned we use a
329	 * slower permute loop.
330	 */
331	xor	r6,r4,r3
332	rldicl.	r6,r6,0,(64-4)
333	bne	.Lvmx_unaligned_copy
334
335	/* Get the destination 16B aligned */
336	neg	r6,r3
337	mtocrf	0x01,r6
338	clrldi	r6,r6,(64-4)
339
340	bf	cr7*4+3,1f
341err3;	lbz	r0,0(r4)
342	addi	r4,r4,1
343err3;	stb	r0,0(r3)
344	addi	r3,r3,1
345
3461:	bf	cr7*4+2,2f
347err3;	lhz	r0,0(r4)
348	addi	r4,r4,2
349err3;	sth	r0,0(r3)
350	addi	r3,r3,2
351
3522:	bf	cr7*4+1,3f
353err3;	lwz	r0,0(r4)
354	addi	r4,r4,4
355err3;	stw	r0,0(r3)
356	addi	r3,r3,4
357
3583:	bf	cr7*4+0,4f
359err3;	ld	r0,0(r4)
360	addi	r4,r4,8
361err3;	std	r0,0(r3)
362	addi	r3,r3,8
363
3644:	sub	r5,r5,r6
365
366	/* Get the desination 128B aligned */
367	neg	r6,r3
368	srdi	r7,r6,4
369	mtocrf	0x01,r7
370	clrldi	r6,r6,(64-7)
371
372	li	r9,16
373	li	r10,32
374	li	r11,48
375
376	bf	cr7*4+3,5f
377err3;	lvx	v1,0,r4
378	addi	r4,r4,16
379err3;	stvx	v1,0,r3
380	addi	r3,r3,16
381
3825:	bf	cr7*4+2,6f
383err3;	lvx	v1,0,r4
384err3;	lvx	v0,r4,r9
385	addi	r4,r4,32
386err3;	stvx	v1,0,r3
387err3;	stvx	v0,r3,r9
388	addi	r3,r3,32
389
3906:	bf	cr7*4+1,7f
391err3;	lvx	v3,0,r4
392err3;	lvx	v2,r4,r9
393err3;	lvx	v1,r4,r10
394err3;	lvx	v0,r4,r11
395	addi	r4,r4,64
396err3;	stvx	v3,0,r3
397err3;	stvx	v2,r3,r9
398err3;	stvx	v1,r3,r10
399err3;	stvx	v0,r3,r11
400	addi	r3,r3,64
401
4027:	sub	r5,r5,r6
403	srdi	r6,r5,7
404
405	std	r14,STK_REG(R14)(r1)
406	std	r15,STK_REG(R15)(r1)
407	std	r16,STK_REG(R16)(r1)
408
409	li	r12,64
410	li	r14,80
411	li	r15,96
412	li	r16,112
413
414	mtctr	r6
415
416	/*
417	 * Now do cacheline sized loads and stores. By this stage the
418	 * cacheline stores are also cacheline aligned.
419	 */
420	.align	5
4218:
422err4;	lvx	v7,0,r4
423err4;	lvx	v6,r4,r9
424err4;	lvx	v5,r4,r10
425err4;	lvx	v4,r4,r11
426err4;	lvx	v3,r4,r12
427err4;	lvx	v2,r4,r14
428err4;	lvx	v1,r4,r15
429err4;	lvx	v0,r4,r16
430	addi	r4,r4,128
431err4;	stvx	v7,0,r3
432err4;	stvx	v6,r3,r9
433err4;	stvx	v5,r3,r10
434err4;	stvx	v4,r3,r11
435err4;	stvx	v3,r3,r12
436err4;	stvx	v2,r3,r14
437err4;	stvx	v1,r3,r15
438err4;	stvx	v0,r3,r16
439	addi	r3,r3,128
440	bdnz	8b
441
442	ld	r14,STK_REG(R14)(r1)
443	ld	r15,STK_REG(R15)(r1)
444	ld	r16,STK_REG(R16)(r1)
445
446	/* Up to 127B to go */
447	clrldi	r5,r5,(64-7)
448	srdi	r6,r5,4
449	mtocrf	0x01,r6
450
451	bf	cr7*4+1,9f
452err3;	lvx	v3,0,r4
453err3;	lvx	v2,r4,r9
454err3;	lvx	v1,r4,r10
455err3;	lvx	v0,r4,r11
456	addi	r4,r4,64
457err3;	stvx	v3,0,r3
458err3;	stvx	v2,r3,r9
459err3;	stvx	v1,r3,r10
460err3;	stvx	v0,r3,r11
461	addi	r3,r3,64
462
4639:	bf	cr7*4+2,10f
464err3;	lvx	v1,0,r4
465err3;	lvx	v0,r4,r9
466	addi	r4,r4,32
467err3;	stvx	v1,0,r3
468err3;	stvx	v0,r3,r9
469	addi	r3,r3,32
470
47110:	bf	cr7*4+3,11f
472err3;	lvx	v1,0,r4
473	addi	r4,r4,16
474err3;	stvx	v1,0,r3
475	addi	r3,r3,16
476
477	/* Up to 15B to go */
47811:	clrldi	r5,r5,(64-4)
479	mtocrf	0x01,r5
480	bf	cr7*4+0,12f
481err3;	ld	r0,0(r4)
482	addi	r4,r4,8
483err3;	std	r0,0(r3)
484	addi	r3,r3,8
485
48612:	bf	cr7*4+1,13f
487err3;	lwz	r0,0(r4)
488	addi	r4,r4,4
489err3;	stw	r0,0(r3)
490	addi	r3,r3,4
491
49213:	bf	cr7*4+2,14f
493err3;	lhz	r0,0(r4)
494	addi	r4,r4,2
495err3;	sth	r0,0(r3)
496	addi	r3,r3,2
497
49814:	bf	cr7*4+3,15f
499err3;	lbz	r0,0(r4)
500err3;	stb	r0,0(r3)
501
50215:	addi	r1,r1,STACKFRAMESIZE
503	b	exit_vmx_usercopy	/* tail call optimise */
504
505.Lvmx_unaligned_copy:
506	/* Get the destination 16B aligned */
507	neg	r6,r3
508	mtocrf	0x01,r6
509	clrldi	r6,r6,(64-4)
510
511	bf	cr7*4+3,1f
512err3;	lbz	r0,0(r4)
513	addi	r4,r4,1
514err3;	stb	r0,0(r3)
515	addi	r3,r3,1
516
5171:	bf	cr7*4+2,2f
518err3;	lhz	r0,0(r4)
519	addi	r4,r4,2
520err3;	sth	r0,0(r3)
521	addi	r3,r3,2
522
5232:	bf	cr7*4+1,3f
524err3;	lwz	r0,0(r4)
525	addi	r4,r4,4
526err3;	stw	r0,0(r3)
527	addi	r3,r3,4
528
5293:	bf	cr7*4+0,4f
530err3;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
531err3;	lwz	r7,4(r4)
532	addi	r4,r4,8
533err3;	stw	r0,0(r3)
534err3;	stw	r7,4(r3)
535	addi	r3,r3,8
536
5374:	sub	r5,r5,r6
538
539	/* Get the desination 128B aligned */
540	neg	r6,r3
541	srdi	r7,r6,4
542	mtocrf	0x01,r7
543	clrldi	r6,r6,(64-7)
544
545	li	r9,16
546	li	r10,32
547	li	r11,48
548
549	LVS(v16,0,r4)		/* Setup permute control vector */
550err3;	lvx	v0,0,r4
551	addi	r4,r4,16
552
553	bf	cr7*4+3,5f
554err3;	lvx	v1,0,r4
555	VPERM(v8,v0,v1,v16)
556	addi	r4,r4,16
557err3;	stvx	v8,0,r3
558	addi	r3,r3,16
559	vor	v0,v1,v1
560
5615:	bf	cr7*4+2,6f
562err3;	lvx	v1,0,r4
563	VPERM(v8,v0,v1,v16)
564err3;	lvx	v0,r4,r9
565	VPERM(v9,v1,v0,v16)
566	addi	r4,r4,32
567err3;	stvx	v8,0,r3
568err3;	stvx	v9,r3,r9
569	addi	r3,r3,32
570
5716:	bf	cr7*4+1,7f
572err3;	lvx	v3,0,r4
573	VPERM(v8,v0,v3,v16)
574err3;	lvx	v2,r4,r9
575	VPERM(v9,v3,v2,v16)
576err3;	lvx	v1,r4,r10
577	VPERM(v10,v2,v1,v16)
578err3;	lvx	v0,r4,r11
579	VPERM(v11,v1,v0,v16)
580	addi	r4,r4,64
581err3;	stvx	v8,0,r3
582err3;	stvx	v9,r3,r9
583err3;	stvx	v10,r3,r10
584err3;	stvx	v11,r3,r11
585	addi	r3,r3,64
586
5877:	sub	r5,r5,r6
588	srdi	r6,r5,7
589
590	std	r14,STK_REG(R14)(r1)
591	std	r15,STK_REG(R15)(r1)
592	std	r16,STK_REG(R16)(r1)
593
594	li	r12,64
595	li	r14,80
596	li	r15,96
597	li	r16,112
598
599	mtctr	r6
600
601	/*
602	 * Now do cacheline sized loads and stores. By this stage the
603	 * cacheline stores are also cacheline aligned.
604	 */
605	.align	5
6068:
607err4;	lvx	v7,0,r4
608	VPERM(v8,v0,v7,v16)
609err4;	lvx	v6,r4,r9
610	VPERM(v9,v7,v6,v16)
611err4;	lvx	v5,r4,r10
612	VPERM(v10,v6,v5,v16)
613err4;	lvx	v4,r4,r11
614	VPERM(v11,v5,v4,v16)
615err4;	lvx	v3,r4,r12
616	VPERM(v12,v4,v3,v16)
617err4;	lvx	v2,r4,r14
618	VPERM(v13,v3,v2,v16)
619err4;	lvx	v1,r4,r15
620	VPERM(v14,v2,v1,v16)
621err4;	lvx	v0,r4,r16
622	VPERM(v15,v1,v0,v16)
623	addi	r4,r4,128
624err4;	stvx	v8,0,r3
625err4;	stvx	v9,r3,r9
626err4;	stvx	v10,r3,r10
627err4;	stvx	v11,r3,r11
628err4;	stvx	v12,r3,r12
629err4;	stvx	v13,r3,r14
630err4;	stvx	v14,r3,r15
631err4;	stvx	v15,r3,r16
632	addi	r3,r3,128
633	bdnz	8b
634
635	ld	r14,STK_REG(R14)(r1)
636	ld	r15,STK_REG(R15)(r1)
637	ld	r16,STK_REG(R16)(r1)
638
639	/* Up to 127B to go */
640	clrldi	r5,r5,(64-7)
641	srdi	r6,r5,4
642	mtocrf	0x01,r6
643
644	bf	cr7*4+1,9f
645err3;	lvx	v3,0,r4
646	VPERM(v8,v0,v3,v16)
647err3;	lvx	v2,r4,r9
648	VPERM(v9,v3,v2,v16)
649err3;	lvx	v1,r4,r10
650	VPERM(v10,v2,v1,v16)
651err3;	lvx	v0,r4,r11
652	VPERM(v11,v1,v0,v16)
653	addi	r4,r4,64
654err3;	stvx	v8,0,r3
655err3;	stvx	v9,r3,r9
656err3;	stvx	v10,r3,r10
657err3;	stvx	v11,r3,r11
658	addi	r3,r3,64
659
6609:	bf	cr7*4+2,10f
661err3;	lvx	v1,0,r4
662	VPERM(v8,v0,v1,v16)
663err3;	lvx	v0,r4,r9
664	VPERM(v9,v1,v0,v16)
665	addi	r4,r4,32
666err3;	stvx	v8,0,r3
667err3;	stvx	v9,r3,r9
668	addi	r3,r3,32
669
67010:	bf	cr7*4+3,11f
671err3;	lvx	v1,0,r4
672	VPERM(v8,v0,v1,v16)
673	addi	r4,r4,16
674err3;	stvx	v8,0,r3
675	addi	r3,r3,16
676
677	/* Up to 15B to go */
67811:	clrldi	r5,r5,(64-4)
679	addi	r4,r4,-16	/* Unwind the +16 load offset */
680	mtocrf	0x01,r5
681	bf	cr7*4+0,12f
682err3;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
683err3;	lwz	r6,4(r4)
684	addi	r4,r4,8
685err3;	stw	r0,0(r3)
686err3;	stw	r6,4(r3)
687	addi	r3,r3,8
688
68912:	bf	cr7*4+1,13f
690err3;	lwz	r0,0(r4)
691	addi	r4,r4,4
692err3;	stw	r0,0(r3)
693	addi	r3,r3,4
694
69513:	bf	cr7*4+2,14f
696err3;	lhz	r0,0(r4)
697	addi	r4,r4,2
698err3;	sth	r0,0(r3)
699	addi	r3,r3,2
700
70114:	bf	cr7*4+3,15f
702err3;	lbz	r0,0(r4)
703err3;	stb	r0,0(r3)
704
70515:	addi	r1,r1,STACKFRAMESIZE
706	b	exit_vmx_usercopy	/* tail call optimise */
707#endif /* CONFIG_ALTIVEC */
708