xref: /openbmc/linux/arch/powerpc/lib/copyuser_power7.S (revision 2eb0f624b709e78ec8e2f4c3412947703db99301)
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2011
17 *
18 * Author: Anton Blanchard <anton@au.ibm.com>
19 */
20#include <asm/ppc_asm.h>
21
22#ifdef __BIG_ENDIAN__
23#define LVS(VRT,RA,RB)		lvsl	VRT,RA,RB
24#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRA,VRB,VRC
25#else
26#define LVS(VRT,RA,RB)		lvsr	VRT,RA,RB
27#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRB,VRA,VRC
28#endif
29
30	.macro err1
31100:
32	EX_TABLE(100b,.Ldo_err1)
33	.endm
34
35	.macro err2
36200:
37	EX_TABLE(200b,.Ldo_err2)
38	.endm
39
40#ifdef CONFIG_ALTIVEC
41	.macro err3
42300:
43	EX_TABLE(300b,.Ldo_err3)
44	.endm
45
46	.macro err4
47400:
48	EX_TABLE(400b,.Ldo_err4)
49	.endm
50
51
52.Ldo_err4:
53	ld	r16,STK_REG(R16)(r1)
54	ld	r15,STK_REG(R15)(r1)
55	ld	r14,STK_REG(R14)(r1)
56.Ldo_err3:
57	bl	exit_vmx_usercopy
58	ld	r0,STACKFRAMESIZE+16(r1)
59	mtlr	r0
60	b	.Lexit
61#endif /* CONFIG_ALTIVEC */
62
63.Ldo_err2:
64	ld	r22,STK_REG(R22)(r1)
65	ld	r21,STK_REG(R21)(r1)
66	ld	r20,STK_REG(R20)(r1)
67	ld	r19,STK_REG(R19)(r1)
68	ld	r18,STK_REG(R18)(r1)
69	ld	r17,STK_REG(R17)(r1)
70	ld	r16,STK_REG(R16)(r1)
71	ld	r15,STK_REG(R15)(r1)
72	ld	r14,STK_REG(R14)(r1)
73.Lexit:
74	addi	r1,r1,STACKFRAMESIZE
75.Ldo_err1:
76	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
77	ld	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
78	ld	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
79	b	__copy_tofrom_user_base
80
81
82_GLOBAL(__copy_tofrom_user_power7)
83#ifdef CONFIG_ALTIVEC
84	cmpldi	r5,16
85	cmpldi	cr1,r5,3328
86
87	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
88	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
89	std	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
90
91	blt	.Lshort_copy
92	bge	cr1,.Lvmx_copy
93#else
94	cmpldi	r5,16
95
96	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
97	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
98	std	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
99
100	blt	.Lshort_copy
101#endif
102
103.Lnonvmx_copy:
104	/* Get the source 8B aligned */
105	neg	r6,r4
106	mtocrf	0x01,r6
107	clrldi	r6,r6,(64-3)
108
109	bf	cr7*4+3,1f
110err1;	lbz	r0,0(r4)
111	addi	r4,r4,1
112err1;	stb	r0,0(r3)
113	addi	r3,r3,1
114
1151:	bf	cr7*4+2,2f
116err1;	lhz	r0,0(r4)
117	addi	r4,r4,2
118err1;	sth	r0,0(r3)
119	addi	r3,r3,2
120
1212:	bf	cr7*4+1,3f
122err1;	lwz	r0,0(r4)
123	addi	r4,r4,4
124err1;	stw	r0,0(r3)
125	addi	r3,r3,4
126
1273:	sub	r5,r5,r6
128	cmpldi	r5,128
129	blt	5f
130
131	mflr	r0
132	stdu	r1,-STACKFRAMESIZE(r1)
133	std	r14,STK_REG(R14)(r1)
134	std	r15,STK_REG(R15)(r1)
135	std	r16,STK_REG(R16)(r1)
136	std	r17,STK_REG(R17)(r1)
137	std	r18,STK_REG(R18)(r1)
138	std	r19,STK_REG(R19)(r1)
139	std	r20,STK_REG(R20)(r1)
140	std	r21,STK_REG(R21)(r1)
141	std	r22,STK_REG(R22)(r1)
142	std	r0,STACKFRAMESIZE+16(r1)
143
144	srdi	r6,r5,7
145	mtctr	r6
146
147	/* Now do cacheline (128B) sized loads and stores. */
148	.align	5
1494:
150err2;	ld	r0,0(r4)
151err2;	ld	r6,8(r4)
152err2;	ld	r7,16(r4)
153err2;	ld	r8,24(r4)
154err2;	ld	r9,32(r4)
155err2;	ld	r10,40(r4)
156err2;	ld	r11,48(r4)
157err2;	ld	r12,56(r4)
158err2;	ld	r14,64(r4)
159err2;	ld	r15,72(r4)
160err2;	ld	r16,80(r4)
161err2;	ld	r17,88(r4)
162err2;	ld	r18,96(r4)
163err2;	ld	r19,104(r4)
164err2;	ld	r20,112(r4)
165err2;	ld	r21,120(r4)
166	addi	r4,r4,128
167err2;	std	r0,0(r3)
168err2;	std	r6,8(r3)
169err2;	std	r7,16(r3)
170err2;	std	r8,24(r3)
171err2;	std	r9,32(r3)
172err2;	std	r10,40(r3)
173err2;	std	r11,48(r3)
174err2;	std	r12,56(r3)
175err2;	std	r14,64(r3)
176err2;	std	r15,72(r3)
177err2;	std	r16,80(r3)
178err2;	std	r17,88(r3)
179err2;	std	r18,96(r3)
180err2;	std	r19,104(r3)
181err2;	std	r20,112(r3)
182err2;	std	r21,120(r3)
183	addi	r3,r3,128
184	bdnz	4b
185
186	clrldi	r5,r5,(64-7)
187
188	ld	r14,STK_REG(R14)(r1)
189	ld	r15,STK_REG(R15)(r1)
190	ld	r16,STK_REG(R16)(r1)
191	ld	r17,STK_REG(R17)(r1)
192	ld	r18,STK_REG(R18)(r1)
193	ld	r19,STK_REG(R19)(r1)
194	ld	r20,STK_REG(R20)(r1)
195	ld	r21,STK_REG(R21)(r1)
196	ld	r22,STK_REG(R22)(r1)
197	addi	r1,r1,STACKFRAMESIZE
198
199	/* Up to 127B to go */
2005:	srdi	r6,r5,4
201	mtocrf	0x01,r6
202
2036:	bf	cr7*4+1,7f
204err1;	ld	r0,0(r4)
205err1;	ld	r6,8(r4)
206err1;	ld	r7,16(r4)
207err1;	ld	r8,24(r4)
208err1;	ld	r9,32(r4)
209err1;	ld	r10,40(r4)
210err1;	ld	r11,48(r4)
211err1;	ld	r12,56(r4)
212	addi	r4,r4,64
213err1;	std	r0,0(r3)
214err1;	std	r6,8(r3)
215err1;	std	r7,16(r3)
216err1;	std	r8,24(r3)
217err1;	std	r9,32(r3)
218err1;	std	r10,40(r3)
219err1;	std	r11,48(r3)
220err1;	std	r12,56(r3)
221	addi	r3,r3,64
222
223	/* Up to 63B to go */
2247:	bf	cr7*4+2,8f
225err1;	ld	r0,0(r4)
226err1;	ld	r6,8(r4)
227err1;	ld	r7,16(r4)
228err1;	ld	r8,24(r4)
229	addi	r4,r4,32
230err1;	std	r0,0(r3)
231err1;	std	r6,8(r3)
232err1;	std	r7,16(r3)
233err1;	std	r8,24(r3)
234	addi	r3,r3,32
235
236	/* Up to 31B to go */
2378:	bf	cr7*4+3,9f
238err1;	ld	r0,0(r4)
239err1;	ld	r6,8(r4)
240	addi	r4,r4,16
241err1;	std	r0,0(r3)
242err1;	std	r6,8(r3)
243	addi	r3,r3,16
244
2459:	clrldi	r5,r5,(64-4)
246
247	/* Up to 15B to go */
248.Lshort_copy:
249	mtocrf	0x01,r5
250	bf	cr7*4+0,12f
251err1;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
252err1;	lwz	r6,4(r4)
253	addi	r4,r4,8
254err1;	stw	r0,0(r3)
255err1;	stw	r6,4(r3)
256	addi	r3,r3,8
257
25812:	bf	cr7*4+1,13f
259err1;	lwz	r0,0(r4)
260	addi	r4,r4,4
261err1;	stw	r0,0(r3)
262	addi	r3,r3,4
263
26413:	bf	cr7*4+2,14f
265err1;	lhz	r0,0(r4)
266	addi	r4,r4,2
267err1;	sth	r0,0(r3)
268	addi	r3,r3,2
269
27014:	bf	cr7*4+3,15f
271err1;	lbz	r0,0(r4)
272err1;	stb	r0,0(r3)
273
27415:	li	r3,0
275	blr
276
277.Lunwind_stack_nonvmx_copy:
278	addi	r1,r1,STACKFRAMESIZE
279	b	.Lnonvmx_copy
280
281#ifdef CONFIG_ALTIVEC
282.Lvmx_copy:
283	mflr	r0
284	std	r0,16(r1)
285	stdu	r1,-STACKFRAMESIZE(r1)
286	bl	enter_vmx_usercopy
287	cmpwi	cr1,r3,0
288	ld	r0,STACKFRAMESIZE+16(r1)
289	ld	r3,STK_REG(R31)(r1)
290	ld	r4,STK_REG(R30)(r1)
291	ld	r5,STK_REG(R29)(r1)
292	mtlr	r0
293
294	/*
295	 * We prefetch both the source and destination using enhanced touch
296	 * instructions. We use a stream ID of 0 for the load side and
297	 * 1 for the store side.
298	 */
299	clrrdi	r6,r4,7
300	clrrdi	r9,r3,7
301	ori	r9,r9,1		/* stream=1 */
302
303	srdi	r7,r5,7		/* length in cachelines, capped at 0x3FF */
304	cmpldi	r7,0x3FF
305	ble	1f
306	li	r7,0x3FF
3071:	lis	r0,0x0E00	/* depth=7 */
308	sldi	r7,r7,7
309	or	r7,r7,r0
310	ori	r10,r7,1	/* stream=1 */
311
312	lis	r8,0x8000	/* GO=1 */
313	clrldi	r8,r8,32
314
315	/* setup read stream 0 */
316	dcbt	0,r6,0b01000   /* addr from */
317	dcbt	0,r7,0b01010   /* length and depth from */
318	/* setup write stream 1 */
319	dcbtst	0,r9,0b01000   /* addr to */
320	dcbtst	0,r10,0b01010  /* length and depth to */
321	eieio
322	dcbt	0,r8,0b01010	/* all streams GO */
323
324	beq	cr1,.Lunwind_stack_nonvmx_copy
325
326	/*
327	 * If source and destination are not relatively aligned we use a
328	 * slower permute loop.
329	 */
330	xor	r6,r4,r3
331	rldicl.	r6,r6,0,(64-4)
332	bne	.Lvmx_unaligned_copy
333
334	/* Get the destination 16B aligned */
335	neg	r6,r3
336	mtocrf	0x01,r6
337	clrldi	r6,r6,(64-4)
338
339	bf	cr7*4+3,1f
340err3;	lbz	r0,0(r4)
341	addi	r4,r4,1
342err3;	stb	r0,0(r3)
343	addi	r3,r3,1
344
3451:	bf	cr7*4+2,2f
346err3;	lhz	r0,0(r4)
347	addi	r4,r4,2
348err3;	sth	r0,0(r3)
349	addi	r3,r3,2
350
3512:	bf	cr7*4+1,3f
352err3;	lwz	r0,0(r4)
353	addi	r4,r4,4
354err3;	stw	r0,0(r3)
355	addi	r3,r3,4
356
3573:	bf	cr7*4+0,4f
358err3;	ld	r0,0(r4)
359	addi	r4,r4,8
360err3;	std	r0,0(r3)
361	addi	r3,r3,8
362
3634:	sub	r5,r5,r6
364
365	/* Get the desination 128B aligned */
366	neg	r6,r3
367	srdi	r7,r6,4
368	mtocrf	0x01,r7
369	clrldi	r6,r6,(64-7)
370
371	li	r9,16
372	li	r10,32
373	li	r11,48
374
375	bf	cr7*4+3,5f
376err3;	lvx	v1,0,r4
377	addi	r4,r4,16
378err3;	stvx	v1,0,r3
379	addi	r3,r3,16
380
3815:	bf	cr7*4+2,6f
382err3;	lvx	v1,0,r4
383err3;	lvx	v0,r4,r9
384	addi	r4,r4,32
385err3;	stvx	v1,0,r3
386err3;	stvx	v0,r3,r9
387	addi	r3,r3,32
388
3896:	bf	cr7*4+1,7f
390err3;	lvx	v3,0,r4
391err3;	lvx	v2,r4,r9
392err3;	lvx	v1,r4,r10
393err3;	lvx	v0,r4,r11
394	addi	r4,r4,64
395err3;	stvx	v3,0,r3
396err3;	stvx	v2,r3,r9
397err3;	stvx	v1,r3,r10
398err3;	stvx	v0,r3,r11
399	addi	r3,r3,64
400
4017:	sub	r5,r5,r6
402	srdi	r6,r5,7
403
404	std	r14,STK_REG(R14)(r1)
405	std	r15,STK_REG(R15)(r1)
406	std	r16,STK_REG(R16)(r1)
407
408	li	r12,64
409	li	r14,80
410	li	r15,96
411	li	r16,112
412
413	mtctr	r6
414
415	/*
416	 * Now do cacheline sized loads and stores. By this stage the
417	 * cacheline stores are also cacheline aligned.
418	 */
419	.align	5
4208:
421err4;	lvx	v7,0,r4
422err4;	lvx	v6,r4,r9
423err4;	lvx	v5,r4,r10
424err4;	lvx	v4,r4,r11
425err4;	lvx	v3,r4,r12
426err4;	lvx	v2,r4,r14
427err4;	lvx	v1,r4,r15
428err4;	lvx	v0,r4,r16
429	addi	r4,r4,128
430err4;	stvx	v7,0,r3
431err4;	stvx	v6,r3,r9
432err4;	stvx	v5,r3,r10
433err4;	stvx	v4,r3,r11
434err4;	stvx	v3,r3,r12
435err4;	stvx	v2,r3,r14
436err4;	stvx	v1,r3,r15
437err4;	stvx	v0,r3,r16
438	addi	r3,r3,128
439	bdnz	8b
440
441	ld	r14,STK_REG(R14)(r1)
442	ld	r15,STK_REG(R15)(r1)
443	ld	r16,STK_REG(R16)(r1)
444
445	/* Up to 127B to go */
446	clrldi	r5,r5,(64-7)
447	srdi	r6,r5,4
448	mtocrf	0x01,r6
449
450	bf	cr7*4+1,9f
451err3;	lvx	v3,0,r4
452err3;	lvx	v2,r4,r9
453err3;	lvx	v1,r4,r10
454err3;	lvx	v0,r4,r11
455	addi	r4,r4,64
456err3;	stvx	v3,0,r3
457err3;	stvx	v2,r3,r9
458err3;	stvx	v1,r3,r10
459err3;	stvx	v0,r3,r11
460	addi	r3,r3,64
461
4629:	bf	cr7*4+2,10f
463err3;	lvx	v1,0,r4
464err3;	lvx	v0,r4,r9
465	addi	r4,r4,32
466err3;	stvx	v1,0,r3
467err3;	stvx	v0,r3,r9
468	addi	r3,r3,32
469
47010:	bf	cr7*4+3,11f
471err3;	lvx	v1,0,r4
472	addi	r4,r4,16
473err3;	stvx	v1,0,r3
474	addi	r3,r3,16
475
476	/* Up to 15B to go */
47711:	clrldi	r5,r5,(64-4)
478	mtocrf	0x01,r5
479	bf	cr7*4+0,12f
480err3;	ld	r0,0(r4)
481	addi	r4,r4,8
482err3;	std	r0,0(r3)
483	addi	r3,r3,8
484
48512:	bf	cr7*4+1,13f
486err3;	lwz	r0,0(r4)
487	addi	r4,r4,4
488err3;	stw	r0,0(r3)
489	addi	r3,r3,4
490
49113:	bf	cr7*4+2,14f
492err3;	lhz	r0,0(r4)
493	addi	r4,r4,2
494err3;	sth	r0,0(r3)
495	addi	r3,r3,2
496
49714:	bf	cr7*4+3,15f
498err3;	lbz	r0,0(r4)
499err3;	stb	r0,0(r3)
500
50115:	addi	r1,r1,STACKFRAMESIZE
502	b	exit_vmx_usercopy	/* tail call optimise */
503
504.Lvmx_unaligned_copy:
505	/* Get the destination 16B aligned */
506	neg	r6,r3
507	mtocrf	0x01,r6
508	clrldi	r6,r6,(64-4)
509
510	bf	cr7*4+3,1f
511err3;	lbz	r0,0(r4)
512	addi	r4,r4,1
513err3;	stb	r0,0(r3)
514	addi	r3,r3,1
515
5161:	bf	cr7*4+2,2f
517err3;	lhz	r0,0(r4)
518	addi	r4,r4,2
519err3;	sth	r0,0(r3)
520	addi	r3,r3,2
521
5222:	bf	cr7*4+1,3f
523err3;	lwz	r0,0(r4)
524	addi	r4,r4,4
525err3;	stw	r0,0(r3)
526	addi	r3,r3,4
527
5283:	bf	cr7*4+0,4f
529err3;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
530err3;	lwz	r7,4(r4)
531	addi	r4,r4,8
532err3;	stw	r0,0(r3)
533err3;	stw	r7,4(r3)
534	addi	r3,r3,8
535
5364:	sub	r5,r5,r6
537
538	/* Get the desination 128B aligned */
539	neg	r6,r3
540	srdi	r7,r6,4
541	mtocrf	0x01,r7
542	clrldi	r6,r6,(64-7)
543
544	li	r9,16
545	li	r10,32
546	li	r11,48
547
548	LVS(v16,0,r4)		/* Setup permute control vector */
549err3;	lvx	v0,0,r4
550	addi	r4,r4,16
551
552	bf	cr7*4+3,5f
553err3;	lvx	v1,0,r4
554	VPERM(v8,v0,v1,v16)
555	addi	r4,r4,16
556err3;	stvx	v8,0,r3
557	addi	r3,r3,16
558	vor	v0,v1,v1
559
5605:	bf	cr7*4+2,6f
561err3;	lvx	v1,0,r4
562	VPERM(v8,v0,v1,v16)
563err3;	lvx	v0,r4,r9
564	VPERM(v9,v1,v0,v16)
565	addi	r4,r4,32
566err3;	stvx	v8,0,r3
567err3;	stvx	v9,r3,r9
568	addi	r3,r3,32
569
5706:	bf	cr7*4+1,7f
571err3;	lvx	v3,0,r4
572	VPERM(v8,v0,v3,v16)
573err3;	lvx	v2,r4,r9
574	VPERM(v9,v3,v2,v16)
575err3;	lvx	v1,r4,r10
576	VPERM(v10,v2,v1,v16)
577err3;	lvx	v0,r4,r11
578	VPERM(v11,v1,v0,v16)
579	addi	r4,r4,64
580err3;	stvx	v8,0,r3
581err3;	stvx	v9,r3,r9
582err3;	stvx	v10,r3,r10
583err3;	stvx	v11,r3,r11
584	addi	r3,r3,64
585
5867:	sub	r5,r5,r6
587	srdi	r6,r5,7
588
589	std	r14,STK_REG(R14)(r1)
590	std	r15,STK_REG(R15)(r1)
591	std	r16,STK_REG(R16)(r1)
592
593	li	r12,64
594	li	r14,80
595	li	r15,96
596	li	r16,112
597
598	mtctr	r6
599
600	/*
601	 * Now do cacheline sized loads and stores. By this stage the
602	 * cacheline stores are also cacheline aligned.
603	 */
604	.align	5
6058:
606err4;	lvx	v7,0,r4
607	VPERM(v8,v0,v7,v16)
608err4;	lvx	v6,r4,r9
609	VPERM(v9,v7,v6,v16)
610err4;	lvx	v5,r4,r10
611	VPERM(v10,v6,v5,v16)
612err4;	lvx	v4,r4,r11
613	VPERM(v11,v5,v4,v16)
614err4;	lvx	v3,r4,r12
615	VPERM(v12,v4,v3,v16)
616err4;	lvx	v2,r4,r14
617	VPERM(v13,v3,v2,v16)
618err4;	lvx	v1,r4,r15
619	VPERM(v14,v2,v1,v16)
620err4;	lvx	v0,r4,r16
621	VPERM(v15,v1,v0,v16)
622	addi	r4,r4,128
623err4;	stvx	v8,0,r3
624err4;	stvx	v9,r3,r9
625err4;	stvx	v10,r3,r10
626err4;	stvx	v11,r3,r11
627err4;	stvx	v12,r3,r12
628err4;	stvx	v13,r3,r14
629err4;	stvx	v14,r3,r15
630err4;	stvx	v15,r3,r16
631	addi	r3,r3,128
632	bdnz	8b
633
634	ld	r14,STK_REG(R14)(r1)
635	ld	r15,STK_REG(R15)(r1)
636	ld	r16,STK_REG(R16)(r1)
637
638	/* Up to 127B to go */
639	clrldi	r5,r5,(64-7)
640	srdi	r6,r5,4
641	mtocrf	0x01,r6
642
643	bf	cr7*4+1,9f
644err3;	lvx	v3,0,r4
645	VPERM(v8,v0,v3,v16)
646err3;	lvx	v2,r4,r9
647	VPERM(v9,v3,v2,v16)
648err3;	lvx	v1,r4,r10
649	VPERM(v10,v2,v1,v16)
650err3;	lvx	v0,r4,r11
651	VPERM(v11,v1,v0,v16)
652	addi	r4,r4,64
653err3;	stvx	v8,0,r3
654err3;	stvx	v9,r3,r9
655err3;	stvx	v10,r3,r10
656err3;	stvx	v11,r3,r11
657	addi	r3,r3,64
658
6599:	bf	cr7*4+2,10f
660err3;	lvx	v1,0,r4
661	VPERM(v8,v0,v1,v16)
662err3;	lvx	v0,r4,r9
663	VPERM(v9,v1,v0,v16)
664	addi	r4,r4,32
665err3;	stvx	v8,0,r3
666err3;	stvx	v9,r3,r9
667	addi	r3,r3,32
668
66910:	bf	cr7*4+3,11f
670err3;	lvx	v1,0,r4
671	VPERM(v8,v0,v1,v16)
672	addi	r4,r4,16
673err3;	stvx	v8,0,r3
674	addi	r3,r3,16
675
676	/* Up to 15B to go */
67711:	clrldi	r5,r5,(64-4)
678	addi	r4,r4,-16	/* Unwind the +16 load offset */
679	mtocrf	0x01,r5
680	bf	cr7*4+0,12f
681err3;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
682err3;	lwz	r6,4(r4)
683	addi	r4,r4,8
684err3;	stw	r0,0(r3)
685err3;	stw	r6,4(r3)
686	addi	r3,r3,8
687
68812:	bf	cr7*4+1,13f
689err3;	lwz	r0,0(r4)
690	addi	r4,r4,4
691err3;	stw	r0,0(r3)
692	addi	r3,r3,4
693
69413:	bf	cr7*4+2,14f
695err3;	lhz	r0,0(r4)
696	addi	r4,r4,2
697err3;	sth	r0,0(r3)
698	addi	r3,r3,2
699
70014:	bf	cr7*4+3,15f
701err3;	lbz	r0,0(r4)
702err3;	stb	r0,0(r3)
703
70415:	addi	r1,r1,STACKFRAMESIZE
705	b	exit_vmx_usercopy	/* tail call optimise */
706#endif /* CONFIG_ALTIVEC */
707