11a59d1b8SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-or-later */
2a66086b8SAnton Blanchard/*
3a66086b8SAnton Blanchard *
4a66086b8SAnton Blanchard * Copyright (C) IBM Corporation, 2011
5a66086b8SAnton Blanchard *
6a66086b8SAnton Blanchard * Author: Anton Blanchard <anton@au.ibm.com>
7a66086b8SAnton Blanchard */
8a66086b8SAnton Blanchard#include <asm/ppc_asm.h>
9a66086b8SAnton Blanchard
1098c45f51SPaul Mackerras#ifndef SELFTEST_CASE
1198c45f51SPaul Mackerras/* 0 == don't use VMX, 1 == use VMX */
1298c45f51SPaul Mackerras#define SELFTEST_CASE	0
1398c45f51SPaul Mackerras#endif
1498c45f51SPaul Mackerras
1532ee1e18SAnton Blanchard#ifdef __BIG_ENDIAN__
1632ee1e18SAnton Blanchard#define LVS(VRT,RA,RB)		lvsl	VRT,RA,RB
1732ee1e18SAnton Blanchard#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRA,VRB,VRC
1832ee1e18SAnton Blanchard#else
1932ee1e18SAnton Blanchard#define LVS(VRT,RA,RB)		lvsr	VRT,RA,RB
2032ee1e18SAnton Blanchard#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRB,VRA,VRC
2132ee1e18SAnton Blanchard#endif
2232ee1e18SAnton Blanchard
23a66086b8SAnton Blanchard	.macro err1
24a66086b8SAnton Blanchard100:
2524bfa6a9SNicholas Piggin	EX_TABLE(100b,.Ldo_err1)
26a66086b8SAnton Blanchard	.endm
27a66086b8SAnton Blanchard
28a66086b8SAnton Blanchard	.macro err2
29a66086b8SAnton Blanchard200:
3024bfa6a9SNicholas Piggin	EX_TABLE(200b,.Ldo_err2)
31a66086b8SAnton Blanchard	.endm
32a66086b8SAnton Blanchard
33a66086b8SAnton Blanchard#ifdef CONFIG_ALTIVEC
34a66086b8SAnton Blanchard	.macro err3
35a66086b8SAnton Blanchard300:
3624bfa6a9SNicholas Piggin	EX_TABLE(300b,.Ldo_err3)
37a66086b8SAnton Blanchard	.endm
38a66086b8SAnton Blanchard
39a66086b8SAnton Blanchard	.macro err4
40a66086b8SAnton Blanchard400:
4124bfa6a9SNicholas Piggin	EX_TABLE(400b,.Ldo_err4)
42a66086b8SAnton Blanchard	.endm
43a66086b8SAnton Blanchard
44a66086b8SAnton Blanchard
45a66086b8SAnton Blanchard.Ldo_err4:
46c75df6f9SMichael Neuling	ld	r16,STK_REG(R16)(r1)
47c75df6f9SMichael Neuling	ld	r15,STK_REG(R15)(r1)
48c75df6f9SMichael Neuling	ld	r14,STK_REG(R14)(r1)
49a66086b8SAnton Blanchard.Ldo_err3:
50*4e991e3cSNicholas Piggin	bl	CFUNC(exit_vmx_usercopy)
51a66086b8SAnton Blanchard	ld	r0,STACKFRAMESIZE+16(r1)
52a66086b8SAnton Blanchard	mtlr	r0
53a66086b8SAnton Blanchard	b	.Lexit
54a66086b8SAnton Blanchard#endif /* CONFIG_ALTIVEC */
55a66086b8SAnton Blanchard
56a66086b8SAnton Blanchard.Ldo_err2:
57c75df6f9SMichael Neuling	ld	r22,STK_REG(R22)(r1)
58c75df6f9SMichael Neuling	ld	r21,STK_REG(R21)(r1)
59c75df6f9SMichael Neuling	ld	r20,STK_REG(R20)(r1)
60c75df6f9SMichael Neuling	ld	r19,STK_REG(R19)(r1)
61c75df6f9SMichael Neuling	ld	r18,STK_REG(R18)(r1)
62c75df6f9SMichael Neuling	ld	r17,STK_REG(R17)(r1)
63c75df6f9SMichael Neuling	ld	r16,STK_REG(R16)(r1)
64c75df6f9SMichael Neuling	ld	r15,STK_REG(R15)(r1)
65c75df6f9SMichael Neuling	ld	r14,STK_REG(R14)(r1)
66a66086b8SAnton Blanchard.Lexit:
67a66086b8SAnton Blanchard	addi	r1,r1,STACKFRAMESIZE
68a66086b8SAnton Blanchard.Ldo_err1:
69752a6422SUlrich Weigand	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
70752a6422SUlrich Weigand	ld	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
71752a6422SUlrich Weigand	ld	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
72a66086b8SAnton Blanchard	b	__copy_tofrom_user_base
73a66086b8SAnton Blanchard
74a66086b8SAnton Blanchard
75a66086b8SAnton Blanchard_GLOBAL(__copy_tofrom_user_power7)
76a66086b8SAnton Blanchard	cmpldi	r5,16
77a3f952dfSAndrew Jeffery	cmpldi	cr1,r5,3328
78a66086b8SAnton Blanchard
79752a6422SUlrich Weigand	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
80752a6422SUlrich Weigand	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
81752a6422SUlrich Weigand	std	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
82a66086b8SAnton Blanchard
83a66086b8SAnton Blanchard	blt	.Lshort_copy
84a66086b8SAnton Blanchard
8598c45f51SPaul Mackerras#ifdef CONFIG_ALTIVEC
8698c45f51SPaul Mackerrastest_feature = SELFTEST_CASE
8798c45f51SPaul MackerrasBEGIN_FTR_SECTION
8898c45f51SPaul Mackerras	bgt	cr1,.Lvmx_copy
8998c45f51SPaul MackerrasEND_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
90a66086b8SAnton Blanchard#endif
91a66086b8SAnton Blanchard
92a66086b8SAnton Blanchard.Lnonvmx_copy:
93a66086b8SAnton Blanchard	/* Get the source 8B aligned */
94a66086b8SAnton Blanchard	neg	r6,r4
95a66086b8SAnton Blanchard	mtocrf	0x01,r6
96a66086b8SAnton Blanchard	clrldi	r6,r6,(64-3)
97a66086b8SAnton Blanchard
98a66086b8SAnton Blanchard	bf	cr7*4+3,1f
99a66086b8SAnton Blancharderr1;	lbz	r0,0(r4)
100a66086b8SAnton Blanchard	addi	r4,r4,1
101a66086b8SAnton Blancharderr1;	stb	r0,0(r3)
102a66086b8SAnton Blanchard	addi	r3,r3,1
103a66086b8SAnton Blanchard
104a66086b8SAnton Blanchard1:	bf	cr7*4+2,2f
105a66086b8SAnton Blancharderr1;	lhz	r0,0(r4)
106a66086b8SAnton Blanchard	addi	r4,r4,2
107a66086b8SAnton Blancharderr1;	sth	r0,0(r3)
108a66086b8SAnton Blanchard	addi	r3,r3,2
109a66086b8SAnton Blanchard
110a66086b8SAnton Blanchard2:	bf	cr7*4+1,3f
111a66086b8SAnton Blancharderr1;	lwz	r0,0(r4)
112a66086b8SAnton Blanchard	addi	r4,r4,4
113a66086b8SAnton Blancharderr1;	stw	r0,0(r3)
114a66086b8SAnton Blanchard	addi	r3,r3,4
115a66086b8SAnton Blanchard
116a66086b8SAnton Blanchard3:	sub	r5,r5,r6
117a66086b8SAnton Blanchard	cmpldi	r5,128
118a66086b8SAnton Blanchard	blt	5f
119a66086b8SAnton Blanchard
120a66086b8SAnton Blanchard	mflr	r0
121a66086b8SAnton Blanchard	stdu	r1,-STACKFRAMESIZE(r1)
122c75df6f9SMichael Neuling	std	r14,STK_REG(R14)(r1)
123c75df6f9SMichael Neuling	std	r15,STK_REG(R15)(r1)
124c75df6f9SMichael Neuling	std	r16,STK_REG(R16)(r1)
125c75df6f9SMichael Neuling	std	r17,STK_REG(R17)(r1)
126c75df6f9SMichael Neuling	std	r18,STK_REG(R18)(r1)
127c75df6f9SMichael Neuling	std	r19,STK_REG(R19)(r1)
128c75df6f9SMichael Neuling	std	r20,STK_REG(R20)(r1)
129c75df6f9SMichael Neuling	std	r21,STK_REG(R21)(r1)
130c75df6f9SMichael Neuling	std	r22,STK_REG(R22)(r1)
131a66086b8SAnton Blanchard	std	r0,STACKFRAMESIZE+16(r1)
132a66086b8SAnton Blanchard
133a66086b8SAnton Blanchard	srdi	r6,r5,7
134a66086b8SAnton Blanchard	mtctr	r6
135a66086b8SAnton Blanchard
136a66086b8SAnton Blanchard	/* Now do cacheline (128B) sized loads and stores. */
137a66086b8SAnton Blanchard	.align	5
138a66086b8SAnton Blanchard4:
139a66086b8SAnton Blancharderr2;	ld	r0,0(r4)
140a66086b8SAnton Blancharderr2;	ld	r6,8(r4)
141a66086b8SAnton Blancharderr2;	ld	r7,16(r4)
142a66086b8SAnton Blancharderr2;	ld	r8,24(r4)
143a66086b8SAnton Blancharderr2;	ld	r9,32(r4)
144a66086b8SAnton Blancharderr2;	ld	r10,40(r4)
145a66086b8SAnton Blancharderr2;	ld	r11,48(r4)
146a66086b8SAnton Blancharderr2;	ld	r12,56(r4)
147a66086b8SAnton Blancharderr2;	ld	r14,64(r4)
148a66086b8SAnton Blancharderr2;	ld	r15,72(r4)
149a66086b8SAnton Blancharderr2;	ld	r16,80(r4)
150a66086b8SAnton Blancharderr2;	ld	r17,88(r4)
151a66086b8SAnton Blancharderr2;	ld	r18,96(r4)
152a66086b8SAnton Blancharderr2;	ld	r19,104(r4)
153a66086b8SAnton Blancharderr2;	ld	r20,112(r4)
154a66086b8SAnton Blancharderr2;	ld	r21,120(r4)
155a66086b8SAnton Blanchard	addi	r4,r4,128
156a66086b8SAnton Blancharderr2;	std	r0,0(r3)
157a66086b8SAnton Blancharderr2;	std	r6,8(r3)
158a66086b8SAnton Blancharderr2;	std	r7,16(r3)
159a66086b8SAnton Blancharderr2;	std	r8,24(r3)
160a66086b8SAnton Blancharderr2;	std	r9,32(r3)
161a66086b8SAnton Blancharderr2;	std	r10,40(r3)
162a66086b8SAnton Blancharderr2;	std	r11,48(r3)
163a66086b8SAnton Blancharderr2;	std	r12,56(r3)
164a66086b8SAnton Blancharderr2;	std	r14,64(r3)
165a66086b8SAnton Blancharderr2;	std	r15,72(r3)
166a66086b8SAnton Blancharderr2;	std	r16,80(r3)
167a66086b8SAnton Blancharderr2;	std	r17,88(r3)
168a66086b8SAnton Blancharderr2;	std	r18,96(r3)
169a66086b8SAnton Blancharderr2;	std	r19,104(r3)
170a66086b8SAnton Blancharderr2;	std	r20,112(r3)
171a66086b8SAnton Blancharderr2;	std	r21,120(r3)
172a66086b8SAnton Blanchard	addi	r3,r3,128
173a66086b8SAnton Blanchard	bdnz	4b
174a66086b8SAnton Blanchard
175a66086b8SAnton Blanchard	clrldi	r5,r5,(64-7)
176a66086b8SAnton Blanchard
177c75df6f9SMichael Neuling	ld	r14,STK_REG(R14)(r1)
178c75df6f9SMichael Neuling	ld	r15,STK_REG(R15)(r1)
179c75df6f9SMichael Neuling	ld	r16,STK_REG(R16)(r1)
180c75df6f9SMichael Neuling	ld	r17,STK_REG(R17)(r1)
181c75df6f9SMichael Neuling	ld	r18,STK_REG(R18)(r1)
182c75df6f9SMichael Neuling	ld	r19,STK_REG(R19)(r1)
183c75df6f9SMichael Neuling	ld	r20,STK_REG(R20)(r1)
184c75df6f9SMichael Neuling	ld	r21,STK_REG(R21)(r1)
185c75df6f9SMichael Neuling	ld	r22,STK_REG(R22)(r1)
186a66086b8SAnton Blanchard	addi	r1,r1,STACKFRAMESIZE
187a66086b8SAnton Blanchard
188a66086b8SAnton Blanchard	/* Up to 127B to go */
189a66086b8SAnton Blanchard5:	srdi	r6,r5,4
190a66086b8SAnton Blanchard	mtocrf	0x01,r6
191a66086b8SAnton Blanchard
192a66086b8SAnton Blanchard6:	bf	cr7*4+1,7f
193a66086b8SAnton Blancharderr1;	ld	r0,0(r4)
194a66086b8SAnton Blancharderr1;	ld	r6,8(r4)
195a66086b8SAnton Blancharderr1;	ld	r7,16(r4)
196a66086b8SAnton Blancharderr1;	ld	r8,24(r4)
197a66086b8SAnton Blancharderr1;	ld	r9,32(r4)
198a66086b8SAnton Blancharderr1;	ld	r10,40(r4)
199a66086b8SAnton Blancharderr1;	ld	r11,48(r4)
200a66086b8SAnton Blancharderr1;	ld	r12,56(r4)
201a66086b8SAnton Blanchard	addi	r4,r4,64
202a66086b8SAnton Blancharderr1;	std	r0,0(r3)
203a66086b8SAnton Blancharderr1;	std	r6,8(r3)
204a66086b8SAnton Blancharderr1;	std	r7,16(r3)
205a66086b8SAnton Blancharderr1;	std	r8,24(r3)
206a66086b8SAnton Blancharderr1;	std	r9,32(r3)
207a66086b8SAnton Blancharderr1;	std	r10,40(r3)
208a66086b8SAnton Blancharderr1;	std	r11,48(r3)
209a66086b8SAnton Blancharderr1;	std	r12,56(r3)
210a66086b8SAnton Blanchard	addi	r3,r3,64
211a66086b8SAnton Blanchard
212a66086b8SAnton Blanchard	/* Up to 63B to go */
213a66086b8SAnton Blanchard7:	bf	cr7*4+2,8f
214a66086b8SAnton Blancharderr1;	ld	r0,0(r4)
215a66086b8SAnton Blancharderr1;	ld	r6,8(r4)
216a66086b8SAnton Blancharderr1;	ld	r7,16(r4)
217a66086b8SAnton Blancharderr1;	ld	r8,24(r4)
218a66086b8SAnton Blanchard	addi	r4,r4,32
219a66086b8SAnton Blancharderr1;	std	r0,0(r3)
220a66086b8SAnton Blancharderr1;	std	r6,8(r3)
221a66086b8SAnton Blancharderr1;	std	r7,16(r3)
222a66086b8SAnton Blancharderr1;	std	r8,24(r3)
223a66086b8SAnton Blanchard	addi	r3,r3,32
224a66086b8SAnton Blanchard
225a66086b8SAnton Blanchard	/* Up to 31B to go */
226a66086b8SAnton Blanchard8:	bf	cr7*4+3,9f
227a66086b8SAnton Blancharderr1;	ld	r0,0(r4)
228a66086b8SAnton Blancharderr1;	ld	r6,8(r4)
229a66086b8SAnton Blanchard	addi	r4,r4,16
230a66086b8SAnton Blancharderr1;	std	r0,0(r3)
231a66086b8SAnton Blancharderr1;	std	r6,8(r3)
232a66086b8SAnton Blanchard	addi	r3,r3,16
233a66086b8SAnton Blanchard
234a66086b8SAnton Blanchard9:	clrldi	r5,r5,(64-4)
235a66086b8SAnton Blanchard
236a66086b8SAnton Blanchard	/* Up to 15B to go */
237a66086b8SAnton Blanchard.Lshort_copy:
238a66086b8SAnton Blanchard	mtocrf	0x01,r5
239a66086b8SAnton Blanchard	bf	cr7*4+0,12f
240a66086b8SAnton Blancharderr1;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
241a66086b8SAnton Blancharderr1;	lwz	r6,4(r4)
242a66086b8SAnton Blanchard	addi	r4,r4,8
243a66086b8SAnton Blancharderr1;	stw	r0,0(r3)
244a66086b8SAnton Blancharderr1;	stw	r6,4(r3)
245a66086b8SAnton Blanchard	addi	r3,r3,8
246a66086b8SAnton Blanchard
247a66086b8SAnton Blanchard12:	bf	cr7*4+1,13f
248a66086b8SAnton Blancharderr1;	lwz	r0,0(r4)
249a66086b8SAnton Blanchard	addi	r4,r4,4
250a66086b8SAnton Blancharderr1;	stw	r0,0(r3)
251a66086b8SAnton Blanchard	addi	r3,r3,4
252a66086b8SAnton Blanchard
253a66086b8SAnton Blanchard13:	bf	cr7*4+2,14f
254a66086b8SAnton Blancharderr1;	lhz	r0,0(r4)
255a66086b8SAnton Blanchard	addi	r4,r4,2
256a66086b8SAnton Blancharderr1;	sth	r0,0(r3)
257a66086b8SAnton Blanchard	addi	r3,r3,2
258a66086b8SAnton Blanchard
259a66086b8SAnton Blanchard14:	bf	cr7*4+3,15f
260a66086b8SAnton Blancharderr1;	lbz	r0,0(r4)
261a66086b8SAnton Blancharderr1;	stb	r0,0(r3)
262a66086b8SAnton Blanchard
263a66086b8SAnton Blanchard15:	li	r3,0
264a66086b8SAnton Blanchard	blr
265a66086b8SAnton Blanchard
266a66086b8SAnton Blanchard.Lunwind_stack_nonvmx_copy:
267a66086b8SAnton Blanchard	addi	r1,r1,STACKFRAMESIZE
268a66086b8SAnton Blanchard	b	.Lnonvmx_copy
269a66086b8SAnton Blanchard
270a66086b8SAnton Blanchard.Lvmx_copy:
27198c45f51SPaul Mackerras#ifdef CONFIG_ALTIVEC
272a66086b8SAnton Blanchard	mflr	r0
273a66086b8SAnton Blanchard	std	r0,16(r1)
274a66086b8SAnton Blanchard	stdu	r1,-STACKFRAMESIZE(r1)
275*4e991e3cSNicholas Piggin	bl	CFUNC(enter_vmx_usercopy)
2762fae7cdbSAnton Blanchard	cmpwi	cr1,r3,0
277a66086b8SAnton Blanchard	ld	r0,STACKFRAMESIZE+16(r1)
278752a6422SUlrich Weigand	ld	r3,STK_REG(R31)(r1)
279752a6422SUlrich Weigand	ld	r4,STK_REG(R30)(r1)
280752a6422SUlrich Weigand	ld	r5,STK_REG(R29)(r1)
281a66086b8SAnton Blanchard	mtlr	r0
282a66086b8SAnton Blanchard
283a9514dc6SAnton Blanchard	/*
284a9514dc6SAnton Blanchard	 * We prefetch both the source and destination using enhanced touch
285a9514dc6SAnton Blanchard	 * instructions. We use a stream ID of 0 for the load side and
286a9514dc6SAnton Blanchard	 * 1 for the store side.
287a9514dc6SAnton Blanchard	 */
288a9514dc6SAnton Blanchard	clrrdi	r6,r4,7
289a9514dc6SAnton Blanchard	clrrdi	r9,r3,7
290a9514dc6SAnton Blanchard	ori	r9,r9,1		/* stream=1 */
291a9514dc6SAnton Blanchard
292a9514dc6SAnton Blanchard	srdi	r7,r5,7		/* length in cachelines, capped at 0x3FF */
293a9514dc6SAnton Blanchard	cmpldi	r7,0x3FF
294a9514dc6SAnton Blanchard	ble	1f
295a9514dc6SAnton Blanchard	li	r7,0x3FF
296a9514dc6SAnton Blanchard1:	lis	r0,0x0E00	/* depth=7 */
297a9514dc6SAnton Blanchard	sldi	r7,r7,7
298a9514dc6SAnton Blanchard	or	r7,r7,r0
299a9514dc6SAnton Blanchard	ori	r10,r7,1	/* stream=1 */
300a9514dc6SAnton Blanchard
301a9514dc6SAnton Blanchard	lis	r8,0x8000	/* GO=1 */
302a9514dc6SAnton Blanchard	clrldi	r8,r8,32
303a9514dc6SAnton Blanchard
304280a5ba2SMichael Neuling	/* setup read stream 0 */
3058a583c0aSAndreas Schwab	dcbt	0,r6,0b01000   /* addr from */
3068a583c0aSAndreas Schwab	dcbt	0,r7,0b01010   /* length and depth from */
307280a5ba2SMichael Neuling	/* setup write stream 1 */
3088a583c0aSAndreas Schwab	dcbtst	0,r9,0b01000   /* addr to */
3098a583c0aSAndreas Schwab	dcbtst	0,r10,0b01010  /* length and depth to */
310a9514dc6SAnton Blanchard	eieio
3118a583c0aSAndreas Schwab	dcbt	0,r8,0b01010	/* all streams GO */
312a9514dc6SAnton Blanchard
3132fae7cdbSAnton Blanchard	beq	cr1,.Lunwind_stack_nonvmx_copy
314a66086b8SAnton Blanchard
315a66086b8SAnton Blanchard	/*
316a66086b8SAnton Blanchard	 * If source and destination are not relatively aligned we use a
317a66086b8SAnton Blanchard	 * slower permute loop.
318a66086b8SAnton Blanchard	 */
319a66086b8SAnton Blanchard	xor	r6,r4,r3
320a66086b8SAnton Blanchard	rldicl.	r6,r6,0,(64-4)
321a66086b8SAnton Blanchard	bne	.Lvmx_unaligned_copy
322a66086b8SAnton Blanchard
323a66086b8SAnton Blanchard	/* Get the destination 16B aligned */
324a66086b8SAnton Blanchard	neg	r6,r3
325a66086b8SAnton Blanchard	mtocrf	0x01,r6
326a66086b8SAnton Blanchard	clrldi	r6,r6,(64-4)
327a66086b8SAnton Blanchard
328a66086b8SAnton Blanchard	bf	cr7*4+3,1f
329a66086b8SAnton Blancharderr3;	lbz	r0,0(r4)
330a66086b8SAnton Blanchard	addi	r4,r4,1
331a66086b8SAnton Blancharderr3;	stb	r0,0(r3)
332a66086b8SAnton Blanchard	addi	r3,r3,1
333a66086b8SAnton Blanchard
334a66086b8SAnton Blanchard1:	bf	cr7*4+2,2f
335a66086b8SAnton Blancharderr3;	lhz	r0,0(r4)
336a66086b8SAnton Blanchard	addi	r4,r4,2
337a66086b8SAnton Blancharderr3;	sth	r0,0(r3)
338a66086b8SAnton Blanchard	addi	r3,r3,2
339a66086b8SAnton Blanchard
340a66086b8SAnton Blanchard2:	bf	cr7*4+1,3f
341a66086b8SAnton Blancharderr3;	lwz	r0,0(r4)
342a66086b8SAnton Blanchard	addi	r4,r4,4
343a66086b8SAnton Blancharderr3;	stw	r0,0(r3)
344a66086b8SAnton Blanchard	addi	r3,r3,4
345a66086b8SAnton Blanchard
346a66086b8SAnton Blanchard3:	bf	cr7*4+0,4f
347a66086b8SAnton Blancharderr3;	ld	r0,0(r4)
348a66086b8SAnton Blanchard	addi	r4,r4,8
349a66086b8SAnton Blancharderr3;	std	r0,0(r3)
350a66086b8SAnton Blanchard	addi	r3,r3,8
351a66086b8SAnton Blanchard
352a66086b8SAnton Blanchard4:	sub	r5,r5,r6
353a66086b8SAnton Blanchard
354a66086b8SAnton Blanchard	/* Get the desination 128B aligned */
355a66086b8SAnton Blanchard	neg	r6,r3
356a66086b8SAnton Blanchard	srdi	r7,r6,4
357a66086b8SAnton Blanchard	mtocrf	0x01,r7
358a66086b8SAnton Blanchard	clrldi	r6,r6,(64-7)
359a66086b8SAnton Blanchard
360a66086b8SAnton Blanchard	li	r9,16
361a66086b8SAnton Blanchard	li	r10,32
362a66086b8SAnton Blanchard	li	r11,48
363a66086b8SAnton Blanchard
364a66086b8SAnton Blanchard	bf	cr7*4+3,5f
3658a583c0aSAndreas Schwaberr3;	lvx	v1,0,r4
366a66086b8SAnton Blanchard	addi	r4,r4,16
3678a583c0aSAndreas Schwaberr3;	stvx	v1,0,r3
368a66086b8SAnton Blanchard	addi	r3,r3,16
369a66086b8SAnton Blanchard
370a66086b8SAnton Blanchard5:	bf	cr7*4+2,6f
3718a583c0aSAndreas Schwaberr3;	lvx	v1,0,r4
372c2ce6f9fSAnton Blancharderr3;	lvx	v0,r4,r9
373a66086b8SAnton Blanchard	addi	r4,r4,32
3748a583c0aSAndreas Schwaberr3;	stvx	v1,0,r3
375c2ce6f9fSAnton Blancharderr3;	stvx	v0,r3,r9
376a66086b8SAnton Blanchard	addi	r3,r3,32
377a66086b8SAnton Blanchard
378a66086b8SAnton Blanchard6:	bf	cr7*4+1,7f
3798a583c0aSAndreas Schwaberr3;	lvx	v3,0,r4
380c2ce6f9fSAnton Blancharderr3;	lvx	v2,r4,r9
381c2ce6f9fSAnton Blancharderr3;	lvx	v1,r4,r10
382c2ce6f9fSAnton Blancharderr3;	lvx	v0,r4,r11
383a66086b8SAnton Blanchard	addi	r4,r4,64
3848a583c0aSAndreas Schwaberr3;	stvx	v3,0,r3
385c2ce6f9fSAnton Blancharderr3;	stvx	v2,r3,r9
386c2ce6f9fSAnton Blancharderr3;	stvx	v1,r3,r10
387c2ce6f9fSAnton Blancharderr3;	stvx	v0,r3,r11
388a66086b8SAnton Blanchard	addi	r3,r3,64
389a66086b8SAnton Blanchard
390a66086b8SAnton Blanchard7:	sub	r5,r5,r6
391a66086b8SAnton Blanchard	srdi	r6,r5,7
392a66086b8SAnton Blanchard
393c75df6f9SMichael Neuling	std	r14,STK_REG(R14)(r1)
394c75df6f9SMichael Neuling	std	r15,STK_REG(R15)(r1)
395c75df6f9SMichael Neuling	std	r16,STK_REG(R16)(r1)
396a66086b8SAnton Blanchard
397a66086b8SAnton Blanchard	li	r12,64
398a66086b8SAnton Blanchard	li	r14,80
399a66086b8SAnton Blanchard	li	r15,96
400a66086b8SAnton Blanchard	li	r16,112
401a66086b8SAnton Blanchard
402a66086b8SAnton Blanchard	mtctr	r6
403a66086b8SAnton Blanchard
404a66086b8SAnton Blanchard	/*
405a66086b8SAnton Blanchard	 * Now do cacheline sized loads and stores. By this stage the
406a66086b8SAnton Blanchard	 * cacheline stores are also cacheline aligned.
407a66086b8SAnton Blanchard	 */
408a66086b8SAnton Blanchard	.align	5
409a66086b8SAnton Blanchard8:
4108a583c0aSAndreas Schwaberr4;	lvx	v7,0,r4
411c2ce6f9fSAnton Blancharderr4;	lvx	v6,r4,r9
412c2ce6f9fSAnton Blancharderr4;	lvx	v5,r4,r10
413c2ce6f9fSAnton Blancharderr4;	lvx	v4,r4,r11
414c2ce6f9fSAnton Blancharderr4;	lvx	v3,r4,r12
415c2ce6f9fSAnton Blancharderr4;	lvx	v2,r4,r14
416c2ce6f9fSAnton Blancharderr4;	lvx	v1,r4,r15
417c2ce6f9fSAnton Blancharderr4;	lvx	v0,r4,r16
418a66086b8SAnton Blanchard	addi	r4,r4,128
4198a583c0aSAndreas Schwaberr4;	stvx	v7,0,r3
420c2ce6f9fSAnton Blancharderr4;	stvx	v6,r3,r9
421c2ce6f9fSAnton Blancharderr4;	stvx	v5,r3,r10
422c2ce6f9fSAnton Blancharderr4;	stvx	v4,r3,r11
423c2ce6f9fSAnton Blancharderr4;	stvx	v3,r3,r12
424c2ce6f9fSAnton Blancharderr4;	stvx	v2,r3,r14
425c2ce6f9fSAnton Blancharderr4;	stvx	v1,r3,r15
426c2ce6f9fSAnton Blancharderr4;	stvx	v0,r3,r16
427a66086b8SAnton Blanchard	addi	r3,r3,128
428a66086b8SAnton Blanchard	bdnz	8b
429a66086b8SAnton Blanchard
430c75df6f9SMichael Neuling	ld	r14,STK_REG(R14)(r1)
431c75df6f9SMichael Neuling	ld	r15,STK_REG(R15)(r1)
432c75df6f9SMichael Neuling	ld	r16,STK_REG(R16)(r1)
433a66086b8SAnton Blanchard
434a66086b8SAnton Blanchard	/* Up to 127B to go */
435a66086b8SAnton Blanchard	clrldi	r5,r5,(64-7)
436a66086b8SAnton Blanchard	srdi	r6,r5,4
437a66086b8SAnton Blanchard	mtocrf	0x01,r6
438a66086b8SAnton Blanchard
439a66086b8SAnton Blanchard	bf	cr7*4+1,9f
4408a583c0aSAndreas Schwaberr3;	lvx	v3,0,r4
441c2ce6f9fSAnton Blancharderr3;	lvx	v2,r4,r9
442c2ce6f9fSAnton Blancharderr3;	lvx	v1,r4,r10
443c2ce6f9fSAnton Blancharderr3;	lvx	v0,r4,r11
444a66086b8SAnton Blanchard	addi	r4,r4,64
4458a583c0aSAndreas Schwaberr3;	stvx	v3,0,r3
446c2ce6f9fSAnton Blancharderr3;	stvx	v2,r3,r9
447c2ce6f9fSAnton Blancharderr3;	stvx	v1,r3,r10
448c2ce6f9fSAnton Blancharderr3;	stvx	v0,r3,r11
449a66086b8SAnton Blanchard	addi	r3,r3,64
450a66086b8SAnton Blanchard
451a66086b8SAnton Blanchard9:	bf	cr7*4+2,10f
4528a583c0aSAndreas Schwaberr3;	lvx	v1,0,r4
453c2ce6f9fSAnton Blancharderr3;	lvx	v0,r4,r9
454a66086b8SAnton Blanchard	addi	r4,r4,32
4558a583c0aSAndreas Schwaberr3;	stvx	v1,0,r3
456c2ce6f9fSAnton Blancharderr3;	stvx	v0,r3,r9
457a66086b8SAnton Blanchard	addi	r3,r3,32
458a66086b8SAnton Blanchard
459a66086b8SAnton Blanchard10:	bf	cr7*4+3,11f
4608a583c0aSAndreas Schwaberr3;	lvx	v1,0,r4
461a66086b8SAnton Blanchard	addi	r4,r4,16
4628a583c0aSAndreas Schwaberr3;	stvx	v1,0,r3
463a66086b8SAnton Blanchard	addi	r3,r3,16
464a66086b8SAnton Blanchard
465a66086b8SAnton Blanchard	/* Up to 15B to go */
466a66086b8SAnton Blanchard11:	clrldi	r5,r5,(64-4)
467a66086b8SAnton Blanchard	mtocrf	0x01,r5
468a66086b8SAnton Blanchard	bf	cr7*4+0,12f
469a66086b8SAnton Blancharderr3;	ld	r0,0(r4)
470a66086b8SAnton Blanchard	addi	r4,r4,8
471a66086b8SAnton Blancharderr3;	std	r0,0(r3)
472a66086b8SAnton Blanchard	addi	r3,r3,8
473a66086b8SAnton Blanchard
474a66086b8SAnton Blanchard12:	bf	cr7*4+1,13f
475a66086b8SAnton Blancharderr3;	lwz	r0,0(r4)
476a66086b8SAnton Blanchard	addi	r4,r4,4
477a66086b8SAnton Blancharderr3;	stw	r0,0(r3)
478a66086b8SAnton Blanchard	addi	r3,r3,4
479a66086b8SAnton Blanchard
480a66086b8SAnton Blanchard13:	bf	cr7*4+2,14f
481a66086b8SAnton Blancharderr3;	lhz	r0,0(r4)
482a66086b8SAnton Blanchard	addi	r4,r4,2
483a66086b8SAnton Blancharderr3;	sth	r0,0(r3)
484a66086b8SAnton Blanchard	addi	r3,r3,2
485a66086b8SAnton Blanchard
486a66086b8SAnton Blanchard14:	bf	cr7*4+3,15f
487a66086b8SAnton Blancharderr3;	lbz	r0,0(r4)
488a66086b8SAnton Blancharderr3;	stb	r0,0(r3)
489a66086b8SAnton Blanchard
490a66086b8SAnton Blanchard15:	addi	r1,r1,STACKFRAMESIZE
491*4e991e3cSNicholas Piggin	b	CFUNC(exit_vmx_usercopy)	/* tail call optimise */
492a66086b8SAnton Blanchard
493a66086b8SAnton Blanchard.Lvmx_unaligned_copy:
494a66086b8SAnton Blanchard	/* Get the destination 16B aligned */
495a66086b8SAnton Blanchard	neg	r6,r3
496a66086b8SAnton Blanchard	mtocrf	0x01,r6
497a66086b8SAnton Blanchard	clrldi	r6,r6,(64-4)
498a66086b8SAnton Blanchard
499a66086b8SAnton Blanchard	bf	cr7*4+3,1f
500a66086b8SAnton Blancharderr3;	lbz	r0,0(r4)
501a66086b8SAnton Blanchard	addi	r4,r4,1
502a66086b8SAnton Blancharderr3;	stb	r0,0(r3)
503a66086b8SAnton Blanchard	addi	r3,r3,1
504a66086b8SAnton Blanchard
505a66086b8SAnton Blanchard1:	bf	cr7*4+2,2f
506a66086b8SAnton Blancharderr3;	lhz	r0,0(r4)
507a66086b8SAnton Blanchard	addi	r4,r4,2
508a66086b8SAnton Blancharderr3;	sth	r0,0(r3)
509a66086b8SAnton Blanchard	addi	r3,r3,2
510a66086b8SAnton Blanchard
511a66086b8SAnton Blanchard2:	bf	cr7*4+1,3f
512a66086b8SAnton Blancharderr3;	lwz	r0,0(r4)
513a66086b8SAnton Blanchard	addi	r4,r4,4
514a66086b8SAnton Blancharderr3;	stw	r0,0(r3)
515a66086b8SAnton Blanchard	addi	r3,r3,4
516a66086b8SAnton Blanchard
517a66086b8SAnton Blanchard3:	bf	cr7*4+0,4f
518a66086b8SAnton Blancharderr3;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
519a66086b8SAnton Blancharderr3;	lwz	r7,4(r4)
520a66086b8SAnton Blanchard	addi	r4,r4,8
521a66086b8SAnton Blancharderr3;	stw	r0,0(r3)
522a66086b8SAnton Blancharderr3;	stw	r7,4(r3)
523a66086b8SAnton Blanchard	addi	r3,r3,8
524a66086b8SAnton Blanchard
525a66086b8SAnton Blanchard4:	sub	r5,r5,r6
526a66086b8SAnton Blanchard
527a66086b8SAnton Blanchard	/* Get the desination 128B aligned */
528a66086b8SAnton Blanchard	neg	r6,r3
529a66086b8SAnton Blanchard	srdi	r7,r6,4
530a66086b8SAnton Blanchard	mtocrf	0x01,r7
531a66086b8SAnton Blanchard	clrldi	r6,r6,(64-7)
532a66086b8SAnton Blanchard
533a66086b8SAnton Blanchard	li	r9,16
534a66086b8SAnton Blanchard	li	r10,32
535a66086b8SAnton Blanchard	li	r11,48
536a66086b8SAnton Blanchard
537c2ce6f9fSAnton Blanchard	LVS(v16,0,r4)		/* Setup permute control vector */
538c2ce6f9fSAnton Blancharderr3;	lvx	v0,0,r4
539a66086b8SAnton Blanchard	addi	r4,r4,16
540a66086b8SAnton Blanchard
541a66086b8SAnton Blanchard	bf	cr7*4+3,5f
5428a583c0aSAndreas Schwaberr3;	lvx	v1,0,r4
543c2ce6f9fSAnton Blanchard	VPERM(v8,v0,v1,v16)
544a66086b8SAnton Blanchard	addi	r4,r4,16
5458a583c0aSAndreas Schwaberr3;	stvx	v8,0,r3
546a66086b8SAnton Blanchard	addi	r3,r3,16
547c2ce6f9fSAnton Blanchard	vor	v0,v1,v1
548a66086b8SAnton Blanchard
549a66086b8SAnton Blanchard5:	bf	cr7*4+2,6f
5508a583c0aSAndreas Schwaberr3;	lvx	v1,0,r4
551c2ce6f9fSAnton Blanchard	VPERM(v8,v0,v1,v16)
552c2ce6f9fSAnton Blancharderr3;	lvx	v0,r4,r9
553c2ce6f9fSAnton Blanchard	VPERM(v9,v1,v0,v16)
554a66086b8SAnton Blanchard	addi	r4,r4,32
5558a583c0aSAndreas Schwaberr3;	stvx	v8,0,r3
556c2ce6f9fSAnton Blancharderr3;	stvx	v9,r3,r9
557a66086b8SAnton Blanchard	addi	r3,r3,32
558a66086b8SAnton Blanchard
559a66086b8SAnton Blanchard6:	bf	cr7*4+1,7f
5608a583c0aSAndreas Schwaberr3;	lvx	v3,0,r4
561c2ce6f9fSAnton Blanchard	VPERM(v8,v0,v3,v16)
562c2ce6f9fSAnton Blancharderr3;	lvx	v2,r4,r9
563c2ce6f9fSAnton Blanchard	VPERM(v9,v3,v2,v16)
564c2ce6f9fSAnton Blancharderr3;	lvx	v1,r4,r10
565c2ce6f9fSAnton Blanchard	VPERM(v10,v2,v1,v16)
566c2ce6f9fSAnton Blancharderr3;	lvx	v0,r4,r11
567c2ce6f9fSAnton Blanchard	VPERM(v11,v1,v0,v16)
568a66086b8SAnton Blanchard	addi	r4,r4,64
5698a583c0aSAndreas Schwaberr3;	stvx	v8,0,r3
570c2ce6f9fSAnton Blancharderr3;	stvx	v9,r3,r9
571c2ce6f9fSAnton Blancharderr3;	stvx	v10,r3,r10
572c2ce6f9fSAnton Blancharderr3;	stvx	v11,r3,r11
573a66086b8SAnton Blanchard	addi	r3,r3,64
574a66086b8SAnton Blanchard
575a66086b8SAnton Blanchard7:	sub	r5,r5,r6
576a66086b8SAnton Blanchard	srdi	r6,r5,7
577a66086b8SAnton Blanchard
578c75df6f9SMichael Neuling	std	r14,STK_REG(R14)(r1)
579c75df6f9SMichael Neuling	std	r15,STK_REG(R15)(r1)
580c75df6f9SMichael Neuling	std	r16,STK_REG(R16)(r1)
581a66086b8SAnton Blanchard
582a66086b8SAnton Blanchard	li	r12,64
583a66086b8SAnton Blanchard	li	r14,80
584a66086b8SAnton Blanchard	li	r15,96
585a66086b8SAnton Blanchard	li	r16,112
586a66086b8SAnton Blanchard
587a66086b8SAnton Blanchard	mtctr	r6
588a66086b8SAnton Blanchard
589a66086b8SAnton Blanchard	/*
590a66086b8SAnton Blanchard	 * Now do cacheline sized loads and stores. By this stage the
591a66086b8SAnton Blanchard	 * cacheline stores are also cacheline aligned.
592a66086b8SAnton Blanchard	 */
593a66086b8SAnton Blanchard	.align	5
594a66086b8SAnton Blanchard8:
5958a583c0aSAndreas Schwaberr4;	lvx	v7,0,r4
596c2ce6f9fSAnton Blanchard	VPERM(v8,v0,v7,v16)
597c2ce6f9fSAnton Blancharderr4;	lvx	v6,r4,r9
598c2ce6f9fSAnton Blanchard	VPERM(v9,v7,v6,v16)
599c2ce6f9fSAnton Blancharderr4;	lvx	v5,r4,r10
600c2ce6f9fSAnton Blanchard	VPERM(v10,v6,v5,v16)
601c2ce6f9fSAnton Blancharderr4;	lvx	v4,r4,r11
602c2ce6f9fSAnton Blanchard	VPERM(v11,v5,v4,v16)
603c2ce6f9fSAnton Blancharderr4;	lvx	v3,r4,r12
604c2ce6f9fSAnton Blanchard	VPERM(v12,v4,v3,v16)
605c2ce6f9fSAnton Blancharderr4;	lvx	v2,r4,r14
606c2ce6f9fSAnton Blanchard	VPERM(v13,v3,v2,v16)
607c2ce6f9fSAnton Blancharderr4;	lvx	v1,r4,r15
608c2ce6f9fSAnton Blanchard	VPERM(v14,v2,v1,v16)
609c2ce6f9fSAnton Blancharderr4;	lvx	v0,r4,r16
610c2ce6f9fSAnton Blanchard	VPERM(v15,v1,v0,v16)
611a66086b8SAnton Blanchard	addi	r4,r4,128
6128a583c0aSAndreas Schwaberr4;	stvx	v8,0,r3
613c2ce6f9fSAnton Blancharderr4;	stvx	v9,r3,r9
614c2ce6f9fSAnton Blancharderr4;	stvx	v10,r3,r10
615c2ce6f9fSAnton Blancharderr4;	stvx	v11,r3,r11
616c2ce6f9fSAnton Blancharderr4;	stvx	v12,r3,r12
617c2ce6f9fSAnton Blancharderr4;	stvx	v13,r3,r14
618c2ce6f9fSAnton Blancharderr4;	stvx	v14,r3,r15
619c2ce6f9fSAnton Blancharderr4;	stvx	v15,r3,r16
620a66086b8SAnton Blanchard	addi	r3,r3,128
621a66086b8SAnton Blanchard	bdnz	8b
622a66086b8SAnton Blanchard
623c75df6f9SMichael Neuling	ld	r14,STK_REG(R14)(r1)
624c75df6f9SMichael Neuling	ld	r15,STK_REG(R15)(r1)
625c75df6f9SMichael Neuling	ld	r16,STK_REG(R16)(r1)
626a66086b8SAnton Blanchard
627a66086b8SAnton Blanchard	/* Up to 127B to go */
628a66086b8SAnton Blanchard	clrldi	r5,r5,(64-7)
629a66086b8SAnton Blanchard	srdi	r6,r5,4
630a66086b8SAnton Blanchard	mtocrf	0x01,r6
631a66086b8SAnton Blanchard
632a66086b8SAnton Blanchard	bf	cr7*4+1,9f
6338a583c0aSAndreas Schwaberr3;	lvx	v3,0,r4
634c2ce6f9fSAnton Blanchard	VPERM(v8,v0,v3,v16)
635c2ce6f9fSAnton Blancharderr3;	lvx	v2,r4,r9
636c2ce6f9fSAnton Blanchard	VPERM(v9,v3,v2,v16)
637c2ce6f9fSAnton Blancharderr3;	lvx	v1,r4,r10
638c2ce6f9fSAnton Blanchard	VPERM(v10,v2,v1,v16)
639c2ce6f9fSAnton Blancharderr3;	lvx	v0,r4,r11
640c2ce6f9fSAnton Blanchard	VPERM(v11,v1,v0,v16)
641a66086b8SAnton Blanchard	addi	r4,r4,64
6428a583c0aSAndreas Schwaberr3;	stvx	v8,0,r3
643c2ce6f9fSAnton Blancharderr3;	stvx	v9,r3,r9
644c2ce6f9fSAnton Blancharderr3;	stvx	v10,r3,r10
645c2ce6f9fSAnton Blancharderr3;	stvx	v11,r3,r11
646a66086b8SAnton Blanchard	addi	r3,r3,64
647a66086b8SAnton Blanchard
648a66086b8SAnton Blanchard9:	bf	cr7*4+2,10f
6498a583c0aSAndreas Schwaberr3;	lvx	v1,0,r4
650c2ce6f9fSAnton Blanchard	VPERM(v8,v0,v1,v16)
651c2ce6f9fSAnton Blancharderr3;	lvx	v0,r4,r9
652c2ce6f9fSAnton Blanchard	VPERM(v9,v1,v0,v16)
653a66086b8SAnton Blanchard	addi	r4,r4,32
6548a583c0aSAndreas Schwaberr3;	stvx	v8,0,r3
655c2ce6f9fSAnton Blancharderr3;	stvx	v9,r3,r9
656a66086b8SAnton Blanchard	addi	r3,r3,32
657a66086b8SAnton Blanchard
658a66086b8SAnton Blanchard10:	bf	cr7*4+3,11f
6598a583c0aSAndreas Schwaberr3;	lvx	v1,0,r4
660c2ce6f9fSAnton Blanchard	VPERM(v8,v0,v1,v16)
661a66086b8SAnton Blanchard	addi	r4,r4,16
6628a583c0aSAndreas Schwaberr3;	stvx	v8,0,r3
663a66086b8SAnton Blanchard	addi	r3,r3,16
664a66086b8SAnton Blanchard
665a66086b8SAnton Blanchard	/* Up to 15B to go */
666a66086b8SAnton Blanchard11:	clrldi	r5,r5,(64-4)
667a66086b8SAnton Blanchard	addi	r4,r4,-16	/* Unwind the +16 load offset */
668a66086b8SAnton Blanchard	mtocrf	0x01,r5
669a66086b8SAnton Blanchard	bf	cr7*4+0,12f
670a66086b8SAnton Blancharderr3;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
671a66086b8SAnton Blancharderr3;	lwz	r6,4(r4)
672a66086b8SAnton Blanchard	addi	r4,r4,8
673a66086b8SAnton Blancharderr3;	stw	r0,0(r3)
674a66086b8SAnton Blancharderr3;	stw	r6,4(r3)
675a66086b8SAnton Blanchard	addi	r3,r3,8
676a66086b8SAnton Blanchard
677a66086b8SAnton Blanchard12:	bf	cr7*4+1,13f
678a66086b8SAnton Blancharderr3;	lwz	r0,0(r4)
679a66086b8SAnton Blanchard	addi	r4,r4,4
680a66086b8SAnton Blancharderr3;	stw	r0,0(r3)
681a66086b8SAnton Blanchard	addi	r3,r3,4
682a66086b8SAnton Blanchard
683a66086b8SAnton Blanchard13:	bf	cr7*4+2,14f
684a66086b8SAnton Blancharderr3;	lhz	r0,0(r4)
685a66086b8SAnton Blanchard	addi	r4,r4,2
686a66086b8SAnton Blancharderr3;	sth	r0,0(r3)
687a66086b8SAnton Blanchard	addi	r3,r3,2
688a66086b8SAnton Blanchard
689a66086b8SAnton Blanchard14:	bf	cr7*4+3,15f
690a66086b8SAnton Blancharderr3;	lbz	r0,0(r4)
691a66086b8SAnton Blancharderr3;	stb	r0,0(r3)
692a66086b8SAnton Blanchard
693a66086b8SAnton Blanchard15:	addi	r1,r1,STACKFRAMESIZE
694*4e991e3cSNicholas Piggin	b	CFUNC(exit_vmx_usercopy)	/* tail call optimise */
695c2522dcdSPaul Bolle#endif /* CONFIG_ALTIVEC */
696