xref: /openbmc/linux/arch/powerpc/lib/copy_mc_64.S (revision cbecf716ca618fd44feda6bd9a64a8179d031fc5)
1*ec6347bbSDan Williams/* SPDX-License-Identifier: GPL-2.0 */
2*ec6347bbSDan Williams/*
3*ec6347bbSDan Williams * Copyright (C) IBM Corporation, 2011
4*ec6347bbSDan Williams * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
5*ec6347bbSDan Williams * Author - Balbir Singh <bsingharora@gmail.com>
6*ec6347bbSDan Williams */
7*ec6347bbSDan Williams#include <linux/export.h>
8*ec6347bbSDan Williams#include <asm/ppc_asm.h>
9*ec6347bbSDan Williams#include <asm/errno.h>
10*ec6347bbSDan Williams
11*ec6347bbSDan Williams	.macro err1
12*ec6347bbSDan Williams100:
13*ec6347bbSDan Williams	EX_TABLE(100b,.Ldo_err1)
14*ec6347bbSDan Williams	.endm
15*ec6347bbSDan Williams
16*ec6347bbSDan Williams	.macro err2
17*ec6347bbSDan Williams200:
18*ec6347bbSDan Williams	EX_TABLE(200b,.Ldo_err2)
19*ec6347bbSDan Williams	.endm
20*ec6347bbSDan Williams
21*ec6347bbSDan Williams	.macro err3
22*ec6347bbSDan Williams300:	EX_TABLE(300b,.Ldone)
23*ec6347bbSDan Williams	.endm
24*ec6347bbSDan Williams
25*ec6347bbSDan Williams.Ldo_err2:
26*ec6347bbSDan Williams	ld	r22,STK_REG(R22)(r1)
27*ec6347bbSDan Williams	ld	r21,STK_REG(R21)(r1)
28*ec6347bbSDan Williams	ld	r20,STK_REG(R20)(r1)
29*ec6347bbSDan Williams	ld	r19,STK_REG(R19)(r1)
30*ec6347bbSDan Williams	ld	r18,STK_REG(R18)(r1)
31*ec6347bbSDan Williams	ld	r17,STK_REG(R17)(r1)
32*ec6347bbSDan Williams	ld	r16,STK_REG(R16)(r1)
33*ec6347bbSDan Williams	ld	r15,STK_REG(R15)(r1)
34*ec6347bbSDan Williams	ld	r14,STK_REG(R14)(r1)
35*ec6347bbSDan Williams	addi	r1,r1,STACKFRAMESIZE
36*ec6347bbSDan Williams.Ldo_err1:
37*ec6347bbSDan Williams	/* Do a byte by byte copy to get the exact remaining size */
38*ec6347bbSDan Williams	mtctr	r7
39*ec6347bbSDan Williams46:
40*ec6347bbSDan Williamserr3;	lbz	r0,0(r4)
41*ec6347bbSDan Williams	addi	r4,r4,1
42*ec6347bbSDan Williamserr3;	stb	r0,0(r3)
43*ec6347bbSDan Williams	addi	r3,r3,1
44*ec6347bbSDan Williams	bdnz	46b
45*ec6347bbSDan Williams	li	r3,0
46*ec6347bbSDan Williams	blr
47*ec6347bbSDan Williams
48*ec6347bbSDan Williams.Ldone:
49*ec6347bbSDan Williams	mfctr	r3
50*ec6347bbSDan Williams	blr
51*ec6347bbSDan Williams
52*ec6347bbSDan Williams
53*ec6347bbSDan Williams_GLOBAL(copy_mc_generic)
54*ec6347bbSDan Williams	mr	r7,r5
55*ec6347bbSDan Williams	cmpldi	r5,16
56*ec6347bbSDan Williams	blt	.Lshort_copy
57*ec6347bbSDan Williams
58*ec6347bbSDan Williams.Lcopy:
59*ec6347bbSDan Williams	/* Get the source 8B aligned */
60*ec6347bbSDan Williams	neg	r6,r4
61*ec6347bbSDan Williams	mtocrf	0x01,r6
62*ec6347bbSDan Williams	clrldi	r6,r6,(64-3)
63*ec6347bbSDan Williams
64*ec6347bbSDan Williams	bf	cr7*4+3,1f
65*ec6347bbSDan Williamserr1;	lbz	r0,0(r4)
66*ec6347bbSDan Williams	addi	r4,r4,1
67*ec6347bbSDan Williamserr1;	stb	r0,0(r3)
68*ec6347bbSDan Williams	addi	r3,r3,1
69*ec6347bbSDan Williams	subi	r7,r7,1
70*ec6347bbSDan Williams
71*ec6347bbSDan Williams1:	bf	cr7*4+2,2f
72*ec6347bbSDan Williamserr1;	lhz	r0,0(r4)
73*ec6347bbSDan Williams	addi	r4,r4,2
74*ec6347bbSDan Williamserr1;	sth	r0,0(r3)
75*ec6347bbSDan Williams	addi	r3,r3,2
76*ec6347bbSDan Williams	subi	r7,r7,2
77*ec6347bbSDan Williams
78*ec6347bbSDan Williams2:	bf	cr7*4+1,3f
79*ec6347bbSDan Williamserr1;	lwz	r0,0(r4)
80*ec6347bbSDan Williams	addi	r4,r4,4
81*ec6347bbSDan Williamserr1;	stw	r0,0(r3)
82*ec6347bbSDan Williams	addi	r3,r3,4
83*ec6347bbSDan Williams	subi	r7,r7,4
84*ec6347bbSDan Williams
85*ec6347bbSDan Williams3:	sub	r5,r5,r6
86*ec6347bbSDan Williams	cmpldi	r5,128
87*ec6347bbSDan Williams
88*ec6347bbSDan Williams	mflr	r0
89*ec6347bbSDan Williams	stdu	r1,-STACKFRAMESIZE(r1)
90*ec6347bbSDan Williams	std	r14,STK_REG(R14)(r1)
91*ec6347bbSDan Williams	std	r15,STK_REG(R15)(r1)
92*ec6347bbSDan Williams	std	r16,STK_REG(R16)(r1)
93*ec6347bbSDan Williams	std	r17,STK_REG(R17)(r1)
94*ec6347bbSDan Williams	std	r18,STK_REG(R18)(r1)
95*ec6347bbSDan Williams	std	r19,STK_REG(R19)(r1)
96*ec6347bbSDan Williams	std	r20,STK_REG(R20)(r1)
97*ec6347bbSDan Williams	std	r21,STK_REG(R21)(r1)
98*ec6347bbSDan Williams	std	r22,STK_REG(R22)(r1)
99*ec6347bbSDan Williams	std	r0,STACKFRAMESIZE+16(r1)
100*ec6347bbSDan Williams
101*ec6347bbSDan Williams	blt	5f
102*ec6347bbSDan Williams	srdi	r6,r5,7
103*ec6347bbSDan Williams	mtctr	r6
104*ec6347bbSDan Williams
105*ec6347bbSDan Williams	/* Now do cacheline (128B) sized loads and stores. */
106*ec6347bbSDan Williams	.align	5
107*ec6347bbSDan Williams4:
108*ec6347bbSDan Williamserr2;	ld	r0,0(r4)
109*ec6347bbSDan Williamserr2;	ld	r6,8(r4)
110*ec6347bbSDan Williamserr2;	ld	r8,16(r4)
111*ec6347bbSDan Williamserr2;	ld	r9,24(r4)
112*ec6347bbSDan Williamserr2;	ld	r10,32(r4)
113*ec6347bbSDan Williamserr2;	ld	r11,40(r4)
114*ec6347bbSDan Williamserr2;	ld	r12,48(r4)
115*ec6347bbSDan Williamserr2;	ld	r14,56(r4)
116*ec6347bbSDan Williamserr2;	ld	r15,64(r4)
117*ec6347bbSDan Williamserr2;	ld	r16,72(r4)
118*ec6347bbSDan Williamserr2;	ld	r17,80(r4)
119*ec6347bbSDan Williamserr2;	ld	r18,88(r4)
120*ec6347bbSDan Williamserr2;	ld	r19,96(r4)
121*ec6347bbSDan Williamserr2;	ld	r20,104(r4)
122*ec6347bbSDan Williamserr2;	ld	r21,112(r4)
123*ec6347bbSDan Williamserr2;	ld	r22,120(r4)
124*ec6347bbSDan Williams	addi	r4,r4,128
125*ec6347bbSDan Williamserr2;	std	r0,0(r3)
126*ec6347bbSDan Williamserr2;	std	r6,8(r3)
127*ec6347bbSDan Williamserr2;	std	r8,16(r3)
128*ec6347bbSDan Williamserr2;	std	r9,24(r3)
129*ec6347bbSDan Williamserr2;	std	r10,32(r3)
130*ec6347bbSDan Williamserr2;	std	r11,40(r3)
131*ec6347bbSDan Williamserr2;	std	r12,48(r3)
132*ec6347bbSDan Williamserr2;	std	r14,56(r3)
133*ec6347bbSDan Williamserr2;	std	r15,64(r3)
134*ec6347bbSDan Williamserr2;	std	r16,72(r3)
135*ec6347bbSDan Williamserr2;	std	r17,80(r3)
136*ec6347bbSDan Williamserr2;	std	r18,88(r3)
137*ec6347bbSDan Williamserr2;	std	r19,96(r3)
138*ec6347bbSDan Williamserr2;	std	r20,104(r3)
139*ec6347bbSDan Williamserr2;	std	r21,112(r3)
140*ec6347bbSDan Williamserr2;	std	r22,120(r3)
141*ec6347bbSDan Williams	addi	r3,r3,128
142*ec6347bbSDan Williams	subi	r7,r7,128
143*ec6347bbSDan Williams	bdnz	4b
144*ec6347bbSDan Williams
145*ec6347bbSDan Williams	clrldi	r5,r5,(64-7)
146*ec6347bbSDan Williams
147*ec6347bbSDan Williams	/* Up to 127B to go */
148*ec6347bbSDan Williams5:	srdi	r6,r5,4
149*ec6347bbSDan Williams	mtocrf	0x01,r6
150*ec6347bbSDan Williams
151*ec6347bbSDan Williams6:	bf	cr7*4+1,7f
152*ec6347bbSDan Williamserr2;	ld	r0,0(r4)
153*ec6347bbSDan Williamserr2;	ld	r6,8(r4)
154*ec6347bbSDan Williamserr2;	ld	r8,16(r4)
155*ec6347bbSDan Williamserr2;	ld	r9,24(r4)
156*ec6347bbSDan Williamserr2;	ld	r10,32(r4)
157*ec6347bbSDan Williamserr2;	ld	r11,40(r4)
158*ec6347bbSDan Williamserr2;	ld	r12,48(r4)
159*ec6347bbSDan Williamserr2;	ld	r14,56(r4)
160*ec6347bbSDan Williams	addi	r4,r4,64
161*ec6347bbSDan Williamserr2;	std	r0,0(r3)
162*ec6347bbSDan Williamserr2;	std	r6,8(r3)
163*ec6347bbSDan Williamserr2;	std	r8,16(r3)
164*ec6347bbSDan Williamserr2;	std	r9,24(r3)
165*ec6347bbSDan Williamserr2;	std	r10,32(r3)
166*ec6347bbSDan Williamserr2;	std	r11,40(r3)
167*ec6347bbSDan Williamserr2;	std	r12,48(r3)
168*ec6347bbSDan Williamserr2;	std	r14,56(r3)
169*ec6347bbSDan Williams	addi	r3,r3,64
170*ec6347bbSDan Williams	subi	r7,r7,64
171*ec6347bbSDan Williams
172*ec6347bbSDan Williams7:	ld	r14,STK_REG(R14)(r1)
173*ec6347bbSDan Williams	ld	r15,STK_REG(R15)(r1)
174*ec6347bbSDan Williams	ld	r16,STK_REG(R16)(r1)
175*ec6347bbSDan Williams	ld	r17,STK_REG(R17)(r1)
176*ec6347bbSDan Williams	ld	r18,STK_REG(R18)(r1)
177*ec6347bbSDan Williams	ld	r19,STK_REG(R19)(r1)
178*ec6347bbSDan Williams	ld	r20,STK_REG(R20)(r1)
179*ec6347bbSDan Williams	ld	r21,STK_REG(R21)(r1)
180*ec6347bbSDan Williams	ld	r22,STK_REG(R22)(r1)
181*ec6347bbSDan Williams	addi	r1,r1,STACKFRAMESIZE
182*ec6347bbSDan Williams
183*ec6347bbSDan Williams	/* Up to 63B to go */
184*ec6347bbSDan Williams	bf	cr7*4+2,8f
185*ec6347bbSDan Williamserr1;	ld	r0,0(r4)
186*ec6347bbSDan Williamserr1;	ld	r6,8(r4)
187*ec6347bbSDan Williamserr1;	ld	r8,16(r4)
188*ec6347bbSDan Williamserr1;	ld	r9,24(r4)
189*ec6347bbSDan Williams	addi	r4,r4,32
190*ec6347bbSDan Williamserr1;	std	r0,0(r3)
191*ec6347bbSDan Williamserr1;	std	r6,8(r3)
192*ec6347bbSDan Williamserr1;	std	r8,16(r3)
193*ec6347bbSDan Williamserr1;	std	r9,24(r3)
194*ec6347bbSDan Williams	addi	r3,r3,32
195*ec6347bbSDan Williams	subi	r7,r7,32
196*ec6347bbSDan Williams
197*ec6347bbSDan Williams	/* Up to 31B to go */
198*ec6347bbSDan Williams8:	bf	cr7*4+3,9f
199*ec6347bbSDan Williamserr1;	ld	r0,0(r4)
200*ec6347bbSDan Williamserr1;	ld	r6,8(r4)
201*ec6347bbSDan Williams	addi	r4,r4,16
202*ec6347bbSDan Williamserr1;	std	r0,0(r3)
203*ec6347bbSDan Williamserr1;	std	r6,8(r3)
204*ec6347bbSDan Williams	addi	r3,r3,16
205*ec6347bbSDan Williams	subi	r7,r7,16
206*ec6347bbSDan Williams
207*ec6347bbSDan Williams9:	clrldi	r5,r5,(64-4)
208*ec6347bbSDan Williams
209*ec6347bbSDan Williams	/* Up to 15B to go */
210*ec6347bbSDan Williams.Lshort_copy:
211*ec6347bbSDan Williams	mtocrf	0x01,r5
212*ec6347bbSDan Williams	bf	cr7*4+0,12f
213*ec6347bbSDan Williamserr1;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
214*ec6347bbSDan Williamserr1;	lwz	r6,4(r4)
215*ec6347bbSDan Williams	addi	r4,r4,8
216*ec6347bbSDan Williamserr1;	stw	r0,0(r3)
217*ec6347bbSDan Williamserr1;	stw	r6,4(r3)
218*ec6347bbSDan Williams	addi	r3,r3,8
219*ec6347bbSDan Williams	subi	r7,r7,8
220*ec6347bbSDan Williams
221*ec6347bbSDan Williams12:	bf	cr7*4+1,13f
222*ec6347bbSDan Williamserr1;	lwz	r0,0(r4)
223*ec6347bbSDan Williams	addi	r4,r4,4
224*ec6347bbSDan Williamserr1;	stw	r0,0(r3)
225*ec6347bbSDan Williams	addi	r3,r3,4
226*ec6347bbSDan Williams	subi	r7,r7,4
227*ec6347bbSDan Williams
228*ec6347bbSDan Williams13:	bf	cr7*4+2,14f
229*ec6347bbSDan Williamserr1;	lhz	r0,0(r4)
230*ec6347bbSDan Williams	addi	r4,r4,2
231*ec6347bbSDan Williamserr1;	sth	r0,0(r3)
232*ec6347bbSDan Williams	addi	r3,r3,2
233*ec6347bbSDan Williams	subi	r7,r7,2
234*ec6347bbSDan Williams
235*ec6347bbSDan Williams14:	bf	cr7*4+3,15f
236*ec6347bbSDan Williamserr1;	lbz	r0,0(r4)
237*ec6347bbSDan Williamserr1;	stb	r0,0(r3)
238*ec6347bbSDan Williams
239*ec6347bbSDan Williams15:	li	r3,0
240*ec6347bbSDan Williams	blr
241*ec6347bbSDan Williams
242*ec6347bbSDan WilliamsEXPORT_SYMBOL_GPL(copy_mc_generic);
243