xref: /openbmc/linux/arch/powerpc/lib/copyuser_64.S (revision 87c2ce3b)
1/*
2 * arch/ppc64/lib/copyuser.S
3 *
4 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <asm/processor.h>
12#include <asm/ppc_asm.h>
13
14	.align	7
15_GLOBAL(__copy_tofrom_user)
16	/* first check for a whole page copy on a page boundary */
17	cmpldi	cr1,r5,16
18	cmpdi	cr6,r5,4096
19	or	r0,r3,r4
20	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
21	andi.	r0,r0,4095
22	std	r3,-24(r1)
23	crand	cr0*4+2,cr0*4+2,cr6*4+2
24	std	r4,-16(r1)
25	std	r5,-8(r1)
26	dcbt	0,r4
27	beq	.Lcopy_page_4K
28	andi.	r6,r6,7
29	mtcrf	0x01,r5
30	blt	cr1,.Lshort_copy
31	bne	.Ldst_unaligned
32.Ldst_aligned:
33	andi.	r0,r4,7
34	addi	r3,r3,-16
35	bne	.Lsrc_unaligned
36	srdi	r7,r5,4
3720:	ld	r9,0(r4)
38	addi	r4,r4,-8
39	mtctr	r7
40	andi.	r5,r5,7
41	bf	cr7*4+0,22f
42	addi	r3,r3,8
43	addi	r4,r4,8
44	mr	r8,r9
45	blt	cr1,72f
4621:	ld	r9,8(r4)
4770:	std	r8,8(r3)
4822:	ldu	r8,16(r4)
4971:	stdu	r9,16(r3)
50	bdnz	21b
5172:	std	r8,8(r3)
52	beq+	3f
53	addi	r3,r3,16
5423:	ld	r9,8(r4)
55.Ldo_tail:
56	bf	cr7*4+1,1f
57	rotldi	r9,r9,32
5873:	stw	r9,0(r3)
59	addi	r3,r3,4
601:	bf	cr7*4+2,2f
61	rotldi	r9,r9,16
6274:	sth	r9,0(r3)
63	addi	r3,r3,2
642:	bf	cr7*4+3,3f
65	rotldi	r9,r9,8
6675:	stb	r9,0(r3)
673:	li	r3,0
68	blr
69
70.Lsrc_unaligned:
71	srdi	r6,r5,3
72	addi	r5,r5,-16
73	subf	r4,r0,r4
74	srdi	r7,r5,4
75	sldi	r10,r0,3
76	cmpldi	cr6,r6,3
77	andi.	r5,r5,7
78	mtctr	r7
79	subfic	r11,r10,64
80	add	r5,r5,r0
81	bt	cr7*4+0,28f
82
8324:	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
8425:	ld	r0,8(r4)
85	sld	r6,r9,r10
8626:	ldu	r9,16(r4)
87	srd	r7,r0,r11
88	sld	r8,r0,r10
89	or	r7,r7,r6
90	blt	cr6,79f
9127:	ld	r0,8(r4)
92	b	2f
93
9428:	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
9529:	ldu	r9,8(r4)
96	sld	r8,r0,r10
97	addi	r3,r3,-8
98	blt	cr6,5f
9930:	ld	r0,8(r4)
100	srd	r12,r9,r11
101	sld	r6,r9,r10
10231:	ldu	r9,16(r4)
103	or	r12,r8,r12
104	srd	r7,r0,r11
105	sld	r8,r0,r10
106	addi	r3,r3,16
107	beq	cr6,78f
108
1091:	or	r7,r7,r6
11032:	ld	r0,8(r4)
11176:	std	r12,8(r3)
1122:	srd	r12,r9,r11
113	sld	r6,r9,r10
11433:	ldu	r9,16(r4)
115	or	r12,r8,r12
11677:	stdu	r7,16(r3)
117	srd	r7,r0,r11
118	sld	r8,r0,r10
119	bdnz	1b
120
12178:	std	r12,8(r3)
122	or	r7,r7,r6
12379:	std	r7,16(r3)
1245:	srd	r12,r9,r11
125	or	r12,r8,r12
12680:	std	r12,24(r3)
127	bne	6f
128	li	r3,0
129	blr
1306:	cmpwi	cr1,r5,8
131	addi	r3,r3,32
132	sld	r9,r9,r10
133	ble	cr1,.Ldo_tail
13434:	ld	r0,8(r4)
135	srd	r7,r0,r11
136	or	r9,r7,r9
137	b	.Ldo_tail
138
139.Ldst_unaligned:
140	mtcrf	0x01,r6		/* put #bytes to 8B bdry into cr7 */
141	subf	r5,r6,r5
142	li	r7,0
143	cmpldi	r1,r5,16
144	bf	cr7*4+3,1f
14535:	lbz	r0,0(r4)
14681:	stb	r0,0(r3)
147	addi	r7,r7,1
1481:	bf	cr7*4+2,2f
14936:	lhzx	r0,r7,r4
15082:	sthx	r0,r7,r3
151	addi	r7,r7,2
1522:	bf	cr7*4+1,3f
15337:	lwzx	r0,r7,r4
15483:	stwx	r0,r7,r3
1553:	mtcrf	0x01,r5
156	add	r4,r6,r4
157	add	r3,r6,r3
158	b	.Ldst_aligned
159
160.Lshort_copy:
161	bf	cr7*4+0,1f
16238:	lwz	r0,0(r4)
16339:	lwz	r9,4(r4)
164	addi	r4,r4,8
16584:	stw	r0,0(r3)
16685:	stw	r9,4(r3)
167	addi	r3,r3,8
1681:	bf	cr7*4+1,2f
16940:	lwz	r0,0(r4)
170	addi	r4,r4,4
17186:	stw	r0,0(r3)
172	addi	r3,r3,4
1732:	bf	cr7*4+2,3f
17441:	lhz	r0,0(r4)
175	addi	r4,r4,2
17687:	sth	r0,0(r3)
177	addi	r3,r3,2
1783:	bf	cr7*4+3,4f
17942:	lbz	r0,0(r4)
18088:	stb	r0,0(r3)
1814:	li	r3,0
182	blr
183
184/*
185 * exception handlers follow
186 * we have to return the number of bytes not copied
187 * for an exception on a load, we set the rest of the destination to 0
188 */
189
190136:
191137:
192	add	r3,r3,r7
193	b	1f
194130:
195131:
196	addi	r3,r3,8
197120:
198122:
199124:
200125:
201126:
202127:
203128:
204129:
205133:
206	addi	r3,r3,8
207121:
208132:
209	addi	r3,r3,8
210123:
211134:
212135:
213138:
214139:
215140:
216141:
217142:
218
219/*
220 * here we have had a fault on a load and r3 points to the first
221 * unmodified byte of the destination
222 */
2231:	ld	r6,-24(r1)
224	ld	r4,-16(r1)
225	ld	r5,-8(r1)
226	subf	r6,r6,r3
227	add	r4,r4,r6
228	subf	r5,r6,r5	/* #bytes left to go */
229
230/*
231 * first see if we can copy any more bytes before hitting another exception
232 */
233	mtctr	r5
23443:	lbz	r0,0(r4)
235	addi	r4,r4,1
23689:	stb	r0,0(r3)
237	addi	r3,r3,1
238	bdnz	43b
239	li	r3,0		/* huh? all copied successfully this time? */
240	blr
241
242/*
243 * here we have trapped again, need to clear ctr bytes starting at r3
244 */
245143:	mfctr	r5
246	li	r0,0
247	mr	r4,r3
248	mr	r3,r5		/* return the number of bytes not copied */
2491:	andi.	r9,r4,7
250	beq	3f
25190:	stb	r0,0(r4)
252	addic.	r5,r5,-1
253	addi	r4,r4,1
254	bne	1b
255	blr
2563:	cmpldi	cr1,r5,8
257	srdi	r9,r5,3
258	andi.	r5,r5,7
259	blt	cr1,93f
260	mtctr	r9
26191:	std	r0,0(r4)
262	addi	r4,r4,8
263	bdnz	91b
26493:	beqlr
265	mtctr	r5
26692:	stb	r0,0(r4)
267	addi	r4,r4,1
268	bdnz	92b
269	blr
270
271/*
272 * exception handlers for stores: we just need to work
273 * out how many bytes weren't copied
274 */
275182:
276183:
277	add	r3,r3,r7
278	b	1f
279180:
280	addi	r3,r3,8
281171:
282177:
283	addi	r3,r3,8
284170:
285172:
286176:
287178:
288	addi	r3,r3,4
289185:
290	addi	r3,r3,4
291173:
292174:
293175:
294179:
295181:
296184:
297186:
298187:
299188:
300189:
3011:
302	ld	r6,-24(r1)
303	ld	r5,-8(r1)
304	add	r6,r6,r5
305	subf	r3,r3,r6	/* #bytes not copied */
306190:
307191:
308192:
309	blr			/* #bytes not copied in r3 */
310
311	.section __ex_table,"a"
312	.align	3
313	.llong	20b,120b
314	.llong	21b,121b
315	.llong	70b,170b
316	.llong	22b,122b
317	.llong	71b,171b
318	.llong	72b,172b
319	.llong	23b,123b
320	.llong	73b,173b
321	.llong	74b,174b
322	.llong	75b,175b
323	.llong	24b,124b
324	.llong	25b,125b
325	.llong	26b,126b
326	.llong	27b,127b
327	.llong	28b,128b
328	.llong	29b,129b
329	.llong	30b,130b
330	.llong	31b,131b
331	.llong	32b,132b
332	.llong	76b,176b
333	.llong	33b,133b
334	.llong	77b,177b
335	.llong	78b,178b
336	.llong	79b,179b
337	.llong	80b,180b
338	.llong	34b,134b
339	.llong	35b,135b
340	.llong	81b,181b
341	.llong	36b,136b
342	.llong	82b,182b
343	.llong	37b,137b
344	.llong	83b,183b
345	.llong	38b,138b
346	.llong	39b,139b
347	.llong	84b,184b
348	.llong	85b,185b
349	.llong	40b,140b
350	.llong	86b,186b
351	.llong	41b,141b
352	.llong	87b,187b
353	.llong	42b,142b
354	.llong	88b,188b
355	.llong	43b,143b
356	.llong	89b,189b
357	.llong	90b,190b
358	.llong	91b,191b
359	.llong	92b,192b
360
361	.text
362
363/*
364 * Routine to copy a whole page of data, optimized for POWER4.
365 * On POWER4 it is more than 50% faster than the simple loop
366 * above (following the .Ldst_aligned label) but it runs slightly
367 * slower on POWER3.
368 */
369.Lcopy_page_4K:
370	std	r31,-32(1)
371	std	r30,-40(1)
372	std	r29,-48(1)
373	std	r28,-56(1)
374	std	r27,-64(1)
375	std	r26,-72(1)
376	std	r25,-80(1)
377	std	r24,-88(1)
378	std	r23,-96(1)
379	std	r22,-104(1)
380	std	r21,-112(1)
381	std	r20,-120(1)
382	li	r5,4096/32 - 1
383	addi	r3,r3,-8
384	li	r0,5
3850:	addi	r5,r5,-24
386	mtctr	r0
38720:	ld	r22,640(4)
38821:	ld	r21,512(4)
38922:	ld	r20,384(4)
39023:	ld	r11,256(4)
39124:	ld	r9,128(4)
39225:	ld	r7,0(4)
39326:	ld	r25,648(4)
39427:	ld	r24,520(4)
39528:	ld	r23,392(4)
39629:	ld	r10,264(4)
39730:	ld	r8,136(4)
39831:	ldu	r6,8(4)
399	cmpwi	r5,24
4001:
40132:	std	r22,648(3)
40233:	std	r21,520(3)
40334:	std	r20,392(3)
40435:	std	r11,264(3)
40536:	std	r9,136(3)
40637:	std	r7,8(3)
40738:	ld	r28,648(4)
40839:	ld	r27,520(4)
40940:	ld	r26,392(4)
41041:	ld	r31,264(4)
41142:	ld	r30,136(4)
41243:	ld	r29,8(4)
41344:	std	r25,656(3)
41445:	std	r24,528(3)
41546:	std	r23,400(3)
41647:	std	r10,272(3)
41748:	std	r8,144(3)
41849:	std	r6,16(3)
41950:	ld	r22,656(4)
42051:	ld	r21,528(4)
42152:	ld	r20,400(4)
42253:	ld	r11,272(4)
42354:	ld	r9,144(4)
42455:	ld	r7,16(4)
42556:	std	r28,664(3)
42657:	std	r27,536(3)
42758:	std	r26,408(3)
42859:	std	r31,280(3)
42960:	std	r30,152(3)
43061:	stdu	r29,24(3)
43162:	ld	r25,664(4)
43263:	ld	r24,536(4)
43364:	ld	r23,408(4)
43465:	ld	r10,280(4)
43566:	ld	r8,152(4)
43667:	ldu	r6,24(4)
437	bdnz	1b
43868:	std	r22,648(3)
43969:	std	r21,520(3)
44070:	std	r20,392(3)
44171:	std	r11,264(3)
44272:	std	r9,136(3)
44373:	std	r7,8(3)
44474:	addi	r4,r4,640
44575:	addi	r3,r3,648
446	bge	0b
447	mtctr	r5
44876:	ld	r7,0(4)
44977:	ld	r8,8(4)
45078:	ldu	r9,16(4)
4513:
45279:	ld	r10,8(4)
45380:	std	r7,8(3)
45481:	ld	r7,16(4)
45582:	std	r8,16(3)
45683:	ld	r8,24(4)
45784:	std	r9,24(3)
45885:	ldu	r9,32(4)
45986:	stdu	r10,32(3)
460	bdnz	3b
4614:
46287:	ld	r10,8(4)
46388:	std	r7,8(3)
46489:	std	r8,16(3)
46590:	std	r9,24(3)
46691:	std	r10,32(3)
4679:	ld	r20,-120(1)
468	ld	r21,-112(1)
469	ld	r22,-104(1)
470	ld	r23,-96(1)
471	ld	r24,-88(1)
472	ld	r25,-80(1)
473	ld	r26,-72(1)
474	ld	r27,-64(1)
475	ld	r28,-56(1)
476	ld	r29,-48(1)
477	ld	r30,-40(1)
478	ld	r31,-32(1)
479	li	r3,0
480	blr
481
482/*
483 * on an exception, reset to the beginning and jump back into the
484 * standard __copy_tofrom_user
485 */
486100:	ld	r20,-120(1)
487	ld	r21,-112(1)
488	ld	r22,-104(1)
489	ld	r23,-96(1)
490	ld	r24,-88(1)
491	ld	r25,-80(1)
492	ld	r26,-72(1)
493	ld	r27,-64(1)
494	ld	r28,-56(1)
495	ld	r29,-48(1)
496	ld	r30,-40(1)
497	ld	r31,-32(1)
498	ld	r3,-24(r1)
499	ld	r4,-16(r1)
500	li	r5,4096
501	b	.Ldst_aligned
502
503	.section __ex_table,"a"
504	.align	3
505	.llong	20b,100b
506	.llong	21b,100b
507	.llong	22b,100b
508	.llong	23b,100b
509	.llong	24b,100b
510	.llong	25b,100b
511	.llong	26b,100b
512	.llong	27b,100b
513	.llong	28b,100b
514	.llong	29b,100b
515	.llong	30b,100b
516	.llong	31b,100b
517	.llong	32b,100b
518	.llong	33b,100b
519	.llong	34b,100b
520	.llong	35b,100b
521	.llong	36b,100b
522	.llong	37b,100b
523	.llong	38b,100b
524	.llong	39b,100b
525	.llong	40b,100b
526	.llong	41b,100b
527	.llong	42b,100b
528	.llong	43b,100b
529	.llong	44b,100b
530	.llong	45b,100b
531	.llong	46b,100b
532	.llong	47b,100b
533	.llong	48b,100b
534	.llong	49b,100b
535	.llong	50b,100b
536	.llong	51b,100b
537	.llong	52b,100b
538	.llong	53b,100b
539	.llong	54b,100b
540	.llong	55b,100b
541	.llong	56b,100b
542	.llong	57b,100b
543	.llong	58b,100b
544	.llong	59b,100b
545	.llong	60b,100b
546	.llong	61b,100b
547	.llong	62b,100b
548	.llong	63b,100b
549	.llong	64b,100b
550	.llong	65b,100b
551	.llong	66b,100b
552	.llong	67b,100b
553	.llong	68b,100b
554	.llong	69b,100b
555	.llong	70b,100b
556	.llong	71b,100b
557	.llong	72b,100b
558	.llong	73b,100b
559	.llong	74b,100b
560	.llong	75b,100b
561	.llong	76b,100b
562	.llong	77b,100b
563	.llong	78b,100b
564	.llong	79b,100b
565	.llong	80b,100b
566	.llong	81b,100b
567	.llong	82b,100b
568	.llong	83b,100b
569	.llong	84b,100b
570	.llong	85b,100b
571	.llong	86b,100b
572	.llong	87b,100b
573	.llong	88b,100b
574	.llong	89b,100b
575	.llong	90b,100b
576	.llong	91b,100b
577