xref: /openbmc/linux/arch/powerpc/lib/copyuser_64.S (revision 8730046c)
1/*
2 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#include <asm/processor.h>
10#include <asm/ppc_asm.h>
11#include <asm/export.h>
12
13#ifdef __BIG_ENDIAN__
14#define sLd sld		/* Shift towards low-numbered address. */
15#define sHd srd		/* Shift towards high-numbered address. */
16#else
17#define sLd srd		/* Shift towards low-numbered address. */
18#define sHd sld		/* Shift towards high-numbered address. */
19#endif
20
21	.align	7
22_GLOBAL_TOC(__copy_tofrom_user)
23BEGIN_FTR_SECTION
24	nop
25FTR_SECTION_ELSE
26	b	__copy_tofrom_user_power7
27ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
28_GLOBAL(__copy_tofrom_user_base)
29	/* first check for a whole page copy on a page boundary */
30	cmpldi	cr1,r5,16
31	cmpdi	cr6,r5,4096
32	or	r0,r3,r4
33	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
34	andi.	r0,r0,4095
35	std	r3,-24(r1)
36	crand	cr0*4+2,cr0*4+2,cr6*4+2
37	std	r4,-16(r1)
38	std	r5,-8(r1)
39	dcbt	0,r4
40	beq	.Lcopy_page_4K
41	andi.	r6,r6,7
42	PPC_MTOCRF(0x01,r5)
43	blt	cr1,.Lshort_copy
44/* Below we want to nop out the bne if we're on a CPU that has the
45 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
46 * cleared.
47 * At the time of writing the only CPU that has this combination of bits
48 * set is Power6.
49 */
50BEGIN_FTR_SECTION
51	nop
52FTR_SECTION_ELSE
53	bne	.Ldst_unaligned
54ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
55		    CPU_FTR_UNALIGNED_LD_STD)
56.Ldst_aligned:
57	addi	r3,r3,-16
58BEGIN_FTR_SECTION
59	andi.	r0,r4,7
60	bne	.Lsrc_unaligned
61END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
62	blt	cr1,.Ldo_tail		/* if < 16 bytes to copy */
63	srdi	r0,r5,5
64	cmpdi	cr1,r0,0
6520:	ld	r7,0(r4)
66220:	ld	r6,8(r4)
67	addi	r4,r4,16
68	mtctr	r0
69	andi.	r0,r5,0x10
70	beq	22f
71	addi	r3,r3,16
72	addi	r4,r4,-16
73	mr	r9,r7
74	mr	r8,r6
75	beq	cr1,72f
7621:	ld	r7,16(r4)
77221:	ld	r6,24(r4)
78	addi	r4,r4,32
7970:	std	r9,0(r3)
80270:	std	r8,8(r3)
8122:	ld	r9,0(r4)
82222:	ld	r8,8(r4)
8371:	std	r7,16(r3)
84271:	std	r6,24(r3)
85	addi	r3,r3,32
86	bdnz	21b
8772:	std	r9,0(r3)
88272:	std	r8,8(r3)
89	andi.	r5,r5,0xf
90	beq+	3f
91	addi	r4,r4,16
92.Ldo_tail:
93	addi	r3,r3,16
94	bf	cr7*4+0,246f
95244:	ld	r9,0(r4)
96	addi	r4,r4,8
97245:	std	r9,0(r3)
98	addi	r3,r3,8
99246:	bf	cr7*4+1,1f
10023:	lwz	r9,0(r4)
101	addi	r4,r4,4
10273:	stw	r9,0(r3)
103	addi	r3,r3,4
1041:	bf	cr7*4+2,2f
10544:	lhz	r9,0(r4)
106	addi	r4,r4,2
10774:	sth	r9,0(r3)
108	addi	r3,r3,2
1092:	bf	cr7*4+3,3f
11045:	lbz	r9,0(r4)
11175:	stb	r9,0(r3)
1123:	li	r3,0
113	blr
114
115.Lsrc_unaligned:
116	srdi	r6,r5,3
117	addi	r5,r5,-16
118	subf	r4,r0,r4
119	srdi	r7,r5,4
120	sldi	r10,r0,3
121	cmpldi	cr6,r6,3
122	andi.	r5,r5,7
123	mtctr	r7
124	subfic	r11,r10,64
125	add	r5,r5,r0
126	bt	cr7*4+0,28f
127
12824:	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
12925:	ld	r0,8(r4)
130	sLd	r6,r9,r10
13126:	ldu	r9,16(r4)
132	sHd	r7,r0,r11
133	sLd	r8,r0,r10
134	or	r7,r7,r6
135	blt	cr6,79f
13627:	ld	r0,8(r4)
137	b	2f
138
13928:	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
14029:	ldu	r9,8(r4)
141	sLd	r8,r0,r10
142	addi	r3,r3,-8
143	blt	cr6,5f
14430:	ld	r0,8(r4)
145	sHd	r12,r9,r11
146	sLd	r6,r9,r10
14731:	ldu	r9,16(r4)
148	or	r12,r8,r12
149	sHd	r7,r0,r11
150	sLd	r8,r0,r10
151	addi	r3,r3,16
152	beq	cr6,78f
153
1541:	or	r7,r7,r6
15532:	ld	r0,8(r4)
15676:	std	r12,8(r3)
1572:	sHd	r12,r9,r11
158	sLd	r6,r9,r10
15933:	ldu	r9,16(r4)
160	or	r12,r8,r12
16177:	stdu	r7,16(r3)
162	sHd	r7,r0,r11
163	sLd	r8,r0,r10
164	bdnz	1b
165
16678:	std	r12,8(r3)
167	or	r7,r7,r6
16879:	std	r7,16(r3)
1695:	sHd	r12,r9,r11
170	or	r12,r8,r12
17180:	std	r12,24(r3)
172	bne	6f
173	li	r3,0
174	blr
1756:	cmpwi	cr1,r5,8
176	addi	r3,r3,32
177	sLd	r9,r9,r10
178	ble	cr1,7f
17934:	ld	r0,8(r4)
180	sHd	r7,r0,r11
181	or	r9,r7,r9
1827:
183	bf	cr7*4+1,1f
184#ifdef __BIG_ENDIAN__
185	rotldi	r9,r9,32
186#endif
18794:	stw	r9,0(r3)
188#ifdef __LITTLE_ENDIAN__
189	rotrdi	r9,r9,32
190#endif
191	addi	r3,r3,4
1921:	bf	cr7*4+2,2f
193#ifdef __BIG_ENDIAN__
194	rotldi	r9,r9,16
195#endif
19695:	sth	r9,0(r3)
197#ifdef __LITTLE_ENDIAN__
198	rotrdi	r9,r9,16
199#endif
200	addi	r3,r3,2
2012:	bf	cr7*4+3,3f
202#ifdef __BIG_ENDIAN__
203	rotldi	r9,r9,8
204#endif
20596:	stb	r9,0(r3)
206#ifdef __LITTLE_ENDIAN__
207	rotrdi	r9,r9,8
208#endif
2093:	li	r3,0
210	blr
211
212.Ldst_unaligned:
213	PPC_MTOCRF(0x01,r6)		/* put #bytes to 8B bdry into cr7 */
214	subf	r5,r6,r5
215	li	r7,0
216	cmpldi	cr1,r5,16
217	bf	cr7*4+3,1f
21835:	lbz	r0,0(r4)
21981:	stb	r0,0(r3)
220	addi	r7,r7,1
2211:	bf	cr7*4+2,2f
22236:	lhzx	r0,r7,r4
22382:	sthx	r0,r7,r3
224	addi	r7,r7,2
2252:	bf	cr7*4+1,3f
22637:	lwzx	r0,r7,r4
22783:	stwx	r0,r7,r3
2283:	PPC_MTOCRF(0x01,r5)
229	add	r4,r6,r4
230	add	r3,r6,r3
231	b	.Ldst_aligned
232
233.Lshort_copy:
234	bf	cr7*4+0,1f
23538:	lwz	r0,0(r4)
23639:	lwz	r9,4(r4)
237	addi	r4,r4,8
23884:	stw	r0,0(r3)
23985:	stw	r9,4(r3)
240	addi	r3,r3,8
2411:	bf	cr7*4+1,2f
24240:	lwz	r0,0(r4)
243	addi	r4,r4,4
24486:	stw	r0,0(r3)
245	addi	r3,r3,4
2462:	bf	cr7*4+2,3f
24741:	lhz	r0,0(r4)
248	addi	r4,r4,2
24987:	sth	r0,0(r3)
250	addi	r3,r3,2
2513:	bf	cr7*4+3,4f
25242:	lbz	r0,0(r4)
25388:	stb	r0,0(r3)
2544:	li	r3,0
255	blr
256
257/*
258 * exception handlers follow
259 * we have to return the number of bytes not copied
260 * for an exception on a load, we set the rest of the destination to 0
261 */
262
263136:
264137:
265	add	r3,r3,r7
266	b	1f
267130:
268131:
269	addi	r3,r3,8
270120:
271320:
272122:
273322:
274124:
275125:
276126:
277127:
278128:
279129:
280133:
281	addi	r3,r3,8
282132:
283	addi	r3,r3,8
284121:
285321:
286344:
287134:
288135:
289138:
290139:
291140:
292141:
293142:
294123:
295144:
296145:
297
298/*
299 * here we have had a fault on a load and r3 points to the first
300 * unmodified byte of the destination
301 */
3021:	ld	r6,-24(r1)
303	ld	r4,-16(r1)
304	ld	r5,-8(r1)
305	subf	r6,r6,r3
306	add	r4,r4,r6
307	subf	r5,r6,r5	/* #bytes left to go */
308
309/*
310 * first see if we can copy any more bytes before hitting another exception
311 */
312	mtctr	r5
31343:	lbz	r0,0(r4)
314	addi	r4,r4,1
31589:	stb	r0,0(r3)
316	addi	r3,r3,1
317	bdnz	43b
318	li	r3,0		/* huh? all copied successfully this time? */
319	blr
320
321/*
322 * here we have trapped again, need to clear ctr bytes starting at r3
323 */
324143:	mfctr	r5
325	li	r0,0
326	mr	r4,r3
327	mr	r3,r5		/* return the number of bytes not copied */
3281:	andi.	r9,r4,7
329	beq	3f
33090:	stb	r0,0(r4)
331	addic.	r5,r5,-1
332	addi	r4,r4,1
333	bne	1b
334	blr
3353:	cmpldi	cr1,r5,8
336	srdi	r9,r5,3
337	andi.	r5,r5,7
338	blt	cr1,93f
339	mtctr	r9
34091:	std	r0,0(r4)
341	addi	r4,r4,8
342	bdnz	91b
34393:	beqlr
344	mtctr	r5
34592:	stb	r0,0(r4)
346	addi	r4,r4,1
347	bdnz	92b
348	blr
349
350/*
351 * exception handlers for stores: we just need to work
352 * out how many bytes weren't copied
353 */
354182:
355183:
356	add	r3,r3,r7
357	b	1f
358371:
359180:
360	addi	r3,r3,8
361171:
362177:
363179:
364	addi	r3,r3,8
365370:
366372:
367176:
368178:
369	addi	r3,r3,4
370185:
371	addi	r3,r3,4
372170:
373172:
374345:
375173:
376174:
377175:
378181:
379184:
380186:
381187:
382188:
383189:
384194:
385195:
386196:
3871:
388	ld	r6,-24(r1)
389	ld	r5,-8(r1)
390	add	r6,r6,r5
391	subf	r3,r3,r6	/* #bytes not copied */
392190:
393191:
394192:
395	blr			/* #bytes not copied in r3 */
396
397	EX_TABLE(20b,120b)
398	EX_TABLE(220b,320b)
399	EX_TABLE(21b,121b)
400	EX_TABLE(221b,321b)
401	EX_TABLE(70b,170b)
402	EX_TABLE(270b,370b)
403	EX_TABLE(22b,122b)
404	EX_TABLE(222b,322b)
405	EX_TABLE(71b,171b)
406	EX_TABLE(271b,371b)
407	EX_TABLE(72b,172b)
408	EX_TABLE(272b,372b)
409	EX_TABLE(244b,344b)
410	EX_TABLE(245b,345b)
411	EX_TABLE(23b,123b)
412	EX_TABLE(73b,173b)
413	EX_TABLE(44b,144b)
414	EX_TABLE(74b,174b)
415	EX_TABLE(45b,145b)
416	EX_TABLE(75b,175b)
417	EX_TABLE(24b,124b)
418	EX_TABLE(25b,125b)
419	EX_TABLE(26b,126b)
420	EX_TABLE(27b,127b)
421	EX_TABLE(28b,128b)
422	EX_TABLE(29b,129b)
423	EX_TABLE(30b,130b)
424	EX_TABLE(31b,131b)
425	EX_TABLE(32b,132b)
426	EX_TABLE(76b,176b)
427	EX_TABLE(33b,133b)
428	EX_TABLE(77b,177b)
429	EX_TABLE(78b,178b)
430	EX_TABLE(79b,179b)
431	EX_TABLE(80b,180b)
432	EX_TABLE(34b,134b)
433	EX_TABLE(94b,194b)
434	EX_TABLE(95b,195b)
435	EX_TABLE(96b,196b)
436	EX_TABLE(35b,135b)
437	EX_TABLE(81b,181b)
438	EX_TABLE(36b,136b)
439	EX_TABLE(82b,182b)
440	EX_TABLE(37b,137b)
441	EX_TABLE(83b,183b)
442	EX_TABLE(38b,138b)
443	EX_TABLE(39b,139b)
444	EX_TABLE(84b,184b)
445	EX_TABLE(85b,185b)
446	EX_TABLE(40b,140b)
447	EX_TABLE(86b,186b)
448	EX_TABLE(41b,141b)
449	EX_TABLE(87b,187b)
450	EX_TABLE(42b,142b)
451	EX_TABLE(88b,188b)
452	EX_TABLE(43b,143b)
453	EX_TABLE(89b,189b)
454	EX_TABLE(90b,190b)
455	EX_TABLE(91b,191b)
456	EX_TABLE(92b,192b)
457
458/*
459 * Routine to copy a whole page of data, optimized for POWER4.
460 * On POWER4 it is more than 50% faster than the simple loop
461 * above (following the .Ldst_aligned label).
462 */
463.Lcopy_page_4K:
464	std	r31,-32(1)
465	std	r30,-40(1)
466	std	r29,-48(1)
467	std	r28,-56(1)
468	std	r27,-64(1)
469	std	r26,-72(1)
470	std	r25,-80(1)
471	std	r24,-88(1)
472	std	r23,-96(1)
473	std	r22,-104(1)
474	std	r21,-112(1)
475	std	r20,-120(1)
476	li	r5,4096/32 - 1
477	addi	r3,r3,-8
478	li	r0,5
4790:	addi	r5,r5,-24
480	mtctr	r0
48120:	ld	r22,640(4)
48221:	ld	r21,512(4)
48322:	ld	r20,384(4)
48423:	ld	r11,256(4)
48524:	ld	r9,128(4)
48625:	ld	r7,0(4)
48726:	ld	r25,648(4)
48827:	ld	r24,520(4)
48928:	ld	r23,392(4)
49029:	ld	r10,264(4)
49130:	ld	r8,136(4)
49231:	ldu	r6,8(4)
493	cmpwi	r5,24
4941:
49532:	std	r22,648(3)
49633:	std	r21,520(3)
49734:	std	r20,392(3)
49835:	std	r11,264(3)
49936:	std	r9,136(3)
50037:	std	r7,8(3)
50138:	ld	r28,648(4)
50239:	ld	r27,520(4)
50340:	ld	r26,392(4)
50441:	ld	r31,264(4)
50542:	ld	r30,136(4)
50643:	ld	r29,8(4)
50744:	std	r25,656(3)
50845:	std	r24,528(3)
50946:	std	r23,400(3)
51047:	std	r10,272(3)
51148:	std	r8,144(3)
51249:	std	r6,16(3)
51350:	ld	r22,656(4)
51451:	ld	r21,528(4)
51552:	ld	r20,400(4)
51653:	ld	r11,272(4)
51754:	ld	r9,144(4)
51855:	ld	r7,16(4)
51956:	std	r28,664(3)
52057:	std	r27,536(3)
52158:	std	r26,408(3)
52259:	std	r31,280(3)
52360:	std	r30,152(3)
52461:	stdu	r29,24(3)
52562:	ld	r25,664(4)
52663:	ld	r24,536(4)
52764:	ld	r23,408(4)
52865:	ld	r10,280(4)
52966:	ld	r8,152(4)
53067:	ldu	r6,24(4)
531	bdnz	1b
53268:	std	r22,648(3)
53369:	std	r21,520(3)
53470:	std	r20,392(3)
53571:	std	r11,264(3)
53672:	std	r9,136(3)
53773:	std	r7,8(3)
53874:	addi	r4,r4,640
53975:	addi	r3,r3,648
540	bge	0b
541	mtctr	r5
54276:	ld	r7,0(4)
54377:	ld	r8,8(4)
54478:	ldu	r9,16(4)
5453:
54679:	ld	r10,8(4)
54780:	std	r7,8(3)
54881:	ld	r7,16(4)
54982:	std	r8,16(3)
55083:	ld	r8,24(4)
55184:	std	r9,24(3)
55285:	ldu	r9,32(4)
55386:	stdu	r10,32(3)
554	bdnz	3b
5554:
55687:	ld	r10,8(4)
55788:	std	r7,8(3)
55889:	std	r8,16(3)
55990:	std	r9,24(3)
56091:	std	r10,32(3)
5619:	ld	r20,-120(1)
562	ld	r21,-112(1)
563	ld	r22,-104(1)
564	ld	r23,-96(1)
565	ld	r24,-88(1)
566	ld	r25,-80(1)
567	ld	r26,-72(1)
568	ld	r27,-64(1)
569	ld	r28,-56(1)
570	ld	r29,-48(1)
571	ld	r30,-40(1)
572	ld	r31,-32(1)
573	li	r3,0
574	blr
575
576/*
577 * on an exception, reset to the beginning and jump back into the
578 * standard __copy_tofrom_user
579 */
580100:	ld	r20,-120(1)
581	ld	r21,-112(1)
582	ld	r22,-104(1)
583	ld	r23,-96(1)
584	ld	r24,-88(1)
585	ld	r25,-80(1)
586	ld	r26,-72(1)
587	ld	r27,-64(1)
588	ld	r28,-56(1)
589	ld	r29,-48(1)
590	ld	r30,-40(1)
591	ld	r31,-32(1)
592	ld	r3,-24(r1)
593	ld	r4,-16(r1)
594	li	r5,4096
595	b	.Ldst_aligned
596
597	EX_TABLE(20b,100b)
598	EX_TABLE(21b,100b)
599	EX_TABLE(22b,100b)
600	EX_TABLE(23b,100b)
601	EX_TABLE(24b,100b)
602	EX_TABLE(25b,100b)
603	EX_TABLE(26b,100b)
604	EX_TABLE(27b,100b)
605	EX_TABLE(28b,100b)
606	EX_TABLE(29b,100b)
607	EX_TABLE(30b,100b)
608	EX_TABLE(31b,100b)
609	EX_TABLE(32b,100b)
610	EX_TABLE(33b,100b)
611	EX_TABLE(34b,100b)
612	EX_TABLE(35b,100b)
613	EX_TABLE(36b,100b)
614	EX_TABLE(37b,100b)
615	EX_TABLE(38b,100b)
616	EX_TABLE(39b,100b)
617	EX_TABLE(40b,100b)
618	EX_TABLE(41b,100b)
619	EX_TABLE(42b,100b)
620	EX_TABLE(43b,100b)
621	EX_TABLE(44b,100b)
622	EX_TABLE(45b,100b)
623	EX_TABLE(46b,100b)
624	EX_TABLE(47b,100b)
625	EX_TABLE(48b,100b)
626	EX_TABLE(49b,100b)
627	EX_TABLE(50b,100b)
628	EX_TABLE(51b,100b)
629	EX_TABLE(52b,100b)
630	EX_TABLE(53b,100b)
631	EX_TABLE(54b,100b)
632	EX_TABLE(55b,100b)
633	EX_TABLE(56b,100b)
634	EX_TABLE(57b,100b)
635	EX_TABLE(58b,100b)
636	EX_TABLE(59b,100b)
637	EX_TABLE(60b,100b)
638	EX_TABLE(61b,100b)
639	EX_TABLE(62b,100b)
640	EX_TABLE(63b,100b)
641	EX_TABLE(64b,100b)
642	EX_TABLE(65b,100b)
643	EX_TABLE(66b,100b)
644	EX_TABLE(67b,100b)
645	EX_TABLE(68b,100b)
646	EX_TABLE(69b,100b)
647	EX_TABLE(70b,100b)
648	EX_TABLE(71b,100b)
649	EX_TABLE(72b,100b)
650	EX_TABLE(73b,100b)
651	EX_TABLE(74b,100b)
652	EX_TABLE(75b,100b)
653	EX_TABLE(76b,100b)
654	EX_TABLE(77b,100b)
655	EX_TABLE(78b,100b)
656	EX_TABLE(79b,100b)
657	EX_TABLE(80b,100b)
658	EX_TABLE(81b,100b)
659	EX_TABLE(82b,100b)
660	EX_TABLE(83b,100b)
661	EX_TABLE(84b,100b)
662	EX_TABLE(85b,100b)
663	EX_TABLE(86b,100b)
664	EX_TABLE(87b,100b)
665	EX_TABLE(88b,100b)
666	EX_TABLE(89b,100b)
667	EX_TABLE(90b,100b)
668	EX_TABLE(91b,100b)
669
670EXPORT_SYMBOL(__copy_tofrom_user)
671