xref: /openbmc/linux/arch/powerpc/lib/copy_32.S (revision 39326182)
12874c5fdSThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-or-later */
270d64ceaSPaul Mackerras/*
370d64ceaSPaul Mackerras * Memory copy functions for 32-bit PowerPC.
470d64ceaSPaul Mackerras *
570d64ceaSPaul Mackerras * Copyright (C) 1996-2005 Paul Mackerras.
670d64ceaSPaul Mackerras */
7*39326182SMasahiro Yamada#include <linux/export.h>
870d64ceaSPaul Mackerras#include <asm/processor.h>
970d64ceaSPaul Mackerras#include <asm/cache.h>
1070d64ceaSPaul Mackerras#include <asm/errno.h>
1170d64ceaSPaul Mackerras#include <asm/ppc_asm.h>
12fa54a981SChristophe Leroy#include <asm/code-patching-asm.h>
1326deb043SChristophe Leroy#include <asm/kasan.h>
1470d64ceaSPaul Mackerras
1570d64ceaSPaul Mackerras#define COPY_16_BYTES		\
1670d64ceaSPaul Mackerras	lwz	r7,4(r4);	\
1770d64ceaSPaul Mackerras	lwz	r8,8(r4);	\
1870d64ceaSPaul Mackerras	lwz	r9,12(r4);	\
1970d64ceaSPaul Mackerras	lwzu	r10,16(r4);	\
2070d64ceaSPaul Mackerras	stw	r7,4(r6);	\
2170d64ceaSPaul Mackerras	stw	r8,8(r6);	\
2270d64ceaSPaul Mackerras	stw	r9,12(r6);	\
2370d64ceaSPaul Mackerras	stwu	r10,16(r6)
2470d64ceaSPaul Mackerras
2570d64ceaSPaul Mackerras#define COPY_16_BYTES_WITHEX(n)	\
2670d64ceaSPaul Mackerras8 ## n ## 0:			\
2770d64ceaSPaul Mackerras	lwz	r7,4(r4);	\
2870d64ceaSPaul Mackerras8 ## n ## 1:			\
2970d64ceaSPaul Mackerras	lwz	r8,8(r4);	\
3070d64ceaSPaul Mackerras8 ## n ## 2:			\
3170d64ceaSPaul Mackerras	lwz	r9,12(r4);	\
3270d64ceaSPaul Mackerras8 ## n ## 3:			\
3370d64ceaSPaul Mackerras	lwzu	r10,16(r4);	\
3470d64ceaSPaul Mackerras8 ## n ## 4:			\
3570d64ceaSPaul Mackerras	stw	r7,4(r6);	\
3670d64ceaSPaul Mackerras8 ## n ## 5:			\
3770d64ceaSPaul Mackerras	stw	r8,8(r6);	\
3870d64ceaSPaul Mackerras8 ## n ## 6:			\
3970d64ceaSPaul Mackerras	stw	r9,12(r6);	\
4070d64ceaSPaul Mackerras8 ## n ## 7:			\
4170d64ceaSPaul Mackerras	stwu	r10,16(r6)
4270d64ceaSPaul Mackerras
4370d64ceaSPaul Mackerras#define COPY_16_BYTES_EXCODE(n)			\
4470d64ceaSPaul Mackerras9 ## n ## 0:					\
4570d64ceaSPaul Mackerras	addi	r5,r5,-(16 * n);		\
4670d64ceaSPaul Mackerras	b	104f;				\
4770d64ceaSPaul Mackerras9 ## n ## 1:					\
4870d64ceaSPaul Mackerras	addi	r5,r5,-(16 * n);		\
4970d64ceaSPaul Mackerras	b	105f;				\
5024bfa6a9SNicholas Piggin	EX_TABLE(8 ## n ## 0b,9 ## n ## 0b);	\
5124bfa6a9SNicholas Piggin	EX_TABLE(8 ## n ## 1b,9 ## n ## 0b);	\
5224bfa6a9SNicholas Piggin	EX_TABLE(8 ## n ## 2b,9 ## n ## 0b);	\
5324bfa6a9SNicholas Piggin	EX_TABLE(8 ## n ## 3b,9 ## n ## 0b);	\
5424bfa6a9SNicholas Piggin	EX_TABLE(8 ## n ## 4b,9 ## n ## 1b);	\
5524bfa6a9SNicholas Piggin	EX_TABLE(8 ## n ## 5b,9 ## n ## 1b);	\
5624bfa6a9SNicholas Piggin	EX_TABLE(8 ## n ## 6b,9 ## n ## 1b);	\
5724bfa6a9SNicholas Piggin	EX_TABLE(8 ## n ## 7b,9 ## n ## 1b)
5870d64ceaSPaul Mackerras
5970d64ceaSPaul Mackerras	.text
6070d64ceaSPaul Mackerras
617dffb720SStephen RothwellCACHELINE_BYTES = L1_CACHE_BYTES
627dffb720SStephen RothwellLG_CACHELINE_BYTES = L1_CACHE_SHIFT
637dffb720SStephen RothwellCACHELINE_MASK = (L1_CACHE_BYTES-1)
6470d64ceaSPaul Mackerras
6526deb043SChristophe Leroy#ifndef CONFIG_KASAN
66da74f659SChristophe Leroy_GLOBAL(memset16)
67da74f659SChristophe Leroy	rlwinm.	r0 ,r5, 31, 1, 31
68da74f659SChristophe Leroy	addi	r6, r3, -4
69da74f659SChristophe Leroy	beq-	2f
70da74f659SChristophe Leroy	rlwimi	r4 ,r4 ,16 ,0 ,15
71da74f659SChristophe Leroy	mtctr	r0
72da74f659SChristophe Leroy1:	stwu	r4, 4(r6)
73da74f659SChristophe Leroy	bdnz	1b
74da74f659SChristophe Leroy2:	andi.	r0, r5, 1
75da74f659SChristophe Leroy	beqlr
76da74f659SChristophe Leroy	sth	r4, 4(r6)
77da74f659SChristophe Leroy	blr
78da74f659SChristophe LeroyEXPORT_SYMBOL(memset16)
7926deb043SChristophe Leroy#endif
80da74f659SChristophe Leroy
81df087e45SLEROY Christophe/*
82df087e45SLEROY Christophe * Use dcbz on the complete cache lines in the destination
83df087e45SLEROY Christophe * to set them to zero.  This requires that the destination
84df087e45SLEROY Christophe * area is cacheable.  -- paulus
85400c47d8SLEROY Christophe *
86400c47d8SLEROY Christophe * During early init, cache might not be active yet, so dcbz cannot be used.
87400c47d8SLEROY Christophe * We therefore skip the optimised bloc that uses dcbz. This jump is
88400c47d8SLEROY Christophe * replaced by a nop once cache is active. This is done in machine_init()
89df087e45SLEROY Christophe */
9026deb043SChristophe Leroy_GLOBAL_KASAN(memset)
917bf6057bSChristophe Leroy	cmplwi	0,r5,4
927bf6057bSChristophe Leroy	blt	7f
937bf6057bSChristophe Leroy
94c152f149SLEROY Christophe	rlwimi	r4,r4,8,16,23
95c152f149SLEROY Christophe	rlwimi	r4,r4,16,0,15
96c152f149SLEROY Christophe
977bf6057bSChristophe Leroy	stw	r4,0(r3)
98df087e45SLEROY Christophe	beqlr
997bf6057bSChristophe Leroy	andi.	r0,r3,3
100df087e45SLEROY Christophe	add	r5,r0,r5
1017bf6057bSChristophe Leroy	subf	r6,r0,r3
102c152f149SLEROY Christophe	cmplwi	0,r4,0
103ad1b0122SChristophe Leroy	/*
104ad1b0122SChristophe Leroy	 * Skip optimised bloc until cache is enabled. Will be replaced
105ad1b0122SChristophe Leroy	 * by 'bne' during boot to use normal procedure if r4 is not zero
106ad1b0122SChristophe Leroy	 */
107fa54a981SChristophe Leroy5:	b	2f
108fa54a981SChristophe Leroy	patch_site	5b, patch__memset_nocache
109c152f149SLEROY Christophe
110df087e45SLEROY Christophe	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
111df087e45SLEROY Christophe	add	r8,r7,r5
112df087e45SLEROY Christophe	srwi	r9,r8,LG_CACHELINE_BYTES
113df087e45SLEROY Christophe	addic.	r9,r9,-1	/* total number of complete cachelines */
114df087e45SLEROY Christophe	ble	2f
115df087e45SLEROY Christophe	xori	r0,r7,CACHELINE_MASK & ~3
116df087e45SLEROY Christophe	srwi.	r0,r0,2
117df087e45SLEROY Christophe	beq	3f
118df087e45SLEROY Christophe	mtctr	r0
119df087e45SLEROY Christophe4:	stwu	r4,4(r6)
120df087e45SLEROY Christophe	bdnz	4b
121df087e45SLEROY Christophe3:	mtctr	r9
122df087e45SLEROY Christophe	li	r7,4
123df087e45SLEROY Christophe10:	dcbz	r7,r6
124df087e45SLEROY Christophe	addi	r6,r6,CACHELINE_BYTES
125df087e45SLEROY Christophe	bdnz	10b
126df087e45SLEROY Christophe	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
127df087e45SLEROY Christophe	addi	r5,r5,4
128df087e45SLEROY Christophe
129c152f149SLEROY Christophe2:	srwi	r0,r5,2
13070d64ceaSPaul Mackerras	mtctr	r0
13170d64ceaSPaul Mackerras	bdz	6f
13270d64ceaSPaul Mackerras1:	stwu	r4,4(r6)
13370d64ceaSPaul Mackerras	bdnz	1b
13470d64ceaSPaul Mackerras6:	andi.	r5,r5,3
13570d64ceaSPaul Mackerras	beqlr
13670d64ceaSPaul Mackerras	mtctr	r5
13770d64ceaSPaul Mackerras	addi	r6,r6,3
13870d64ceaSPaul Mackerras8:	stbu	r4,1(r6)
13970d64ceaSPaul Mackerras	bdnz	8b
14070d64ceaSPaul Mackerras	blr
1417bf6057bSChristophe Leroy
1427bf6057bSChristophe Leroy7:	cmpwi	0,r5,0
1437bf6057bSChristophe Leroy	beqlr
1447bf6057bSChristophe Leroy	mtctr	r5
1457bf6057bSChristophe Leroy	addi	r6,r3,-1
1467bf6057bSChristophe Leroy9:	stbu	r4,1(r6)
1477bf6057bSChristophe Leroy	bdnz	9b
1487bf6057bSChristophe Leroy	blr
149c0622167SChristophe LeroyEXPORT_SYMBOL(memset)
15026deb043SChristophe LeroyEXPORT_SYMBOL_KASAN(memset)
15170d64ceaSPaul Mackerras
152df087e45SLEROY Christophe/*
153df087e45SLEROY Christophe * This version uses dcbz on the complete cache lines in the
154df087e45SLEROY Christophe * destination area to reduce memory traffic.  This requires that
155df087e45SLEROY Christophe * the destination area is cacheable.
156df087e45SLEROY Christophe * We only use this version if the source and dest don't overlap.
157df087e45SLEROY Christophe * -- paulus.
1581cd03890SLEROY Christophe *
1591cd03890SLEROY Christophe * During early init, cache might not be active yet, so dcbz cannot be used.
1601cd03890SLEROY Christophe * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
1611cd03890SLEROY Christophe * replaced by a nop once cache is active. This is done in machine_init()
162df087e45SLEROY Christophe */
16326deb043SChristophe Leroy_GLOBAL_KASAN(memmove)
1640b05e2d6SLEROY Christophe	cmplw	0,r3,r4
1650b05e2d6SLEROY Christophe	bgt	backwards_memcpy
1660b05e2d6SLEROY Christophe	/* fall through */
1670b05e2d6SLEROY Christophe
16826deb043SChristophe Leroy_GLOBAL_KASAN(memcpy)
169fa54a981SChristophe Leroy1:	b	generic_memcpy
170fa54a981SChristophe Leroy	patch_site	1b, patch__memcpy_nocache
171fa54a981SChristophe Leroy
172df087e45SLEROY Christophe	add	r7,r3,r5		/* test if the src & dst overlap */
173df087e45SLEROY Christophe	add	r8,r4,r5
174df087e45SLEROY Christophe	cmplw	0,r4,r7
175df087e45SLEROY Christophe	cmplw	1,r3,r8
176df087e45SLEROY Christophe	crand	0,0,4			/* cr0.lt &= cr1.lt */
1770b05e2d6SLEROY Christophe	blt	generic_memcpy		/* if regions overlap */
178df087e45SLEROY Christophe
179df087e45SLEROY Christophe	addi	r4,r4,-4
180df087e45SLEROY Christophe	addi	r6,r3,-4
181df087e45SLEROY Christophe	neg	r0,r3
182df087e45SLEROY Christophe	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
183df087e45SLEROY Christophe	beq	58f
184df087e45SLEROY Christophe
185df087e45SLEROY Christophe	cmplw	0,r5,r0			/* is this more than total to do? */
186df087e45SLEROY Christophe	blt	63f			/* if not much to do */
187df087e45SLEROY Christophe	andi.	r8,r0,3			/* get it word-aligned first */
188df087e45SLEROY Christophe	subf	r5,r0,r5
189df087e45SLEROY Christophe	mtctr	r8
190df087e45SLEROY Christophe	beq+	61f
191df087e45SLEROY Christophe70:	lbz	r9,4(r4)		/* do some bytes */
192df087e45SLEROY Christophe	addi	r4,r4,1
193df087e45SLEROY Christophe	addi	r6,r6,1
194295ffb41SLEROY Christophe	stb	r9,3(r6)
195df087e45SLEROY Christophe	bdnz	70b
196df087e45SLEROY Christophe61:	srwi.	r0,r0,2
197df087e45SLEROY Christophe	mtctr	r0
198df087e45SLEROY Christophe	beq	58f
199df087e45SLEROY Christophe72:	lwzu	r9,4(r4)		/* do some words */
200df087e45SLEROY Christophe	stwu	r9,4(r6)
201df087e45SLEROY Christophe	bdnz	72b
202df087e45SLEROY Christophe
203df087e45SLEROY Christophe58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
204df087e45SLEROY Christophe	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
205df087e45SLEROY Christophe	li	r11,4
206df087e45SLEROY Christophe	mtctr	r0
207df087e45SLEROY Christophe	beq	63f
208df087e45SLEROY Christophe53:
209df087e45SLEROY Christophe	dcbz	r11,r6
210df087e45SLEROY Christophe	COPY_16_BYTES
211df087e45SLEROY Christophe#if L1_CACHE_BYTES >= 32
212df087e45SLEROY Christophe	COPY_16_BYTES
213df087e45SLEROY Christophe#if L1_CACHE_BYTES >= 64
214df087e45SLEROY Christophe	COPY_16_BYTES
215df087e45SLEROY Christophe	COPY_16_BYTES
216df087e45SLEROY Christophe#if L1_CACHE_BYTES >= 128
217df087e45SLEROY Christophe	COPY_16_BYTES
218df087e45SLEROY Christophe	COPY_16_BYTES
219df087e45SLEROY Christophe	COPY_16_BYTES
220df087e45SLEROY Christophe	COPY_16_BYTES
221df087e45SLEROY Christophe#endif
222df087e45SLEROY Christophe#endif
223df087e45SLEROY Christophe#endif
224df087e45SLEROY Christophe	bdnz	53b
225df087e45SLEROY Christophe
226df087e45SLEROY Christophe63:	srwi.	r0,r5,2
227df087e45SLEROY Christophe	mtctr	r0
228df087e45SLEROY Christophe	beq	64f
229df087e45SLEROY Christophe30:	lwzu	r0,4(r4)
230df087e45SLEROY Christophe	stwu	r0,4(r6)
231df087e45SLEROY Christophe	bdnz	30b
232df087e45SLEROY Christophe
233df087e45SLEROY Christophe64:	andi.	r0,r5,3
234df087e45SLEROY Christophe	mtctr	r0
235df087e45SLEROY Christophe	beq+	65f
236295ffb41SLEROY Christophe	addi	r4,r4,3
237295ffb41SLEROY Christophe	addi	r6,r6,3
238295ffb41SLEROY Christophe40:	lbzu	r0,1(r4)
239295ffb41SLEROY Christophe	stbu	r0,1(r6)
240df087e45SLEROY Christophe	bdnz	40b
241df087e45SLEROY Christophe65:	blr
2429445aa1aSAl ViroEXPORT_SYMBOL(memcpy)
2439445aa1aSAl ViroEXPORT_SYMBOL(memmove)
24426deb043SChristophe LeroyEXPORT_SYMBOL_KASAN(memcpy)
24526deb043SChristophe LeroyEXPORT_SYMBOL_KASAN(memmove)
246df087e45SLEROY Christophe
247b4c6afdcSMichael Ellermangeneric_memcpy:
24870d64ceaSPaul Mackerras	srwi.	r7,r5,3
24970d64ceaSPaul Mackerras	addi	r6,r3,-4
25070d64ceaSPaul Mackerras	addi	r4,r4,-4
25170d64ceaSPaul Mackerras	beq	2f			/* if less than 8 bytes to do */
25270d64ceaSPaul Mackerras	andi.	r0,r6,3			/* get dest word aligned */
25370d64ceaSPaul Mackerras	mtctr	r7
25470d64ceaSPaul Mackerras	bne	5f
25570d64ceaSPaul Mackerras1:	lwz	r7,4(r4)
25670d64ceaSPaul Mackerras	lwzu	r8,8(r4)
25770d64ceaSPaul Mackerras	stw	r7,4(r6)
25870d64ceaSPaul Mackerras	stwu	r8,8(r6)
25970d64ceaSPaul Mackerras	bdnz	1b
26070d64ceaSPaul Mackerras	andi.	r5,r5,7
26170d64ceaSPaul Mackerras2:	cmplwi	0,r5,4
26270d64ceaSPaul Mackerras	blt	3f
26370d64ceaSPaul Mackerras	lwzu	r0,4(r4)
26470d64ceaSPaul Mackerras	addi	r5,r5,-4
26570d64ceaSPaul Mackerras	stwu	r0,4(r6)
26670d64ceaSPaul Mackerras3:	cmpwi	0,r5,0
26770d64ceaSPaul Mackerras	beqlr
26870d64ceaSPaul Mackerras	mtctr	r5
26970d64ceaSPaul Mackerras	addi	r4,r4,3
27070d64ceaSPaul Mackerras	addi	r6,r6,3
27170d64ceaSPaul Mackerras4:	lbzu	r0,1(r4)
27270d64ceaSPaul Mackerras	stbu	r0,1(r6)
27370d64ceaSPaul Mackerras	bdnz	4b
27470d64ceaSPaul Mackerras	blr
27570d64ceaSPaul Mackerras5:	subfic	r0,r0,4
27670d64ceaSPaul Mackerras	mtctr	r0
27770d64ceaSPaul Mackerras6:	lbz	r7,4(r4)
27870d64ceaSPaul Mackerras	addi	r4,r4,1
27970d64ceaSPaul Mackerras	stb	r7,4(r6)
28070d64ceaSPaul Mackerras	addi	r6,r6,1
28170d64ceaSPaul Mackerras	bdnz	6b
28270d64ceaSPaul Mackerras	subf	r5,r0,r5
28370d64ceaSPaul Mackerras	rlwinm.	r7,r5,32-3,3,31
28470d64ceaSPaul Mackerras	beq	2b
28570d64ceaSPaul Mackerras	mtctr	r7
28670d64ceaSPaul Mackerras	b	1b
28770d64ceaSPaul Mackerras
28870d64ceaSPaul Mackerras_GLOBAL(backwards_memcpy)
28970d64ceaSPaul Mackerras	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
29070d64ceaSPaul Mackerras	add	r6,r3,r5
29170d64ceaSPaul Mackerras	add	r4,r4,r5
29270d64ceaSPaul Mackerras	beq	2f
29370d64ceaSPaul Mackerras	andi.	r0,r6,3
29470d64ceaSPaul Mackerras	mtctr	r7
29570d64ceaSPaul Mackerras	bne	5f
29670d64ceaSPaul Mackerras1:	lwz	r7,-4(r4)
29770d64ceaSPaul Mackerras	lwzu	r8,-8(r4)
29870d64ceaSPaul Mackerras	stw	r7,-4(r6)
29970d64ceaSPaul Mackerras	stwu	r8,-8(r6)
30070d64ceaSPaul Mackerras	bdnz	1b
30170d64ceaSPaul Mackerras	andi.	r5,r5,7
30270d64ceaSPaul Mackerras2:	cmplwi	0,r5,4
30370d64ceaSPaul Mackerras	blt	3f
30470d64ceaSPaul Mackerras	lwzu	r0,-4(r4)
30570d64ceaSPaul Mackerras	subi	r5,r5,4
30670d64ceaSPaul Mackerras	stwu	r0,-4(r6)
30770d64ceaSPaul Mackerras3:	cmpwi	0,r5,0
30870d64ceaSPaul Mackerras	beqlr
30970d64ceaSPaul Mackerras	mtctr	r5
31070d64ceaSPaul Mackerras4:	lbzu	r0,-1(r4)
31170d64ceaSPaul Mackerras	stbu	r0,-1(r6)
31270d64ceaSPaul Mackerras	bdnz	4b
31370d64ceaSPaul Mackerras	blr
31470d64ceaSPaul Mackerras5:	mtctr	r0
31570d64ceaSPaul Mackerras6:	lbzu	r7,-1(r4)
31670d64ceaSPaul Mackerras	stbu	r7,-1(r6)
31770d64ceaSPaul Mackerras	bdnz	6b
31870d64ceaSPaul Mackerras	subf	r5,r0,r5
31970d64ceaSPaul Mackerras	rlwinm.	r7,r5,32-3,3,31
32070d64ceaSPaul Mackerras	beq	2b
32170d64ceaSPaul Mackerras	mtctr	r7
32270d64ceaSPaul Mackerras	b	1b
32370d64ceaSPaul Mackerras
32470d64ceaSPaul Mackerras_GLOBAL(__copy_tofrom_user)
32570d64ceaSPaul Mackerras	addi	r4,r4,-4
32670d64ceaSPaul Mackerras	addi	r6,r3,-4
32770d64ceaSPaul Mackerras	neg	r0,r3
32870d64ceaSPaul Mackerras	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
32970d64ceaSPaul Mackerras	beq	58f
33070d64ceaSPaul Mackerras
33170d64ceaSPaul Mackerras	cmplw	0,r5,r0			/* is this more than total to do? */
33270d64ceaSPaul Mackerras	blt	63f			/* if not much to do */
33370d64ceaSPaul Mackerras	andi.	r8,r0,3			/* get it word-aligned first */
33470d64ceaSPaul Mackerras	mtctr	r8
33570d64ceaSPaul Mackerras	beq+	61f
33670d64ceaSPaul Mackerras70:	lbz	r9,4(r4)		/* do some bytes */
33770d64ceaSPaul Mackerras71:	stb	r9,4(r6)
33870d64ceaSPaul Mackerras	addi	r4,r4,1
33970d64ceaSPaul Mackerras	addi	r6,r6,1
34070d64ceaSPaul Mackerras	bdnz	70b
34170d64ceaSPaul Mackerras61:	subf	r5,r0,r5
34270d64ceaSPaul Mackerras	srwi.	r0,r0,2
34370d64ceaSPaul Mackerras	mtctr	r0
34470d64ceaSPaul Mackerras	beq	58f
34570d64ceaSPaul Mackerras72:	lwzu	r9,4(r4)		/* do some words */
34670d64ceaSPaul Mackerras73:	stwu	r9,4(r6)
34770d64ceaSPaul Mackerras	bdnz	72b
34870d64ceaSPaul Mackerras
34924bfa6a9SNicholas Piggin	EX_TABLE(70b,100f)
35024bfa6a9SNicholas Piggin	EX_TABLE(71b,101f)
35124bfa6a9SNicholas Piggin	EX_TABLE(72b,102f)
35224bfa6a9SNicholas Piggin	EX_TABLE(73b,103f)
35370d64ceaSPaul Mackerras
35470d64ceaSPaul Mackerras58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
35570d64ceaSPaul Mackerras	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
35670d64ceaSPaul Mackerras	li	r11,4
35770d64ceaSPaul Mackerras	beq	63f
35870d64ceaSPaul Mackerras
35970d64ceaSPaul Mackerras	/* Here we decide how far ahead to prefetch the source */
36070d64ceaSPaul Mackerras	li	r3,4
36170d64ceaSPaul Mackerras	cmpwi	r0,1
36270d64ceaSPaul Mackerras	li	r7,0
36370d64ceaSPaul Mackerras	ble	114f
36470d64ceaSPaul Mackerras	li	r7,1
36570d64ceaSPaul Mackerras#if MAX_COPY_PREFETCH > 1
36670d64ceaSPaul Mackerras	/* Heuristically, for large transfers we prefetch
36770d64ceaSPaul Mackerras	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
36870d64ceaSPaul Mackerras	   we prefetch 1 cacheline ahead. */
36970d64ceaSPaul Mackerras	cmpwi	r0,MAX_COPY_PREFETCH
37070d64ceaSPaul Mackerras	ble	112f
37170d64ceaSPaul Mackerras	li	r7,MAX_COPY_PREFETCH
37270d64ceaSPaul Mackerras112:	mtctr	r7
37370d64ceaSPaul Mackerras111:	dcbt	r3,r4
37470d64ceaSPaul Mackerras	addi	r3,r3,CACHELINE_BYTES
37570d64ceaSPaul Mackerras	bdnz	111b
37670d64ceaSPaul Mackerras#else
37770d64ceaSPaul Mackerras	dcbt	r3,r4
37870d64ceaSPaul Mackerras	addi	r3,r3,CACHELINE_BYTES
37970d64ceaSPaul Mackerras#endif /* MAX_COPY_PREFETCH > 1 */
38070d64ceaSPaul Mackerras
38170d64ceaSPaul Mackerras114:	subf	r8,r7,r0
38270d64ceaSPaul Mackerras	mr	r0,r7
38370d64ceaSPaul Mackerras	mtctr	r8
38470d64ceaSPaul Mackerras
38570d64ceaSPaul Mackerras53:	dcbt	r3,r4
38670d64ceaSPaul Mackerras54:	dcbz	r11,r6
38724bfa6a9SNicholas Piggin	EX_TABLE(54b,105f)
38870d64ceaSPaul Mackerras/* the main body of the cacheline loop */
38970d64ceaSPaul Mackerras	COPY_16_BYTES_WITHEX(0)
3907dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 32
39170d64ceaSPaul Mackerras	COPY_16_BYTES_WITHEX(1)
3927dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 64
39370d64ceaSPaul Mackerras	COPY_16_BYTES_WITHEX(2)
39470d64ceaSPaul Mackerras	COPY_16_BYTES_WITHEX(3)
3957dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 128
39670d64ceaSPaul Mackerras	COPY_16_BYTES_WITHEX(4)
39770d64ceaSPaul Mackerras	COPY_16_BYTES_WITHEX(5)
39870d64ceaSPaul Mackerras	COPY_16_BYTES_WITHEX(6)
39970d64ceaSPaul Mackerras	COPY_16_BYTES_WITHEX(7)
40070d64ceaSPaul Mackerras#endif
40170d64ceaSPaul Mackerras#endif
40270d64ceaSPaul Mackerras#endif
40370d64ceaSPaul Mackerras	bdnz	53b
40470d64ceaSPaul Mackerras	cmpwi	r0,0
40570d64ceaSPaul Mackerras	li	r3,4
40670d64ceaSPaul Mackerras	li	r7,0
40770d64ceaSPaul Mackerras	bne	114b
40870d64ceaSPaul Mackerras
40970d64ceaSPaul Mackerras63:	srwi.	r0,r5,2
41070d64ceaSPaul Mackerras	mtctr	r0
41170d64ceaSPaul Mackerras	beq	64f
41270d64ceaSPaul Mackerras30:	lwzu	r0,4(r4)
41370d64ceaSPaul Mackerras31:	stwu	r0,4(r6)
41470d64ceaSPaul Mackerras	bdnz	30b
41570d64ceaSPaul Mackerras
41670d64ceaSPaul Mackerras64:	andi.	r0,r5,3
41770d64ceaSPaul Mackerras	mtctr	r0
41870d64ceaSPaul Mackerras	beq+	65f
41970d64ceaSPaul Mackerras40:	lbz	r0,4(r4)
42070d64ceaSPaul Mackerras41:	stb	r0,4(r6)
42170d64ceaSPaul Mackerras	addi	r4,r4,1
42270d64ceaSPaul Mackerras	addi	r6,r6,1
42370d64ceaSPaul Mackerras	bdnz	40b
42470d64ceaSPaul Mackerras65:	li	r3,0
42570d64ceaSPaul Mackerras	blr
42670d64ceaSPaul Mackerras
42770d64ceaSPaul Mackerras/* read fault, initial single-byte copy */
42870d64ceaSPaul Mackerras100:	li	r9,0
42970d64ceaSPaul Mackerras	b	90f
43070d64ceaSPaul Mackerras/* write fault, initial single-byte copy */
43170d64ceaSPaul Mackerras101:	li	r9,1
43270d64ceaSPaul Mackerras90:	subf	r5,r8,r5
43370d64ceaSPaul Mackerras	li	r3,0
43470d64ceaSPaul Mackerras	b	99f
43570d64ceaSPaul Mackerras/* read fault, initial word copy */
43670d64ceaSPaul Mackerras102:	li	r9,0
43770d64ceaSPaul Mackerras	b	91f
43870d64ceaSPaul Mackerras/* write fault, initial word copy */
43970d64ceaSPaul Mackerras103:	li	r9,1
44070d64ceaSPaul Mackerras91:	li	r3,2
44170d64ceaSPaul Mackerras	b	99f
44270d64ceaSPaul Mackerras
44370d64ceaSPaul Mackerras/*
44470d64ceaSPaul Mackerras * this stuff handles faults in the cacheline loop and branches to either
44570d64ceaSPaul Mackerras * 104f (if in read part) or 105f (if in write part), after updating r5
44670d64ceaSPaul Mackerras */
44770d64ceaSPaul Mackerras	COPY_16_BYTES_EXCODE(0)
4487dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 32
44970d64ceaSPaul Mackerras	COPY_16_BYTES_EXCODE(1)
4507dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 64
45170d64ceaSPaul Mackerras	COPY_16_BYTES_EXCODE(2)
45270d64ceaSPaul Mackerras	COPY_16_BYTES_EXCODE(3)
4537dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 128
45470d64ceaSPaul Mackerras	COPY_16_BYTES_EXCODE(4)
45570d64ceaSPaul Mackerras	COPY_16_BYTES_EXCODE(5)
45670d64ceaSPaul Mackerras	COPY_16_BYTES_EXCODE(6)
45770d64ceaSPaul Mackerras	COPY_16_BYTES_EXCODE(7)
45870d64ceaSPaul Mackerras#endif
45970d64ceaSPaul Mackerras#endif
46070d64ceaSPaul Mackerras#endif
46170d64ceaSPaul Mackerras
46270d64ceaSPaul Mackerras/* read fault in cacheline loop */
46370d64ceaSPaul Mackerras104:	li	r9,0
46470d64ceaSPaul Mackerras	b	92f
46570d64ceaSPaul Mackerras/* fault on dcbz (effectively a write fault) */
46670d64ceaSPaul Mackerras/* or write fault in cacheline loop */
46770d64ceaSPaul Mackerras105:	li	r9,1
46870d64ceaSPaul Mackerras92:	li	r3,LG_CACHELINE_BYTES
46970d64ceaSPaul Mackerras	mfctr	r8
47070d64ceaSPaul Mackerras	add	r0,r0,r8
47170d64ceaSPaul Mackerras	b	106f
47270d64ceaSPaul Mackerras/* read fault in final word loop */
47370d64ceaSPaul Mackerras108:	li	r9,0
47470d64ceaSPaul Mackerras	b	93f
47570d64ceaSPaul Mackerras/* write fault in final word loop */
47670d64ceaSPaul Mackerras109:	li	r9,1
47770d64ceaSPaul Mackerras93:	andi.	r5,r5,3
47870d64ceaSPaul Mackerras	li	r3,2
47970d64ceaSPaul Mackerras	b	99f
48070d64ceaSPaul Mackerras/* read fault in final byte loop */
48170d64ceaSPaul Mackerras110:	li	r9,0
48270d64ceaSPaul Mackerras	b	94f
48370d64ceaSPaul Mackerras/* write fault in final byte loop */
48470d64ceaSPaul Mackerras111:	li	r9,1
48570d64ceaSPaul Mackerras94:	li	r5,0
48670d64ceaSPaul Mackerras	li	r3,0
48770d64ceaSPaul Mackerras/*
48870d64ceaSPaul Mackerras * At this stage the number of bytes not copied is
48970d64ceaSPaul Mackerras * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
49070d64ceaSPaul Mackerras */
49170d64ceaSPaul Mackerras99:	mfctr	r0
49270d64ceaSPaul Mackerras106:	slw	r3,r0,r3
49370d64ceaSPaul Mackerras	add.	r3,r3,r5
49470d64ceaSPaul Mackerras	beq	120f			/* shouldn't happen */
49570d64ceaSPaul Mackerras	cmpwi	0,r9,0
49670d64ceaSPaul Mackerras	bne	120f
49770d64ceaSPaul Mackerras/* for a read fault, first try to continue the copy one byte at a time */
49870d64ceaSPaul Mackerras	mtctr	r3
49970d64ceaSPaul Mackerras130:	lbz	r0,4(r4)
50070d64ceaSPaul Mackerras131:	stb	r0,4(r6)
50170d64ceaSPaul Mackerras	addi	r4,r4,1
50270d64ceaSPaul Mackerras	addi	r6,r6,1
50370d64ceaSPaul Mackerras	bdnz	130b
50470d64ceaSPaul Mackerras/* then clear out the destination: r3 bytes starting at 4(r6) */
50570d64ceaSPaul Mackerras132:	mfctr	r3
50670d64ceaSPaul Mackerras120:	blr
50770d64ceaSPaul Mackerras
50824bfa6a9SNicholas Piggin	EX_TABLE(30b,108b)
50924bfa6a9SNicholas Piggin	EX_TABLE(31b,109b)
51024bfa6a9SNicholas Piggin	EX_TABLE(40b,110b)
51124bfa6a9SNicholas Piggin	EX_TABLE(41b,111b)
51224bfa6a9SNicholas Piggin	EX_TABLE(130b,132b)
51324bfa6a9SNicholas Piggin	EX_TABLE(131b,120b)
51424bfa6a9SNicholas Piggin
5159445aa1aSAl ViroEXPORT_SYMBOL(__copy_tofrom_user)
516