xref: /openbmc/linux/arch/powerpc/lib/copy_32.S (revision 7dffb720)
170d64ceaSPaul Mackerras/*
270d64ceaSPaul Mackerras * Memory copy functions for 32-bit PowerPC.
370d64ceaSPaul Mackerras *
470d64ceaSPaul Mackerras * Copyright (C) 1996-2005 Paul Mackerras.
570d64ceaSPaul Mackerras *
670d64ceaSPaul Mackerras * This program is free software; you can redistribute it and/or
770d64ceaSPaul Mackerras * modify it under the terms of the GNU General Public License
870d64ceaSPaul Mackerras * as published by the Free Software Foundation; either version
970d64ceaSPaul Mackerras * 2 of the License, or (at your option) any later version.
1070d64ceaSPaul Mackerras */
1170d64ceaSPaul Mackerras#include <linux/config.h>
1270d64ceaSPaul Mackerras#include <asm/processor.h>
1370d64ceaSPaul Mackerras#include <asm/cache.h>
1470d64ceaSPaul Mackerras#include <asm/errno.h>
1570d64ceaSPaul Mackerras#include <asm/ppc_asm.h>
1670d64ceaSPaul Mackerras
1770d64ceaSPaul Mackerras#define COPY_16_BYTES		\
1870d64ceaSPaul Mackerras	lwz	r7,4(r4);	\
1970d64ceaSPaul Mackerras	lwz	r8,8(r4);	\
2070d64ceaSPaul Mackerras	lwz	r9,12(r4);	\
2170d64ceaSPaul Mackerras	lwzu	r10,16(r4);	\
2270d64ceaSPaul Mackerras	stw	r7,4(r6);	\
2370d64ceaSPaul Mackerras	stw	r8,8(r6);	\
2470d64ceaSPaul Mackerras	stw	r9,12(r6);	\
2570d64ceaSPaul Mackerras	stwu	r10,16(r6)
2670d64ceaSPaul Mackerras
2770d64ceaSPaul Mackerras#define COPY_16_BYTES_WITHEX(n)	\
2870d64ceaSPaul Mackerras8 ## n ## 0:			\
2970d64ceaSPaul Mackerras	lwz	r7,4(r4);	\
3070d64ceaSPaul Mackerras8 ## n ## 1:			\
3170d64ceaSPaul Mackerras	lwz	r8,8(r4);	\
3270d64ceaSPaul Mackerras8 ## n ## 2:			\
3370d64ceaSPaul Mackerras	lwz	r9,12(r4);	\
3470d64ceaSPaul Mackerras8 ## n ## 3:			\
3570d64ceaSPaul Mackerras	lwzu	r10,16(r4);	\
3670d64ceaSPaul Mackerras8 ## n ## 4:			\
3770d64ceaSPaul Mackerras	stw	r7,4(r6);	\
3870d64ceaSPaul Mackerras8 ## n ## 5:			\
3970d64ceaSPaul Mackerras	stw	r8,8(r6);	\
4070d64ceaSPaul Mackerras8 ## n ## 6:			\
4170d64ceaSPaul Mackerras	stw	r9,12(r6);	\
4270d64ceaSPaul Mackerras8 ## n ## 7:			\
4370d64ceaSPaul Mackerras	stwu	r10,16(r6)
4470d64ceaSPaul Mackerras
4570d64ceaSPaul Mackerras#define COPY_16_BYTES_EXCODE(n)			\
4670d64ceaSPaul Mackerras9 ## n ## 0:					\
4770d64ceaSPaul Mackerras	addi	r5,r5,-(16 * n);		\
4870d64ceaSPaul Mackerras	b	104f;				\
4970d64ceaSPaul Mackerras9 ## n ## 1:					\
5070d64ceaSPaul Mackerras	addi	r5,r5,-(16 * n);		\
5170d64ceaSPaul Mackerras	b	105f;				\
5270d64ceaSPaul Mackerras.section __ex_table,"a";			\
5370d64ceaSPaul Mackerras	.align	2;				\
5470d64ceaSPaul Mackerras	.long	8 ## n ## 0b,9 ## n ## 0b;	\
5570d64ceaSPaul Mackerras	.long	8 ## n ## 1b,9 ## n ## 0b;	\
5670d64ceaSPaul Mackerras	.long	8 ## n ## 2b,9 ## n ## 0b;	\
5770d64ceaSPaul Mackerras	.long	8 ## n ## 3b,9 ## n ## 0b;	\
5870d64ceaSPaul Mackerras	.long	8 ## n ## 4b,9 ## n ## 1b;	\
5970d64ceaSPaul Mackerras	.long	8 ## n ## 5b,9 ## n ## 1b;	\
6070d64ceaSPaul Mackerras	.long	8 ## n ## 6b,9 ## n ## 1b;	\
6170d64ceaSPaul Mackerras	.long	8 ## n ## 7b,9 ## n ## 1b;	\
6270d64ceaSPaul Mackerras	.text
6370d64ceaSPaul Mackerras
6470d64ceaSPaul Mackerras	.text
6570d64ceaSPaul Mackerras	.stabs	"arch/powerpc/lib/",N_SO,0,0,0f
6670d64ceaSPaul Mackerras	.stabs	"copy32.S",N_SO,0,0,0f
6770d64ceaSPaul Mackerras0:
6870d64ceaSPaul Mackerras
697dffb720SStephen RothwellCACHELINE_BYTES = L1_CACHE_BYTES
707dffb720SStephen RothwellLG_CACHELINE_BYTES = L1_CACHE_SHIFT
717dffb720SStephen RothwellCACHELINE_MASK = (L1_CACHE_BYTES-1)
7270d64ceaSPaul Mackerras
7370d64ceaSPaul Mackerras/*
7470d64ceaSPaul Mackerras * Use dcbz on the complete cache lines in the destination
7570d64ceaSPaul Mackerras * to set them to zero.  This requires that the destination
7670d64ceaSPaul Mackerras * area is cacheable.  -- paulus
7770d64ceaSPaul Mackerras */
7870d64ceaSPaul Mackerras_GLOBAL(cacheable_memzero)
7970d64ceaSPaul Mackerras	mr	r5,r4
8070d64ceaSPaul Mackerras	li	r4,0
8170d64ceaSPaul Mackerras	addi	r6,r3,-4
8270d64ceaSPaul Mackerras	cmplwi	0,r5,4
8370d64ceaSPaul Mackerras	blt	7f
8470d64ceaSPaul Mackerras	stwu	r4,4(r6)
8570d64ceaSPaul Mackerras	beqlr
8670d64ceaSPaul Mackerras	andi.	r0,r6,3
8770d64ceaSPaul Mackerras	add	r5,r0,r5
8870d64ceaSPaul Mackerras	subf	r6,r0,r6
8970d64ceaSPaul Mackerras	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
9070d64ceaSPaul Mackerras	add	r8,r7,r5
9170d64ceaSPaul Mackerras	srwi	r9,r8,LG_CACHELINE_BYTES
9270d64ceaSPaul Mackerras	addic.	r9,r9,-1	/* total number of complete cachelines */
9370d64ceaSPaul Mackerras	ble	2f
9470d64ceaSPaul Mackerras	xori	r0,r7,CACHELINE_MASK & ~3
9570d64ceaSPaul Mackerras	srwi.	r0,r0,2
9670d64ceaSPaul Mackerras	beq	3f
9770d64ceaSPaul Mackerras	mtctr	r0
9870d64ceaSPaul Mackerras4:	stwu	r4,4(r6)
9970d64ceaSPaul Mackerras	bdnz	4b
10070d64ceaSPaul Mackerras3:	mtctr	r9
10170d64ceaSPaul Mackerras	li	r7,4
10270d64ceaSPaul Mackerras#if !defined(CONFIG_8xx)
10370d64ceaSPaul Mackerras10:	dcbz	r7,r6
10470d64ceaSPaul Mackerras#else
10570d64ceaSPaul Mackerras10:	stw	r4, 4(r6)
10670d64ceaSPaul Mackerras	stw	r4, 8(r6)
10770d64ceaSPaul Mackerras	stw	r4, 12(r6)
10870d64ceaSPaul Mackerras	stw	r4, 16(r6)
10970d64ceaSPaul Mackerras#if CACHE_LINE_SIZE >= 32
11070d64ceaSPaul Mackerras	stw	r4, 20(r6)
11170d64ceaSPaul Mackerras	stw	r4, 24(r6)
11270d64ceaSPaul Mackerras	stw	r4, 28(r6)
11370d64ceaSPaul Mackerras	stw	r4, 32(r6)
11470d64ceaSPaul Mackerras#endif /* CACHE_LINE_SIZE */
11570d64ceaSPaul Mackerras#endif
11670d64ceaSPaul Mackerras	addi	r6,r6,CACHELINE_BYTES
11770d64ceaSPaul Mackerras	bdnz	10b
11870d64ceaSPaul Mackerras	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
11970d64ceaSPaul Mackerras	addi	r5,r5,4
12070d64ceaSPaul Mackerras2:	srwi	r0,r5,2
12170d64ceaSPaul Mackerras	mtctr	r0
12270d64ceaSPaul Mackerras	bdz	6f
12370d64ceaSPaul Mackerras1:	stwu	r4,4(r6)
12470d64ceaSPaul Mackerras	bdnz	1b
12570d64ceaSPaul Mackerras6:	andi.	r5,r5,3
12670d64ceaSPaul Mackerras7:	cmpwi	0,r5,0
12770d64ceaSPaul Mackerras	beqlr
12870d64ceaSPaul Mackerras	mtctr	r5
12970d64ceaSPaul Mackerras	addi	r6,r6,3
13070d64ceaSPaul Mackerras8:	stbu	r4,1(r6)
13170d64ceaSPaul Mackerras	bdnz	8b
13270d64ceaSPaul Mackerras	blr
13370d64ceaSPaul Mackerras
13470d64ceaSPaul Mackerras_GLOBAL(memset)
13570d64ceaSPaul Mackerras	rlwimi	r4,r4,8,16,23
13670d64ceaSPaul Mackerras	rlwimi	r4,r4,16,0,15
13770d64ceaSPaul Mackerras	addi	r6,r3,-4
13870d64ceaSPaul Mackerras	cmplwi	0,r5,4
13970d64ceaSPaul Mackerras	blt	7f
14070d64ceaSPaul Mackerras	stwu	r4,4(r6)
14170d64ceaSPaul Mackerras	beqlr
14270d64ceaSPaul Mackerras	andi.	r0,r6,3
14370d64ceaSPaul Mackerras	add	r5,r0,r5
14470d64ceaSPaul Mackerras	subf	r6,r0,r6
14570d64ceaSPaul Mackerras	srwi	r0,r5,2
14670d64ceaSPaul Mackerras	mtctr	r0
14770d64ceaSPaul Mackerras	bdz	6f
14870d64ceaSPaul Mackerras1:	stwu	r4,4(r6)
14970d64ceaSPaul Mackerras	bdnz	1b
15070d64ceaSPaul Mackerras6:	andi.	r5,r5,3
15170d64ceaSPaul Mackerras7:	cmpwi	0,r5,0
15270d64ceaSPaul Mackerras	beqlr
15370d64ceaSPaul Mackerras	mtctr	r5
15470d64ceaSPaul Mackerras	addi	r6,r6,3
15570d64ceaSPaul Mackerras8:	stbu	r4,1(r6)
15670d64ceaSPaul Mackerras	bdnz	8b
15770d64ceaSPaul Mackerras	blr
15870d64ceaSPaul Mackerras
15970d64ceaSPaul Mackerras/*
16070d64ceaSPaul Mackerras * This version uses dcbz on the complete cache lines in the
16170d64ceaSPaul Mackerras * destination area to reduce memory traffic.  This requires that
16270d64ceaSPaul Mackerras * the destination area is cacheable.
16370d64ceaSPaul Mackerras * We only use this version if the source and dest don't overlap.
16470d64ceaSPaul Mackerras * -- paulus.
16570d64ceaSPaul Mackerras */
16670d64ceaSPaul Mackerras_GLOBAL(cacheable_memcpy)
16770d64ceaSPaul Mackerras	add	r7,r3,r5		/* test if the src & dst overlap */
16870d64ceaSPaul Mackerras	add	r8,r4,r5
16970d64ceaSPaul Mackerras	cmplw	0,r4,r7
17070d64ceaSPaul Mackerras	cmplw	1,r3,r8
17170d64ceaSPaul Mackerras	crand	0,0,4			/* cr0.lt &= cr1.lt */
17270d64ceaSPaul Mackerras	blt	memcpy			/* if regions overlap */
17370d64ceaSPaul Mackerras
17470d64ceaSPaul Mackerras	addi	r4,r4,-4
17570d64ceaSPaul Mackerras	addi	r6,r3,-4
17670d64ceaSPaul Mackerras	neg	r0,r3
17770d64ceaSPaul Mackerras	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
17870d64ceaSPaul Mackerras	beq	58f
17970d64ceaSPaul Mackerras
18070d64ceaSPaul Mackerras	cmplw	0,r5,r0			/* is this more than total to do? */
18170d64ceaSPaul Mackerras	blt	63f			/* if not much to do */
18270d64ceaSPaul Mackerras	andi.	r8,r0,3			/* get it word-aligned first */
18370d64ceaSPaul Mackerras	subf	r5,r0,r5
18470d64ceaSPaul Mackerras	mtctr	r8
18570d64ceaSPaul Mackerras	beq+	61f
18670d64ceaSPaul Mackerras70:	lbz	r9,4(r4)		/* do some bytes */
18770d64ceaSPaul Mackerras	stb	r9,4(r6)
18870d64ceaSPaul Mackerras	addi	r4,r4,1
18970d64ceaSPaul Mackerras	addi	r6,r6,1
19070d64ceaSPaul Mackerras	bdnz	70b
19170d64ceaSPaul Mackerras61:	srwi.	r0,r0,2
19270d64ceaSPaul Mackerras	mtctr	r0
19370d64ceaSPaul Mackerras	beq	58f
19470d64ceaSPaul Mackerras72:	lwzu	r9,4(r4)		/* do some words */
19570d64ceaSPaul Mackerras	stwu	r9,4(r6)
19670d64ceaSPaul Mackerras	bdnz	72b
19770d64ceaSPaul Mackerras
19870d64ceaSPaul Mackerras58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
19970d64ceaSPaul Mackerras	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
20070d64ceaSPaul Mackerras	li	r11,4
20170d64ceaSPaul Mackerras	mtctr	r0
20270d64ceaSPaul Mackerras	beq	63f
20370d64ceaSPaul Mackerras53:
20470d64ceaSPaul Mackerras#if !defined(CONFIG_8xx)
20570d64ceaSPaul Mackerras	dcbz	r11,r6
20670d64ceaSPaul Mackerras#endif
20770d64ceaSPaul Mackerras	COPY_16_BYTES
2087dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 32
20970d64ceaSPaul Mackerras	COPY_16_BYTES
2107dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 64
21170d64ceaSPaul Mackerras	COPY_16_BYTES
21270d64ceaSPaul Mackerras	COPY_16_BYTES
2137dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 128
21470d64ceaSPaul Mackerras	COPY_16_BYTES
21570d64ceaSPaul Mackerras	COPY_16_BYTES
21670d64ceaSPaul Mackerras	COPY_16_BYTES
21770d64ceaSPaul Mackerras	COPY_16_BYTES
21870d64ceaSPaul Mackerras#endif
21970d64ceaSPaul Mackerras#endif
22070d64ceaSPaul Mackerras#endif
22170d64ceaSPaul Mackerras	bdnz	53b
22270d64ceaSPaul Mackerras
22370d64ceaSPaul Mackerras63:	srwi.	r0,r5,2
22470d64ceaSPaul Mackerras	mtctr	r0
22570d64ceaSPaul Mackerras	beq	64f
22670d64ceaSPaul Mackerras30:	lwzu	r0,4(r4)
22770d64ceaSPaul Mackerras	stwu	r0,4(r6)
22870d64ceaSPaul Mackerras	bdnz	30b
22970d64ceaSPaul Mackerras
23070d64ceaSPaul Mackerras64:	andi.	r0,r5,3
23170d64ceaSPaul Mackerras	mtctr	r0
23270d64ceaSPaul Mackerras	beq+	65f
23370d64ceaSPaul Mackerras40:	lbz	r0,4(r4)
23470d64ceaSPaul Mackerras	stb	r0,4(r6)
23570d64ceaSPaul Mackerras	addi	r4,r4,1
23670d64ceaSPaul Mackerras	addi	r6,r6,1
23770d64ceaSPaul Mackerras	bdnz	40b
23870d64ceaSPaul Mackerras65:	blr
23970d64ceaSPaul Mackerras
24070d64ceaSPaul Mackerras_GLOBAL(memmove)
24170d64ceaSPaul Mackerras	cmplw	0,r3,r4
24270d64ceaSPaul Mackerras	bgt	backwards_memcpy
24370d64ceaSPaul Mackerras	/* fall through */
24470d64ceaSPaul Mackerras
24570d64ceaSPaul Mackerras_GLOBAL(memcpy)
24670d64ceaSPaul Mackerras	srwi.	r7,r5,3
24770d64ceaSPaul Mackerras	addi	r6,r3,-4
24870d64ceaSPaul Mackerras	addi	r4,r4,-4
24970d64ceaSPaul Mackerras	beq	2f			/* if less than 8 bytes to do */
25070d64ceaSPaul Mackerras	andi.	r0,r6,3			/* get dest word aligned */
25170d64ceaSPaul Mackerras	mtctr	r7
25270d64ceaSPaul Mackerras	bne	5f
25370d64ceaSPaul Mackerras1:	lwz	r7,4(r4)
25470d64ceaSPaul Mackerras	lwzu	r8,8(r4)
25570d64ceaSPaul Mackerras	stw	r7,4(r6)
25670d64ceaSPaul Mackerras	stwu	r8,8(r6)
25770d64ceaSPaul Mackerras	bdnz	1b
25870d64ceaSPaul Mackerras	andi.	r5,r5,7
25970d64ceaSPaul Mackerras2:	cmplwi	0,r5,4
26070d64ceaSPaul Mackerras	blt	3f
26170d64ceaSPaul Mackerras	lwzu	r0,4(r4)
26270d64ceaSPaul Mackerras	addi	r5,r5,-4
26370d64ceaSPaul Mackerras	stwu	r0,4(r6)
26470d64ceaSPaul Mackerras3:	cmpwi	0,r5,0
26570d64ceaSPaul Mackerras	beqlr
26670d64ceaSPaul Mackerras	mtctr	r5
26770d64ceaSPaul Mackerras	addi	r4,r4,3
26870d64ceaSPaul Mackerras	addi	r6,r6,3
26970d64ceaSPaul Mackerras4:	lbzu	r0,1(r4)
27070d64ceaSPaul Mackerras	stbu	r0,1(r6)
27170d64ceaSPaul Mackerras	bdnz	4b
27270d64ceaSPaul Mackerras	blr
27370d64ceaSPaul Mackerras5:	subfic	r0,r0,4
27470d64ceaSPaul Mackerras	mtctr	r0
27570d64ceaSPaul Mackerras6:	lbz	r7,4(r4)
27670d64ceaSPaul Mackerras	addi	r4,r4,1
27770d64ceaSPaul Mackerras	stb	r7,4(r6)
27870d64ceaSPaul Mackerras	addi	r6,r6,1
27970d64ceaSPaul Mackerras	bdnz	6b
28070d64ceaSPaul Mackerras	subf	r5,r0,r5
28170d64ceaSPaul Mackerras	rlwinm.	r7,r5,32-3,3,31
28270d64ceaSPaul Mackerras	beq	2b
28370d64ceaSPaul Mackerras	mtctr	r7
28470d64ceaSPaul Mackerras	b	1b
28570d64ceaSPaul Mackerras
28670d64ceaSPaul Mackerras_GLOBAL(backwards_memcpy)
28770d64ceaSPaul Mackerras	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
28870d64ceaSPaul Mackerras	add	r6,r3,r5
28970d64ceaSPaul Mackerras	add	r4,r4,r5
29070d64ceaSPaul Mackerras	beq	2f
29170d64ceaSPaul Mackerras	andi.	r0,r6,3
29270d64ceaSPaul Mackerras	mtctr	r7
29370d64ceaSPaul Mackerras	bne	5f
29470d64ceaSPaul Mackerras1:	lwz	r7,-4(r4)
29570d64ceaSPaul Mackerras	lwzu	r8,-8(r4)
29670d64ceaSPaul Mackerras	stw	r7,-4(r6)
29770d64ceaSPaul Mackerras	stwu	r8,-8(r6)
29870d64ceaSPaul Mackerras	bdnz	1b
29970d64ceaSPaul Mackerras	andi.	r5,r5,7
30070d64ceaSPaul Mackerras2:	cmplwi	0,r5,4
30170d64ceaSPaul Mackerras	blt	3f
30270d64ceaSPaul Mackerras	lwzu	r0,-4(r4)
30370d64ceaSPaul Mackerras	subi	r5,r5,4
30470d64ceaSPaul Mackerras	stwu	r0,-4(r6)
30570d64ceaSPaul Mackerras3:	cmpwi	0,r5,0
30670d64ceaSPaul Mackerras	beqlr
30770d64ceaSPaul Mackerras	mtctr	r5
30870d64ceaSPaul Mackerras4:	lbzu	r0,-1(r4)
30970d64ceaSPaul Mackerras	stbu	r0,-1(r6)
31070d64ceaSPaul Mackerras	bdnz	4b
31170d64ceaSPaul Mackerras	blr
31270d64ceaSPaul Mackerras5:	mtctr	r0
31370d64ceaSPaul Mackerras6:	lbzu	r7,-1(r4)
31470d64ceaSPaul Mackerras	stbu	r7,-1(r6)
31570d64ceaSPaul Mackerras	bdnz	6b
31670d64ceaSPaul Mackerras	subf	r5,r0,r5
31770d64ceaSPaul Mackerras	rlwinm.	r7,r5,32-3,3,31
31870d64ceaSPaul Mackerras	beq	2b
31970d64ceaSPaul Mackerras	mtctr	r7
32070d64ceaSPaul Mackerras	b	1b
32170d64ceaSPaul Mackerras
32270d64ceaSPaul Mackerras_GLOBAL(__copy_tofrom_user)
32370d64ceaSPaul Mackerras	addi	r4,r4,-4
32470d64ceaSPaul Mackerras	addi	r6,r3,-4
32570d64ceaSPaul Mackerras	neg	r0,r3
32670d64ceaSPaul Mackerras	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
32770d64ceaSPaul Mackerras	beq	58f
32870d64ceaSPaul Mackerras
32970d64ceaSPaul Mackerras	cmplw	0,r5,r0			/* is this more than total to do? */
33070d64ceaSPaul Mackerras	blt	63f			/* if not much to do */
33170d64ceaSPaul Mackerras	andi.	r8,r0,3			/* get it word-aligned first */
33270d64ceaSPaul Mackerras	mtctr	r8
33370d64ceaSPaul Mackerras	beq+	61f
33470d64ceaSPaul Mackerras70:	lbz	r9,4(r4)		/* do some bytes */
33570d64ceaSPaul Mackerras71:	stb	r9,4(r6)
33670d64ceaSPaul Mackerras	addi	r4,r4,1
33770d64ceaSPaul Mackerras	addi	r6,r6,1
33870d64ceaSPaul Mackerras	bdnz	70b
33970d64ceaSPaul Mackerras61:	subf	r5,r0,r5
34070d64ceaSPaul Mackerras	srwi.	r0,r0,2
34170d64ceaSPaul Mackerras	mtctr	r0
34270d64ceaSPaul Mackerras	beq	58f
34370d64ceaSPaul Mackerras72:	lwzu	r9,4(r4)		/* do some words */
34470d64ceaSPaul Mackerras73:	stwu	r9,4(r6)
34570d64ceaSPaul Mackerras	bdnz	72b
34670d64ceaSPaul Mackerras
34770d64ceaSPaul Mackerras	.section __ex_table,"a"
34870d64ceaSPaul Mackerras	.align	2
34970d64ceaSPaul Mackerras	.long	70b,100f
35070d64ceaSPaul Mackerras	.long	71b,101f
35170d64ceaSPaul Mackerras	.long	72b,102f
35270d64ceaSPaul Mackerras	.long	73b,103f
35370d64ceaSPaul Mackerras	.text
35470d64ceaSPaul Mackerras
35570d64ceaSPaul Mackerras58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
35670d64ceaSPaul Mackerras	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
35770d64ceaSPaul Mackerras	li	r11,4
35870d64ceaSPaul Mackerras	beq	63f
35970d64ceaSPaul Mackerras
36070d64ceaSPaul Mackerras#ifdef CONFIG_8xx
36170d64ceaSPaul Mackerras	/* Don't use prefetch on 8xx */
36270d64ceaSPaul Mackerras	mtctr	r0
36370d64ceaSPaul Mackerras	li	r0,0
36470d64ceaSPaul Mackerras53:	COPY_16_BYTES_WITHEX(0)
36570d64ceaSPaul Mackerras	bdnz	53b
36670d64ceaSPaul Mackerras
36770d64ceaSPaul Mackerras#else /* not CONFIG_8xx */
36870d64ceaSPaul Mackerras	/* Here we decide how far ahead to prefetch the source */
36970d64ceaSPaul Mackerras	li	r3,4
37070d64ceaSPaul Mackerras	cmpwi	r0,1
37170d64ceaSPaul Mackerras	li	r7,0
37270d64ceaSPaul Mackerras	ble	114f
37370d64ceaSPaul Mackerras	li	r7,1
37470d64ceaSPaul Mackerras#if MAX_COPY_PREFETCH > 1
37570d64ceaSPaul Mackerras	/* Heuristically, for large transfers we prefetch
37670d64ceaSPaul Mackerras	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
37770d64ceaSPaul Mackerras	   we prefetch 1 cacheline ahead. */
37870d64ceaSPaul Mackerras	cmpwi	r0,MAX_COPY_PREFETCH
37970d64ceaSPaul Mackerras	ble	112f
38070d64ceaSPaul Mackerras	li	r7,MAX_COPY_PREFETCH
38170d64ceaSPaul Mackerras112:	mtctr	r7
38270d64ceaSPaul Mackerras111:	dcbt	r3,r4
38370d64ceaSPaul Mackerras	addi	r3,r3,CACHELINE_BYTES
38470d64ceaSPaul Mackerras	bdnz	111b
38570d64ceaSPaul Mackerras#else
38670d64ceaSPaul Mackerras	dcbt	r3,r4
38770d64ceaSPaul Mackerras	addi	r3,r3,CACHELINE_BYTES
38870d64ceaSPaul Mackerras#endif /* MAX_COPY_PREFETCH > 1 */
38970d64ceaSPaul Mackerras
39070d64ceaSPaul Mackerras114:	subf	r8,r7,r0
39170d64ceaSPaul Mackerras	mr	r0,r7
39270d64ceaSPaul Mackerras	mtctr	r8
39370d64ceaSPaul Mackerras
39470d64ceaSPaul Mackerras53:	dcbt	r3,r4
39570d64ceaSPaul Mackerras54:	dcbz	r11,r6
39670d64ceaSPaul Mackerras	.section __ex_table,"a"
39770d64ceaSPaul Mackerras	.align	2
39870d64ceaSPaul Mackerras	.long	54b,105f
39970d64ceaSPaul Mackerras	.text
40070d64ceaSPaul Mackerras/* the main body of the cacheline loop */
40170d64ceaSPaul Mackerras	COPY_16_BYTES_WITHEX(0)
4027dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 32
40370d64ceaSPaul Mackerras	COPY_16_BYTES_WITHEX(1)
4047dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 64
40570d64ceaSPaul Mackerras	COPY_16_BYTES_WITHEX(2)
40670d64ceaSPaul Mackerras	COPY_16_BYTES_WITHEX(3)
4077dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 128
40870d64ceaSPaul Mackerras	COPY_16_BYTES_WITHEX(4)
40970d64ceaSPaul Mackerras	COPY_16_BYTES_WITHEX(5)
41070d64ceaSPaul Mackerras	COPY_16_BYTES_WITHEX(6)
41170d64ceaSPaul Mackerras	COPY_16_BYTES_WITHEX(7)
41270d64ceaSPaul Mackerras#endif
41370d64ceaSPaul Mackerras#endif
41470d64ceaSPaul Mackerras#endif
41570d64ceaSPaul Mackerras	bdnz	53b
41670d64ceaSPaul Mackerras	cmpwi	r0,0
41770d64ceaSPaul Mackerras	li	r3,4
41870d64ceaSPaul Mackerras	li	r7,0
41970d64ceaSPaul Mackerras	bne	114b
42070d64ceaSPaul Mackerras#endif /* CONFIG_8xx */
42170d64ceaSPaul Mackerras
42270d64ceaSPaul Mackerras63:	srwi.	r0,r5,2
42370d64ceaSPaul Mackerras	mtctr	r0
42470d64ceaSPaul Mackerras	beq	64f
42570d64ceaSPaul Mackerras30:	lwzu	r0,4(r4)
42670d64ceaSPaul Mackerras31:	stwu	r0,4(r6)
42770d64ceaSPaul Mackerras	bdnz	30b
42870d64ceaSPaul Mackerras
42970d64ceaSPaul Mackerras64:	andi.	r0,r5,3
43070d64ceaSPaul Mackerras	mtctr	r0
43170d64ceaSPaul Mackerras	beq+	65f
43270d64ceaSPaul Mackerras40:	lbz	r0,4(r4)
43370d64ceaSPaul Mackerras41:	stb	r0,4(r6)
43470d64ceaSPaul Mackerras	addi	r4,r4,1
43570d64ceaSPaul Mackerras	addi	r6,r6,1
43670d64ceaSPaul Mackerras	bdnz	40b
43770d64ceaSPaul Mackerras65:	li	r3,0
43870d64ceaSPaul Mackerras	blr
43970d64ceaSPaul Mackerras
44070d64ceaSPaul Mackerras/* read fault, initial single-byte copy */
44170d64ceaSPaul Mackerras100:	li	r9,0
44270d64ceaSPaul Mackerras	b	90f
44370d64ceaSPaul Mackerras/* write fault, initial single-byte copy */
44470d64ceaSPaul Mackerras101:	li	r9,1
44570d64ceaSPaul Mackerras90:	subf	r5,r8,r5
44670d64ceaSPaul Mackerras	li	r3,0
44770d64ceaSPaul Mackerras	b	99f
44870d64ceaSPaul Mackerras/* read fault, initial word copy */
44970d64ceaSPaul Mackerras102:	li	r9,0
45070d64ceaSPaul Mackerras	b	91f
45170d64ceaSPaul Mackerras/* write fault, initial word copy */
45270d64ceaSPaul Mackerras103:	li	r9,1
45370d64ceaSPaul Mackerras91:	li	r3,2
45470d64ceaSPaul Mackerras	b	99f
45570d64ceaSPaul Mackerras
45670d64ceaSPaul Mackerras/*
45770d64ceaSPaul Mackerras * this stuff handles faults in the cacheline loop and branches to either
45870d64ceaSPaul Mackerras * 104f (if in read part) or 105f (if in write part), after updating r5
45970d64ceaSPaul Mackerras */
46070d64ceaSPaul Mackerras	COPY_16_BYTES_EXCODE(0)
4617dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 32
46270d64ceaSPaul Mackerras	COPY_16_BYTES_EXCODE(1)
4637dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 64
46470d64ceaSPaul Mackerras	COPY_16_BYTES_EXCODE(2)
46570d64ceaSPaul Mackerras	COPY_16_BYTES_EXCODE(3)
4667dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 128
46770d64ceaSPaul Mackerras	COPY_16_BYTES_EXCODE(4)
46870d64ceaSPaul Mackerras	COPY_16_BYTES_EXCODE(5)
46970d64ceaSPaul Mackerras	COPY_16_BYTES_EXCODE(6)
47070d64ceaSPaul Mackerras	COPY_16_BYTES_EXCODE(7)
47170d64ceaSPaul Mackerras#endif
47270d64ceaSPaul Mackerras#endif
47370d64ceaSPaul Mackerras#endif
47470d64ceaSPaul Mackerras
47570d64ceaSPaul Mackerras/* read fault in cacheline loop */
47670d64ceaSPaul Mackerras104:	li	r9,0
47770d64ceaSPaul Mackerras	b	92f
47870d64ceaSPaul Mackerras/* fault on dcbz (effectively a write fault) */
47970d64ceaSPaul Mackerras/* or write fault in cacheline loop */
48070d64ceaSPaul Mackerras105:	li	r9,1
48170d64ceaSPaul Mackerras92:	li	r3,LG_CACHELINE_BYTES
48270d64ceaSPaul Mackerras	mfctr	r8
48370d64ceaSPaul Mackerras	add	r0,r0,r8
48470d64ceaSPaul Mackerras	b	106f
48570d64ceaSPaul Mackerras/* read fault in final word loop */
48670d64ceaSPaul Mackerras108:	li	r9,0
48770d64ceaSPaul Mackerras	b	93f
48870d64ceaSPaul Mackerras/* write fault in final word loop */
48970d64ceaSPaul Mackerras109:	li	r9,1
49070d64ceaSPaul Mackerras93:	andi.	r5,r5,3
49170d64ceaSPaul Mackerras	li	r3,2
49270d64ceaSPaul Mackerras	b	99f
49370d64ceaSPaul Mackerras/* read fault in final byte loop */
49470d64ceaSPaul Mackerras110:	li	r9,0
49570d64ceaSPaul Mackerras	b	94f
49670d64ceaSPaul Mackerras/* write fault in final byte loop */
49770d64ceaSPaul Mackerras111:	li	r9,1
49870d64ceaSPaul Mackerras94:	li	r5,0
49970d64ceaSPaul Mackerras	li	r3,0
50070d64ceaSPaul Mackerras/*
50170d64ceaSPaul Mackerras * At this stage the number of bytes not copied is
50270d64ceaSPaul Mackerras * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
50370d64ceaSPaul Mackerras */
50470d64ceaSPaul Mackerras99:	mfctr	r0
50570d64ceaSPaul Mackerras106:	slw	r3,r0,r3
50670d64ceaSPaul Mackerras	add.	r3,r3,r5
50770d64ceaSPaul Mackerras	beq	120f			/* shouldn't happen */
50870d64ceaSPaul Mackerras	cmpwi	0,r9,0
50970d64ceaSPaul Mackerras	bne	120f
51070d64ceaSPaul Mackerras/* for a read fault, first try to continue the copy one byte at a time */
51170d64ceaSPaul Mackerras	mtctr	r3
51270d64ceaSPaul Mackerras130:	lbz	r0,4(r4)
51370d64ceaSPaul Mackerras131:	stb	r0,4(r6)
51470d64ceaSPaul Mackerras	addi	r4,r4,1
51570d64ceaSPaul Mackerras	addi	r6,r6,1
51670d64ceaSPaul Mackerras	bdnz	130b
51770d64ceaSPaul Mackerras/* then clear out the destination: r3 bytes starting at 4(r6) */
51870d64ceaSPaul Mackerras132:	mfctr	r3
51970d64ceaSPaul Mackerras	srwi.	r0,r3,2
52070d64ceaSPaul Mackerras	li	r9,0
52170d64ceaSPaul Mackerras	mtctr	r0
52270d64ceaSPaul Mackerras	beq	113f
52370d64ceaSPaul Mackerras112:	stwu	r9,4(r6)
52470d64ceaSPaul Mackerras	bdnz	112b
52570d64ceaSPaul Mackerras113:	andi.	r0,r3,3
52670d64ceaSPaul Mackerras	mtctr	r0
52770d64ceaSPaul Mackerras	beq	120f
52870d64ceaSPaul Mackerras114:	stb	r9,4(r6)
52970d64ceaSPaul Mackerras	addi	r6,r6,1
53070d64ceaSPaul Mackerras	bdnz	114b
53170d64ceaSPaul Mackerras120:	blr
53270d64ceaSPaul Mackerras
53370d64ceaSPaul Mackerras	.section __ex_table,"a"
53470d64ceaSPaul Mackerras	.align	2
53570d64ceaSPaul Mackerras	.long	30b,108b
53670d64ceaSPaul Mackerras	.long	31b,109b
53770d64ceaSPaul Mackerras	.long	40b,110b
53870d64ceaSPaul Mackerras	.long	41b,111b
53970d64ceaSPaul Mackerras	.long	130b,132b
54070d64ceaSPaul Mackerras	.long	131b,120b
54170d64ceaSPaul Mackerras	.long	112b,120b
54270d64ceaSPaul Mackerras	.long	114b,120b
54370d64ceaSPaul Mackerras	.text
544