170d64ceaSPaul Mackerras/* 270d64ceaSPaul Mackerras * Memory copy functions for 32-bit PowerPC. 370d64ceaSPaul Mackerras * 470d64ceaSPaul Mackerras * Copyright (C) 1996-2005 Paul Mackerras. 570d64ceaSPaul Mackerras * 670d64ceaSPaul Mackerras * This program is free software; you can redistribute it and/or 770d64ceaSPaul Mackerras * modify it under the terms of the GNU General Public License 870d64ceaSPaul Mackerras * as published by the Free Software Foundation; either version 970d64ceaSPaul Mackerras * 2 of the License, or (at your option) any later version. 1070d64ceaSPaul Mackerras */ 1170d64ceaSPaul Mackerras#include <linux/config.h> 1270d64ceaSPaul Mackerras#include <asm/processor.h> 1370d64ceaSPaul Mackerras#include <asm/cache.h> 1470d64ceaSPaul Mackerras#include <asm/errno.h> 1570d64ceaSPaul Mackerras#include <asm/ppc_asm.h> 1670d64ceaSPaul Mackerras 1770d64ceaSPaul Mackerras#define COPY_16_BYTES \ 1870d64ceaSPaul Mackerras lwz r7,4(r4); \ 1970d64ceaSPaul Mackerras lwz r8,8(r4); \ 2070d64ceaSPaul Mackerras lwz r9,12(r4); \ 2170d64ceaSPaul Mackerras lwzu r10,16(r4); \ 2270d64ceaSPaul Mackerras stw r7,4(r6); \ 2370d64ceaSPaul Mackerras stw r8,8(r6); \ 2470d64ceaSPaul Mackerras stw r9,12(r6); \ 2570d64ceaSPaul Mackerras stwu r10,16(r6) 2670d64ceaSPaul Mackerras 2770d64ceaSPaul Mackerras#define COPY_16_BYTES_WITHEX(n) \ 2870d64ceaSPaul Mackerras8 ## n ## 0: \ 2970d64ceaSPaul Mackerras lwz r7,4(r4); \ 3070d64ceaSPaul Mackerras8 ## n ## 1: \ 3170d64ceaSPaul Mackerras lwz r8,8(r4); \ 3270d64ceaSPaul Mackerras8 ## n ## 2: \ 3370d64ceaSPaul Mackerras lwz r9,12(r4); \ 3470d64ceaSPaul Mackerras8 ## n ## 3: \ 3570d64ceaSPaul Mackerras lwzu r10,16(r4); \ 3670d64ceaSPaul Mackerras8 ## n ## 4: \ 3770d64ceaSPaul Mackerras stw r7,4(r6); \ 3870d64ceaSPaul Mackerras8 ## n ## 5: \ 3970d64ceaSPaul Mackerras stw r8,8(r6); \ 4070d64ceaSPaul Mackerras8 ## n ## 6: \ 4170d64ceaSPaul Mackerras stw r9,12(r6); \ 4270d64ceaSPaul Mackerras8 ## n ## 7: \ 4370d64ceaSPaul Mackerras stwu r10,16(r6) 4470d64ceaSPaul Mackerras 4570d64ceaSPaul Mackerras#define COPY_16_BYTES_EXCODE(n) \ 4670d64ceaSPaul Mackerras9 ## n ## 0: \ 4770d64ceaSPaul Mackerras addi r5,r5,-(16 * n); \ 4870d64ceaSPaul Mackerras b 104f; \ 4970d64ceaSPaul Mackerras9 ## n ## 1: \ 5070d64ceaSPaul Mackerras addi r5,r5,-(16 * n); \ 5170d64ceaSPaul Mackerras b 105f; \ 5270d64ceaSPaul Mackerras.section __ex_table,"a"; \ 5370d64ceaSPaul Mackerras .align 2; \ 5470d64ceaSPaul Mackerras .long 8 ## n ## 0b,9 ## n ## 0b; \ 5570d64ceaSPaul Mackerras .long 8 ## n ## 1b,9 ## n ## 0b; \ 5670d64ceaSPaul Mackerras .long 8 ## n ## 2b,9 ## n ## 0b; \ 5770d64ceaSPaul Mackerras .long 8 ## n ## 3b,9 ## n ## 0b; \ 5870d64ceaSPaul Mackerras .long 8 ## n ## 4b,9 ## n ## 1b; \ 5970d64ceaSPaul Mackerras .long 8 ## n ## 5b,9 ## n ## 1b; \ 6070d64ceaSPaul Mackerras .long 8 ## n ## 6b,9 ## n ## 1b; \ 6170d64ceaSPaul Mackerras .long 8 ## n ## 7b,9 ## n ## 1b; \ 6270d64ceaSPaul Mackerras .text 6370d64ceaSPaul Mackerras 6470d64ceaSPaul Mackerras .text 6570d64ceaSPaul Mackerras .stabs "arch/powerpc/lib/",N_SO,0,0,0f 6670d64ceaSPaul Mackerras .stabs "copy32.S",N_SO,0,0,0f 6770d64ceaSPaul Mackerras0: 6870d64ceaSPaul Mackerras 697dffb720SStephen RothwellCACHELINE_BYTES = L1_CACHE_BYTES 707dffb720SStephen RothwellLG_CACHELINE_BYTES = L1_CACHE_SHIFT 717dffb720SStephen RothwellCACHELINE_MASK = (L1_CACHE_BYTES-1) 7270d64ceaSPaul Mackerras 7370d64ceaSPaul Mackerras/* 7470d64ceaSPaul Mackerras * Use dcbz on the complete cache lines in the destination 7570d64ceaSPaul Mackerras * to set them to zero. This requires that the destination 7670d64ceaSPaul Mackerras * area is cacheable. -- paulus 7770d64ceaSPaul Mackerras */ 7870d64ceaSPaul Mackerras_GLOBAL(cacheable_memzero) 7970d64ceaSPaul Mackerras mr r5,r4 8070d64ceaSPaul Mackerras li r4,0 8170d64ceaSPaul Mackerras addi r6,r3,-4 8270d64ceaSPaul Mackerras cmplwi 0,r5,4 8370d64ceaSPaul Mackerras blt 7f 8470d64ceaSPaul Mackerras stwu r4,4(r6) 8570d64ceaSPaul Mackerras beqlr 8670d64ceaSPaul Mackerras andi. r0,r6,3 8770d64ceaSPaul Mackerras add r5,r0,r5 8870d64ceaSPaul Mackerras subf r6,r0,r6 8970d64ceaSPaul Mackerras clrlwi r7,r6,32-LG_CACHELINE_BYTES 9070d64ceaSPaul Mackerras add r8,r7,r5 9170d64ceaSPaul Mackerras srwi r9,r8,LG_CACHELINE_BYTES 9270d64ceaSPaul Mackerras addic. r9,r9,-1 /* total number of complete cachelines */ 9370d64ceaSPaul Mackerras ble 2f 9470d64ceaSPaul Mackerras xori r0,r7,CACHELINE_MASK & ~3 9570d64ceaSPaul Mackerras srwi. r0,r0,2 9670d64ceaSPaul Mackerras beq 3f 9770d64ceaSPaul Mackerras mtctr r0 9870d64ceaSPaul Mackerras4: stwu r4,4(r6) 9970d64ceaSPaul Mackerras bdnz 4b 10070d64ceaSPaul Mackerras3: mtctr r9 10170d64ceaSPaul Mackerras li r7,4 10270d64ceaSPaul Mackerras#if !defined(CONFIG_8xx) 10370d64ceaSPaul Mackerras10: dcbz r7,r6 10470d64ceaSPaul Mackerras#else 10570d64ceaSPaul Mackerras10: stw r4, 4(r6) 10670d64ceaSPaul Mackerras stw r4, 8(r6) 10770d64ceaSPaul Mackerras stw r4, 12(r6) 10870d64ceaSPaul Mackerras stw r4, 16(r6) 10970d64ceaSPaul Mackerras#if CACHE_LINE_SIZE >= 32 11070d64ceaSPaul Mackerras stw r4, 20(r6) 11170d64ceaSPaul Mackerras stw r4, 24(r6) 11270d64ceaSPaul Mackerras stw r4, 28(r6) 11370d64ceaSPaul Mackerras stw r4, 32(r6) 11470d64ceaSPaul Mackerras#endif /* CACHE_LINE_SIZE */ 11570d64ceaSPaul Mackerras#endif 11670d64ceaSPaul Mackerras addi r6,r6,CACHELINE_BYTES 11770d64ceaSPaul Mackerras bdnz 10b 11870d64ceaSPaul Mackerras clrlwi r5,r8,32-LG_CACHELINE_BYTES 11970d64ceaSPaul Mackerras addi r5,r5,4 12070d64ceaSPaul Mackerras2: srwi r0,r5,2 12170d64ceaSPaul Mackerras mtctr r0 12270d64ceaSPaul Mackerras bdz 6f 12370d64ceaSPaul Mackerras1: stwu r4,4(r6) 12470d64ceaSPaul Mackerras bdnz 1b 12570d64ceaSPaul Mackerras6: andi. r5,r5,3 12670d64ceaSPaul Mackerras7: cmpwi 0,r5,0 12770d64ceaSPaul Mackerras beqlr 12870d64ceaSPaul Mackerras mtctr r5 12970d64ceaSPaul Mackerras addi r6,r6,3 13070d64ceaSPaul Mackerras8: stbu r4,1(r6) 13170d64ceaSPaul Mackerras bdnz 8b 13270d64ceaSPaul Mackerras blr 13370d64ceaSPaul Mackerras 13470d64ceaSPaul Mackerras_GLOBAL(memset) 13570d64ceaSPaul Mackerras rlwimi r4,r4,8,16,23 13670d64ceaSPaul Mackerras rlwimi r4,r4,16,0,15 13770d64ceaSPaul Mackerras addi r6,r3,-4 13870d64ceaSPaul Mackerras cmplwi 0,r5,4 13970d64ceaSPaul Mackerras blt 7f 14070d64ceaSPaul Mackerras stwu r4,4(r6) 14170d64ceaSPaul Mackerras beqlr 14270d64ceaSPaul Mackerras andi. r0,r6,3 14370d64ceaSPaul Mackerras add r5,r0,r5 14470d64ceaSPaul Mackerras subf r6,r0,r6 14570d64ceaSPaul Mackerras srwi r0,r5,2 14670d64ceaSPaul Mackerras mtctr r0 14770d64ceaSPaul Mackerras bdz 6f 14870d64ceaSPaul Mackerras1: stwu r4,4(r6) 14970d64ceaSPaul Mackerras bdnz 1b 15070d64ceaSPaul Mackerras6: andi. r5,r5,3 15170d64ceaSPaul Mackerras7: cmpwi 0,r5,0 15270d64ceaSPaul Mackerras beqlr 15370d64ceaSPaul Mackerras mtctr r5 15470d64ceaSPaul Mackerras addi r6,r6,3 15570d64ceaSPaul Mackerras8: stbu r4,1(r6) 15670d64ceaSPaul Mackerras bdnz 8b 15770d64ceaSPaul Mackerras blr 15870d64ceaSPaul Mackerras 15970d64ceaSPaul Mackerras/* 16070d64ceaSPaul Mackerras * This version uses dcbz on the complete cache lines in the 16170d64ceaSPaul Mackerras * destination area to reduce memory traffic. This requires that 16270d64ceaSPaul Mackerras * the destination area is cacheable. 16370d64ceaSPaul Mackerras * We only use this version if the source and dest don't overlap. 16470d64ceaSPaul Mackerras * -- paulus. 16570d64ceaSPaul Mackerras */ 16670d64ceaSPaul Mackerras_GLOBAL(cacheable_memcpy) 16770d64ceaSPaul Mackerras add r7,r3,r5 /* test if the src & dst overlap */ 16870d64ceaSPaul Mackerras add r8,r4,r5 16970d64ceaSPaul Mackerras cmplw 0,r4,r7 17070d64ceaSPaul Mackerras cmplw 1,r3,r8 17170d64ceaSPaul Mackerras crand 0,0,4 /* cr0.lt &= cr1.lt */ 17270d64ceaSPaul Mackerras blt memcpy /* if regions overlap */ 17370d64ceaSPaul Mackerras 17470d64ceaSPaul Mackerras addi r4,r4,-4 17570d64ceaSPaul Mackerras addi r6,r3,-4 17670d64ceaSPaul Mackerras neg r0,r3 17770d64ceaSPaul Mackerras andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 17870d64ceaSPaul Mackerras beq 58f 17970d64ceaSPaul Mackerras 18070d64ceaSPaul Mackerras cmplw 0,r5,r0 /* is this more than total to do? */ 18170d64ceaSPaul Mackerras blt 63f /* if not much to do */ 18270d64ceaSPaul Mackerras andi. r8,r0,3 /* get it word-aligned first */ 18370d64ceaSPaul Mackerras subf r5,r0,r5 18470d64ceaSPaul Mackerras mtctr r8 18570d64ceaSPaul Mackerras beq+ 61f 18670d64ceaSPaul Mackerras70: lbz r9,4(r4) /* do some bytes */ 18770d64ceaSPaul Mackerras stb r9,4(r6) 18870d64ceaSPaul Mackerras addi r4,r4,1 18970d64ceaSPaul Mackerras addi r6,r6,1 19070d64ceaSPaul Mackerras bdnz 70b 19170d64ceaSPaul Mackerras61: srwi. r0,r0,2 19270d64ceaSPaul Mackerras mtctr r0 19370d64ceaSPaul Mackerras beq 58f 19470d64ceaSPaul Mackerras72: lwzu r9,4(r4) /* do some words */ 19570d64ceaSPaul Mackerras stwu r9,4(r6) 19670d64ceaSPaul Mackerras bdnz 72b 19770d64ceaSPaul Mackerras 19870d64ceaSPaul Mackerras58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 19970d64ceaSPaul Mackerras clrlwi r5,r5,32-LG_CACHELINE_BYTES 20070d64ceaSPaul Mackerras li r11,4 20170d64ceaSPaul Mackerras mtctr r0 20270d64ceaSPaul Mackerras beq 63f 20370d64ceaSPaul Mackerras53: 20470d64ceaSPaul Mackerras#if !defined(CONFIG_8xx) 20570d64ceaSPaul Mackerras dcbz r11,r6 20670d64ceaSPaul Mackerras#endif 20770d64ceaSPaul Mackerras COPY_16_BYTES 2087dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 32 20970d64ceaSPaul Mackerras COPY_16_BYTES 2107dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 64 21170d64ceaSPaul Mackerras COPY_16_BYTES 21270d64ceaSPaul Mackerras COPY_16_BYTES 2137dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 128 21470d64ceaSPaul Mackerras COPY_16_BYTES 21570d64ceaSPaul Mackerras COPY_16_BYTES 21670d64ceaSPaul Mackerras COPY_16_BYTES 21770d64ceaSPaul Mackerras COPY_16_BYTES 21870d64ceaSPaul Mackerras#endif 21970d64ceaSPaul Mackerras#endif 22070d64ceaSPaul Mackerras#endif 22170d64ceaSPaul Mackerras bdnz 53b 22270d64ceaSPaul Mackerras 22370d64ceaSPaul Mackerras63: srwi. r0,r5,2 22470d64ceaSPaul Mackerras mtctr r0 22570d64ceaSPaul Mackerras beq 64f 22670d64ceaSPaul Mackerras30: lwzu r0,4(r4) 22770d64ceaSPaul Mackerras stwu r0,4(r6) 22870d64ceaSPaul Mackerras bdnz 30b 22970d64ceaSPaul Mackerras 23070d64ceaSPaul Mackerras64: andi. r0,r5,3 23170d64ceaSPaul Mackerras mtctr r0 23270d64ceaSPaul Mackerras beq+ 65f 23370d64ceaSPaul Mackerras40: lbz r0,4(r4) 23470d64ceaSPaul Mackerras stb r0,4(r6) 23570d64ceaSPaul Mackerras addi r4,r4,1 23670d64ceaSPaul Mackerras addi r6,r6,1 23770d64ceaSPaul Mackerras bdnz 40b 23870d64ceaSPaul Mackerras65: blr 23970d64ceaSPaul Mackerras 24070d64ceaSPaul Mackerras_GLOBAL(memmove) 24170d64ceaSPaul Mackerras cmplw 0,r3,r4 24270d64ceaSPaul Mackerras bgt backwards_memcpy 24370d64ceaSPaul Mackerras /* fall through */ 24470d64ceaSPaul Mackerras 24570d64ceaSPaul Mackerras_GLOBAL(memcpy) 24670d64ceaSPaul Mackerras srwi. r7,r5,3 24770d64ceaSPaul Mackerras addi r6,r3,-4 24870d64ceaSPaul Mackerras addi r4,r4,-4 24970d64ceaSPaul Mackerras beq 2f /* if less than 8 bytes to do */ 25070d64ceaSPaul Mackerras andi. r0,r6,3 /* get dest word aligned */ 25170d64ceaSPaul Mackerras mtctr r7 25270d64ceaSPaul Mackerras bne 5f 25370d64ceaSPaul Mackerras1: lwz r7,4(r4) 25470d64ceaSPaul Mackerras lwzu r8,8(r4) 25570d64ceaSPaul Mackerras stw r7,4(r6) 25670d64ceaSPaul Mackerras stwu r8,8(r6) 25770d64ceaSPaul Mackerras bdnz 1b 25870d64ceaSPaul Mackerras andi. r5,r5,7 25970d64ceaSPaul Mackerras2: cmplwi 0,r5,4 26070d64ceaSPaul Mackerras blt 3f 26170d64ceaSPaul Mackerras lwzu r0,4(r4) 26270d64ceaSPaul Mackerras addi r5,r5,-4 26370d64ceaSPaul Mackerras stwu r0,4(r6) 26470d64ceaSPaul Mackerras3: cmpwi 0,r5,0 26570d64ceaSPaul Mackerras beqlr 26670d64ceaSPaul Mackerras mtctr r5 26770d64ceaSPaul Mackerras addi r4,r4,3 26870d64ceaSPaul Mackerras addi r6,r6,3 26970d64ceaSPaul Mackerras4: lbzu r0,1(r4) 27070d64ceaSPaul Mackerras stbu r0,1(r6) 27170d64ceaSPaul Mackerras bdnz 4b 27270d64ceaSPaul Mackerras blr 27370d64ceaSPaul Mackerras5: subfic r0,r0,4 27470d64ceaSPaul Mackerras mtctr r0 27570d64ceaSPaul Mackerras6: lbz r7,4(r4) 27670d64ceaSPaul Mackerras addi r4,r4,1 27770d64ceaSPaul Mackerras stb r7,4(r6) 27870d64ceaSPaul Mackerras addi r6,r6,1 27970d64ceaSPaul Mackerras bdnz 6b 28070d64ceaSPaul Mackerras subf r5,r0,r5 28170d64ceaSPaul Mackerras rlwinm. r7,r5,32-3,3,31 28270d64ceaSPaul Mackerras beq 2b 28370d64ceaSPaul Mackerras mtctr r7 28470d64ceaSPaul Mackerras b 1b 28570d64ceaSPaul Mackerras 28670d64ceaSPaul Mackerras_GLOBAL(backwards_memcpy) 28770d64ceaSPaul Mackerras rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ 28870d64ceaSPaul Mackerras add r6,r3,r5 28970d64ceaSPaul Mackerras add r4,r4,r5 29070d64ceaSPaul Mackerras beq 2f 29170d64ceaSPaul Mackerras andi. r0,r6,3 29270d64ceaSPaul Mackerras mtctr r7 29370d64ceaSPaul Mackerras bne 5f 29470d64ceaSPaul Mackerras1: lwz r7,-4(r4) 29570d64ceaSPaul Mackerras lwzu r8,-8(r4) 29670d64ceaSPaul Mackerras stw r7,-4(r6) 29770d64ceaSPaul Mackerras stwu r8,-8(r6) 29870d64ceaSPaul Mackerras bdnz 1b 29970d64ceaSPaul Mackerras andi. r5,r5,7 30070d64ceaSPaul Mackerras2: cmplwi 0,r5,4 30170d64ceaSPaul Mackerras blt 3f 30270d64ceaSPaul Mackerras lwzu r0,-4(r4) 30370d64ceaSPaul Mackerras subi r5,r5,4 30470d64ceaSPaul Mackerras stwu r0,-4(r6) 30570d64ceaSPaul Mackerras3: cmpwi 0,r5,0 30670d64ceaSPaul Mackerras beqlr 30770d64ceaSPaul Mackerras mtctr r5 30870d64ceaSPaul Mackerras4: lbzu r0,-1(r4) 30970d64ceaSPaul Mackerras stbu r0,-1(r6) 31070d64ceaSPaul Mackerras bdnz 4b 31170d64ceaSPaul Mackerras blr 31270d64ceaSPaul Mackerras5: mtctr r0 31370d64ceaSPaul Mackerras6: lbzu r7,-1(r4) 31470d64ceaSPaul Mackerras stbu r7,-1(r6) 31570d64ceaSPaul Mackerras bdnz 6b 31670d64ceaSPaul Mackerras subf r5,r0,r5 31770d64ceaSPaul Mackerras rlwinm. r7,r5,32-3,3,31 31870d64ceaSPaul Mackerras beq 2b 31970d64ceaSPaul Mackerras mtctr r7 32070d64ceaSPaul Mackerras b 1b 32170d64ceaSPaul Mackerras 32270d64ceaSPaul Mackerras_GLOBAL(__copy_tofrom_user) 32370d64ceaSPaul Mackerras addi r4,r4,-4 32470d64ceaSPaul Mackerras addi r6,r3,-4 32570d64ceaSPaul Mackerras neg r0,r3 32670d64ceaSPaul Mackerras andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 32770d64ceaSPaul Mackerras beq 58f 32870d64ceaSPaul Mackerras 32970d64ceaSPaul Mackerras cmplw 0,r5,r0 /* is this more than total to do? */ 33070d64ceaSPaul Mackerras blt 63f /* if not much to do */ 33170d64ceaSPaul Mackerras andi. r8,r0,3 /* get it word-aligned first */ 33270d64ceaSPaul Mackerras mtctr r8 33370d64ceaSPaul Mackerras beq+ 61f 33470d64ceaSPaul Mackerras70: lbz r9,4(r4) /* do some bytes */ 33570d64ceaSPaul Mackerras71: stb r9,4(r6) 33670d64ceaSPaul Mackerras addi r4,r4,1 33770d64ceaSPaul Mackerras addi r6,r6,1 33870d64ceaSPaul Mackerras bdnz 70b 33970d64ceaSPaul Mackerras61: subf r5,r0,r5 34070d64ceaSPaul Mackerras srwi. r0,r0,2 34170d64ceaSPaul Mackerras mtctr r0 34270d64ceaSPaul Mackerras beq 58f 34370d64ceaSPaul Mackerras72: lwzu r9,4(r4) /* do some words */ 34470d64ceaSPaul Mackerras73: stwu r9,4(r6) 34570d64ceaSPaul Mackerras bdnz 72b 34670d64ceaSPaul Mackerras 34770d64ceaSPaul Mackerras .section __ex_table,"a" 34870d64ceaSPaul Mackerras .align 2 34970d64ceaSPaul Mackerras .long 70b,100f 35070d64ceaSPaul Mackerras .long 71b,101f 35170d64ceaSPaul Mackerras .long 72b,102f 35270d64ceaSPaul Mackerras .long 73b,103f 35370d64ceaSPaul Mackerras .text 35470d64ceaSPaul Mackerras 35570d64ceaSPaul Mackerras58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 35670d64ceaSPaul Mackerras clrlwi r5,r5,32-LG_CACHELINE_BYTES 35770d64ceaSPaul Mackerras li r11,4 35870d64ceaSPaul Mackerras beq 63f 35970d64ceaSPaul Mackerras 36070d64ceaSPaul Mackerras#ifdef CONFIG_8xx 36170d64ceaSPaul Mackerras /* Don't use prefetch on 8xx */ 36270d64ceaSPaul Mackerras mtctr r0 36370d64ceaSPaul Mackerras li r0,0 36470d64ceaSPaul Mackerras53: COPY_16_BYTES_WITHEX(0) 36570d64ceaSPaul Mackerras bdnz 53b 36670d64ceaSPaul Mackerras 36770d64ceaSPaul Mackerras#else /* not CONFIG_8xx */ 36870d64ceaSPaul Mackerras /* Here we decide how far ahead to prefetch the source */ 36970d64ceaSPaul Mackerras li r3,4 37070d64ceaSPaul Mackerras cmpwi r0,1 37170d64ceaSPaul Mackerras li r7,0 37270d64ceaSPaul Mackerras ble 114f 37370d64ceaSPaul Mackerras li r7,1 37470d64ceaSPaul Mackerras#if MAX_COPY_PREFETCH > 1 37570d64ceaSPaul Mackerras /* Heuristically, for large transfers we prefetch 37670d64ceaSPaul Mackerras MAX_COPY_PREFETCH cachelines ahead. For small transfers 37770d64ceaSPaul Mackerras we prefetch 1 cacheline ahead. */ 37870d64ceaSPaul Mackerras cmpwi r0,MAX_COPY_PREFETCH 37970d64ceaSPaul Mackerras ble 112f 38070d64ceaSPaul Mackerras li r7,MAX_COPY_PREFETCH 38170d64ceaSPaul Mackerras112: mtctr r7 38270d64ceaSPaul Mackerras111: dcbt r3,r4 38370d64ceaSPaul Mackerras addi r3,r3,CACHELINE_BYTES 38470d64ceaSPaul Mackerras bdnz 111b 38570d64ceaSPaul Mackerras#else 38670d64ceaSPaul Mackerras dcbt r3,r4 38770d64ceaSPaul Mackerras addi r3,r3,CACHELINE_BYTES 38870d64ceaSPaul Mackerras#endif /* MAX_COPY_PREFETCH > 1 */ 38970d64ceaSPaul Mackerras 39070d64ceaSPaul Mackerras114: subf r8,r7,r0 39170d64ceaSPaul Mackerras mr r0,r7 39270d64ceaSPaul Mackerras mtctr r8 39370d64ceaSPaul Mackerras 39470d64ceaSPaul Mackerras53: dcbt r3,r4 39570d64ceaSPaul Mackerras54: dcbz r11,r6 39670d64ceaSPaul Mackerras .section __ex_table,"a" 39770d64ceaSPaul Mackerras .align 2 39870d64ceaSPaul Mackerras .long 54b,105f 39970d64ceaSPaul Mackerras .text 40070d64ceaSPaul Mackerras/* the main body of the cacheline loop */ 40170d64ceaSPaul Mackerras COPY_16_BYTES_WITHEX(0) 4027dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 32 40370d64ceaSPaul Mackerras COPY_16_BYTES_WITHEX(1) 4047dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 64 40570d64ceaSPaul Mackerras COPY_16_BYTES_WITHEX(2) 40670d64ceaSPaul Mackerras COPY_16_BYTES_WITHEX(3) 4077dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 128 40870d64ceaSPaul Mackerras COPY_16_BYTES_WITHEX(4) 40970d64ceaSPaul Mackerras COPY_16_BYTES_WITHEX(5) 41070d64ceaSPaul Mackerras COPY_16_BYTES_WITHEX(6) 41170d64ceaSPaul Mackerras COPY_16_BYTES_WITHEX(7) 41270d64ceaSPaul Mackerras#endif 41370d64ceaSPaul Mackerras#endif 41470d64ceaSPaul Mackerras#endif 41570d64ceaSPaul Mackerras bdnz 53b 41670d64ceaSPaul Mackerras cmpwi r0,0 41770d64ceaSPaul Mackerras li r3,4 41870d64ceaSPaul Mackerras li r7,0 41970d64ceaSPaul Mackerras bne 114b 42070d64ceaSPaul Mackerras#endif /* CONFIG_8xx */ 42170d64ceaSPaul Mackerras 42270d64ceaSPaul Mackerras63: srwi. r0,r5,2 42370d64ceaSPaul Mackerras mtctr r0 42470d64ceaSPaul Mackerras beq 64f 42570d64ceaSPaul Mackerras30: lwzu r0,4(r4) 42670d64ceaSPaul Mackerras31: stwu r0,4(r6) 42770d64ceaSPaul Mackerras bdnz 30b 42870d64ceaSPaul Mackerras 42970d64ceaSPaul Mackerras64: andi. r0,r5,3 43070d64ceaSPaul Mackerras mtctr r0 43170d64ceaSPaul Mackerras beq+ 65f 43270d64ceaSPaul Mackerras40: lbz r0,4(r4) 43370d64ceaSPaul Mackerras41: stb r0,4(r6) 43470d64ceaSPaul Mackerras addi r4,r4,1 43570d64ceaSPaul Mackerras addi r6,r6,1 43670d64ceaSPaul Mackerras bdnz 40b 43770d64ceaSPaul Mackerras65: li r3,0 43870d64ceaSPaul Mackerras blr 43970d64ceaSPaul Mackerras 44070d64ceaSPaul Mackerras/* read fault, initial single-byte copy */ 44170d64ceaSPaul Mackerras100: li r9,0 44270d64ceaSPaul Mackerras b 90f 44370d64ceaSPaul Mackerras/* write fault, initial single-byte copy */ 44470d64ceaSPaul Mackerras101: li r9,1 44570d64ceaSPaul Mackerras90: subf r5,r8,r5 44670d64ceaSPaul Mackerras li r3,0 44770d64ceaSPaul Mackerras b 99f 44870d64ceaSPaul Mackerras/* read fault, initial word copy */ 44970d64ceaSPaul Mackerras102: li r9,0 45070d64ceaSPaul Mackerras b 91f 45170d64ceaSPaul Mackerras/* write fault, initial word copy */ 45270d64ceaSPaul Mackerras103: li r9,1 45370d64ceaSPaul Mackerras91: li r3,2 45470d64ceaSPaul Mackerras b 99f 45570d64ceaSPaul Mackerras 45670d64ceaSPaul Mackerras/* 45770d64ceaSPaul Mackerras * this stuff handles faults in the cacheline loop and branches to either 45870d64ceaSPaul Mackerras * 104f (if in read part) or 105f (if in write part), after updating r5 45970d64ceaSPaul Mackerras */ 46070d64ceaSPaul Mackerras COPY_16_BYTES_EXCODE(0) 4617dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 32 46270d64ceaSPaul Mackerras COPY_16_BYTES_EXCODE(1) 4637dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 64 46470d64ceaSPaul Mackerras COPY_16_BYTES_EXCODE(2) 46570d64ceaSPaul Mackerras COPY_16_BYTES_EXCODE(3) 4667dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 128 46770d64ceaSPaul Mackerras COPY_16_BYTES_EXCODE(4) 46870d64ceaSPaul Mackerras COPY_16_BYTES_EXCODE(5) 46970d64ceaSPaul Mackerras COPY_16_BYTES_EXCODE(6) 47070d64ceaSPaul Mackerras COPY_16_BYTES_EXCODE(7) 47170d64ceaSPaul Mackerras#endif 47270d64ceaSPaul Mackerras#endif 47370d64ceaSPaul Mackerras#endif 47470d64ceaSPaul Mackerras 47570d64ceaSPaul Mackerras/* read fault in cacheline loop */ 47670d64ceaSPaul Mackerras104: li r9,0 47770d64ceaSPaul Mackerras b 92f 47870d64ceaSPaul Mackerras/* fault on dcbz (effectively a write fault) */ 47970d64ceaSPaul Mackerras/* or write fault in cacheline loop */ 48070d64ceaSPaul Mackerras105: li r9,1 48170d64ceaSPaul Mackerras92: li r3,LG_CACHELINE_BYTES 48270d64ceaSPaul Mackerras mfctr r8 48370d64ceaSPaul Mackerras add r0,r0,r8 48470d64ceaSPaul Mackerras b 106f 48570d64ceaSPaul Mackerras/* read fault in final word loop */ 48670d64ceaSPaul Mackerras108: li r9,0 48770d64ceaSPaul Mackerras b 93f 48870d64ceaSPaul Mackerras/* write fault in final word loop */ 48970d64ceaSPaul Mackerras109: li r9,1 49070d64ceaSPaul Mackerras93: andi. r5,r5,3 49170d64ceaSPaul Mackerras li r3,2 49270d64ceaSPaul Mackerras b 99f 49370d64ceaSPaul Mackerras/* read fault in final byte loop */ 49470d64ceaSPaul Mackerras110: li r9,0 49570d64ceaSPaul Mackerras b 94f 49670d64ceaSPaul Mackerras/* write fault in final byte loop */ 49770d64ceaSPaul Mackerras111: li r9,1 49870d64ceaSPaul Mackerras94: li r5,0 49970d64ceaSPaul Mackerras li r3,0 50070d64ceaSPaul Mackerras/* 50170d64ceaSPaul Mackerras * At this stage the number of bytes not copied is 50270d64ceaSPaul Mackerras * r5 + (ctr << r3), and r9 is 0 for read or 1 for write. 50370d64ceaSPaul Mackerras */ 50470d64ceaSPaul Mackerras99: mfctr r0 50570d64ceaSPaul Mackerras106: slw r3,r0,r3 50670d64ceaSPaul Mackerras add. r3,r3,r5 50770d64ceaSPaul Mackerras beq 120f /* shouldn't happen */ 50870d64ceaSPaul Mackerras cmpwi 0,r9,0 50970d64ceaSPaul Mackerras bne 120f 51070d64ceaSPaul Mackerras/* for a read fault, first try to continue the copy one byte at a time */ 51170d64ceaSPaul Mackerras mtctr r3 51270d64ceaSPaul Mackerras130: lbz r0,4(r4) 51370d64ceaSPaul Mackerras131: stb r0,4(r6) 51470d64ceaSPaul Mackerras addi r4,r4,1 51570d64ceaSPaul Mackerras addi r6,r6,1 51670d64ceaSPaul Mackerras bdnz 130b 51770d64ceaSPaul Mackerras/* then clear out the destination: r3 bytes starting at 4(r6) */ 51870d64ceaSPaul Mackerras132: mfctr r3 51970d64ceaSPaul Mackerras srwi. r0,r3,2 52070d64ceaSPaul Mackerras li r9,0 52170d64ceaSPaul Mackerras mtctr r0 52270d64ceaSPaul Mackerras beq 113f 52370d64ceaSPaul Mackerras112: stwu r9,4(r6) 52470d64ceaSPaul Mackerras bdnz 112b 52570d64ceaSPaul Mackerras113: andi. r0,r3,3 52670d64ceaSPaul Mackerras mtctr r0 52770d64ceaSPaul Mackerras beq 120f 52870d64ceaSPaul Mackerras114: stb r9,4(r6) 52970d64ceaSPaul Mackerras addi r6,r6,1 53070d64ceaSPaul Mackerras bdnz 114b 53170d64ceaSPaul Mackerras120: blr 53270d64ceaSPaul Mackerras 53370d64ceaSPaul Mackerras .section __ex_table,"a" 53470d64ceaSPaul Mackerras .align 2 53570d64ceaSPaul Mackerras .long 30b,108b 53670d64ceaSPaul Mackerras .long 31b,109b 53770d64ceaSPaul Mackerras .long 40b,110b 53870d64ceaSPaul Mackerras .long 41b,111b 53970d64ceaSPaul Mackerras .long 130b,132b 54070d64ceaSPaul Mackerras .long 131b,120b 54170d64ceaSPaul Mackerras .long 112b,120b 54270d64ceaSPaul Mackerras .long 114b,120b 54370d64ceaSPaul Mackerras .text 544