12874c5fdSThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-or-later */ 270d64ceaSPaul Mackerras/* 370d64ceaSPaul Mackerras * Memory copy functions for 32-bit PowerPC. 470d64ceaSPaul Mackerras * 570d64ceaSPaul Mackerras * Copyright (C) 1996-2005 Paul Mackerras. 670d64ceaSPaul Mackerras */ 7*39326182SMasahiro Yamada#include <linux/export.h> 870d64ceaSPaul Mackerras#include <asm/processor.h> 970d64ceaSPaul Mackerras#include <asm/cache.h> 1070d64ceaSPaul Mackerras#include <asm/errno.h> 1170d64ceaSPaul Mackerras#include <asm/ppc_asm.h> 12fa54a981SChristophe Leroy#include <asm/code-patching-asm.h> 1326deb043SChristophe Leroy#include <asm/kasan.h> 1470d64ceaSPaul Mackerras 1570d64ceaSPaul Mackerras#define COPY_16_BYTES \ 1670d64ceaSPaul Mackerras lwz r7,4(r4); \ 1770d64ceaSPaul Mackerras lwz r8,8(r4); \ 1870d64ceaSPaul Mackerras lwz r9,12(r4); \ 1970d64ceaSPaul Mackerras lwzu r10,16(r4); \ 2070d64ceaSPaul Mackerras stw r7,4(r6); \ 2170d64ceaSPaul Mackerras stw r8,8(r6); \ 2270d64ceaSPaul Mackerras stw r9,12(r6); \ 2370d64ceaSPaul Mackerras stwu r10,16(r6) 2470d64ceaSPaul Mackerras 2570d64ceaSPaul Mackerras#define COPY_16_BYTES_WITHEX(n) \ 2670d64ceaSPaul Mackerras8 ## n ## 0: \ 2770d64ceaSPaul Mackerras lwz r7,4(r4); \ 2870d64ceaSPaul Mackerras8 ## n ## 1: \ 2970d64ceaSPaul Mackerras lwz r8,8(r4); \ 3070d64ceaSPaul Mackerras8 ## n ## 2: \ 3170d64ceaSPaul Mackerras lwz r9,12(r4); \ 3270d64ceaSPaul Mackerras8 ## n ## 3: \ 3370d64ceaSPaul Mackerras lwzu r10,16(r4); \ 3470d64ceaSPaul Mackerras8 ## n ## 4: \ 3570d64ceaSPaul Mackerras stw r7,4(r6); \ 3670d64ceaSPaul Mackerras8 ## n ## 5: \ 3770d64ceaSPaul Mackerras stw r8,8(r6); \ 3870d64ceaSPaul Mackerras8 ## n ## 6: \ 3970d64ceaSPaul Mackerras stw r9,12(r6); \ 4070d64ceaSPaul Mackerras8 ## n ## 7: \ 4170d64ceaSPaul Mackerras stwu r10,16(r6) 4270d64ceaSPaul Mackerras 4370d64ceaSPaul Mackerras#define COPY_16_BYTES_EXCODE(n) \ 4470d64ceaSPaul Mackerras9 ## n ## 0: \ 4570d64ceaSPaul Mackerras addi r5,r5,-(16 * n); \ 4670d64ceaSPaul Mackerras b 104f; \ 4770d64ceaSPaul Mackerras9 ## n ## 1: \ 4870d64ceaSPaul Mackerras addi r5,r5,-(16 * n); \ 4970d64ceaSPaul Mackerras b 105f; \ 5024bfa6a9SNicholas Piggin EX_TABLE(8 ## n ## 0b,9 ## n ## 0b); \ 5124bfa6a9SNicholas Piggin EX_TABLE(8 ## n ## 1b,9 ## n ## 0b); \ 5224bfa6a9SNicholas Piggin EX_TABLE(8 ## n ## 2b,9 ## n ## 0b); \ 5324bfa6a9SNicholas Piggin EX_TABLE(8 ## n ## 3b,9 ## n ## 0b); \ 5424bfa6a9SNicholas Piggin EX_TABLE(8 ## n ## 4b,9 ## n ## 1b); \ 5524bfa6a9SNicholas Piggin EX_TABLE(8 ## n ## 5b,9 ## n ## 1b); \ 5624bfa6a9SNicholas Piggin EX_TABLE(8 ## n ## 6b,9 ## n ## 1b); \ 5724bfa6a9SNicholas Piggin EX_TABLE(8 ## n ## 7b,9 ## n ## 1b) 5870d64ceaSPaul Mackerras 5970d64ceaSPaul Mackerras .text 6070d64ceaSPaul Mackerras 617dffb720SStephen RothwellCACHELINE_BYTES = L1_CACHE_BYTES 627dffb720SStephen RothwellLG_CACHELINE_BYTES = L1_CACHE_SHIFT 637dffb720SStephen RothwellCACHELINE_MASK = (L1_CACHE_BYTES-1) 6470d64ceaSPaul Mackerras 6526deb043SChristophe Leroy#ifndef CONFIG_KASAN 66da74f659SChristophe Leroy_GLOBAL(memset16) 67da74f659SChristophe Leroy rlwinm. r0 ,r5, 31, 1, 31 68da74f659SChristophe Leroy addi r6, r3, -4 69da74f659SChristophe Leroy beq- 2f 70da74f659SChristophe Leroy rlwimi r4 ,r4 ,16 ,0 ,15 71da74f659SChristophe Leroy mtctr r0 72da74f659SChristophe Leroy1: stwu r4, 4(r6) 73da74f659SChristophe Leroy bdnz 1b 74da74f659SChristophe Leroy2: andi. r0, r5, 1 75da74f659SChristophe Leroy beqlr 76da74f659SChristophe Leroy sth r4, 4(r6) 77da74f659SChristophe Leroy blr 78da74f659SChristophe LeroyEXPORT_SYMBOL(memset16) 7926deb043SChristophe Leroy#endif 80da74f659SChristophe Leroy 81df087e45SLEROY Christophe/* 82df087e45SLEROY Christophe * Use dcbz on the complete cache lines in the destination 83df087e45SLEROY Christophe * to set them to zero. This requires that the destination 84df087e45SLEROY Christophe * area is cacheable. -- paulus 85400c47d8SLEROY Christophe * 86400c47d8SLEROY Christophe * During early init, cache might not be active yet, so dcbz cannot be used. 87400c47d8SLEROY Christophe * We therefore skip the optimised bloc that uses dcbz. This jump is 88400c47d8SLEROY Christophe * replaced by a nop once cache is active. This is done in machine_init() 89df087e45SLEROY Christophe */ 9026deb043SChristophe Leroy_GLOBAL_KASAN(memset) 917bf6057bSChristophe Leroy cmplwi 0,r5,4 927bf6057bSChristophe Leroy blt 7f 937bf6057bSChristophe Leroy 94c152f149SLEROY Christophe rlwimi r4,r4,8,16,23 95c152f149SLEROY Christophe rlwimi r4,r4,16,0,15 96c152f149SLEROY Christophe 977bf6057bSChristophe Leroy stw r4,0(r3) 98df087e45SLEROY Christophe beqlr 997bf6057bSChristophe Leroy andi. r0,r3,3 100df087e45SLEROY Christophe add r5,r0,r5 1017bf6057bSChristophe Leroy subf r6,r0,r3 102c152f149SLEROY Christophe cmplwi 0,r4,0 103ad1b0122SChristophe Leroy /* 104ad1b0122SChristophe Leroy * Skip optimised bloc until cache is enabled. Will be replaced 105ad1b0122SChristophe Leroy * by 'bne' during boot to use normal procedure if r4 is not zero 106ad1b0122SChristophe Leroy */ 107fa54a981SChristophe Leroy5: b 2f 108fa54a981SChristophe Leroy patch_site 5b, patch__memset_nocache 109c152f149SLEROY Christophe 110df087e45SLEROY Christophe clrlwi r7,r6,32-LG_CACHELINE_BYTES 111df087e45SLEROY Christophe add r8,r7,r5 112df087e45SLEROY Christophe srwi r9,r8,LG_CACHELINE_BYTES 113df087e45SLEROY Christophe addic. r9,r9,-1 /* total number of complete cachelines */ 114df087e45SLEROY Christophe ble 2f 115df087e45SLEROY Christophe xori r0,r7,CACHELINE_MASK & ~3 116df087e45SLEROY Christophe srwi. r0,r0,2 117df087e45SLEROY Christophe beq 3f 118df087e45SLEROY Christophe mtctr r0 119df087e45SLEROY Christophe4: stwu r4,4(r6) 120df087e45SLEROY Christophe bdnz 4b 121df087e45SLEROY Christophe3: mtctr r9 122df087e45SLEROY Christophe li r7,4 123df087e45SLEROY Christophe10: dcbz r7,r6 124df087e45SLEROY Christophe addi r6,r6,CACHELINE_BYTES 125df087e45SLEROY Christophe bdnz 10b 126df087e45SLEROY Christophe clrlwi r5,r8,32-LG_CACHELINE_BYTES 127df087e45SLEROY Christophe addi r5,r5,4 128df087e45SLEROY Christophe 129c152f149SLEROY Christophe2: srwi r0,r5,2 13070d64ceaSPaul Mackerras mtctr r0 13170d64ceaSPaul Mackerras bdz 6f 13270d64ceaSPaul Mackerras1: stwu r4,4(r6) 13370d64ceaSPaul Mackerras bdnz 1b 13470d64ceaSPaul Mackerras6: andi. r5,r5,3 13570d64ceaSPaul Mackerras beqlr 13670d64ceaSPaul Mackerras mtctr r5 13770d64ceaSPaul Mackerras addi r6,r6,3 13870d64ceaSPaul Mackerras8: stbu r4,1(r6) 13970d64ceaSPaul Mackerras bdnz 8b 14070d64ceaSPaul Mackerras blr 1417bf6057bSChristophe Leroy 1427bf6057bSChristophe Leroy7: cmpwi 0,r5,0 1437bf6057bSChristophe Leroy beqlr 1447bf6057bSChristophe Leroy mtctr r5 1457bf6057bSChristophe Leroy addi r6,r3,-1 1467bf6057bSChristophe Leroy9: stbu r4,1(r6) 1477bf6057bSChristophe Leroy bdnz 9b 1487bf6057bSChristophe Leroy blr 149c0622167SChristophe LeroyEXPORT_SYMBOL(memset) 15026deb043SChristophe LeroyEXPORT_SYMBOL_KASAN(memset) 15170d64ceaSPaul Mackerras 152df087e45SLEROY Christophe/* 153df087e45SLEROY Christophe * This version uses dcbz on the complete cache lines in the 154df087e45SLEROY Christophe * destination area to reduce memory traffic. This requires that 155df087e45SLEROY Christophe * the destination area is cacheable. 156df087e45SLEROY Christophe * We only use this version if the source and dest don't overlap. 157df087e45SLEROY Christophe * -- paulus. 1581cd03890SLEROY Christophe * 1591cd03890SLEROY Christophe * During early init, cache might not be active yet, so dcbz cannot be used. 1601cd03890SLEROY Christophe * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is 1611cd03890SLEROY Christophe * replaced by a nop once cache is active. This is done in machine_init() 162df087e45SLEROY Christophe */ 16326deb043SChristophe Leroy_GLOBAL_KASAN(memmove) 1640b05e2d6SLEROY Christophe cmplw 0,r3,r4 1650b05e2d6SLEROY Christophe bgt backwards_memcpy 1660b05e2d6SLEROY Christophe /* fall through */ 1670b05e2d6SLEROY Christophe 16826deb043SChristophe Leroy_GLOBAL_KASAN(memcpy) 169fa54a981SChristophe Leroy1: b generic_memcpy 170fa54a981SChristophe Leroy patch_site 1b, patch__memcpy_nocache 171fa54a981SChristophe Leroy 172df087e45SLEROY Christophe add r7,r3,r5 /* test if the src & dst overlap */ 173df087e45SLEROY Christophe add r8,r4,r5 174df087e45SLEROY Christophe cmplw 0,r4,r7 175df087e45SLEROY Christophe cmplw 1,r3,r8 176df087e45SLEROY Christophe crand 0,0,4 /* cr0.lt &= cr1.lt */ 1770b05e2d6SLEROY Christophe blt generic_memcpy /* if regions overlap */ 178df087e45SLEROY Christophe 179df087e45SLEROY Christophe addi r4,r4,-4 180df087e45SLEROY Christophe addi r6,r3,-4 181df087e45SLEROY Christophe neg r0,r3 182df087e45SLEROY Christophe andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 183df087e45SLEROY Christophe beq 58f 184df087e45SLEROY Christophe 185df087e45SLEROY Christophe cmplw 0,r5,r0 /* is this more than total to do? */ 186df087e45SLEROY Christophe blt 63f /* if not much to do */ 187df087e45SLEROY Christophe andi. r8,r0,3 /* get it word-aligned first */ 188df087e45SLEROY Christophe subf r5,r0,r5 189df087e45SLEROY Christophe mtctr r8 190df087e45SLEROY Christophe beq+ 61f 191df087e45SLEROY Christophe70: lbz r9,4(r4) /* do some bytes */ 192df087e45SLEROY Christophe addi r4,r4,1 193df087e45SLEROY Christophe addi r6,r6,1 194295ffb41SLEROY Christophe stb r9,3(r6) 195df087e45SLEROY Christophe bdnz 70b 196df087e45SLEROY Christophe61: srwi. r0,r0,2 197df087e45SLEROY Christophe mtctr r0 198df087e45SLEROY Christophe beq 58f 199df087e45SLEROY Christophe72: lwzu r9,4(r4) /* do some words */ 200df087e45SLEROY Christophe stwu r9,4(r6) 201df087e45SLEROY Christophe bdnz 72b 202df087e45SLEROY Christophe 203df087e45SLEROY Christophe58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 204df087e45SLEROY Christophe clrlwi r5,r5,32-LG_CACHELINE_BYTES 205df087e45SLEROY Christophe li r11,4 206df087e45SLEROY Christophe mtctr r0 207df087e45SLEROY Christophe beq 63f 208df087e45SLEROY Christophe53: 209df087e45SLEROY Christophe dcbz r11,r6 210df087e45SLEROY Christophe COPY_16_BYTES 211df087e45SLEROY Christophe#if L1_CACHE_BYTES >= 32 212df087e45SLEROY Christophe COPY_16_BYTES 213df087e45SLEROY Christophe#if L1_CACHE_BYTES >= 64 214df087e45SLEROY Christophe COPY_16_BYTES 215df087e45SLEROY Christophe COPY_16_BYTES 216df087e45SLEROY Christophe#if L1_CACHE_BYTES >= 128 217df087e45SLEROY Christophe COPY_16_BYTES 218df087e45SLEROY Christophe COPY_16_BYTES 219df087e45SLEROY Christophe COPY_16_BYTES 220df087e45SLEROY Christophe COPY_16_BYTES 221df087e45SLEROY Christophe#endif 222df087e45SLEROY Christophe#endif 223df087e45SLEROY Christophe#endif 224df087e45SLEROY Christophe bdnz 53b 225df087e45SLEROY Christophe 226df087e45SLEROY Christophe63: srwi. r0,r5,2 227df087e45SLEROY Christophe mtctr r0 228df087e45SLEROY Christophe beq 64f 229df087e45SLEROY Christophe30: lwzu r0,4(r4) 230df087e45SLEROY Christophe stwu r0,4(r6) 231df087e45SLEROY Christophe bdnz 30b 232df087e45SLEROY Christophe 233df087e45SLEROY Christophe64: andi. r0,r5,3 234df087e45SLEROY Christophe mtctr r0 235df087e45SLEROY Christophe beq+ 65f 236295ffb41SLEROY Christophe addi r4,r4,3 237295ffb41SLEROY Christophe addi r6,r6,3 238295ffb41SLEROY Christophe40: lbzu r0,1(r4) 239295ffb41SLEROY Christophe stbu r0,1(r6) 240df087e45SLEROY Christophe bdnz 40b 241df087e45SLEROY Christophe65: blr 2429445aa1aSAl ViroEXPORT_SYMBOL(memcpy) 2439445aa1aSAl ViroEXPORT_SYMBOL(memmove) 24426deb043SChristophe LeroyEXPORT_SYMBOL_KASAN(memcpy) 24526deb043SChristophe LeroyEXPORT_SYMBOL_KASAN(memmove) 246df087e45SLEROY Christophe 247b4c6afdcSMichael Ellermangeneric_memcpy: 24870d64ceaSPaul Mackerras srwi. r7,r5,3 24970d64ceaSPaul Mackerras addi r6,r3,-4 25070d64ceaSPaul Mackerras addi r4,r4,-4 25170d64ceaSPaul Mackerras beq 2f /* if less than 8 bytes to do */ 25270d64ceaSPaul Mackerras andi. r0,r6,3 /* get dest word aligned */ 25370d64ceaSPaul Mackerras mtctr r7 25470d64ceaSPaul Mackerras bne 5f 25570d64ceaSPaul Mackerras1: lwz r7,4(r4) 25670d64ceaSPaul Mackerras lwzu r8,8(r4) 25770d64ceaSPaul Mackerras stw r7,4(r6) 25870d64ceaSPaul Mackerras stwu r8,8(r6) 25970d64ceaSPaul Mackerras bdnz 1b 26070d64ceaSPaul Mackerras andi. r5,r5,7 26170d64ceaSPaul Mackerras2: cmplwi 0,r5,4 26270d64ceaSPaul Mackerras blt 3f 26370d64ceaSPaul Mackerras lwzu r0,4(r4) 26470d64ceaSPaul Mackerras addi r5,r5,-4 26570d64ceaSPaul Mackerras stwu r0,4(r6) 26670d64ceaSPaul Mackerras3: cmpwi 0,r5,0 26770d64ceaSPaul Mackerras beqlr 26870d64ceaSPaul Mackerras mtctr r5 26970d64ceaSPaul Mackerras addi r4,r4,3 27070d64ceaSPaul Mackerras addi r6,r6,3 27170d64ceaSPaul Mackerras4: lbzu r0,1(r4) 27270d64ceaSPaul Mackerras stbu r0,1(r6) 27370d64ceaSPaul Mackerras bdnz 4b 27470d64ceaSPaul Mackerras blr 27570d64ceaSPaul Mackerras5: subfic r0,r0,4 27670d64ceaSPaul Mackerras mtctr r0 27770d64ceaSPaul Mackerras6: lbz r7,4(r4) 27870d64ceaSPaul Mackerras addi r4,r4,1 27970d64ceaSPaul Mackerras stb r7,4(r6) 28070d64ceaSPaul Mackerras addi r6,r6,1 28170d64ceaSPaul Mackerras bdnz 6b 28270d64ceaSPaul Mackerras subf r5,r0,r5 28370d64ceaSPaul Mackerras rlwinm. r7,r5,32-3,3,31 28470d64ceaSPaul Mackerras beq 2b 28570d64ceaSPaul Mackerras mtctr r7 28670d64ceaSPaul Mackerras b 1b 28770d64ceaSPaul Mackerras 28870d64ceaSPaul Mackerras_GLOBAL(backwards_memcpy) 28970d64ceaSPaul Mackerras rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ 29070d64ceaSPaul Mackerras add r6,r3,r5 29170d64ceaSPaul Mackerras add r4,r4,r5 29270d64ceaSPaul Mackerras beq 2f 29370d64ceaSPaul Mackerras andi. r0,r6,3 29470d64ceaSPaul Mackerras mtctr r7 29570d64ceaSPaul Mackerras bne 5f 29670d64ceaSPaul Mackerras1: lwz r7,-4(r4) 29770d64ceaSPaul Mackerras lwzu r8,-8(r4) 29870d64ceaSPaul Mackerras stw r7,-4(r6) 29970d64ceaSPaul Mackerras stwu r8,-8(r6) 30070d64ceaSPaul Mackerras bdnz 1b 30170d64ceaSPaul Mackerras andi. r5,r5,7 30270d64ceaSPaul Mackerras2: cmplwi 0,r5,4 30370d64ceaSPaul Mackerras blt 3f 30470d64ceaSPaul Mackerras lwzu r0,-4(r4) 30570d64ceaSPaul Mackerras subi r5,r5,4 30670d64ceaSPaul Mackerras stwu r0,-4(r6) 30770d64ceaSPaul Mackerras3: cmpwi 0,r5,0 30870d64ceaSPaul Mackerras beqlr 30970d64ceaSPaul Mackerras mtctr r5 31070d64ceaSPaul Mackerras4: lbzu r0,-1(r4) 31170d64ceaSPaul Mackerras stbu r0,-1(r6) 31270d64ceaSPaul Mackerras bdnz 4b 31370d64ceaSPaul Mackerras blr 31470d64ceaSPaul Mackerras5: mtctr r0 31570d64ceaSPaul Mackerras6: lbzu r7,-1(r4) 31670d64ceaSPaul Mackerras stbu r7,-1(r6) 31770d64ceaSPaul Mackerras bdnz 6b 31870d64ceaSPaul Mackerras subf r5,r0,r5 31970d64ceaSPaul Mackerras rlwinm. r7,r5,32-3,3,31 32070d64ceaSPaul Mackerras beq 2b 32170d64ceaSPaul Mackerras mtctr r7 32270d64ceaSPaul Mackerras b 1b 32370d64ceaSPaul Mackerras 32470d64ceaSPaul Mackerras_GLOBAL(__copy_tofrom_user) 32570d64ceaSPaul Mackerras addi r4,r4,-4 32670d64ceaSPaul Mackerras addi r6,r3,-4 32770d64ceaSPaul Mackerras neg r0,r3 32870d64ceaSPaul Mackerras andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 32970d64ceaSPaul Mackerras beq 58f 33070d64ceaSPaul Mackerras 33170d64ceaSPaul Mackerras cmplw 0,r5,r0 /* is this more than total to do? */ 33270d64ceaSPaul Mackerras blt 63f /* if not much to do */ 33370d64ceaSPaul Mackerras andi. r8,r0,3 /* get it word-aligned first */ 33470d64ceaSPaul Mackerras mtctr r8 33570d64ceaSPaul Mackerras beq+ 61f 33670d64ceaSPaul Mackerras70: lbz r9,4(r4) /* do some bytes */ 33770d64ceaSPaul Mackerras71: stb r9,4(r6) 33870d64ceaSPaul Mackerras addi r4,r4,1 33970d64ceaSPaul Mackerras addi r6,r6,1 34070d64ceaSPaul Mackerras bdnz 70b 34170d64ceaSPaul Mackerras61: subf r5,r0,r5 34270d64ceaSPaul Mackerras srwi. r0,r0,2 34370d64ceaSPaul Mackerras mtctr r0 34470d64ceaSPaul Mackerras beq 58f 34570d64ceaSPaul Mackerras72: lwzu r9,4(r4) /* do some words */ 34670d64ceaSPaul Mackerras73: stwu r9,4(r6) 34770d64ceaSPaul Mackerras bdnz 72b 34870d64ceaSPaul Mackerras 34924bfa6a9SNicholas Piggin EX_TABLE(70b,100f) 35024bfa6a9SNicholas Piggin EX_TABLE(71b,101f) 35124bfa6a9SNicholas Piggin EX_TABLE(72b,102f) 35224bfa6a9SNicholas Piggin EX_TABLE(73b,103f) 35370d64ceaSPaul Mackerras 35470d64ceaSPaul Mackerras58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 35570d64ceaSPaul Mackerras clrlwi r5,r5,32-LG_CACHELINE_BYTES 35670d64ceaSPaul Mackerras li r11,4 35770d64ceaSPaul Mackerras beq 63f 35870d64ceaSPaul Mackerras 35970d64ceaSPaul Mackerras /* Here we decide how far ahead to prefetch the source */ 36070d64ceaSPaul Mackerras li r3,4 36170d64ceaSPaul Mackerras cmpwi r0,1 36270d64ceaSPaul Mackerras li r7,0 36370d64ceaSPaul Mackerras ble 114f 36470d64ceaSPaul Mackerras li r7,1 36570d64ceaSPaul Mackerras#if MAX_COPY_PREFETCH > 1 36670d64ceaSPaul Mackerras /* Heuristically, for large transfers we prefetch 36770d64ceaSPaul Mackerras MAX_COPY_PREFETCH cachelines ahead. For small transfers 36870d64ceaSPaul Mackerras we prefetch 1 cacheline ahead. */ 36970d64ceaSPaul Mackerras cmpwi r0,MAX_COPY_PREFETCH 37070d64ceaSPaul Mackerras ble 112f 37170d64ceaSPaul Mackerras li r7,MAX_COPY_PREFETCH 37270d64ceaSPaul Mackerras112: mtctr r7 37370d64ceaSPaul Mackerras111: dcbt r3,r4 37470d64ceaSPaul Mackerras addi r3,r3,CACHELINE_BYTES 37570d64ceaSPaul Mackerras bdnz 111b 37670d64ceaSPaul Mackerras#else 37770d64ceaSPaul Mackerras dcbt r3,r4 37870d64ceaSPaul Mackerras addi r3,r3,CACHELINE_BYTES 37970d64ceaSPaul Mackerras#endif /* MAX_COPY_PREFETCH > 1 */ 38070d64ceaSPaul Mackerras 38170d64ceaSPaul Mackerras114: subf r8,r7,r0 38270d64ceaSPaul Mackerras mr r0,r7 38370d64ceaSPaul Mackerras mtctr r8 38470d64ceaSPaul Mackerras 38570d64ceaSPaul Mackerras53: dcbt r3,r4 38670d64ceaSPaul Mackerras54: dcbz r11,r6 38724bfa6a9SNicholas Piggin EX_TABLE(54b,105f) 38870d64ceaSPaul Mackerras/* the main body of the cacheline loop */ 38970d64ceaSPaul Mackerras COPY_16_BYTES_WITHEX(0) 3907dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 32 39170d64ceaSPaul Mackerras COPY_16_BYTES_WITHEX(1) 3927dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 64 39370d64ceaSPaul Mackerras COPY_16_BYTES_WITHEX(2) 39470d64ceaSPaul Mackerras COPY_16_BYTES_WITHEX(3) 3957dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 128 39670d64ceaSPaul Mackerras COPY_16_BYTES_WITHEX(4) 39770d64ceaSPaul Mackerras COPY_16_BYTES_WITHEX(5) 39870d64ceaSPaul Mackerras COPY_16_BYTES_WITHEX(6) 39970d64ceaSPaul Mackerras COPY_16_BYTES_WITHEX(7) 40070d64ceaSPaul Mackerras#endif 40170d64ceaSPaul Mackerras#endif 40270d64ceaSPaul Mackerras#endif 40370d64ceaSPaul Mackerras bdnz 53b 40470d64ceaSPaul Mackerras cmpwi r0,0 40570d64ceaSPaul Mackerras li r3,4 40670d64ceaSPaul Mackerras li r7,0 40770d64ceaSPaul Mackerras bne 114b 40870d64ceaSPaul Mackerras 40970d64ceaSPaul Mackerras63: srwi. r0,r5,2 41070d64ceaSPaul Mackerras mtctr r0 41170d64ceaSPaul Mackerras beq 64f 41270d64ceaSPaul Mackerras30: lwzu r0,4(r4) 41370d64ceaSPaul Mackerras31: stwu r0,4(r6) 41470d64ceaSPaul Mackerras bdnz 30b 41570d64ceaSPaul Mackerras 41670d64ceaSPaul Mackerras64: andi. r0,r5,3 41770d64ceaSPaul Mackerras mtctr r0 41870d64ceaSPaul Mackerras beq+ 65f 41970d64ceaSPaul Mackerras40: lbz r0,4(r4) 42070d64ceaSPaul Mackerras41: stb r0,4(r6) 42170d64ceaSPaul Mackerras addi r4,r4,1 42270d64ceaSPaul Mackerras addi r6,r6,1 42370d64ceaSPaul Mackerras bdnz 40b 42470d64ceaSPaul Mackerras65: li r3,0 42570d64ceaSPaul Mackerras blr 42670d64ceaSPaul Mackerras 42770d64ceaSPaul Mackerras/* read fault, initial single-byte copy */ 42870d64ceaSPaul Mackerras100: li r9,0 42970d64ceaSPaul Mackerras b 90f 43070d64ceaSPaul Mackerras/* write fault, initial single-byte copy */ 43170d64ceaSPaul Mackerras101: li r9,1 43270d64ceaSPaul Mackerras90: subf r5,r8,r5 43370d64ceaSPaul Mackerras li r3,0 43470d64ceaSPaul Mackerras b 99f 43570d64ceaSPaul Mackerras/* read fault, initial word copy */ 43670d64ceaSPaul Mackerras102: li r9,0 43770d64ceaSPaul Mackerras b 91f 43870d64ceaSPaul Mackerras/* write fault, initial word copy */ 43970d64ceaSPaul Mackerras103: li r9,1 44070d64ceaSPaul Mackerras91: li r3,2 44170d64ceaSPaul Mackerras b 99f 44270d64ceaSPaul Mackerras 44370d64ceaSPaul Mackerras/* 44470d64ceaSPaul Mackerras * this stuff handles faults in the cacheline loop and branches to either 44570d64ceaSPaul Mackerras * 104f (if in read part) or 105f (if in write part), after updating r5 44670d64ceaSPaul Mackerras */ 44770d64ceaSPaul Mackerras COPY_16_BYTES_EXCODE(0) 4487dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 32 44970d64ceaSPaul Mackerras COPY_16_BYTES_EXCODE(1) 4507dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 64 45170d64ceaSPaul Mackerras COPY_16_BYTES_EXCODE(2) 45270d64ceaSPaul Mackerras COPY_16_BYTES_EXCODE(3) 4537dffb720SStephen Rothwell#if L1_CACHE_BYTES >= 128 45470d64ceaSPaul Mackerras COPY_16_BYTES_EXCODE(4) 45570d64ceaSPaul Mackerras COPY_16_BYTES_EXCODE(5) 45670d64ceaSPaul Mackerras COPY_16_BYTES_EXCODE(6) 45770d64ceaSPaul Mackerras COPY_16_BYTES_EXCODE(7) 45870d64ceaSPaul Mackerras#endif 45970d64ceaSPaul Mackerras#endif 46070d64ceaSPaul Mackerras#endif 46170d64ceaSPaul Mackerras 46270d64ceaSPaul Mackerras/* read fault in cacheline loop */ 46370d64ceaSPaul Mackerras104: li r9,0 46470d64ceaSPaul Mackerras b 92f 46570d64ceaSPaul Mackerras/* fault on dcbz (effectively a write fault) */ 46670d64ceaSPaul Mackerras/* or write fault in cacheline loop */ 46770d64ceaSPaul Mackerras105: li r9,1 46870d64ceaSPaul Mackerras92: li r3,LG_CACHELINE_BYTES 46970d64ceaSPaul Mackerras mfctr r8 47070d64ceaSPaul Mackerras add r0,r0,r8 47170d64ceaSPaul Mackerras b 106f 47270d64ceaSPaul Mackerras/* read fault in final word loop */ 47370d64ceaSPaul Mackerras108: li r9,0 47470d64ceaSPaul Mackerras b 93f 47570d64ceaSPaul Mackerras/* write fault in final word loop */ 47670d64ceaSPaul Mackerras109: li r9,1 47770d64ceaSPaul Mackerras93: andi. r5,r5,3 47870d64ceaSPaul Mackerras li r3,2 47970d64ceaSPaul Mackerras b 99f 48070d64ceaSPaul Mackerras/* read fault in final byte loop */ 48170d64ceaSPaul Mackerras110: li r9,0 48270d64ceaSPaul Mackerras b 94f 48370d64ceaSPaul Mackerras/* write fault in final byte loop */ 48470d64ceaSPaul Mackerras111: li r9,1 48570d64ceaSPaul Mackerras94: li r5,0 48670d64ceaSPaul Mackerras li r3,0 48770d64ceaSPaul Mackerras/* 48870d64ceaSPaul Mackerras * At this stage the number of bytes not copied is 48970d64ceaSPaul Mackerras * r5 + (ctr << r3), and r9 is 0 for read or 1 for write. 49070d64ceaSPaul Mackerras */ 49170d64ceaSPaul Mackerras99: mfctr r0 49270d64ceaSPaul Mackerras106: slw r3,r0,r3 49370d64ceaSPaul Mackerras add. r3,r3,r5 49470d64ceaSPaul Mackerras beq 120f /* shouldn't happen */ 49570d64ceaSPaul Mackerras cmpwi 0,r9,0 49670d64ceaSPaul Mackerras bne 120f 49770d64ceaSPaul Mackerras/* for a read fault, first try to continue the copy one byte at a time */ 49870d64ceaSPaul Mackerras mtctr r3 49970d64ceaSPaul Mackerras130: lbz r0,4(r4) 50070d64ceaSPaul Mackerras131: stb r0,4(r6) 50170d64ceaSPaul Mackerras addi r4,r4,1 50270d64ceaSPaul Mackerras addi r6,r6,1 50370d64ceaSPaul Mackerras bdnz 130b 50470d64ceaSPaul Mackerras/* then clear out the destination: r3 bytes starting at 4(r6) */ 50570d64ceaSPaul Mackerras132: mfctr r3 50670d64ceaSPaul Mackerras120: blr 50770d64ceaSPaul Mackerras 50824bfa6a9SNicholas Piggin EX_TABLE(30b,108b) 50924bfa6a9SNicholas Piggin EX_TABLE(31b,109b) 51024bfa6a9SNicholas Piggin EX_TABLE(40b,110b) 51124bfa6a9SNicholas Piggin EX_TABLE(41b,111b) 51224bfa6a9SNicholas Piggin EX_TABLE(130b,132b) 51324bfa6a9SNicholas Piggin EX_TABLE(131b,120b) 51424bfa6a9SNicholas Piggin 5159445aa1aSAl ViroEXPORT_SYMBOL(__copy_tofrom_user) 516