1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * Memory copy functions for 32-bit PowerPC. 4 * 5 * Copyright (C) 1996-2005 Paul Mackerras. 6 */ 7#include <asm/processor.h> 8#include <asm/cache.h> 9#include <asm/errno.h> 10#include <asm/ppc_asm.h> 11#include <asm/export.h> 12#include <asm/code-patching-asm.h> 13#include <asm/kasan.h> 14 15#define COPY_16_BYTES \ 16 lwz r7,4(r4); \ 17 lwz r8,8(r4); \ 18 lwz r9,12(r4); \ 19 lwzu r10,16(r4); \ 20 stw r7,4(r6); \ 21 stw r8,8(r6); \ 22 stw r9,12(r6); \ 23 stwu r10,16(r6) 24 25#define COPY_16_BYTES_WITHEX(n) \ 268 ## n ## 0: \ 27 lwz r7,4(r4); \ 288 ## n ## 1: \ 29 lwz r8,8(r4); \ 308 ## n ## 2: \ 31 lwz r9,12(r4); \ 328 ## n ## 3: \ 33 lwzu r10,16(r4); \ 348 ## n ## 4: \ 35 stw r7,4(r6); \ 368 ## n ## 5: \ 37 stw r8,8(r6); \ 388 ## n ## 6: \ 39 stw r9,12(r6); \ 408 ## n ## 7: \ 41 stwu r10,16(r6) 42 43#define COPY_16_BYTES_EXCODE(n) \ 449 ## n ## 0: \ 45 addi r5,r5,-(16 * n); \ 46 b 104f; \ 479 ## n ## 1: \ 48 addi r5,r5,-(16 * n); \ 49 b 105f; \ 50 EX_TABLE(8 ## n ## 0b,9 ## n ## 0b); \ 51 EX_TABLE(8 ## n ## 1b,9 ## n ## 0b); \ 52 EX_TABLE(8 ## n ## 2b,9 ## n ## 0b); \ 53 EX_TABLE(8 ## n ## 3b,9 ## n ## 0b); \ 54 EX_TABLE(8 ## n ## 4b,9 ## n ## 1b); \ 55 EX_TABLE(8 ## n ## 5b,9 ## n ## 1b); \ 56 EX_TABLE(8 ## n ## 6b,9 ## n ## 1b); \ 57 EX_TABLE(8 ## n ## 7b,9 ## n ## 1b) 58 59 .text 60 .stabs "arch/powerpc/lib/",N_SO,0,0,0f 61 .stabs "copy_32.S",N_SO,0,0,0f 620: 63 64CACHELINE_BYTES = L1_CACHE_BYTES 65LG_CACHELINE_BYTES = L1_CACHE_SHIFT 66CACHELINE_MASK = (L1_CACHE_BYTES-1) 67 68#ifndef CONFIG_KASAN 69_GLOBAL(memset16) 70 rlwinm. r0 ,r5, 31, 1, 31 71 addi r6, r3, -4 72 beq- 2f 73 rlwimi r4 ,r4 ,16 ,0 ,15 74 mtctr r0 751: stwu r4, 4(r6) 76 bdnz 1b 772: andi. r0, r5, 1 78 beqlr 79 sth r4, 4(r6) 80 blr 81EXPORT_SYMBOL(memset16) 82#endif 83 84/* 85 * Use dcbz on the complete cache lines in the destination 86 * to set them to zero. This requires that the destination 87 * area is cacheable. -- paulus 88 * 89 * During early init, cache might not be active yet, so dcbz cannot be used. 90 * We therefore skip the optimised bloc that uses dcbz. This jump is 91 * replaced by a nop once cache is active. This is done in machine_init() 92 */ 93_GLOBAL_KASAN(memset) 94 cmplwi 0,r5,4 95 blt 7f 96 97 rlwimi r4,r4,8,16,23 98 rlwimi r4,r4,16,0,15 99 100 stw r4,0(r3) 101 beqlr 102 andi. r0,r3,3 103 add r5,r0,r5 104 subf r6,r0,r3 105 cmplwi 0,r4,0 106 /* 107 * Skip optimised bloc until cache is enabled. Will be replaced 108 * by 'bne' during boot to use normal procedure if r4 is not zero 109 */ 1105: b 2f 111 patch_site 5b, patch__memset_nocache 112 113 clrlwi r7,r6,32-LG_CACHELINE_BYTES 114 add r8,r7,r5 115 srwi r9,r8,LG_CACHELINE_BYTES 116 addic. r9,r9,-1 /* total number of complete cachelines */ 117 ble 2f 118 xori r0,r7,CACHELINE_MASK & ~3 119 srwi. r0,r0,2 120 beq 3f 121 mtctr r0 1224: stwu r4,4(r6) 123 bdnz 4b 1243: mtctr r9 125 li r7,4 12610: dcbz r7,r6 127 addi r6,r6,CACHELINE_BYTES 128 bdnz 10b 129 clrlwi r5,r8,32-LG_CACHELINE_BYTES 130 addi r5,r5,4 131 1322: srwi r0,r5,2 133 mtctr r0 134 bdz 6f 1351: stwu r4,4(r6) 136 bdnz 1b 1376: andi. r5,r5,3 138 beqlr 139 mtctr r5 140 addi r6,r6,3 1418: stbu r4,1(r6) 142 bdnz 8b 143 blr 144 1457: cmpwi 0,r5,0 146 beqlr 147 mtctr r5 148 addi r6,r3,-1 1499: stbu r4,1(r6) 150 bdnz 9b 151 blr 152EXPORT_SYMBOL(memset) 153EXPORT_SYMBOL_KASAN(memset) 154 155/* 156 * This version uses dcbz on the complete cache lines in the 157 * destination area to reduce memory traffic. This requires that 158 * the destination area is cacheable. 159 * We only use this version if the source and dest don't overlap. 160 * -- paulus. 161 * 162 * During early init, cache might not be active yet, so dcbz cannot be used. 163 * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is 164 * replaced by a nop once cache is active. This is done in machine_init() 165 */ 166_GLOBAL_KASAN(memmove) 167 cmplw 0,r3,r4 168 bgt backwards_memcpy 169 /* fall through */ 170 171_GLOBAL_KASAN(memcpy) 1721: b generic_memcpy 173 patch_site 1b, patch__memcpy_nocache 174 175 add r7,r3,r5 /* test if the src & dst overlap */ 176 add r8,r4,r5 177 cmplw 0,r4,r7 178 cmplw 1,r3,r8 179 crand 0,0,4 /* cr0.lt &= cr1.lt */ 180 blt generic_memcpy /* if regions overlap */ 181 182 addi r4,r4,-4 183 addi r6,r3,-4 184 neg r0,r3 185 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 186 beq 58f 187 188 cmplw 0,r5,r0 /* is this more than total to do? */ 189 blt 63f /* if not much to do */ 190 andi. r8,r0,3 /* get it word-aligned first */ 191 subf r5,r0,r5 192 mtctr r8 193 beq+ 61f 19470: lbz r9,4(r4) /* do some bytes */ 195 addi r4,r4,1 196 addi r6,r6,1 197 stb r9,3(r6) 198 bdnz 70b 19961: srwi. r0,r0,2 200 mtctr r0 201 beq 58f 20272: lwzu r9,4(r4) /* do some words */ 203 stwu r9,4(r6) 204 bdnz 72b 205 20658: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 207 clrlwi r5,r5,32-LG_CACHELINE_BYTES 208 li r11,4 209 mtctr r0 210 beq 63f 21153: 212 dcbz r11,r6 213 COPY_16_BYTES 214#if L1_CACHE_BYTES >= 32 215 COPY_16_BYTES 216#if L1_CACHE_BYTES >= 64 217 COPY_16_BYTES 218 COPY_16_BYTES 219#if L1_CACHE_BYTES >= 128 220 COPY_16_BYTES 221 COPY_16_BYTES 222 COPY_16_BYTES 223 COPY_16_BYTES 224#endif 225#endif 226#endif 227 bdnz 53b 228 22963: srwi. r0,r5,2 230 mtctr r0 231 beq 64f 23230: lwzu r0,4(r4) 233 stwu r0,4(r6) 234 bdnz 30b 235 23664: andi. r0,r5,3 237 mtctr r0 238 beq+ 65f 239 addi r4,r4,3 240 addi r6,r6,3 24140: lbzu r0,1(r4) 242 stbu r0,1(r6) 243 bdnz 40b 24465: blr 245EXPORT_SYMBOL(memcpy) 246EXPORT_SYMBOL(memmove) 247EXPORT_SYMBOL_KASAN(memcpy) 248EXPORT_SYMBOL_KASAN(memmove) 249 250generic_memcpy: 251 srwi. r7,r5,3 252 addi r6,r3,-4 253 addi r4,r4,-4 254 beq 2f /* if less than 8 bytes to do */ 255 andi. r0,r6,3 /* get dest word aligned */ 256 mtctr r7 257 bne 5f 2581: lwz r7,4(r4) 259 lwzu r8,8(r4) 260 stw r7,4(r6) 261 stwu r8,8(r6) 262 bdnz 1b 263 andi. r5,r5,7 2642: cmplwi 0,r5,4 265 blt 3f 266 lwzu r0,4(r4) 267 addi r5,r5,-4 268 stwu r0,4(r6) 2693: cmpwi 0,r5,0 270 beqlr 271 mtctr r5 272 addi r4,r4,3 273 addi r6,r6,3 2744: lbzu r0,1(r4) 275 stbu r0,1(r6) 276 bdnz 4b 277 blr 2785: subfic r0,r0,4 279 mtctr r0 2806: lbz r7,4(r4) 281 addi r4,r4,1 282 stb r7,4(r6) 283 addi r6,r6,1 284 bdnz 6b 285 subf r5,r0,r5 286 rlwinm. r7,r5,32-3,3,31 287 beq 2b 288 mtctr r7 289 b 1b 290 291_GLOBAL(backwards_memcpy) 292 rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ 293 add r6,r3,r5 294 add r4,r4,r5 295 beq 2f 296 andi. r0,r6,3 297 mtctr r7 298 bne 5f 2991: lwz r7,-4(r4) 300 lwzu r8,-8(r4) 301 stw r7,-4(r6) 302 stwu r8,-8(r6) 303 bdnz 1b 304 andi. r5,r5,7 3052: cmplwi 0,r5,4 306 blt 3f 307 lwzu r0,-4(r4) 308 subi r5,r5,4 309 stwu r0,-4(r6) 3103: cmpwi 0,r5,0 311 beqlr 312 mtctr r5 3134: lbzu r0,-1(r4) 314 stbu r0,-1(r6) 315 bdnz 4b 316 blr 3175: mtctr r0 3186: lbzu r7,-1(r4) 319 stbu r7,-1(r6) 320 bdnz 6b 321 subf r5,r0,r5 322 rlwinm. r7,r5,32-3,3,31 323 beq 2b 324 mtctr r7 325 b 1b 326 327_GLOBAL(__copy_tofrom_user) 328 addi r4,r4,-4 329 addi r6,r3,-4 330 neg r0,r3 331 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 332 beq 58f 333 334 cmplw 0,r5,r0 /* is this more than total to do? */ 335 blt 63f /* if not much to do */ 336 andi. r8,r0,3 /* get it word-aligned first */ 337 mtctr r8 338 beq+ 61f 33970: lbz r9,4(r4) /* do some bytes */ 34071: stb r9,4(r6) 341 addi r4,r4,1 342 addi r6,r6,1 343 bdnz 70b 34461: subf r5,r0,r5 345 srwi. r0,r0,2 346 mtctr r0 347 beq 58f 34872: lwzu r9,4(r4) /* do some words */ 34973: stwu r9,4(r6) 350 bdnz 72b 351 352 EX_TABLE(70b,100f) 353 EX_TABLE(71b,101f) 354 EX_TABLE(72b,102f) 355 EX_TABLE(73b,103f) 356 35758: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 358 clrlwi r5,r5,32-LG_CACHELINE_BYTES 359 li r11,4 360 beq 63f 361 362 /* Here we decide how far ahead to prefetch the source */ 363 li r3,4 364 cmpwi r0,1 365 li r7,0 366 ble 114f 367 li r7,1 368#if MAX_COPY_PREFETCH > 1 369 /* Heuristically, for large transfers we prefetch 370 MAX_COPY_PREFETCH cachelines ahead. For small transfers 371 we prefetch 1 cacheline ahead. */ 372 cmpwi r0,MAX_COPY_PREFETCH 373 ble 112f 374 li r7,MAX_COPY_PREFETCH 375112: mtctr r7 376111: dcbt r3,r4 377 addi r3,r3,CACHELINE_BYTES 378 bdnz 111b 379#else 380 dcbt r3,r4 381 addi r3,r3,CACHELINE_BYTES 382#endif /* MAX_COPY_PREFETCH > 1 */ 383 384114: subf r8,r7,r0 385 mr r0,r7 386 mtctr r8 387 38853: dcbt r3,r4 38954: dcbz r11,r6 390 EX_TABLE(54b,105f) 391/* the main body of the cacheline loop */ 392 COPY_16_BYTES_WITHEX(0) 393#if L1_CACHE_BYTES >= 32 394 COPY_16_BYTES_WITHEX(1) 395#if L1_CACHE_BYTES >= 64 396 COPY_16_BYTES_WITHEX(2) 397 COPY_16_BYTES_WITHEX(3) 398#if L1_CACHE_BYTES >= 128 399 COPY_16_BYTES_WITHEX(4) 400 COPY_16_BYTES_WITHEX(5) 401 COPY_16_BYTES_WITHEX(6) 402 COPY_16_BYTES_WITHEX(7) 403#endif 404#endif 405#endif 406 bdnz 53b 407 cmpwi r0,0 408 li r3,4 409 li r7,0 410 bne 114b 411 41263: srwi. r0,r5,2 413 mtctr r0 414 beq 64f 41530: lwzu r0,4(r4) 41631: stwu r0,4(r6) 417 bdnz 30b 418 41964: andi. r0,r5,3 420 mtctr r0 421 beq+ 65f 42240: lbz r0,4(r4) 42341: stb r0,4(r6) 424 addi r4,r4,1 425 addi r6,r6,1 426 bdnz 40b 42765: li r3,0 428 blr 429 430/* read fault, initial single-byte copy */ 431100: li r9,0 432 b 90f 433/* write fault, initial single-byte copy */ 434101: li r9,1 43590: subf r5,r8,r5 436 li r3,0 437 b 99f 438/* read fault, initial word copy */ 439102: li r9,0 440 b 91f 441/* write fault, initial word copy */ 442103: li r9,1 44391: li r3,2 444 b 99f 445 446/* 447 * this stuff handles faults in the cacheline loop and branches to either 448 * 104f (if in read part) or 105f (if in write part), after updating r5 449 */ 450 COPY_16_BYTES_EXCODE(0) 451#if L1_CACHE_BYTES >= 32 452 COPY_16_BYTES_EXCODE(1) 453#if L1_CACHE_BYTES >= 64 454 COPY_16_BYTES_EXCODE(2) 455 COPY_16_BYTES_EXCODE(3) 456#if L1_CACHE_BYTES >= 128 457 COPY_16_BYTES_EXCODE(4) 458 COPY_16_BYTES_EXCODE(5) 459 COPY_16_BYTES_EXCODE(6) 460 COPY_16_BYTES_EXCODE(7) 461#endif 462#endif 463#endif 464 465/* read fault in cacheline loop */ 466104: li r9,0 467 b 92f 468/* fault on dcbz (effectively a write fault) */ 469/* or write fault in cacheline loop */ 470105: li r9,1 47192: li r3,LG_CACHELINE_BYTES 472 mfctr r8 473 add r0,r0,r8 474 b 106f 475/* read fault in final word loop */ 476108: li r9,0 477 b 93f 478/* write fault in final word loop */ 479109: li r9,1 48093: andi. r5,r5,3 481 li r3,2 482 b 99f 483/* read fault in final byte loop */ 484110: li r9,0 485 b 94f 486/* write fault in final byte loop */ 487111: li r9,1 48894: li r5,0 489 li r3,0 490/* 491 * At this stage the number of bytes not copied is 492 * r5 + (ctr << r3), and r9 is 0 for read or 1 for write. 493 */ 49499: mfctr r0 495106: slw r3,r0,r3 496 add. r3,r3,r5 497 beq 120f /* shouldn't happen */ 498 cmpwi 0,r9,0 499 bne 120f 500/* for a read fault, first try to continue the copy one byte at a time */ 501 mtctr r3 502130: lbz r0,4(r4) 503131: stb r0,4(r6) 504 addi r4,r4,1 505 addi r6,r6,1 506 bdnz 130b 507/* then clear out the destination: r3 bytes starting at 4(r6) */ 508132: mfctr r3 509120: blr 510 511 EX_TABLE(30b,108b) 512 EX_TABLE(31b,109b) 513 EX_TABLE(40b,110b) 514 EX_TABLE(41b,111b) 515 EX_TABLE(130b,132b) 516 EX_TABLE(131b,120b) 517 518EXPORT_SYMBOL(__copy_tofrom_user) 519