11a59d1b8SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-or-later */ 2a66086b8SAnton Blanchard/* 3a66086b8SAnton Blanchard * 4a66086b8SAnton Blanchard * Copyright (C) IBM Corporation, 2011 5a66086b8SAnton Blanchard * 6a66086b8SAnton Blanchard * Author: Anton Blanchard <anton@au.ibm.com> 7a66086b8SAnton Blanchard */ 8a66086b8SAnton Blanchard#include <asm/ppc_asm.h> 9a66086b8SAnton Blanchard 1098c45f51SPaul Mackerras#ifndef SELFTEST_CASE 1198c45f51SPaul Mackerras/* 0 == don't use VMX, 1 == use VMX */ 1298c45f51SPaul Mackerras#define SELFTEST_CASE 0 1398c45f51SPaul Mackerras#endif 1498c45f51SPaul Mackerras 1532ee1e18SAnton Blanchard#ifdef __BIG_ENDIAN__ 1632ee1e18SAnton Blanchard#define LVS(VRT,RA,RB) lvsl VRT,RA,RB 1732ee1e18SAnton Blanchard#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC 1832ee1e18SAnton Blanchard#else 1932ee1e18SAnton Blanchard#define LVS(VRT,RA,RB) lvsr VRT,RA,RB 2032ee1e18SAnton Blanchard#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC 2132ee1e18SAnton Blanchard#endif 2232ee1e18SAnton Blanchard 23a66086b8SAnton Blanchard .macro err1 24a66086b8SAnton Blanchard100: 2524bfa6a9SNicholas Piggin EX_TABLE(100b,.Ldo_err1) 26a66086b8SAnton Blanchard .endm 27a66086b8SAnton Blanchard 28a66086b8SAnton Blanchard .macro err2 29a66086b8SAnton Blanchard200: 3024bfa6a9SNicholas Piggin EX_TABLE(200b,.Ldo_err2) 31a66086b8SAnton Blanchard .endm 32a66086b8SAnton Blanchard 33a66086b8SAnton Blanchard#ifdef CONFIG_ALTIVEC 34a66086b8SAnton Blanchard .macro err3 35a66086b8SAnton Blanchard300: 3624bfa6a9SNicholas Piggin EX_TABLE(300b,.Ldo_err3) 37a66086b8SAnton Blanchard .endm 38a66086b8SAnton Blanchard 39a66086b8SAnton Blanchard .macro err4 40a66086b8SAnton Blanchard400: 4124bfa6a9SNicholas Piggin EX_TABLE(400b,.Ldo_err4) 42a66086b8SAnton Blanchard .endm 43a66086b8SAnton Blanchard 44a66086b8SAnton Blanchard 45a66086b8SAnton Blanchard.Ldo_err4: 46c75df6f9SMichael Neuling ld r16,STK_REG(R16)(r1) 47c75df6f9SMichael Neuling ld r15,STK_REG(R15)(r1) 48c75df6f9SMichael Neuling ld r14,STK_REG(R14)(r1) 49a66086b8SAnton Blanchard.Ldo_err3: 50*4e991e3cSNicholas Piggin bl CFUNC(exit_vmx_usercopy) 51a66086b8SAnton Blanchard ld r0,STACKFRAMESIZE+16(r1) 52a66086b8SAnton Blanchard mtlr r0 53a66086b8SAnton Blanchard b .Lexit 54a66086b8SAnton Blanchard#endif /* CONFIG_ALTIVEC */ 55a66086b8SAnton Blanchard 56a66086b8SAnton Blanchard.Ldo_err2: 57c75df6f9SMichael Neuling ld r22,STK_REG(R22)(r1) 58c75df6f9SMichael Neuling ld r21,STK_REG(R21)(r1) 59c75df6f9SMichael Neuling ld r20,STK_REG(R20)(r1) 60c75df6f9SMichael Neuling ld r19,STK_REG(R19)(r1) 61c75df6f9SMichael Neuling ld r18,STK_REG(R18)(r1) 62c75df6f9SMichael Neuling ld r17,STK_REG(R17)(r1) 63c75df6f9SMichael Neuling ld r16,STK_REG(R16)(r1) 64c75df6f9SMichael Neuling ld r15,STK_REG(R15)(r1) 65c75df6f9SMichael Neuling ld r14,STK_REG(R14)(r1) 66a66086b8SAnton Blanchard.Lexit: 67a66086b8SAnton Blanchard addi r1,r1,STACKFRAMESIZE 68a66086b8SAnton Blanchard.Ldo_err1: 69752a6422SUlrich Weigand ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) 70752a6422SUlrich Weigand ld r4,-STACKFRAMESIZE+STK_REG(R30)(r1) 71752a6422SUlrich Weigand ld r5,-STACKFRAMESIZE+STK_REG(R29)(r1) 72a66086b8SAnton Blanchard b __copy_tofrom_user_base 73a66086b8SAnton Blanchard 74a66086b8SAnton Blanchard 75a66086b8SAnton Blanchard_GLOBAL(__copy_tofrom_user_power7) 76a66086b8SAnton Blanchard cmpldi r5,16 77a3f952dfSAndrew Jeffery cmpldi cr1,r5,3328 78a66086b8SAnton Blanchard 79752a6422SUlrich Weigand std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) 80752a6422SUlrich Weigand std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) 81752a6422SUlrich Weigand std r5,-STACKFRAMESIZE+STK_REG(R29)(r1) 82a66086b8SAnton Blanchard 83a66086b8SAnton Blanchard blt .Lshort_copy 84a66086b8SAnton Blanchard 8598c45f51SPaul Mackerras#ifdef CONFIG_ALTIVEC 8698c45f51SPaul Mackerrastest_feature = SELFTEST_CASE 8798c45f51SPaul MackerrasBEGIN_FTR_SECTION 8898c45f51SPaul Mackerras bgt cr1,.Lvmx_copy 8998c45f51SPaul MackerrasEND_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) 90a66086b8SAnton Blanchard#endif 91a66086b8SAnton Blanchard 92a66086b8SAnton Blanchard.Lnonvmx_copy: 93a66086b8SAnton Blanchard /* Get the source 8B aligned */ 94a66086b8SAnton Blanchard neg r6,r4 95a66086b8SAnton Blanchard mtocrf 0x01,r6 96a66086b8SAnton Blanchard clrldi r6,r6,(64-3) 97a66086b8SAnton Blanchard 98a66086b8SAnton Blanchard bf cr7*4+3,1f 99a66086b8SAnton Blancharderr1; lbz r0,0(r4) 100a66086b8SAnton Blanchard addi r4,r4,1 101a66086b8SAnton Blancharderr1; stb r0,0(r3) 102a66086b8SAnton Blanchard addi r3,r3,1 103a66086b8SAnton Blanchard 104a66086b8SAnton Blanchard1: bf cr7*4+2,2f 105a66086b8SAnton Blancharderr1; lhz r0,0(r4) 106a66086b8SAnton Blanchard addi r4,r4,2 107a66086b8SAnton Blancharderr1; sth r0,0(r3) 108a66086b8SAnton Blanchard addi r3,r3,2 109a66086b8SAnton Blanchard 110a66086b8SAnton Blanchard2: bf cr7*4+1,3f 111a66086b8SAnton Blancharderr1; lwz r0,0(r4) 112a66086b8SAnton Blanchard addi r4,r4,4 113a66086b8SAnton Blancharderr1; stw r0,0(r3) 114a66086b8SAnton Blanchard addi r3,r3,4 115a66086b8SAnton Blanchard 116a66086b8SAnton Blanchard3: sub r5,r5,r6 117a66086b8SAnton Blanchard cmpldi r5,128 118a66086b8SAnton Blanchard blt 5f 119a66086b8SAnton Blanchard 120a66086b8SAnton Blanchard mflr r0 121a66086b8SAnton Blanchard stdu r1,-STACKFRAMESIZE(r1) 122c75df6f9SMichael Neuling std r14,STK_REG(R14)(r1) 123c75df6f9SMichael Neuling std r15,STK_REG(R15)(r1) 124c75df6f9SMichael Neuling std r16,STK_REG(R16)(r1) 125c75df6f9SMichael Neuling std r17,STK_REG(R17)(r1) 126c75df6f9SMichael Neuling std r18,STK_REG(R18)(r1) 127c75df6f9SMichael Neuling std r19,STK_REG(R19)(r1) 128c75df6f9SMichael Neuling std r20,STK_REG(R20)(r1) 129c75df6f9SMichael Neuling std r21,STK_REG(R21)(r1) 130c75df6f9SMichael Neuling std r22,STK_REG(R22)(r1) 131a66086b8SAnton Blanchard std r0,STACKFRAMESIZE+16(r1) 132a66086b8SAnton Blanchard 133a66086b8SAnton Blanchard srdi r6,r5,7 134a66086b8SAnton Blanchard mtctr r6 135a66086b8SAnton Blanchard 136a66086b8SAnton Blanchard /* Now do cacheline (128B) sized loads and stores. */ 137a66086b8SAnton Blanchard .align 5 138a66086b8SAnton Blanchard4: 139a66086b8SAnton Blancharderr2; ld r0,0(r4) 140a66086b8SAnton Blancharderr2; ld r6,8(r4) 141a66086b8SAnton Blancharderr2; ld r7,16(r4) 142a66086b8SAnton Blancharderr2; ld r8,24(r4) 143a66086b8SAnton Blancharderr2; ld r9,32(r4) 144a66086b8SAnton Blancharderr2; ld r10,40(r4) 145a66086b8SAnton Blancharderr2; ld r11,48(r4) 146a66086b8SAnton Blancharderr2; ld r12,56(r4) 147a66086b8SAnton Blancharderr2; ld r14,64(r4) 148a66086b8SAnton Blancharderr2; ld r15,72(r4) 149a66086b8SAnton Blancharderr2; ld r16,80(r4) 150a66086b8SAnton Blancharderr2; ld r17,88(r4) 151a66086b8SAnton Blancharderr2; ld r18,96(r4) 152a66086b8SAnton Blancharderr2; ld r19,104(r4) 153a66086b8SAnton Blancharderr2; ld r20,112(r4) 154a66086b8SAnton Blancharderr2; ld r21,120(r4) 155a66086b8SAnton Blanchard addi r4,r4,128 156a66086b8SAnton Blancharderr2; std r0,0(r3) 157a66086b8SAnton Blancharderr2; std r6,8(r3) 158a66086b8SAnton Blancharderr2; std r7,16(r3) 159a66086b8SAnton Blancharderr2; std r8,24(r3) 160a66086b8SAnton Blancharderr2; std r9,32(r3) 161a66086b8SAnton Blancharderr2; std r10,40(r3) 162a66086b8SAnton Blancharderr2; std r11,48(r3) 163a66086b8SAnton Blancharderr2; std r12,56(r3) 164a66086b8SAnton Blancharderr2; std r14,64(r3) 165a66086b8SAnton Blancharderr2; std r15,72(r3) 166a66086b8SAnton Blancharderr2; std r16,80(r3) 167a66086b8SAnton Blancharderr2; std r17,88(r3) 168a66086b8SAnton Blancharderr2; std r18,96(r3) 169a66086b8SAnton Blancharderr2; std r19,104(r3) 170a66086b8SAnton Blancharderr2; std r20,112(r3) 171a66086b8SAnton Blancharderr2; std r21,120(r3) 172a66086b8SAnton Blanchard addi r3,r3,128 173a66086b8SAnton Blanchard bdnz 4b 174a66086b8SAnton Blanchard 175a66086b8SAnton Blanchard clrldi r5,r5,(64-7) 176a66086b8SAnton Blanchard 177c75df6f9SMichael Neuling ld r14,STK_REG(R14)(r1) 178c75df6f9SMichael Neuling ld r15,STK_REG(R15)(r1) 179c75df6f9SMichael Neuling ld r16,STK_REG(R16)(r1) 180c75df6f9SMichael Neuling ld r17,STK_REG(R17)(r1) 181c75df6f9SMichael Neuling ld r18,STK_REG(R18)(r1) 182c75df6f9SMichael Neuling ld r19,STK_REG(R19)(r1) 183c75df6f9SMichael Neuling ld r20,STK_REG(R20)(r1) 184c75df6f9SMichael Neuling ld r21,STK_REG(R21)(r1) 185c75df6f9SMichael Neuling ld r22,STK_REG(R22)(r1) 186a66086b8SAnton Blanchard addi r1,r1,STACKFRAMESIZE 187a66086b8SAnton Blanchard 188a66086b8SAnton Blanchard /* Up to 127B to go */ 189a66086b8SAnton Blanchard5: srdi r6,r5,4 190a66086b8SAnton Blanchard mtocrf 0x01,r6 191a66086b8SAnton Blanchard 192a66086b8SAnton Blanchard6: bf cr7*4+1,7f 193a66086b8SAnton Blancharderr1; ld r0,0(r4) 194a66086b8SAnton Blancharderr1; ld r6,8(r4) 195a66086b8SAnton Blancharderr1; ld r7,16(r4) 196a66086b8SAnton Blancharderr1; ld r8,24(r4) 197a66086b8SAnton Blancharderr1; ld r9,32(r4) 198a66086b8SAnton Blancharderr1; ld r10,40(r4) 199a66086b8SAnton Blancharderr1; ld r11,48(r4) 200a66086b8SAnton Blancharderr1; ld r12,56(r4) 201a66086b8SAnton Blanchard addi r4,r4,64 202a66086b8SAnton Blancharderr1; std r0,0(r3) 203a66086b8SAnton Blancharderr1; std r6,8(r3) 204a66086b8SAnton Blancharderr1; std r7,16(r3) 205a66086b8SAnton Blancharderr1; std r8,24(r3) 206a66086b8SAnton Blancharderr1; std r9,32(r3) 207a66086b8SAnton Blancharderr1; std r10,40(r3) 208a66086b8SAnton Blancharderr1; std r11,48(r3) 209a66086b8SAnton Blancharderr1; std r12,56(r3) 210a66086b8SAnton Blanchard addi r3,r3,64 211a66086b8SAnton Blanchard 212a66086b8SAnton Blanchard /* Up to 63B to go */ 213a66086b8SAnton Blanchard7: bf cr7*4+2,8f 214a66086b8SAnton Blancharderr1; ld r0,0(r4) 215a66086b8SAnton Blancharderr1; ld r6,8(r4) 216a66086b8SAnton Blancharderr1; ld r7,16(r4) 217a66086b8SAnton Blancharderr1; ld r8,24(r4) 218a66086b8SAnton Blanchard addi r4,r4,32 219a66086b8SAnton Blancharderr1; std r0,0(r3) 220a66086b8SAnton Blancharderr1; std r6,8(r3) 221a66086b8SAnton Blancharderr1; std r7,16(r3) 222a66086b8SAnton Blancharderr1; std r8,24(r3) 223a66086b8SAnton Blanchard addi r3,r3,32 224a66086b8SAnton Blanchard 225a66086b8SAnton Blanchard /* Up to 31B to go */ 226a66086b8SAnton Blanchard8: bf cr7*4+3,9f 227a66086b8SAnton Blancharderr1; ld r0,0(r4) 228a66086b8SAnton Blancharderr1; ld r6,8(r4) 229a66086b8SAnton Blanchard addi r4,r4,16 230a66086b8SAnton Blancharderr1; std r0,0(r3) 231a66086b8SAnton Blancharderr1; std r6,8(r3) 232a66086b8SAnton Blanchard addi r3,r3,16 233a66086b8SAnton Blanchard 234a66086b8SAnton Blanchard9: clrldi r5,r5,(64-4) 235a66086b8SAnton Blanchard 236a66086b8SAnton Blanchard /* Up to 15B to go */ 237a66086b8SAnton Blanchard.Lshort_copy: 238a66086b8SAnton Blanchard mtocrf 0x01,r5 239a66086b8SAnton Blanchard bf cr7*4+0,12f 240a66086b8SAnton Blancharderr1; lwz r0,0(r4) /* Less chance of a reject with word ops */ 241a66086b8SAnton Blancharderr1; lwz r6,4(r4) 242a66086b8SAnton Blanchard addi r4,r4,8 243a66086b8SAnton Blancharderr1; stw r0,0(r3) 244a66086b8SAnton Blancharderr1; stw r6,4(r3) 245a66086b8SAnton Blanchard addi r3,r3,8 246a66086b8SAnton Blanchard 247a66086b8SAnton Blanchard12: bf cr7*4+1,13f 248a66086b8SAnton Blancharderr1; lwz r0,0(r4) 249a66086b8SAnton Blanchard addi r4,r4,4 250a66086b8SAnton Blancharderr1; stw r0,0(r3) 251a66086b8SAnton Blanchard addi r3,r3,4 252a66086b8SAnton Blanchard 253a66086b8SAnton Blanchard13: bf cr7*4+2,14f 254a66086b8SAnton Blancharderr1; lhz r0,0(r4) 255a66086b8SAnton Blanchard addi r4,r4,2 256a66086b8SAnton Blancharderr1; sth r0,0(r3) 257a66086b8SAnton Blanchard addi r3,r3,2 258a66086b8SAnton Blanchard 259a66086b8SAnton Blanchard14: bf cr7*4+3,15f 260a66086b8SAnton Blancharderr1; lbz r0,0(r4) 261a66086b8SAnton Blancharderr1; stb r0,0(r3) 262a66086b8SAnton Blanchard 263a66086b8SAnton Blanchard15: li r3,0 264a66086b8SAnton Blanchard blr 265a66086b8SAnton Blanchard 266a66086b8SAnton Blanchard.Lunwind_stack_nonvmx_copy: 267a66086b8SAnton Blanchard addi r1,r1,STACKFRAMESIZE 268a66086b8SAnton Blanchard b .Lnonvmx_copy 269a66086b8SAnton Blanchard 270a66086b8SAnton Blanchard.Lvmx_copy: 27198c45f51SPaul Mackerras#ifdef CONFIG_ALTIVEC 272a66086b8SAnton Blanchard mflr r0 273a66086b8SAnton Blanchard std r0,16(r1) 274a66086b8SAnton Blanchard stdu r1,-STACKFRAMESIZE(r1) 275*4e991e3cSNicholas Piggin bl CFUNC(enter_vmx_usercopy) 2762fae7cdbSAnton Blanchard cmpwi cr1,r3,0 277a66086b8SAnton Blanchard ld r0,STACKFRAMESIZE+16(r1) 278752a6422SUlrich Weigand ld r3,STK_REG(R31)(r1) 279752a6422SUlrich Weigand ld r4,STK_REG(R30)(r1) 280752a6422SUlrich Weigand ld r5,STK_REG(R29)(r1) 281a66086b8SAnton Blanchard mtlr r0 282a66086b8SAnton Blanchard 283a9514dc6SAnton Blanchard /* 284a9514dc6SAnton Blanchard * We prefetch both the source and destination using enhanced touch 285a9514dc6SAnton Blanchard * instructions. We use a stream ID of 0 for the load side and 286a9514dc6SAnton Blanchard * 1 for the store side. 287a9514dc6SAnton Blanchard */ 288a9514dc6SAnton Blanchard clrrdi r6,r4,7 289a9514dc6SAnton Blanchard clrrdi r9,r3,7 290a9514dc6SAnton Blanchard ori r9,r9,1 /* stream=1 */ 291a9514dc6SAnton Blanchard 292a9514dc6SAnton Blanchard srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */ 293a9514dc6SAnton Blanchard cmpldi r7,0x3FF 294a9514dc6SAnton Blanchard ble 1f 295a9514dc6SAnton Blanchard li r7,0x3FF 296a9514dc6SAnton Blanchard1: lis r0,0x0E00 /* depth=7 */ 297a9514dc6SAnton Blanchard sldi r7,r7,7 298a9514dc6SAnton Blanchard or r7,r7,r0 299a9514dc6SAnton Blanchard ori r10,r7,1 /* stream=1 */ 300a9514dc6SAnton Blanchard 301a9514dc6SAnton Blanchard lis r8,0x8000 /* GO=1 */ 302a9514dc6SAnton Blanchard clrldi r8,r8,32 303a9514dc6SAnton Blanchard 304280a5ba2SMichael Neuling /* setup read stream 0 */ 3058a583c0aSAndreas Schwab dcbt 0,r6,0b01000 /* addr from */ 3068a583c0aSAndreas Schwab dcbt 0,r7,0b01010 /* length and depth from */ 307280a5ba2SMichael Neuling /* setup write stream 1 */ 3088a583c0aSAndreas Schwab dcbtst 0,r9,0b01000 /* addr to */ 3098a583c0aSAndreas Schwab dcbtst 0,r10,0b01010 /* length and depth to */ 310a9514dc6SAnton Blanchard eieio 3118a583c0aSAndreas Schwab dcbt 0,r8,0b01010 /* all streams GO */ 312a9514dc6SAnton Blanchard 3132fae7cdbSAnton Blanchard beq cr1,.Lunwind_stack_nonvmx_copy 314a66086b8SAnton Blanchard 315a66086b8SAnton Blanchard /* 316a66086b8SAnton Blanchard * If source and destination are not relatively aligned we use a 317a66086b8SAnton Blanchard * slower permute loop. 318a66086b8SAnton Blanchard */ 319a66086b8SAnton Blanchard xor r6,r4,r3 320a66086b8SAnton Blanchard rldicl. r6,r6,0,(64-4) 321a66086b8SAnton Blanchard bne .Lvmx_unaligned_copy 322a66086b8SAnton Blanchard 323a66086b8SAnton Blanchard /* Get the destination 16B aligned */ 324a66086b8SAnton Blanchard neg r6,r3 325a66086b8SAnton Blanchard mtocrf 0x01,r6 326a66086b8SAnton Blanchard clrldi r6,r6,(64-4) 327a66086b8SAnton Blanchard 328a66086b8SAnton Blanchard bf cr7*4+3,1f 329a66086b8SAnton Blancharderr3; lbz r0,0(r4) 330a66086b8SAnton Blanchard addi r4,r4,1 331a66086b8SAnton Blancharderr3; stb r0,0(r3) 332a66086b8SAnton Blanchard addi r3,r3,1 333a66086b8SAnton Blanchard 334a66086b8SAnton Blanchard1: bf cr7*4+2,2f 335a66086b8SAnton Blancharderr3; lhz r0,0(r4) 336a66086b8SAnton Blanchard addi r4,r4,2 337a66086b8SAnton Blancharderr3; sth r0,0(r3) 338a66086b8SAnton Blanchard addi r3,r3,2 339a66086b8SAnton Blanchard 340a66086b8SAnton Blanchard2: bf cr7*4+1,3f 341a66086b8SAnton Blancharderr3; lwz r0,0(r4) 342a66086b8SAnton Blanchard addi r4,r4,4 343a66086b8SAnton Blancharderr3; stw r0,0(r3) 344a66086b8SAnton Blanchard addi r3,r3,4 345a66086b8SAnton Blanchard 346a66086b8SAnton Blanchard3: bf cr7*4+0,4f 347a66086b8SAnton Blancharderr3; ld r0,0(r4) 348a66086b8SAnton Blanchard addi r4,r4,8 349a66086b8SAnton Blancharderr3; std r0,0(r3) 350a66086b8SAnton Blanchard addi r3,r3,8 351a66086b8SAnton Blanchard 352a66086b8SAnton Blanchard4: sub r5,r5,r6 353a66086b8SAnton Blanchard 354a66086b8SAnton Blanchard /* Get the desination 128B aligned */ 355a66086b8SAnton Blanchard neg r6,r3 356a66086b8SAnton Blanchard srdi r7,r6,4 357a66086b8SAnton Blanchard mtocrf 0x01,r7 358a66086b8SAnton Blanchard clrldi r6,r6,(64-7) 359a66086b8SAnton Blanchard 360a66086b8SAnton Blanchard li r9,16 361a66086b8SAnton Blanchard li r10,32 362a66086b8SAnton Blanchard li r11,48 363a66086b8SAnton Blanchard 364a66086b8SAnton Blanchard bf cr7*4+3,5f 3658a583c0aSAndreas Schwaberr3; lvx v1,0,r4 366a66086b8SAnton Blanchard addi r4,r4,16 3678a583c0aSAndreas Schwaberr3; stvx v1,0,r3 368a66086b8SAnton Blanchard addi r3,r3,16 369a66086b8SAnton Blanchard 370a66086b8SAnton Blanchard5: bf cr7*4+2,6f 3718a583c0aSAndreas Schwaberr3; lvx v1,0,r4 372c2ce6f9fSAnton Blancharderr3; lvx v0,r4,r9 373a66086b8SAnton Blanchard addi r4,r4,32 3748a583c0aSAndreas Schwaberr3; stvx v1,0,r3 375c2ce6f9fSAnton Blancharderr3; stvx v0,r3,r9 376a66086b8SAnton Blanchard addi r3,r3,32 377a66086b8SAnton Blanchard 378a66086b8SAnton Blanchard6: bf cr7*4+1,7f 3798a583c0aSAndreas Schwaberr3; lvx v3,0,r4 380c2ce6f9fSAnton Blancharderr3; lvx v2,r4,r9 381c2ce6f9fSAnton Blancharderr3; lvx v1,r4,r10 382c2ce6f9fSAnton Blancharderr3; lvx v0,r4,r11 383a66086b8SAnton Blanchard addi r4,r4,64 3848a583c0aSAndreas Schwaberr3; stvx v3,0,r3 385c2ce6f9fSAnton Blancharderr3; stvx v2,r3,r9 386c2ce6f9fSAnton Blancharderr3; stvx v1,r3,r10 387c2ce6f9fSAnton Blancharderr3; stvx v0,r3,r11 388a66086b8SAnton Blanchard addi r3,r3,64 389a66086b8SAnton Blanchard 390a66086b8SAnton Blanchard7: sub r5,r5,r6 391a66086b8SAnton Blanchard srdi r6,r5,7 392a66086b8SAnton Blanchard 393c75df6f9SMichael Neuling std r14,STK_REG(R14)(r1) 394c75df6f9SMichael Neuling std r15,STK_REG(R15)(r1) 395c75df6f9SMichael Neuling std r16,STK_REG(R16)(r1) 396a66086b8SAnton Blanchard 397a66086b8SAnton Blanchard li r12,64 398a66086b8SAnton Blanchard li r14,80 399a66086b8SAnton Blanchard li r15,96 400a66086b8SAnton Blanchard li r16,112 401a66086b8SAnton Blanchard 402a66086b8SAnton Blanchard mtctr r6 403a66086b8SAnton Blanchard 404a66086b8SAnton Blanchard /* 405a66086b8SAnton Blanchard * Now do cacheline sized loads and stores. By this stage the 406a66086b8SAnton Blanchard * cacheline stores are also cacheline aligned. 407a66086b8SAnton Blanchard */ 408a66086b8SAnton Blanchard .align 5 409a66086b8SAnton Blanchard8: 4108a583c0aSAndreas Schwaberr4; lvx v7,0,r4 411c2ce6f9fSAnton Blancharderr4; lvx v6,r4,r9 412c2ce6f9fSAnton Blancharderr4; lvx v5,r4,r10 413c2ce6f9fSAnton Blancharderr4; lvx v4,r4,r11 414c2ce6f9fSAnton Blancharderr4; lvx v3,r4,r12 415c2ce6f9fSAnton Blancharderr4; lvx v2,r4,r14 416c2ce6f9fSAnton Blancharderr4; lvx v1,r4,r15 417c2ce6f9fSAnton Blancharderr4; lvx v0,r4,r16 418a66086b8SAnton Blanchard addi r4,r4,128 4198a583c0aSAndreas Schwaberr4; stvx v7,0,r3 420c2ce6f9fSAnton Blancharderr4; stvx v6,r3,r9 421c2ce6f9fSAnton Blancharderr4; stvx v5,r3,r10 422c2ce6f9fSAnton Blancharderr4; stvx v4,r3,r11 423c2ce6f9fSAnton Blancharderr4; stvx v3,r3,r12 424c2ce6f9fSAnton Blancharderr4; stvx v2,r3,r14 425c2ce6f9fSAnton Blancharderr4; stvx v1,r3,r15 426c2ce6f9fSAnton Blancharderr4; stvx v0,r3,r16 427a66086b8SAnton Blanchard addi r3,r3,128 428a66086b8SAnton Blanchard bdnz 8b 429a66086b8SAnton Blanchard 430c75df6f9SMichael Neuling ld r14,STK_REG(R14)(r1) 431c75df6f9SMichael Neuling ld r15,STK_REG(R15)(r1) 432c75df6f9SMichael Neuling ld r16,STK_REG(R16)(r1) 433a66086b8SAnton Blanchard 434a66086b8SAnton Blanchard /* Up to 127B to go */ 435a66086b8SAnton Blanchard clrldi r5,r5,(64-7) 436a66086b8SAnton Blanchard srdi r6,r5,4 437a66086b8SAnton Blanchard mtocrf 0x01,r6 438a66086b8SAnton Blanchard 439a66086b8SAnton Blanchard bf cr7*4+1,9f 4408a583c0aSAndreas Schwaberr3; lvx v3,0,r4 441c2ce6f9fSAnton Blancharderr3; lvx v2,r4,r9 442c2ce6f9fSAnton Blancharderr3; lvx v1,r4,r10 443c2ce6f9fSAnton Blancharderr3; lvx v0,r4,r11 444a66086b8SAnton Blanchard addi r4,r4,64 4458a583c0aSAndreas Schwaberr3; stvx v3,0,r3 446c2ce6f9fSAnton Blancharderr3; stvx v2,r3,r9 447c2ce6f9fSAnton Blancharderr3; stvx v1,r3,r10 448c2ce6f9fSAnton Blancharderr3; stvx v0,r3,r11 449a66086b8SAnton Blanchard addi r3,r3,64 450a66086b8SAnton Blanchard 451a66086b8SAnton Blanchard9: bf cr7*4+2,10f 4528a583c0aSAndreas Schwaberr3; lvx v1,0,r4 453c2ce6f9fSAnton Blancharderr3; lvx v0,r4,r9 454a66086b8SAnton Blanchard addi r4,r4,32 4558a583c0aSAndreas Schwaberr3; stvx v1,0,r3 456c2ce6f9fSAnton Blancharderr3; stvx v0,r3,r9 457a66086b8SAnton Blanchard addi r3,r3,32 458a66086b8SAnton Blanchard 459a66086b8SAnton Blanchard10: bf cr7*4+3,11f 4608a583c0aSAndreas Schwaberr3; lvx v1,0,r4 461a66086b8SAnton Blanchard addi r4,r4,16 4628a583c0aSAndreas Schwaberr3; stvx v1,0,r3 463a66086b8SAnton Blanchard addi r3,r3,16 464a66086b8SAnton Blanchard 465a66086b8SAnton Blanchard /* Up to 15B to go */ 466a66086b8SAnton Blanchard11: clrldi r5,r5,(64-4) 467a66086b8SAnton Blanchard mtocrf 0x01,r5 468a66086b8SAnton Blanchard bf cr7*4+0,12f 469a66086b8SAnton Blancharderr3; ld r0,0(r4) 470a66086b8SAnton Blanchard addi r4,r4,8 471a66086b8SAnton Blancharderr3; std r0,0(r3) 472a66086b8SAnton Blanchard addi r3,r3,8 473a66086b8SAnton Blanchard 474a66086b8SAnton Blanchard12: bf cr7*4+1,13f 475a66086b8SAnton Blancharderr3; lwz r0,0(r4) 476a66086b8SAnton Blanchard addi r4,r4,4 477a66086b8SAnton Blancharderr3; stw r0,0(r3) 478a66086b8SAnton Blanchard addi r3,r3,4 479a66086b8SAnton Blanchard 480a66086b8SAnton Blanchard13: bf cr7*4+2,14f 481a66086b8SAnton Blancharderr3; lhz r0,0(r4) 482a66086b8SAnton Blanchard addi r4,r4,2 483a66086b8SAnton Blancharderr3; sth r0,0(r3) 484a66086b8SAnton Blanchard addi r3,r3,2 485a66086b8SAnton Blanchard 486a66086b8SAnton Blanchard14: bf cr7*4+3,15f 487a66086b8SAnton Blancharderr3; lbz r0,0(r4) 488a66086b8SAnton Blancharderr3; stb r0,0(r3) 489a66086b8SAnton Blanchard 490a66086b8SAnton Blanchard15: addi r1,r1,STACKFRAMESIZE 491*4e991e3cSNicholas Piggin b CFUNC(exit_vmx_usercopy) /* tail call optimise */ 492a66086b8SAnton Blanchard 493a66086b8SAnton Blanchard.Lvmx_unaligned_copy: 494a66086b8SAnton Blanchard /* Get the destination 16B aligned */ 495a66086b8SAnton Blanchard neg r6,r3 496a66086b8SAnton Blanchard mtocrf 0x01,r6 497a66086b8SAnton Blanchard clrldi r6,r6,(64-4) 498a66086b8SAnton Blanchard 499a66086b8SAnton Blanchard bf cr7*4+3,1f 500a66086b8SAnton Blancharderr3; lbz r0,0(r4) 501a66086b8SAnton Blanchard addi r4,r4,1 502a66086b8SAnton Blancharderr3; stb r0,0(r3) 503a66086b8SAnton Blanchard addi r3,r3,1 504a66086b8SAnton Blanchard 505a66086b8SAnton Blanchard1: bf cr7*4+2,2f 506a66086b8SAnton Blancharderr3; lhz r0,0(r4) 507a66086b8SAnton Blanchard addi r4,r4,2 508a66086b8SAnton Blancharderr3; sth r0,0(r3) 509a66086b8SAnton Blanchard addi r3,r3,2 510a66086b8SAnton Blanchard 511a66086b8SAnton Blanchard2: bf cr7*4+1,3f 512a66086b8SAnton Blancharderr3; lwz r0,0(r4) 513a66086b8SAnton Blanchard addi r4,r4,4 514a66086b8SAnton Blancharderr3; stw r0,0(r3) 515a66086b8SAnton Blanchard addi r3,r3,4 516a66086b8SAnton Blanchard 517a66086b8SAnton Blanchard3: bf cr7*4+0,4f 518a66086b8SAnton Blancharderr3; lwz r0,0(r4) /* Less chance of a reject with word ops */ 519a66086b8SAnton Blancharderr3; lwz r7,4(r4) 520a66086b8SAnton Blanchard addi r4,r4,8 521a66086b8SAnton Blancharderr3; stw r0,0(r3) 522a66086b8SAnton Blancharderr3; stw r7,4(r3) 523a66086b8SAnton Blanchard addi r3,r3,8 524a66086b8SAnton Blanchard 525a66086b8SAnton Blanchard4: sub r5,r5,r6 526a66086b8SAnton Blanchard 527a66086b8SAnton Blanchard /* Get the desination 128B aligned */ 528a66086b8SAnton Blanchard neg r6,r3 529a66086b8SAnton Blanchard srdi r7,r6,4 530a66086b8SAnton Blanchard mtocrf 0x01,r7 531a66086b8SAnton Blanchard clrldi r6,r6,(64-7) 532a66086b8SAnton Blanchard 533a66086b8SAnton Blanchard li r9,16 534a66086b8SAnton Blanchard li r10,32 535a66086b8SAnton Blanchard li r11,48 536a66086b8SAnton Blanchard 537c2ce6f9fSAnton Blanchard LVS(v16,0,r4) /* Setup permute control vector */ 538c2ce6f9fSAnton Blancharderr3; lvx v0,0,r4 539a66086b8SAnton Blanchard addi r4,r4,16 540a66086b8SAnton Blanchard 541a66086b8SAnton Blanchard bf cr7*4+3,5f 5428a583c0aSAndreas Schwaberr3; lvx v1,0,r4 543c2ce6f9fSAnton Blanchard VPERM(v8,v0,v1,v16) 544a66086b8SAnton Blanchard addi r4,r4,16 5458a583c0aSAndreas Schwaberr3; stvx v8,0,r3 546a66086b8SAnton Blanchard addi r3,r3,16 547c2ce6f9fSAnton Blanchard vor v0,v1,v1 548a66086b8SAnton Blanchard 549a66086b8SAnton Blanchard5: bf cr7*4+2,6f 5508a583c0aSAndreas Schwaberr3; lvx v1,0,r4 551c2ce6f9fSAnton Blanchard VPERM(v8,v0,v1,v16) 552c2ce6f9fSAnton Blancharderr3; lvx v0,r4,r9 553c2ce6f9fSAnton Blanchard VPERM(v9,v1,v0,v16) 554a66086b8SAnton Blanchard addi r4,r4,32 5558a583c0aSAndreas Schwaberr3; stvx v8,0,r3 556c2ce6f9fSAnton Blancharderr3; stvx v9,r3,r9 557a66086b8SAnton Blanchard addi r3,r3,32 558a66086b8SAnton Blanchard 559a66086b8SAnton Blanchard6: bf cr7*4+1,7f 5608a583c0aSAndreas Schwaberr3; lvx v3,0,r4 561c2ce6f9fSAnton Blanchard VPERM(v8,v0,v3,v16) 562c2ce6f9fSAnton Blancharderr3; lvx v2,r4,r9 563c2ce6f9fSAnton Blanchard VPERM(v9,v3,v2,v16) 564c2ce6f9fSAnton Blancharderr3; lvx v1,r4,r10 565c2ce6f9fSAnton Blanchard VPERM(v10,v2,v1,v16) 566c2ce6f9fSAnton Blancharderr3; lvx v0,r4,r11 567c2ce6f9fSAnton Blanchard VPERM(v11,v1,v0,v16) 568a66086b8SAnton Blanchard addi r4,r4,64 5698a583c0aSAndreas Schwaberr3; stvx v8,0,r3 570c2ce6f9fSAnton Blancharderr3; stvx v9,r3,r9 571c2ce6f9fSAnton Blancharderr3; stvx v10,r3,r10 572c2ce6f9fSAnton Blancharderr3; stvx v11,r3,r11 573a66086b8SAnton Blanchard addi r3,r3,64 574a66086b8SAnton Blanchard 575a66086b8SAnton Blanchard7: sub r5,r5,r6 576a66086b8SAnton Blanchard srdi r6,r5,7 577a66086b8SAnton Blanchard 578c75df6f9SMichael Neuling std r14,STK_REG(R14)(r1) 579c75df6f9SMichael Neuling std r15,STK_REG(R15)(r1) 580c75df6f9SMichael Neuling std r16,STK_REG(R16)(r1) 581a66086b8SAnton Blanchard 582a66086b8SAnton Blanchard li r12,64 583a66086b8SAnton Blanchard li r14,80 584a66086b8SAnton Blanchard li r15,96 585a66086b8SAnton Blanchard li r16,112 586a66086b8SAnton Blanchard 587a66086b8SAnton Blanchard mtctr r6 588a66086b8SAnton Blanchard 589a66086b8SAnton Blanchard /* 590a66086b8SAnton Blanchard * Now do cacheline sized loads and stores. By this stage the 591a66086b8SAnton Blanchard * cacheline stores are also cacheline aligned. 592a66086b8SAnton Blanchard */ 593a66086b8SAnton Blanchard .align 5 594a66086b8SAnton Blanchard8: 5958a583c0aSAndreas Schwaberr4; lvx v7,0,r4 596c2ce6f9fSAnton Blanchard VPERM(v8,v0,v7,v16) 597c2ce6f9fSAnton Blancharderr4; lvx v6,r4,r9 598c2ce6f9fSAnton Blanchard VPERM(v9,v7,v6,v16) 599c2ce6f9fSAnton Blancharderr4; lvx v5,r4,r10 600c2ce6f9fSAnton Blanchard VPERM(v10,v6,v5,v16) 601c2ce6f9fSAnton Blancharderr4; lvx v4,r4,r11 602c2ce6f9fSAnton Blanchard VPERM(v11,v5,v4,v16) 603c2ce6f9fSAnton Blancharderr4; lvx v3,r4,r12 604c2ce6f9fSAnton Blanchard VPERM(v12,v4,v3,v16) 605c2ce6f9fSAnton Blancharderr4; lvx v2,r4,r14 606c2ce6f9fSAnton Blanchard VPERM(v13,v3,v2,v16) 607c2ce6f9fSAnton Blancharderr4; lvx v1,r4,r15 608c2ce6f9fSAnton Blanchard VPERM(v14,v2,v1,v16) 609c2ce6f9fSAnton Blancharderr4; lvx v0,r4,r16 610c2ce6f9fSAnton Blanchard VPERM(v15,v1,v0,v16) 611a66086b8SAnton Blanchard addi r4,r4,128 6128a583c0aSAndreas Schwaberr4; stvx v8,0,r3 613c2ce6f9fSAnton Blancharderr4; stvx v9,r3,r9 614c2ce6f9fSAnton Blancharderr4; stvx v10,r3,r10 615c2ce6f9fSAnton Blancharderr4; stvx v11,r3,r11 616c2ce6f9fSAnton Blancharderr4; stvx v12,r3,r12 617c2ce6f9fSAnton Blancharderr4; stvx v13,r3,r14 618c2ce6f9fSAnton Blancharderr4; stvx v14,r3,r15 619c2ce6f9fSAnton Blancharderr4; stvx v15,r3,r16 620a66086b8SAnton Blanchard addi r3,r3,128 621a66086b8SAnton Blanchard bdnz 8b 622a66086b8SAnton Blanchard 623c75df6f9SMichael Neuling ld r14,STK_REG(R14)(r1) 624c75df6f9SMichael Neuling ld r15,STK_REG(R15)(r1) 625c75df6f9SMichael Neuling ld r16,STK_REG(R16)(r1) 626a66086b8SAnton Blanchard 627a66086b8SAnton Blanchard /* Up to 127B to go */ 628a66086b8SAnton Blanchard clrldi r5,r5,(64-7) 629a66086b8SAnton Blanchard srdi r6,r5,4 630a66086b8SAnton Blanchard mtocrf 0x01,r6 631a66086b8SAnton Blanchard 632a66086b8SAnton Blanchard bf cr7*4+1,9f 6338a583c0aSAndreas Schwaberr3; lvx v3,0,r4 634c2ce6f9fSAnton Blanchard VPERM(v8,v0,v3,v16) 635c2ce6f9fSAnton Blancharderr3; lvx v2,r4,r9 636c2ce6f9fSAnton Blanchard VPERM(v9,v3,v2,v16) 637c2ce6f9fSAnton Blancharderr3; lvx v1,r4,r10 638c2ce6f9fSAnton Blanchard VPERM(v10,v2,v1,v16) 639c2ce6f9fSAnton Blancharderr3; lvx v0,r4,r11 640c2ce6f9fSAnton Blanchard VPERM(v11,v1,v0,v16) 641a66086b8SAnton Blanchard addi r4,r4,64 6428a583c0aSAndreas Schwaberr3; stvx v8,0,r3 643c2ce6f9fSAnton Blancharderr3; stvx v9,r3,r9 644c2ce6f9fSAnton Blancharderr3; stvx v10,r3,r10 645c2ce6f9fSAnton Blancharderr3; stvx v11,r3,r11 646a66086b8SAnton Blanchard addi r3,r3,64 647a66086b8SAnton Blanchard 648a66086b8SAnton Blanchard9: bf cr7*4+2,10f 6498a583c0aSAndreas Schwaberr3; lvx v1,0,r4 650c2ce6f9fSAnton Blanchard VPERM(v8,v0,v1,v16) 651c2ce6f9fSAnton Blancharderr3; lvx v0,r4,r9 652c2ce6f9fSAnton Blanchard VPERM(v9,v1,v0,v16) 653a66086b8SAnton Blanchard addi r4,r4,32 6548a583c0aSAndreas Schwaberr3; stvx v8,0,r3 655c2ce6f9fSAnton Blancharderr3; stvx v9,r3,r9 656a66086b8SAnton Blanchard addi r3,r3,32 657a66086b8SAnton Blanchard 658a66086b8SAnton Blanchard10: bf cr7*4+3,11f 6598a583c0aSAndreas Schwaberr3; lvx v1,0,r4 660c2ce6f9fSAnton Blanchard VPERM(v8,v0,v1,v16) 661a66086b8SAnton Blanchard addi r4,r4,16 6628a583c0aSAndreas Schwaberr3; stvx v8,0,r3 663a66086b8SAnton Blanchard addi r3,r3,16 664a66086b8SAnton Blanchard 665a66086b8SAnton Blanchard /* Up to 15B to go */ 666a66086b8SAnton Blanchard11: clrldi r5,r5,(64-4) 667a66086b8SAnton Blanchard addi r4,r4,-16 /* Unwind the +16 load offset */ 668a66086b8SAnton Blanchard mtocrf 0x01,r5 669a66086b8SAnton Blanchard bf cr7*4+0,12f 670a66086b8SAnton Blancharderr3; lwz r0,0(r4) /* Less chance of a reject with word ops */ 671a66086b8SAnton Blancharderr3; lwz r6,4(r4) 672a66086b8SAnton Blanchard addi r4,r4,8 673a66086b8SAnton Blancharderr3; stw r0,0(r3) 674a66086b8SAnton Blancharderr3; stw r6,4(r3) 675a66086b8SAnton Blanchard addi r3,r3,8 676a66086b8SAnton Blanchard 677a66086b8SAnton Blanchard12: bf cr7*4+1,13f 678a66086b8SAnton Blancharderr3; lwz r0,0(r4) 679a66086b8SAnton Blanchard addi r4,r4,4 680a66086b8SAnton Blancharderr3; stw r0,0(r3) 681a66086b8SAnton Blanchard addi r3,r3,4 682a66086b8SAnton Blanchard 683a66086b8SAnton Blanchard13: bf cr7*4+2,14f 684a66086b8SAnton Blancharderr3; lhz r0,0(r4) 685a66086b8SAnton Blanchard addi r4,r4,2 686a66086b8SAnton Blancharderr3; sth r0,0(r3) 687a66086b8SAnton Blanchard addi r3,r3,2 688a66086b8SAnton Blanchard 689a66086b8SAnton Blanchard14: bf cr7*4+3,15f 690a66086b8SAnton Blancharderr3; lbz r0,0(r4) 691a66086b8SAnton Blancharderr3; stb r0,0(r3) 692a66086b8SAnton Blanchard 693a66086b8SAnton Blanchard15: addi r1,r1,STACKFRAMESIZE 694*4e991e3cSNicholas Piggin b CFUNC(exit_vmx_usercopy) /* tail call optimise */ 695c2522dcdSPaul Bolle#endif /* CONFIG_ALTIVEC */ 696