11a59d1b8SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-or-later */ 2b3f271e8SAnton Blanchard/* 3b3f271e8SAnton Blanchard * 4b3f271e8SAnton Blanchard * Copyright (C) IBM Corporation, 2012 5b3f271e8SAnton Blanchard * 6b3f271e8SAnton Blanchard * Author: Anton Blanchard <anton@au.ibm.com> 7b3f271e8SAnton Blanchard */ 8b3f271e8SAnton Blanchard#include <asm/ppc_asm.h> 9b3f271e8SAnton Blanchard 1098c45f51SPaul Mackerras#ifndef SELFTEST_CASE 1198c45f51SPaul Mackerras/* 0 == don't use VMX, 1 == use VMX */ 1298c45f51SPaul Mackerras#define SELFTEST_CASE 0 1398c45f51SPaul Mackerras#endif 1432ee1e18SAnton Blanchard 1532ee1e18SAnton Blanchard#ifdef __BIG_ENDIAN__ 1632ee1e18SAnton Blanchard#define LVS(VRT,RA,RB) lvsl VRT,RA,RB 1732ee1e18SAnton Blanchard#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC 1832ee1e18SAnton Blanchard#else 1932ee1e18SAnton Blanchard#define LVS(VRT,RA,RB) lvsr VRT,RA,RB 2032ee1e18SAnton Blanchard#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC 2132ee1e18SAnton Blanchard#endif 2232ee1e18SAnton Blanchard 2398c45f51SPaul Mackerras_GLOBAL(memcpy_power7) 24b3f271e8SAnton Blanchard cmpldi r5,16 25b3f271e8SAnton Blanchard cmpldi cr1,r5,4096 26752a6422SUlrich Weigand std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) 27b3f271e8SAnton Blanchard blt .Lshort_copy 2898c45f51SPaul Mackerras 2998c45f51SPaul Mackerras#ifdef CONFIG_ALTIVEC 3098c45f51SPaul Mackerrastest_feature = SELFTEST_CASE 3198c45f51SPaul MackerrasBEGIN_FTR_SECTION 32b3f271e8SAnton Blanchard bgt cr1, .Lvmx_copy 3398c45f51SPaul MackerrasEND_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) 34b3f271e8SAnton Blanchard#endif 35b3f271e8SAnton Blanchard 36b3f271e8SAnton Blanchard.Lnonvmx_copy: 37b3f271e8SAnton Blanchard /* Get the source 8B aligned */ 38b3f271e8SAnton Blanchard neg r6,r4 39b3f271e8SAnton Blanchard mtocrf 0x01,r6 40b3f271e8SAnton Blanchard clrldi r6,r6,(64-3) 41b3f271e8SAnton Blanchard 42b3f271e8SAnton Blanchard bf cr7*4+3,1f 43b3f271e8SAnton Blanchard lbz r0,0(r4) 44b3f271e8SAnton Blanchard addi r4,r4,1 45b3f271e8SAnton Blanchard stb r0,0(r3) 46b3f271e8SAnton Blanchard addi r3,r3,1 47b3f271e8SAnton Blanchard 48b3f271e8SAnton Blanchard1: bf cr7*4+2,2f 49b3f271e8SAnton Blanchard lhz r0,0(r4) 50b3f271e8SAnton Blanchard addi r4,r4,2 51b3f271e8SAnton Blanchard sth r0,0(r3) 52b3f271e8SAnton Blanchard addi r3,r3,2 53b3f271e8SAnton Blanchard 54b3f271e8SAnton Blanchard2: bf cr7*4+1,3f 55b3f271e8SAnton Blanchard lwz r0,0(r4) 56b3f271e8SAnton Blanchard addi r4,r4,4 57b3f271e8SAnton Blanchard stw r0,0(r3) 58b3f271e8SAnton Blanchard addi r3,r3,4 59b3f271e8SAnton Blanchard 60b3f271e8SAnton Blanchard3: sub r5,r5,r6 61b3f271e8SAnton Blanchard cmpldi r5,128 62b3f271e8SAnton Blanchard blt 5f 63b3f271e8SAnton Blanchard 64b3f271e8SAnton Blanchard mflr r0 65b3f271e8SAnton Blanchard stdu r1,-STACKFRAMESIZE(r1) 66c75df6f9SMichael Neuling std r14,STK_REG(R14)(r1) 67c75df6f9SMichael Neuling std r15,STK_REG(R15)(r1) 68c75df6f9SMichael Neuling std r16,STK_REG(R16)(r1) 69c75df6f9SMichael Neuling std r17,STK_REG(R17)(r1) 70c75df6f9SMichael Neuling std r18,STK_REG(R18)(r1) 71c75df6f9SMichael Neuling std r19,STK_REG(R19)(r1) 72c75df6f9SMichael Neuling std r20,STK_REG(R20)(r1) 73c75df6f9SMichael Neuling std r21,STK_REG(R21)(r1) 74c75df6f9SMichael Neuling std r22,STK_REG(R22)(r1) 75b3f271e8SAnton Blanchard std r0,STACKFRAMESIZE+16(r1) 76b3f271e8SAnton Blanchard 77b3f271e8SAnton Blanchard srdi r6,r5,7 78b3f271e8SAnton Blanchard mtctr r6 79b3f271e8SAnton Blanchard 80b3f271e8SAnton Blanchard /* Now do cacheline (128B) sized loads and stores. */ 81b3f271e8SAnton Blanchard .align 5 82b3f271e8SAnton Blanchard4: 83b3f271e8SAnton Blanchard ld r0,0(r4) 84b3f271e8SAnton Blanchard ld r6,8(r4) 85b3f271e8SAnton Blanchard ld r7,16(r4) 86b3f271e8SAnton Blanchard ld r8,24(r4) 87b3f271e8SAnton Blanchard ld r9,32(r4) 88b3f271e8SAnton Blanchard ld r10,40(r4) 89b3f271e8SAnton Blanchard ld r11,48(r4) 90b3f271e8SAnton Blanchard ld r12,56(r4) 91b3f271e8SAnton Blanchard ld r14,64(r4) 92b3f271e8SAnton Blanchard ld r15,72(r4) 93b3f271e8SAnton Blanchard ld r16,80(r4) 94b3f271e8SAnton Blanchard ld r17,88(r4) 95b3f271e8SAnton Blanchard ld r18,96(r4) 96b3f271e8SAnton Blanchard ld r19,104(r4) 97b3f271e8SAnton Blanchard ld r20,112(r4) 98b3f271e8SAnton Blanchard ld r21,120(r4) 99b3f271e8SAnton Blanchard addi r4,r4,128 100b3f271e8SAnton Blanchard std r0,0(r3) 101b3f271e8SAnton Blanchard std r6,8(r3) 102b3f271e8SAnton Blanchard std r7,16(r3) 103b3f271e8SAnton Blanchard std r8,24(r3) 104b3f271e8SAnton Blanchard std r9,32(r3) 105b3f271e8SAnton Blanchard std r10,40(r3) 106b3f271e8SAnton Blanchard std r11,48(r3) 107b3f271e8SAnton Blanchard std r12,56(r3) 108b3f271e8SAnton Blanchard std r14,64(r3) 109b3f271e8SAnton Blanchard std r15,72(r3) 110b3f271e8SAnton Blanchard std r16,80(r3) 111b3f271e8SAnton Blanchard std r17,88(r3) 112b3f271e8SAnton Blanchard std r18,96(r3) 113b3f271e8SAnton Blanchard std r19,104(r3) 114b3f271e8SAnton Blanchard std r20,112(r3) 115b3f271e8SAnton Blanchard std r21,120(r3) 116b3f271e8SAnton Blanchard addi r3,r3,128 117b3f271e8SAnton Blanchard bdnz 4b 118b3f271e8SAnton Blanchard 119b3f271e8SAnton Blanchard clrldi r5,r5,(64-7) 120b3f271e8SAnton Blanchard 121c75df6f9SMichael Neuling ld r14,STK_REG(R14)(r1) 122c75df6f9SMichael Neuling ld r15,STK_REG(R15)(r1) 123c75df6f9SMichael Neuling ld r16,STK_REG(R16)(r1) 124c75df6f9SMichael Neuling ld r17,STK_REG(R17)(r1) 125c75df6f9SMichael Neuling ld r18,STK_REG(R18)(r1) 126c75df6f9SMichael Neuling ld r19,STK_REG(R19)(r1) 127c75df6f9SMichael Neuling ld r20,STK_REG(R20)(r1) 128c75df6f9SMichael Neuling ld r21,STK_REG(R21)(r1) 129c75df6f9SMichael Neuling ld r22,STK_REG(R22)(r1) 130b3f271e8SAnton Blanchard addi r1,r1,STACKFRAMESIZE 131b3f271e8SAnton Blanchard 132b3f271e8SAnton Blanchard /* Up to 127B to go */ 133b3f271e8SAnton Blanchard5: srdi r6,r5,4 134b3f271e8SAnton Blanchard mtocrf 0x01,r6 135b3f271e8SAnton Blanchard 136b3f271e8SAnton Blanchard6: bf cr7*4+1,7f 137b3f271e8SAnton Blanchard ld r0,0(r4) 138b3f271e8SAnton Blanchard ld r6,8(r4) 139b3f271e8SAnton Blanchard ld r7,16(r4) 140b3f271e8SAnton Blanchard ld r8,24(r4) 141b3f271e8SAnton Blanchard ld r9,32(r4) 142b3f271e8SAnton Blanchard ld r10,40(r4) 143b3f271e8SAnton Blanchard ld r11,48(r4) 144b3f271e8SAnton Blanchard ld r12,56(r4) 145b3f271e8SAnton Blanchard addi r4,r4,64 146b3f271e8SAnton Blanchard std r0,0(r3) 147b3f271e8SAnton Blanchard std r6,8(r3) 148b3f271e8SAnton Blanchard std r7,16(r3) 149b3f271e8SAnton Blanchard std r8,24(r3) 150b3f271e8SAnton Blanchard std r9,32(r3) 151b3f271e8SAnton Blanchard std r10,40(r3) 152b3f271e8SAnton Blanchard std r11,48(r3) 153b3f271e8SAnton Blanchard std r12,56(r3) 154b3f271e8SAnton Blanchard addi r3,r3,64 155b3f271e8SAnton Blanchard 156b3f271e8SAnton Blanchard /* Up to 63B to go */ 157b3f271e8SAnton Blanchard7: bf cr7*4+2,8f 158b3f271e8SAnton Blanchard ld r0,0(r4) 159b3f271e8SAnton Blanchard ld r6,8(r4) 160b3f271e8SAnton Blanchard ld r7,16(r4) 161b3f271e8SAnton Blanchard ld r8,24(r4) 162b3f271e8SAnton Blanchard addi r4,r4,32 163b3f271e8SAnton Blanchard std r0,0(r3) 164b3f271e8SAnton Blanchard std r6,8(r3) 165b3f271e8SAnton Blanchard std r7,16(r3) 166b3f271e8SAnton Blanchard std r8,24(r3) 167b3f271e8SAnton Blanchard addi r3,r3,32 168b3f271e8SAnton Blanchard 169b3f271e8SAnton Blanchard /* Up to 31B to go */ 170b3f271e8SAnton Blanchard8: bf cr7*4+3,9f 171b3f271e8SAnton Blanchard ld r0,0(r4) 172b3f271e8SAnton Blanchard ld r6,8(r4) 173b3f271e8SAnton Blanchard addi r4,r4,16 174b3f271e8SAnton Blanchard std r0,0(r3) 175b3f271e8SAnton Blanchard std r6,8(r3) 176b3f271e8SAnton Blanchard addi r3,r3,16 177b3f271e8SAnton Blanchard 178b3f271e8SAnton Blanchard9: clrldi r5,r5,(64-4) 179b3f271e8SAnton Blanchard 180b3f271e8SAnton Blanchard /* Up to 15B to go */ 181b3f271e8SAnton Blanchard.Lshort_copy: 182b3f271e8SAnton Blanchard mtocrf 0x01,r5 183b3f271e8SAnton Blanchard bf cr7*4+0,12f 184b3f271e8SAnton Blanchard lwz r0,0(r4) /* Less chance of a reject with word ops */ 185b3f271e8SAnton Blanchard lwz r6,4(r4) 186b3f271e8SAnton Blanchard addi r4,r4,8 187b3f271e8SAnton Blanchard stw r0,0(r3) 188b3f271e8SAnton Blanchard stw r6,4(r3) 189b3f271e8SAnton Blanchard addi r3,r3,8 190b3f271e8SAnton Blanchard 191b3f271e8SAnton Blanchard12: bf cr7*4+1,13f 192b3f271e8SAnton Blanchard lwz r0,0(r4) 193b3f271e8SAnton Blanchard addi r4,r4,4 194b3f271e8SAnton Blanchard stw r0,0(r3) 195b3f271e8SAnton Blanchard addi r3,r3,4 196b3f271e8SAnton Blanchard 197b3f271e8SAnton Blanchard13: bf cr7*4+2,14f 198b3f271e8SAnton Blanchard lhz r0,0(r4) 199b3f271e8SAnton Blanchard addi r4,r4,2 200b3f271e8SAnton Blanchard sth r0,0(r3) 201b3f271e8SAnton Blanchard addi r3,r3,2 202b3f271e8SAnton Blanchard 203b3f271e8SAnton Blanchard14: bf cr7*4+3,15f 204b3f271e8SAnton Blanchard lbz r0,0(r4) 205b3f271e8SAnton Blanchard stb r0,0(r3) 206b3f271e8SAnton Blanchard 207752a6422SUlrich Weigand15: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) 208b3f271e8SAnton Blanchard blr 209b3f271e8SAnton Blanchard 210b3f271e8SAnton Blanchard.Lunwind_stack_nonvmx_copy: 211b3f271e8SAnton Blanchard addi r1,r1,STACKFRAMESIZE 212b3f271e8SAnton Blanchard b .Lnonvmx_copy 213b3f271e8SAnton Blanchard 214b3f271e8SAnton Blanchard.Lvmx_copy: 21598c45f51SPaul Mackerras#ifdef CONFIG_ALTIVEC 216b3f271e8SAnton Blanchard mflr r0 217752a6422SUlrich Weigand std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) 218752a6422SUlrich Weigand std r5,-STACKFRAMESIZE+STK_REG(R29)(r1) 219b3f271e8SAnton Blanchard std r0,16(r1) 220b3f271e8SAnton Blanchard stdu r1,-STACKFRAMESIZE(r1) 221*4e991e3cSNicholas Piggin bl CFUNC(enter_vmx_ops) 2222fae7cdbSAnton Blanchard cmpwi cr1,r3,0 223b3f271e8SAnton Blanchard ld r0,STACKFRAMESIZE+16(r1) 224752a6422SUlrich Weigand ld r3,STK_REG(R31)(r1) 225752a6422SUlrich Weigand ld r4,STK_REG(R30)(r1) 226752a6422SUlrich Weigand ld r5,STK_REG(R29)(r1) 227b3f271e8SAnton Blanchard mtlr r0 228b3f271e8SAnton Blanchard 229b3f271e8SAnton Blanchard /* 230b3f271e8SAnton Blanchard * We prefetch both the source and destination using enhanced touch 231b3f271e8SAnton Blanchard * instructions. We use a stream ID of 0 for the load side and 232b3f271e8SAnton Blanchard * 1 for the store side. 233b3f271e8SAnton Blanchard */ 234b3f271e8SAnton Blanchard clrrdi r6,r4,7 235b3f271e8SAnton Blanchard clrrdi r9,r3,7 236b3f271e8SAnton Blanchard ori r9,r9,1 /* stream=1 */ 237b3f271e8SAnton Blanchard 238b3f271e8SAnton Blanchard srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */ 239c8adfeccSNishanth Aravamudan cmpldi r7,0x3FF 240c8adfeccSNishanth Aravamudan ble 1f 241b3f271e8SAnton Blanchard li r7,0x3FF 242b3f271e8SAnton Blanchard1: lis r0,0x0E00 /* depth=7 */ 243b3f271e8SAnton Blanchard sldi r7,r7,7 244b3f271e8SAnton Blanchard or r7,r7,r0 245b3f271e8SAnton Blanchard ori r10,r7,1 /* stream=1 */ 246b3f271e8SAnton Blanchard 247b3f271e8SAnton Blanchard lis r8,0x8000 /* GO=1 */ 248b3f271e8SAnton Blanchard clrldi r8,r8,32 249b3f271e8SAnton Blanchard 2508a583c0aSAndreas Schwab dcbt 0,r6,0b01000 2518a583c0aSAndreas Schwab dcbt 0,r7,0b01010 2528a583c0aSAndreas Schwab dcbtst 0,r9,0b01000 2538a583c0aSAndreas Schwab dcbtst 0,r10,0b01010 254b3f271e8SAnton Blanchard eieio 2558a583c0aSAndreas Schwab dcbt 0,r8,0b01010 /* GO */ 256b3f271e8SAnton Blanchard 2572fae7cdbSAnton Blanchard beq cr1,.Lunwind_stack_nonvmx_copy 258b3f271e8SAnton Blanchard 259b3f271e8SAnton Blanchard /* 260b3f271e8SAnton Blanchard * If source and destination are not relatively aligned we use a 261b3f271e8SAnton Blanchard * slower permute loop. 262b3f271e8SAnton Blanchard */ 263b3f271e8SAnton Blanchard xor r6,r4,r3 264b3f271e8SAnton Blanchard rldicl. r6,r6,0,(64-4) 265b3f271e8SAnton Blanchard bne .Lvmx_unaligned_copy 266b3f271e8SAnton Blanchard 267b3f271e8SAnton Blanchard /* Get the destination 16B aligned */ 268b3f271e8SAnton Blanchard neg r6,r3 269b3f271e8SAnton Blanchard mtocrf 0x01,r6 270b3f271e8SAnton Blanchard clrldi r6,r6,(64-4) 271b3f271e8SAnton Blanchard 272b3f271e8SAnton Blanchard bf cr7*4+3,1f 273b3f271e8SAnton Blanchard lbz r0,0(r4) 274b3f271e8SAnton Blanchard addi r4,r4,1 275b3f271e8SAnton Blanchard stb r0,0(r3) 276b3f271e8SAnton Blanchard addi r3,r3,1 277b3f271e8SAnton Blanchard 278b3f271e8SAnton Blanchard1: bf cr7*4+2,2f 279b3f271e8SAnton Blanchard lhz r0,0(r4) 280b3f271e8SAnton Blanchard addi r4,r4,2 281b3f271e8SAnton Blanchard sth r0,0(r3) 282b3f271e8SAnton Blanchard addi r3,r3,2 283b3f271e8SAnton Blanchard 284b3f271e8SAnton Blanchard2: bf cr7*4+1,3f 285b3f271e8SAnton Blanchard lwz r0,0(r4) 286b3f271e8SAnton Blanchard addi r4,r4,4 287b3f271e8SAnton Blanchard stw r0,0(r3) 288b3f271e8SAnton Blanchard addi r3,r3,4 289b3f271e8SAnton Blanchard 290b3f271e8SAnton Blanchard3: bf cr7*4+0,4f 291b3f271e8SAnton Blanchard ld r0,0(r4) 292b3f271e8SAnton Blanchard addi r4,r4,8 293b3f271e8SAnton Blanchard std r0,0(r3) 294b3f271e8SAnton Blanchard addi r3,r3,8 295b3f271e8SAnton Blanchard 296b3f271e8SAnton Blanchard4: sub r5,r5,r6 297b3f271e8SAnton Blanchard 298b3f271e8SAnton Blanchard /* Get the desination 128B aligned */ 299b3f271e8SAnton Blanchard neg r6,r3 300b3f271e8SAnton Blanchard srdi r7,r6,4 301b3f271e8SAnton Blanchard mtocrf 0x01,r7 302b3f271e8SAnton Blanchard clrldi r6,r6,(64-7) 303b3f271e8SAnton Blanchard 304b3f271e8SAnton Blanchard li r9,16 305b3f271e8SAnton Blanchard li r10,32 306b3f271e8SAnton Blanchard li r11,48 307b3f271e8SAnton Blanchard 308b3f271e8SAnton Blanchard bf cr7*4+3,5f 3098a583c0aSAndreas Schwab lvx v1,0,r4 310b3f271e8SAnton Blanchard addi r4,r4,16 3118a583c0aSAndreas Schwab stvx v1,0,r3 312b3f271e8SAnton Blanchard addi r3,r3,16 313b3f271e8SAnton Blanchard 314b3f271e8SAnton Blanchard5: bf cr7*4+2,6f 3158a583c0aSAndreas Schwab lvx v1,0,r4 316c2ce6f9fSAnton Blanchard lvx v0,r4,r9 317b3f271e8SAnton Blanchard addi r4,r4,32 3188a583c0aSAndreas Schwab stvx v1,0,r3 319c2ce6f9fSAnton Blanchard stvx v0,r3,r9 320b3f271e8SAnton Blanchard addi r3,r3,32 321b3f271e8SAnton Blanchard 322b3f271e8SAnton Blanchard6: bf cr7*4+1,7f 3238a583c0aSAndreas Schwab lvx v3,0,r4 324c2ce6f9fSAnton Blanchard lvx v2,r4,r9 325c2ce6f9fSAnton Blanchard lvx v1,r4,r10 326c2ce6f9fSAnton Blanchard lvx v0,r4,r11 327b3f271e8SAnton Blanchard addi r4,r4,64 3288a583c0aSAndreas Schwab stvx v3,0,r3 329c2ce6f9fSAnton Blanchard stvx v2,r3,r9 330c2ce6f9fSAnton Blanchard stvx v1,r3,r10 331c2ce6f9fSAnton Blanchard stvx v0,r3,r11 332b3f271e8SAnton Blanchard addi r3,r3,64 333b3f271e8SAnton Blanchard 334b3f271e8SAnton Blanchard7: sub r5,r5,r6 335b3f271e8SAnton Blanchard srdi r6,r5,7 336b3f271e8SAnton Blanchard 337c75df6f9SMichael Neuling std r14,STK_REG(R14)(r1) 338c75df6f9SMichael Neuling std r15,STK_REG(R15)(r1) 339c75df6f9SMichael Neuling std r16,STK_REG(R16)(r1) 340b3f271e8SAnton Blanchard 341b3f271e8SAnton Blanchard li r12,64 342b3f271e8SAnton Blanchard li r14,80 343b3f271e8SAnton Blanchard li r15,96 344b3f271e8SAnton Blanchard li r16,112 345b3f271e8SAnton Blanchard 346b3f271e8SAnton Blanchard mtctr r6 347b3f271e8SAnton Blanchard 348b3f271e8SAnton Blanchard /* 349b3f271e8SAnton Blanchard * Now do cacheline sized loads and stores. By this stage the 350b3f271e8SAnton Blanchard * cacheline stores are also cacheline aligned. 351b3f271e8SAnton Blanchard */ 352b3f271e8SAnton Blanchard .align 5 353b3f271e8SAnton Blanchard8: 3548a583c0aSAndreas Schwab lvx v7,0,r4 355c2ce6f9fSAnton Blanchard lvx v6,r4,r9 356c2ce6f9fSAnton Blanchard lvx v5,r4,r10 357c2ce6f9fSAnton Blanchard lvx v4,r4,r11 358c2ce6f9fSAnton Blanchard lvx v3,r4,r12 359c2ce6f9fSAnton Blanchard lvx v2,r4,r14 360c2ce6f9fSAnton Blanchard lvx v1,r4,r15 361c2ce6f9fSAnton Blanchard lvx v0,r4,r16 362b3f271e8SAnton Blanchard addi r4,r4,128 3638a583c0aSAndreas Schwab stvx v7,0,r3 364c2ce6f9fSAnton Blanchard stvx v6,r3,r9 365c2ce6f9fSAnton Blanchard stvx v5,r3,r10 366c2ce6f9fSAnton Blanchard stvx v4,r3,r11 367c2ce6f9fSAnton Blanchard stvx v3,r3,r12 368c2ce6f9fSAnton Blanchard stvx v2,r3,r14 369c2ce6f9fSAnton Blanchard stvx v1,r3,r15 370c2ce6f9fSAnton Blanchard stvx v0,r3,r16 371b3f271e8SAnton Blanchard addi r3,r3,128 372b3f271e8SAnton Blanchard bdnz 8b 373b3f271e8SAnton Blanchard 374c75df6f9SMichael Neuling ld r14,STK_REG(R14)(r1) 375c75df6f9SMichael Neuling ld r15,STK_REG(R15)(r1) 376c75df6f9SMichael Neuling ld r16,STK_REG(R16)(r1) 377b3f271e8SAnton Blanchard 378b3f271e8SAnton Blanchard /* Up to 127B to go */ 379b3f271e8SAnton Blanchard clrldi r5,r5,(64-7) 380b3f271e8SAnton Blanchard srdi r6,r5,4 381b3f271e8SAnton Blanchard mtocrf 0x01,r6 382b3f271e8SAnton Blanchard 383b3f271e8SAnton Blanchard bf cr7*4+1,9f 3848a583c0aSAndreas Schwab lvx v3,0,r4 385c2ce6f9fSAnton Blanchard lvx v2,r4,r9 386c2ce6f9fSAnton Blanchard lvx v1,r4,r10 387c2ce6f9fSAnton Blanchard lvx v0,r4,r11 388b3f271e8SAnton Blanchard addi r4,r4,64 3898a583c0aSAndreas Schwab stvx v3,0,r3 390c2ce6f9fSAnton Blanchard stvx v2,r3,r9 391c2ce6f9fSAnton Blanchard stvx v1,r3,r10 392c2ce6f9fSAnton Blanchard stvx v0,r3,r11 393b3f271e8SAnton Blanchard addi r3,r3,64 394b3f271e8SAnton Blanchard 395b3f271e8SAnton Blanchard9: bf cr7*4+2,10f 3968a583c0aSAndreas Schwab lvx v1,0,r4 397c2ce6f9fSAnton Blanchard lvx v0,r4,r9 398b3f271e8SAnton Blanchard addi r4,r4,32 3998a583c0aSAndreas Schwab stvx v1,0,r3 400c2ce6f9fSAnton Blanchard stvx v0,r3,r9 401b3f271e8SAnton Blanchard addi r3,r3,32 402b3f271e8SAnton Blanchard 403b3f271e8SAnton Blanchard10: bf cr7*4+3,11f 4048a583c0aSAndreas Schwab lvx v1,0,r4 405b3f271e8SAnton Blanchard addi r4,r4,16 4068a583c0aSAndreas Schwab stvx v1,0,r3 407b3f271e8SAnton Blanchard addi r3,r3,16 408b3f271e8SAnton Blanchard 409b3f271e8SAnton Blanchard /* Up to 15B to go */ 410b3f271e8SAnton Blanchard11: clrldi r5,r5,(64-4) 411b3f271e8SAnton Blanchard mtocrf 0x01,r5 412b3f271e8SAnton Blanchard bf cr7*4+0,12f 413b3f271e8SAnton Blanchard ld r0,0(r4) 414b3f271e8SAnton Blanchard addi r4,r4,8 415b3f271e8SAnton Blanchard std r0,0(r3) 416b3f271e8SAnton Blanchard addi r3,r3,8 417b3f271e8SAnton Blanchard 418b3f271e8SAnton Blanchard12: bf cr7*4+1,13f 419b3f271e8SAnton Blanchard lwz r0,0(r4) 420b3f271e8SAnton Blanchard addi r4,r4,4 421b3f271e8SAnton Blanchard stw r0,0(r3) 422b3f271e8SAnton Blanchard addi r3,r3,4 423b3f271e8SAnton Blanchard 424b3f271e8SAnton Blanchard13: bf cr7*4+2,14f 425b3f271e8SAnton Blanchard lhz r0,0(r4) 426b3f271e8SAnton Blanchard addi r4,r4,2 427b3f271e8SAnton Blanchard sth r0,0(r3) 428b3f271e8SAnton Blanchard addi r3,r3,2 429b3f271e8SAnton Blanchard 430b3f271e8SAnton Blanchard14: bf cr7*4+3,15f 431b3f271e8SAnton Blanchard lbz r0,0(r4) 432b3f271e8SAnton Blanchard stb r0,0(r3) 433b3f271e8SAnton Blanchard 434b3f271e8SAnton Blanchard15: addi r1,r1,STACKFRAMESIZE 435752a6422SUlrich Weigand ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) 436*4e991e3cSNicholas Piggin b CFUNC(exit_vmx_ops) /* tail call optimise */ 437b3f271e8SAnton Blanchard 438b3f271e8SAnton Blanchard.Lvmx_unaligned_copy: 439b3f271e8SAnton Blanchard /* Get the destination 16B aligned */ 440b3f271e8SAnton Blanchard neg r6,r3 441b3f271e8SAnton Blanchard mtocrf 0x01,r6 442b3f271e8SAnton Blanchard clrldi r6,r6,(64-4) 443b3f271e8SAnton Blanchard 444b3f271e8SAnton Blanchard bf cr7*4+3,1f 445b3f271e8SAnton Blanchard lbz r0,0(r4) 446b3f271e8SAnton Blanchard addi r4,r4,1 447b3f271e8SAnton Blanchard stb r0,0(r3) 448b3f271e8SAnton Blanchard addi r3,r3,1 449b3f271e8SAnton Blanchard 450b3f271e8SAnton Blanchard1: bf cr7*4+2,2f 451b3f271e8SAnton Blanchard lhz r0,0(r4) 452b3f271e8SAnton Blanchard addi r4,r4,2 453b3f271e8SAnton Blanchard sth r0,0(r3) 454b3f271e8SAnton Blanchard addi r3,r3,2 455b3f271e8SAnton Blanchard 456b3f271e8SAnton Blanchard2: bf cr7*4+1,3f 457b3f271e8SAnton Blanchard lwz r0,0(r4) 458b3f271e8SAnton Blanchard addi r4,r4,4 459b3f271e8SAnton Blanchard stw r0,0(r3) 460b3f271e8SAnton Blanchard addi r3,r3,4 461b3f271e8SAnton Blanchard 462b3f271e8SAnton Blanchard3: bf cr7*4+0,4f 463b3f271e8SAnton Blanchard lwz r0,0(r4) /* Less chance of a reject with word ops */ 464b3f271e8SAnton Blanchard lwz r7,4(r4) 465b3f271e8SAnton Blanchard addi r4,r4,8 466b3f271e8SAnton Blanchard stw r0,0(r3) 467b3f271e8SAnton Blanchard stw r7,4(r3) 468b3f271e8SAnton Blanchard addi r3,r3,8 469b3f271e8SAnton Blanchard 470b3f271e8SAnton Blanchard4: sub r5,r5,r6 471b3f271e8SAnton Blanchard 472b3f271e8SAnton Blanchard /* Get the desination 128B aligned */ 473b3f271e8SAnton Blanchard neg r6,r3 474b3f271e8SAnton Blanchard srdi r7,r6,4 475b3f271e8SAnton Blanchard mtocrf 0x01,r7 476b3f271e8SAnton Blanchard clrldi r6,r6,(64-7) 477b3f271e8SAnton Blanchard 478b3f271e8SAnton Blanchard li r9,16 479b3f271e8SAnton Blanchard li r10,32 480b3f271e8SAnton Blanchard li r11,48 481b3f271e8SAnton Blanchard 482c2ce6f9fSAnton Blanchard LVS(v16,0,r4) /* Setup permute control vector */ 483c2ce6f9fSAnton Blanchard lvx v0,0,r4 484b3f271e8SAnton Blanchard addi r4,r4,16 485b3f271e8SAnton Blanchard 486b3f271e8SAnton Blanchard bf cr7*4+3,5f 4878a583c0aSAndreas Schwab lvx v1,0,r4 488c2ce6f9fSAnton Blanchard VPERM(v8,v0,v1,v16) 489b3f271e8SAnton Blanchard addi r4,r4,16 4908a583c0aSAndreas Schwab stvx v8,0,r3 491b3f271e8SAnton Blanchard addi r3,r3,16 492c2ce6f9fSAnton Blanchard vor v0,v1,v1 493b3f271e8SAnton Blanchard 494b3f271e8SAnton Blanchard5: bf cr7*4+2,6f 4958a583c0aSAndreas Schwab lvx v1,0,r4 496c2ce6f9fSAnton Blanchard VPERM(v8,v0,v1,v16) 497c2ce6f9fSAnton Blanchard lvx v0,r4,r9 498c2ce6f9fSAnton Blanchard VPERM(v9,v1,v0,v16) 499b3f271e8SAnton Blanchard addi r4,r4,32 5008a583c0aSAndreas Schwab stvx v8,0,r3 501c2ce6f9fSAnton Blanchard stvx v9,r3,r9 502b3f271e8SAnton Blanchard addi r3,r3,32 503b3f271e8SAnton Blanchard 504b3f271e8SAnton Blanchard6: bf cr7*4+1,7f 5058a583c0aSAndreas Schwab lvx v3,0,r4 506c2ce6f9fSAnton Blanchard VPERM(v8,v0,v3,v16) 507c2ce6f9fSAnton Blanchard lvx v2,r4,r9 508c2ce6f9fSAnton Blanchard VPERM(v9,v3,v2,v16) 509c2ce6f9fSAnton Blanchard lvx v1,r4,r10 510c2ce6f9fSAnton Blanchard VPERM(v10,v2,v1,v16) 511c2ce6f9fSAnton Blanchard lvx v0,r4,r11 512c2ce6f9fSAnton Blanchard VPERM(v11,v1,v0,v16) 513b3f271e8SAnton Blanchard addi r4,r4,64 5148a583c0aSAndreas Schwab stvx v8,0,r3 515c2ce6f9fSAnton Blanchard stvx v9,r3,r9 516c2ce6f9fSAnton Blanchard stvx v10,r3,r10 517c2ce6f9fSAnton Blanchard stvx v11,r3,r11 518b3f271e8SAnton Blanchard addi r3,r3,64 519b3f271e8SAnton Blanchard 520b3f271e8SAnton Blanchard7: sub r5,r5,r6 521b3f271e8SAnton Blanchard srdi r6,r5,7 522b3f271e8SAnton Blanchard 523c75df6f9SMichael Neuling std r14,STK_REG(R14)(r1) 524c75df6f9SMichael Neuling std r15,STK_REG(R15)(r1) 525c75df6f9SMichael Neuling std r16,STK_REG(R16)(r1) 526b3f271e8SAnton Blanchard 527b3f271e8SAnton Blanchard li r12,64 528b3f271e8SAnton Blanchard li r14,80 529b3f271e8SAnton Blanchard li r15,96 530b3f271e8SAnton Blanchard li r16,112 531b3f271e8SAnton Blanchard 532b3f271e8SAnton Blanchard mtctr r6 533b3f271e8SAnton Blanchard 534b3f271e8SAnton Blanchard /* 535b3f271e8SAnton Blanchard * Now do cacheline sized loads and stores. By this stage the 536b3f271e8SAnton Blanchard * cacheline stores are also cacheline aligned. 537b3f271e8SAnton Blanchard */ 538b3f271e8SAnton Blanchard .align 5 539b3f271e8SAnton Blanchard8: 5408a583c0aSAndreas Schwab lvx v7,0,r4 541c2ce6f9fSAnton Blanchard VPERM(v8,v0,v7,v16) 542c2ce6f9fSAnton Blanchard lvx v6,r4,r9 543c2ce6f9fSAnton Blanchard VPERM(v9,v7,v6,v16) 544c2ce6f9fSAnton Blanchard lvx v5,r4,r10 545c2ce6f9fSAnton Blanchard VPERM(v10,v6,v5,v16) 546c2ce6f9fSAnton Blanchard lvx v4,r4,r11 547c2ce6f9fSAnton Blanchard VPERM(v11,v5,v4,v16) 548c2ce6f9fSAnton Blanchard lvx v3,r4,r12 549c2ce6f9fSAnton Blanchard VPERM(v12,v4,v3,v16) 550c2ce6f9fSAnton Blanchard lvx v2,r4,r14 551c2ce6f9fSAnton Blanchard VPERM(v13,v3,v2,v16) 552c2ce6f9fSAnton Blanchard lvx v1,r4,r15 553c2ce6f9fSAnton Blanchard VPERM(v14,v2,v1,v16) 554c2ce6f9fSAnton Blanchard lvx v0,r4,r16 555c2ce6f9fSAnton Blanchard VPERM(v15,v1,v0,v16) 556b3f271e8SAnton Blanchard addi r4,r4,128 5578a583c0aSAndreas Schwab stvx v8,0,r3 558c2ce6f9fSAnton Blanchard stvx v9,r3,r9 559c2ce6f9fSAnton Blanchard stvx v10,r3,r10 560c2ce6f9fSAnton Blanchard stvx v11,r3,r11 561c2ce6f9fSAnton Blanchard stvx v12,r3,r12 562c2ce6f9fSAnton Blanchard stvx v13,r3,r14 563c2ce6f9fSAnton Blanchard stvx v14,r3,r15 564c2ce6f9fSAnton Blanchard stvx v15,r3,r16 565b3f271e8SAnton Blanchard addi r3,r3,128 566b3f271e8SAnton Blanchard bdnz 8b 567b3f271e8SAnton Blanchard 568c75df6f9SMichael Neuling ld r14,STK_REG(R14)(r1) 569c75df6f9SMichael Neuling ld r15,STK_REG(R15)(r1) 570c75df6f9SMichael Neuling ld r16,STK_REG(R16)(r1) 571b3f271e8SAnton Blanchard 572b3f271e8SAnton Blanchard /* Up to 127B to go */ 573b3f271e8SAnton Blanchard clrldi r5,r5,(64-7) 574b3f271e8SAnton Blanchard srdi r6,r5,4 575b3f271e8SAnton Blanchard mtocrf 0x01,r6 576b3f271e8SAnton Blanchard 577b3f271e8SAnton Blanchard bf cr7*4+1,9f 5788a583c0aSAndreas Schwab lvx v3,0,r4 579c2ce6f9fSAnton Blanchard VPERM(v8,v0,v3,v16) 580c2ce6f9fSAnton Blanchard lvx v2,r4,r9 581c2ce6f9fSAnton Blanchard VPERM(v9,v3,v2,v16) 582c2ce6f9fSAnton Blanchard lvx v1,r4,r10 583c2ce6f9fSAnton Blanchard VPERM(v10,v2,v1,v16) 584c2ce6f9fSAnton Blanchard lvx v0,r4,r11 585c2ce6f9fSAnton Blanchard VPERM(v11,v1,v0,v16) 586b3f271e8SAnton Blanchard addi r4,r4,64 5878a583c0aSAndreas Schwab stvx v8,0,r3 588c2ce6f9fSAnton Blanchard stvx v9,r3,r9 589c2ce6f9fSAnton Blanchard stvx v10,r3,r10 590c2ce6f9fSAnton Blanchard stvx v11,r3,r11 591b3f271e8SAnton Blanchard addi r3,r3,64 592b3f271e8SAnton Blanchard 593b3f271e8SAnton Blanchard9: bf cr7*4+2,10f 5948a583c0aSAndreas Schwab lvx v1,0,r4 595c2ce6f9fSAnton Blanchard VPERM(v8,v0,v1,v16) 596c2ce6f9fSAnton Blanchard lvx v0,r4,r9 597c2ce6f9fSAnton Blanchard VPERM(v9,v1,v0,v16) 598b3f271e8SAnton Blanchard addi r4,r4,32 5998a583c0aSAndreas Schwab stvx v8,0,r3 600c2ce6f9fSAnton Blanchard stvx v9,r3,r9 601b3f271e8SAnton Blanchard addi r3,r3,32 602b3f271e8SAnton Blanchard 603b3f271e8SAnton Blanchard10: bf cr7*4+3,11f 6048a583c0aSAndreas Schwab lvx v1,0,r4 605c2ce6f9fSAnton Blanchard VPERM(v8,v0,v1,v16) 606b3f271e8SAnton Blanchard addi r4,r4,16 6078a583c0aSAndreas Schwab stvx v8,0,r3 608b3f271e8SAnton Blanchard addi r3,r3,16 609b3f271e8SAnton Blanchard 610b3f271e8SAnton Blanchard /* Up to 15B to go */ 611b3f271e8SAnton Blanchard11: clrldi r5,r5,(64-4) 612b3f271e8SAnton Blanchard addi r4,r4,-16 /* Unwind the +16 load offset */ 613b3f271e8SAnton Blanchard mtocrf 0x01,r5 614b3f271e8SAnton Blanchard bf cr7*4+0,12f 615b3f271e8SAnton Blanchard lwz r0,0(r4) /* Less chance of a reject with word ops */ 616b3f271e8SAnton Blanchard lwz r6,4(r4) 617b3f271e8SAnton Blanchard addi r4,r4,8 618b3f271e8SAnton Blanchard stw r0,0(r3) 619b3f271e8SAnton Blanchard stw r6,4(r3) 620b3f271e8SAnton Blanchard addi r3,r3,8 621b3f271e8SAnton Blanchard 622b3f271e8SAnton Blanchard12: bf cr7*4+1,13f 623b3f271e8SAnton Blanchard lwz r0,0(r4) 624b3f271e8SAnton Blanchard addi r4,r4,4 625b3f271e8SAnton Blanchard stw r0,0(r3) 626b3f271e8SAnton Blanchard addi r3,r3,4 627b3f271e8SAnton Blanchard 628b3f271e8SAnton Blanchard13: bf cr7*4+2,14f 629b3f271e8SAnton Blanchard lhz r0,0(r4) 630b3f271e8SAnton Blanchard addi r4,r4,2 631b3f271e8SAnton Blanchard sth r0,0(r3) 632b3f271e8SAnton Blanchard addi r3,r3,2 633b3f271e8SAnton Blanchard 634b3f271e8SAnton Blanchard14: bf cr7*4+3,15f 635b3f271e8SAnton Blanchard lbz r0,0(r4) 636b3f271e8SAnton Blanchard stb r0,0(r3) 637b3f271e8SAnton Blanchard 638b3f271e8SAnton Blanchard15: addi r1,r1,STACKFRAMESIZE 639752a6422SUlrich Weigand ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) 640*4e991e3cSNicholas Piggin b CFUNC(exit_vmx_ops) /* tail call optimise */ 641c2522dcdSPaul Bolle#endif /* CONFIG_ALTIVEC */ 642