1a66086b8SAnton Blanchard/* 2a66086b8SAnton Blanchard * This program is free software; you can redistribute it and/or modify 3a66086b8SAnton Blanchard * it under the terms of the GNU General Public License as published by 4a66086b8SAnton Blanchard * the Free Software Foundation; either version 2 of the License, or 5a66086b8SAnton Blanchard * (at your option) any later version. 6a66086b8SAnton Blanchard * 7a66086b8SAnton Blanchard * This program is distributed in the hope that it will be useful, 8a66086b8SAnton Blanchard * but WITHOUT ANY WARRANTY; without even the implied warranty of 9a66086b8SAnton Blanchard * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10a66086b8SAnton Blanchard * GNU General Public License for more details. 11a66086b8SAnton Blanchard * 12a66086b8SAnton Blanchard * You should have received a copy of the GNU General Public License 13a66086b8SAnton Blanchard * along with this program; if not, write to the Free Software 14a66086b8SAnton Blanchard * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 15a66086b8SAnton Blanchard * 16a66086b8SAnton Blanchard * Copyright (C) IBM Corporation, 2011 17a66086b8SAnton Blanchard * 18a66086b8SAnton Blanchard * Author: Anton Blanchard <anton@au.ibm.com> 19a66086b8SAnton Blanchard */ 20a66086b8SAnton Blanchard#include <asm/ppc_asm.h> 21a66086b8SAnton Blanchard 2232ee1e18SAnton Blanchard#ifdef __BIG_ENDIAN__ 2332ee1e18SAnton Blanchard#define LVS(VRT,RA,RB) lvsl VRT,RA,RB 2432ee1e18SAnton Blanchard#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC 2532ee1e18SAnton Blanchard#else 2632ee1e18SAnton Blanchard#define LVS(VRT,RA,RB) lvsr VRT,RA,RB 2732ee1e18SAnton Blanchard#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC 2832ee1e18SAnton Blanchard#endif 2932ee1e18SAnton Blanchard 30a66086b8SAnton Blanchard .macro err1 31a66086b8SAnton Blanchard100: 3224bfa6a9SNicholas Piggin EX_TABLE(100b,.Ldo_err1) 33a66086b8SAnton Blanchard .endm 34a66086b8SAnton Blanchard 35a66086b8SAnton Blanchard .macro err2 36a66086b8SAnton Blanchard200: 3724bfa6a9SNicholas Piggin EX_TABLE(200b,.Ldo_err2) 38a66086b8SAnton Blanchard .endm 39a66086b8SAnton Blanchard 40a66086b8SAnton Blanchard#ifdef CONFIG_ALTIVEC 41a66086b8SAnton Blanchard .macro err3 42a66086b8SAnton Blanchard300: 4324bfa6a9SNicholas Piggin EX_TABLE(300b,.Ldo_err3) 44a66086b8SAnton Blanchard .endm 45a66086b8SAnton Blanchard 46a66086b8SAnton Blanchard .macro err4 47a66086b8SAnton Blanchard400: 4824bfa6a9SNicholas Piggin EX_TABLE(400b,.Ldo_err4) 49a66086b8SAnton Blanchard .endm 50a66086b8SAnton Blanchard 51a66086b8SAnton Blanchard 52a66086b8SAnton Blanchard.Ldo_err4: 53c75df6f9SMichael Neuling ld r16,STK_REG(R16)(r1) 54c75df6f9SMichael Neuling ld r15,STK_REG(R15)(r1) 55c75df6f9SMichael Neuling ld r14,STK_REG(R14)(r1) 56a66086b8SAnton Blanchard.Ldo_err3: 57b1576fecSAnton Blanchard bl exit_vmx_usercopy 58a66086b8SAnton Blanchard ld r0,STACKFRAMESIZE+16(r1) 59a66086b8SAnton Blanchard mtlr r0 60a66086b8SAnton Blanchard b .Lexit 61a66086b8SAnton Blanchard#endif /* CONFIG_ALTIVEC */ 62a66086b8SAnton Blanchard 63a66086b8SAnton Blanchard.Ldo_err2: 64c75df6f9SMichael Neuling ld r22,STK_REG(R22)(r1) 65c75df6f9SMichael Neuling ld r21,STK_REG(R21)(r1) 66c75df6f9SMichael Neuling ld r20,STK_REG(R20)(r1) 67c75df6f9SMichael Neuling ld r19,STK_REG(R19)(r1) 68c75df6f9SMichael Neuling ld r18,STK_REG(R18)(r1) 69c75df6f9SMichael Neuling ld r17,STK_REG(R17)(r1) 70c75df6f9SMichael Neuling ld r16,STK_REG(R16)(r1) 71c75df6f9SMichael Neuling ld r15,STK_REG(R15)(r1) 72c75df6f9SMichael Neuling ld r14,STK_REG(R14)(r1) 73a66086b8SAnton Blanchard.Lexit: 74a66086b8SAnton Blanchard addi r1,r1,STACKFRAMESIZE 75a66086b8SAnton Blanchard.Ldo_err1: 76752a6422SUlrich Weigand ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) 77752a6422SUlrich Weigand ld r4,-STACKFRAMESIZE+STK_REG(R30)(r1) 78752a6422SUlrich Weigand ld r5,-STACKFRAMESIZE+STK_REG(R29)(r1) 79a66086b8SAnton Blanchard b __copy_tofrom_user_base 80a66086b8SAnton Blanchard 81a66086b8SAnton Blanchard 82a66086b8SAnton Blanchard_GLOBAL(__copy_tofrom_user_power7) 83a66086b8SAnton Blanchard#ifdef CONFIG_ALTIVEC 84a66086b8SAnton Blanchard cmpldi r5,16 85a3f952dfSAndrew Jeffery cmpldi cr1,r5,3328 86a66086b8SAnton Blanchard 87752a6422SUlrich Weigand std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) 88752a6422SUlrich Weigand std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) 89752a6422SUlrich Weigand std r5,-STACKFRAMESIZE+STK_REG(R29)(r1) 90a66086b8SAnton Blanchard 91a66086b8SAnton Blanchard blt .Lshort_copy 92a3f952dfSAndrew Jeffery bge cr1,.Lvmx_copy 93a66086b8SAnton Blanchard#else 94a66086b8SAnton Blanchard cmpldi r5,16 95a66086b8SAnton Blanchard 96752a6422SUlrich Weigand std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) 97752a6422SUlrich Weigand std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) 98752a6422SUlrich Weigand std r5,-STACKFRAMESIZE+STK_REG(R29)(r1) 99a66086b8SAnton Blanchard 100a66086b8SAnton Blanchard blt .Lshort_copy 101a66086b8SAnton Blanchard#endif 102a66086b8SAnton Blanchard 103a66086b8SAnton Blanchard.Lnonvmx_copy: 104a66086b8SAnton Blanchard /* Get the source 8B aligned */ 105a66086b8SAnton Blanchard neg r6,r4 106a66086b8SAnton Blanchard mtocrf 0x01,r6 107a66086b8SAnton Blanchard clrldi r6,r6,(64-3) 108a66086b8SAnton Blanchard 109a66086b8SAnton Blanchard bf cr7*4+3,1f 110a66086b8SAnton Blancharderr1; lbz r0,0(r4) 111a66086b8SAnton Blanchard addi r4,r4,1 112a66086b8SAnton Blancharderr1; stb r0,0(r3) 113a66086b8SAnton Blanchard addi r3,r3,1 114a66086b8SAnton Blanchard 115a66086b8SAnton Blanchard1: bf cr7*4+2,2f 116a66086b8SAnton Blancharderr1; lhz r0,0(r4) 117a66086b8SAnton Blanchard addi r4,r4,2 118a66086b8SAnton Blancharderr1; sth r0,0(r3) 119a66086b8SAnton Blanchard addi r3,r3,2 120a66086b8SAnton Blanchard 121a66086b8SAnton Blanchard2: bf cr7*4+1,3f 122a66086b8SAnton Blancharderr1; lwz r0,0(r4) 123a66086b8SAnton Blanchard addi r4,r4,4 124a66086b8SAnton Blancharderr1; stw r0,0(r3) 125a66086b8SAnton Blanchard addi r3,r3,4 126a66086b8SAnton Blanchard 127a66086b8SAnton Blanchard3: sub r5,r5,r6 128a66086b8SAnton Blanchard cmpldi r5,128 129a66086b8SAnton Blanchard blt 5f 130a66086b8SAnton Blanchard 131a66086b8SAnton Blanchard mflr r0 132a66086b8SAnton Blanchard stdu r1,-STACKFRAMESIZE(r1) 133c75df6f9SMichael Neuling std r14,STK_REG(R14)(r1) 134c75df6f9SMichael Neuling std r15,STK_REG(R15)(r1) 135c75df6f9SMichael Neuling std r16,STK_REG(R16)(r1) 136c75df6f9SMichael Neuling std r17,STK_REG(R17)(r1) 137c75df6f9SMichael Neuling std r18,STK_REG(R18)(r1) 138c75df6f9SMichael Neuling std r19,STK_REG(R19)(r1) 139c75df6f9SMichael Neuling std r20,STK_REG(R20)(r1) 140c75df6f9SMichael Neuling std r21,STK_REG(R21)(r1) 141c75df6f9SMichael Neuling std r22,STK_REG(R22)(r1) 142a66086b8SAnton Blanchard std r0,STACKFRAMESIZE+16(r1) 143a66086b8SAnton Blanchard 144a66086b8SAnton Blanchard srdi r6,r5,7 145a66086b8SAnton Blanchard mtctr r6 146a66086b8SAnton Blanchard 147a66086b8SAnton Blanchard /* Now do cacheline (128B) sized loads and stores. */ 148a66086b8SAnton Blanchard .align 5 149a66086b8SAnton Blanchard4: 150a66086b8SAnton Blancharderr2; ld r0,0(r4) 151a66086b8SAnton Blancharderr2; ld r6,8(r4) 152a66086b8SAnton Blancharderr2; ld r7,16(r4) 153a66086b8SAnton Blancharderr2; ld r8,24(r4) 154a66086b8SAnton Blancharderr2; ld r9,32(r4) 155a66086b8SAnton Blancharderr2; ld r10,40(r4) 156a66086b8SAnton Blancharderr2; ld r11,48(r4) 157a66086b8SAnton Blancharderr2; ld r12,56(r4) 158a66086b8SAnton Blancharderr2; ld r14,64(r4) 159a66086b8SAnton Blancharderr2; ld r15,72(r4) 160a66086b8SAnton Blancharderr2; ld r16,80(r4) 161a66086b8SAnton Blancharderr2; ld r17,88(r4) 162a66086b8SAnton Blancharderr2; ld r18,96(r4) 163a66086b8SAnton Blancharderr2; ld r19,104(r4) 164a66086b8SAnton Blancharderr2; ld r20,112(r4) 165a66086b8SAnton Blancharderr2; ld r21,120(r4) 166a66086b8SAnton Blanchard addi r4,r4,128 167a66086b8SAnton Blancharderr2; std r0,0(r3) 168a66086b8SAnton Blancharderr2; std r6,8(r3) 169a66086b8SAnton Blancharderr2; std r7,16(r3) 170a66086b8SAnton Blancharderr2; std r8,24(r3) 171a66086b8SAnton Blancharderr2; std r9,32(r3) 172a66086b8SAnton Blancharderr2; std r10,40(r3) 173a66086b8SAnton Blancharderr2; std r11,48(r3) 174a66086b8SAnton Blancharderr2; std r12,56(r3) 175a66086b8SAnton Blancharderr2; std r14,64(r3) 176a66086b8SAnton Blancharderr2; std r15,72(r3) 177a66086b8SAnton Blancharderr2; std r16,80(r3) 178a66086b8SAnton Blancharderr2; std r17,88(r3) 179a66086b8SAnton Blancharderr2; std r18,96(r3) 180a66086b8SAnton Blancharderr2; std r19,104(r3) 181a66086b8SAnton Blancharderr2; std r20,112(r3) 182a66086b8SAnton Blancharderr2; std r21,120(r3) 183a66086b8SAnton Blanchard addi r3,r3,128 184a66086b8SAnton Blanchard bdnz 4b 185a66086b8SAnton Blanchard 186a66086b8SAnton Blanchard clrldi r5,r5,(64-7) 187a66086b8SAnton Blanchard 188c75df6f9SMichael Neuling ld r14,STK_REG(R14)(r1) 189c75df6f9SMichael Neuling ld r15,STK_REG(R15)(r1) 190c75df6f9SMichael Neuling ld r16,STK_REG(R16)(r1) 191c75df6f9SMichael Neuling ld r17,STK_REG(R17)(r1) 192c75df6f9SMichael Neuling ld r18,STK_REG(R18)(r1) 193c75df6f9SMichael Neuling ld r19,STK_REG(R19)(r1) 194c75df6f9SMichael Neuling ld r20,STK_REG(R20)(r1) 195c75df6f9SMichael Neuling ld r21,STK_REG(R21)(r1) 196c75df6f9SMichael Neuling ld r22,STK_REG(R22)(r1) 197a66086b8SAnton Blanchard addi r1,r1,STACKFRAMESIZE 198a66086b8SAnton Blanchard 199a66086b8SAnton Blanchard /* Up to 127B to go */ 200a66086b8SAnton Blanchard5: srdi r6,r5,4 201a66086b8SAnton Blanchard mtocrf 0x01,r6 202a66086b8SAnton Blanchard 203a66086b8SAnton Blanchard6: bf cr7*4+1,7f 204a66086b8SAnton Blancharderr1; ld r0,0(r4) 205a66086b8SAnton Blancharderr1; ld r6,8(r4) 206a66086b8SAnton Blancharderr1; ld r7,16(r4) 207a66086b8SAnton Blancharderr1; ld r8,24(r4) 208a66086b8SAnton Blancharderr1; ld r9,32(r4) 209a66086b8SAnton Blancharderr1; ld r10,40(r4) 210a66086b8SAnton Blancharderr1; ld r11,48(r4) 211a66086b8SAnton Blancharderr1; ld r12,56(r4) 212a66086b8SAnton Blanchard addi r4,r4,64 213a66086b8SAnton Blancharderr1; std r0,0(r3) 214a66086b8SAnton Blancharderr1; std r6,8(r3) 215a66086b8SAnton Blancharderr1; std r7,16(r3) 216a66086b8SAnton Blancharderr1; std r8,24(r3) 217a66086b8SAnton Blancharderr1; std r9,32(r3) 218a66086b8SAnton Blancharderr1; std r10,40(r3) 219a66086b8SAnton Blancharderr1; std r11,48(r3) 220a66086b8SAnton Blancharderr1; std r12,56(r3) 221a66086b8SAnton Blanchard addi r3,r3,64 222a66086b8SAnton Blanchard 223a66086b8SAnton Blanchard /* Up to 63B to go */ 224a66086b8SAnton Blanchard7: bf cr7*4+2,8f 225a66086b8SAnton Blancharderr1; ld r0,0(r4) 226a66086b8SAnton Blancharderr1; ld r6,8(r4) 227a66086b8SAnton Blancharderr1; ld r7,16(r4) 228a66086b8SAnton Blancharderr1; ld r8,24(r4) 229a66086b8SAnton Blanchard addi r4,r4,32 230a66086b8SAnton Blancharderr1; std r0,0(r3) 231a66086b8SAnton Blancharderr1; std r6,8(r3) 232a66086b8SAnton Blancharderr1; std r7,16(r3) 233a66086b8SAnton Blancharderr1; std r8,24(r3) 234a66086b8SAnton Blanchard addi r3,r3,32 235a66086b8SAnton Blanchard 236a66086b8SAnton Blanchard /* Up to 31B to go */ 237a66086b8SAnton Blanchard8: bf cr7*4+3,9f 238a66086b8SAnton Blancharderr1; ld r0,0(r4) 239a66086b8SAnton Blancharderr1; ld r6,8(r4) 240a66086b8SAnton Blanchard addi r4,r4,16 241a66086b8SAnton Blancharderr1; std r0,0(r3) 242a66086b8SAnton Blancharderr1; std r6,8(r3) 243a66086b8SAnton Blanchard addi r3,r3,16 244a66086b8SAnton Blanchard 245a66086b8SAnton Blanchard9: clrldi r5,r5,(64-4) 246a66086b8SAnton Blanchard 247a66086b8SAnton Blanchard /* Up to 15B to go */ 248a66086b8SAnton Blanchard.Lshort_copy: 249a66086b8SAnton Blanchard mtocrf 0x01,r5 250a66086b8SAnton Blanchard bf cr7*4+0,12f 251a66086b8SAnton Blancharderr1; lwz r0,0(r4) /* Less chance of a reject with word ops */ 252a66086b8SAnton Blancharderr1; lwz r6,4(r4) 253a66086b8SAnton Blanchard addi r4,r4,8 254a66086b8SAnton Blancharderr1; stw r0,0(r3) 255a66086b8SAnton Blancharderr1; stw r6,4(r3) 256a66086b8SAnton Blanchard addi r3,r3,8 257a66086b8SAnton Blanchard 258a66086b8SAnton Blanchard12: bf cr7*4+1,13f 259a66086b8SAnton Blancharderr1; lwz r0,0(r4) 260a66086b8SAnton Blanchard addi r4,r4,4 261a66086b8SAnton Blancharderr1; stw r0,0(r3) 262a66086b8SAnton Blanchard addi r3,r3,4 263a66086b8SAnton Blanchard 264a66086b8SAnton Blanchard13: bf cr7*4+2,14f 265a66086b8SAnton Blancharderr1; lhz r0,0(r4) 266a66086b8SAnton Blanchard addi r4,r4,2 267a66086b8SAnton Blancharderr1; sth r0,0(r3) 268a66086b8SAnton Blanchard addi r3,r3,2 269a66086b8SAnton Blanchard 270a66086b8SAnton Blanchard14: bf cr7*4+3,15f 271a66086b8SAnton Blancharderr1; lbz r0,0(r4) 272a66086b8SAnton Blancharderr1; stb r0,0(r3) 273a66086b8SAnton Blanchard 274a66086b8SAnton Blanchard15: li r3,0 275a66086b8SAnton Blanchard blr 276a66086b8SAnton Blanchard 277a66086b8SAnton Blanchard.Lunwind_stack_nonvmx_copy: 278a66086b8SAnton Blanchard addi r1,r1,STACKFRAMESIZE 279a66086b8SAnton Blanchard b .Lnonvmx_copy 280a66086b8SAnton Blanchard 281a66086b8SAnton Blanchard#ifdef CONFIG_ALTIVEC 282a66086b8SAnton Blanchard.Lvmx_copy: 283a66086b8SAnton Blanchard mflr r0 284a66086b8SAnton Blanchard std r0,16(r1) 285a66086b8SAnton Blanchard stdu r1,-STACKFRAMESIZE(r1) 286b1576fecSAnton Blanchard bl enter_vmx_usercopy 2872fae7cdbSAnton Blanchard cmpwi cr1,r3,0 288a66086b8SAnton Blanchard ld r0,STACKFRAMESIZE+16(r1) 289752a6422SUlrich Weigand ld r3,STK_REG(R31)(r1) 290752a6422SUlrich Weigand ld r4,STK_REG(R30)(r1) 291752a6422SUlrich Weigand ld r5,STK_REG(R29)(r1) 292a66086b8SAnton Blanchard mtlr r0 293a66086b8SAnton Blanchard 294a9514dc6SAnton Blanchard /* 295a9514dc6SAnton Blanchard * We prefetch both the source and destination using enhanced touch 296a9514dc6SAnton Blanchard * instructions. We use a stream ID of 0 for the load side and 297a9514dc6SAnton Blanchard * 1 for the store side. 298a9514dc6SAnton Blanchard */ 299a9514dc6SAnton Blanchard clrrdi r6,r4,7 300a9514dc6SAnton Blanchard clrrdi r9,r3,7 301a9514dc6SAnton Blanchard ori r9,r9,1 /* stream=1 */ 302a9514dc6SAnton Blanchard 303a9514dc6SAnton Blanchard srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */ 304a9514dc6SAnton Blanchard cmpldi r7,0x3FF 305a9514dc6SAnton Blanchard ble 1f 306a9514dc6SAnton Blanchard li r7,0x3FF 307a9514dc6SAnton Blanchard1: lis r0,0x0E00 /* depth=7 */ 308a9514dc6SAnton Blanchard sldi r7,r7,7 309a9514dc6SAnton Blanchard or r7,r7,r0 310a9514dc6SAnton Blanchard ori r10,r7,1 /* stream=1 */ 311a9514dc6SAnton Blanchard 312a9514dc6SAnton Blanchard lis r8,0x8000 /* GO=1 */ 313a9514dc6SAnton Blanchard clrldi r8,r8,32 314a9514dc6SAnton Blanchard 315a9514dc6SAnton Blanchard.machine push 316a9514dc6SAnton Blanchard.machine "power4" 317280a5ba2SMichael Neuling /* setup read stream 0 */ 318280a5ba2SMichael Neuling dcbt r0,r6,0b01000 /* addr from */ 319280a5ba2SMichael Neuling dcbt r0,r7,0b01010 /* length and depth from */ 320280a5ba2SMichael Neuling /* setup write stream 1 */ 321280a5ba2SMichael Neuling dcbtst r0,r9,0b01000 /* addr to */ 322280a5ba2SMichael Neuling dcbtst r0,r10,0b01010 /* length and depth to */ 323a9514dc6SAnton Blanchard eieio 324280a5ba2SMichael Neuling dcbt r0,r8,0b01010 /* all streams GO */ 325a9514dc6SAnton Blanchard.machine pop 326a9514dc6SAnton Blanchard 3272fae7cdbSAnton Blanchard beq cr1,.Lunwind_stack_nonvmx_copy 328a66086b8SAnton Blanchard 329a66086b8SAnton Blanchard /* 330a66086b8SAnton Blanchard * If source and destination are not relatively aligned we use a 331a66086b8SAnton Blanchard * slower permute loop. 332a66086b8SAnton Blanchard */ 333a66086b8SAnton Blanchard xor r6,r4,r3 334a66086b8SAnton Blanchard rldicl. r6,r6,0,(64-4) 335a66086b8SAnton Blanchard bne .Lvmx_unaligned_copy 336a66086b8SAnton Blanchard 337a66086b8SAnton Blanchard /* Get the destination 16B aligned */ 338a66086b8SAnton Blanchard neg r6,r3 339a66086b8SAnton Blanchard mtocrf 0x01,r6 340a66086b8SAnton Blanchard clrldi r6,r6,(64-4) 341a66086b8SAnton Blanchard 342a66086b8SAnton Blanchard bf cr7*4+3,1f 343a66086b8SAnton Blancharderr3; lbz r0,0(r4) 344a66086b8SAnton Blanchard addi r4,r4,1 345a66086b8SAnton Blancharderr3; stb r0,0(r3) 346a66086b8SAnton Blanchard addi r3,r3,1 347a66086b8SAnton Blanchard 348a66086b8SAnton Blanchard1: bf cr7*4+2,2f 349a66086b8SAnton Blancharderr3; lhz r0,0(r4) 350a66086b8SAnton Blanchard addi r4,r4,2 351a66086b8SAnton Blancharderr3; sth r0,0(r3) 352a66086b8SAnton Blanchard addi r3,r3,2 353a66086b8SAnton Blanchard 354a66086b8SAnton Blanchard2: bf cr7*4+1,3f 355a66086b8SAnton Blancharderr3; lwz r0,0(r4) 356a66086b8SAnton Blanchard addi r4,r4,4 357a66086b8SAnton Blancharderr3; stw r0,0(r3) 358a66086b8SAnton Blanchard addi r3,r3,4 359a66086b8SAnton Blanchard 360a66086b8SAnton Blanchard3: bf cr7*4+0,4f 361a66086b8SAnton Blancharderr3; ld r0,0(r4) 362a66086b8SAnton Blanchard addi r4,r4,8 363a66086b8SAnton Blancharderr3; std r0,0(r3) 364a66086b8SAnton Blanchard addi r3,r3,8 365a66086b8SAnton Blanchard 366a66086b8SAnton Blanchard4: sub r5,r5,r6 367a66086b8SAnton Blanchard 368a66086b8SAnton Blanchard /* Get the desination 128B aligned */ 369a66086b8SAnton Blanchard neg r6,r3 370a66086b8SAnton Blanchard srdi r7,r6,4 371a66086b8SAnton Blanchard mtocrf 0x01,r7 372a66086b8SAnton Blanchard clrldi r6,r6,(64-7) 373a66086b8SAnton Blanchard 374a66086b8SAnton Blanchard li r9,16 375a66086b8SAnton Blanchard li r10,32 376a66086b8SAnton Blanchard li r11,48 377a66086b8SAnton Blanchard 378a66086b8SAnton Blanchard bf cr7*4+3,5f 379c2ce6f9fSAnton Blancharderr3; lvx v1,r0,r4 380a66086b8SAnton Blanchard addi r4,r4,16 381c2ce6f9fSAnton Blancharderr3; stvx v1,r0,r3 382a66086b8SAnton Blanchard addi r3,r3,16 383a66086b8SAnton Blanchard 384a66086b8SAnton Blanchard5: bf cr7*4+2,6f 385c2ce6f9fSAnton Blancharderr3; lvx v1,r0,r4 386c2ce6f9fSAnton Blancharderr3; lvx v0,r4,r9 387a66086b8SAnton Blanchard addi r4,r4,32 388c2ce6f9fSAnton Blancharderr3; stvx v1,r0,r3 389c2ce6f9fSAnton Blancharderr3; stvx v0,r3,r9 390a66086b8SAnton Blanchard addi r3,r3,32 391a66086b8SAnton Blanchard 392a66086b8SAnton Blanchard6: bf cr7*4+1,7f 393c2ce6f9fSAnton Blancharderr3; lvx v3,r0,r4 394c2ce6f9fSAnton Blancharderr3; lvx v2,r4,r9 395c2ce6f9fSAnton Blancharderr3; lvx v1,r4,r10 396c2ce6f9fSAnton Blancharderr3; lvx v0,r4,r11 397a66086b8SAnton Blanchard addi r4,r4,64 398c2ce6f9fSAnton Blancharderr3; stvx v3,r0,r3 399c2ce6f9fSAnton Blancharderr3; stvx v2,r3,r9 400c2ce6f9fSAnton Blancharderr3; stvx v1,r3,r10 401c2ce6f9fSAnton Blancharderr3; stvx v0,r3,r11 402a66086b8SAnton Blanchard addi r3,r3,64 403a66086b8SAnton Blanchard 404a66086b8SAnton Blanchard7: sub r5,r5,r6 405a66086b8SAnton Blanchard srdi r6,r5,7 406a66086b8SAnton Blanchard 407c75df6f9SMichael Neuling std r14,STK_REG(R14)(r1) 408c75df6f9SMichael Neuling std r15,STK_REG(R15)(r1) 409c75df6f9SMichael Neuling std r16,STK_REG(R16)(r1) 410a66086b8SAnton Blanchard 411a66086b8SAnton Blanchard li r12,64 412a66086b8SAnton Blanchard li r14,80 413a66086b8SAnton Blanchard li r15,96 414a66086b8SAnton Blanchard li r16,112 415a66086b8SAnton Blanchard 416a66086b8SAnton Blanchard mtctr r6 417a66086b8SAnton Blanchard 418a66086b8SAnton Blanchard /* 419a66086b8SAnton Blanchard * Now do cacheline sized loads and stores. By this stage the 420a66086b8SAnton Blanchard * cacheline stores are also cacheline aligned. 421a66086b8SAnton Blanchard */ 422a66086b8SAnton Blanchard .align 5 423a66086b8SAnton Blanchard8: 424c2ce6f9fSAnton Blancharderr4; lvx v7,r0,r4 425c2ce6f9fSAnton Blancharderr4; lvx v6,r4,r9 426c2ce6f9fSAnton Blancharderr4; lvx v5,r4,r10 427c2ce6f9fSAnton Blancharderr4; lvx v4,r4,r11 428c2ce6f9fSAnton Blancharderr4; lvx v3,r4,r12 429c2ce6f9fSAnton Blancharderr4; lvx v2,r4,r14 430c2ce6f9fSAnton Blancharderr4; lvx v1,r4,r15 431c2ce6f9fSAnton Blancharderr4; lvx v0,r4,r16 432a66086b8SAnton Blanchard addi r4,r4,128 433c2ce6f9fSAnton Blancharderr4; stvx v7,r0,r3 434c2ce6f9fSAnton Blancharderr4; stvx v6,r3,r9 435c2ce6f9fSAnton Blancharderr4; stvx v5,r3,r10 436c2ce6f9fSAnton Blancharderr4; stvx v4,r3,r11 437c2ce6f9fSAnton Blancharderr4; stvx v3,r3,r12 438c2ce6f9fSAnton Blancharderr4; stvx v2,r3,r14 439c2ce6f9fSAnton Blancharderr4; stvx v1,r3,r15 440c2ce6f9fSAnton Blancharderr4; stvx v0,r3,r16 441a66086b8SAnton Blanchard addi r3,r3,128 442a66086b8SAnton Blanchard bdnz 8b 443a66086b8SAnton Blanchard 444c75df6f9SMichael Neuling ld r14,STK_REG(R14)(r1) 445c75df6f9SMichael Neuling ld r15,STK_REG(R15)(r1) 446c75df6f9SMichael Neuling ld r16,STK_REG(R16)(r1) 447a66086b8SAnton Blanchard 448a66086b8SAnton Blanchard /* Up to 127B to go */ 449a66086b8SAnton Blanchard clrldi r5,r5,(64-7) 450a66086b8SAnton Blanchard srdi r6,r5,4 451a66086b8SAnton Blanchard mtocrf 0x01,r6 452a66086b8SAnton Blanchard 453a66086b8SAnton Blanchard bf cr7*4+1,9f 454c2ce6f9fSAnton Blancharderr3; lvx v3,r0,r4 455c2ce6f9fSAnton Blancharderr3; lvx v2,r4,r9 456c2ce6f9fSAnton Blancharderr3; lvx v1,r4,r10 457c2ce6f9fSAnton Blancharderr3; lvx v0,r4,r11 458a66086b8SAnton Blanchard addi r4,r4,64 459c2ce6f9fSAnton Blancharderr3; stvx v3,r0,r3 460c2ce6f9fSAnton Blancharderr3; stvx v2,r3,r9 461c2ce6f9fSAnton Blancharderr3; stvx v1,r3,r10 462c2ce6f9fSAnton Blancharderr3; stvx v0,r3,r11 463a66086b8SAnton Blanchard addi r3,r3,64 464a66086b8SAnton Blanchard 465a66086b8SAnton Blanchard9: bf cr7*4+2,10f 466c2ce6f9fSAnton Blancharderr3; lvx v1,r0,r4 467c2ce6f9fSAnton Blancharderr3; lvx v0,r4,r9 468a66086b8SAnton Blanchard addi r4,r4,32 469c2ce6f9fSAnton Blancharderr3; stvx v1,r0,r3 470c2ce6f9fSAnton Blancharderr3; stvx v0,r3,r9 471a66086b8SAnton Blanchard addi r3,r3,32 472a66086b8SAnton Blanchard 473a66086b8SAnton Blanchard10: bf cr7*4+3,11f 474c2ce6f9fSAnton Blancharderr3; lvx v1,r0,r4 475a66086b8SAnton Blanchard addi r4,r4,16 476c2ce6f9fSAnton Blancharderr3; stvx v1,r0,r3 477a66086b8SAnton Blanchard addi r3,r3,16 478a66086b8SAnton Blanchard 479a66086b8SAnton Blanchard /* Up to 15B to go */ 480a66086b8SAnton Blanchard11: clrldi r5,r5,(64-4) 481a66086b8SAnton Blanchard mtocrf 0x01,r5 482a66086b8SAnton Blanchard bf cr7*4+0,12f 483a66086b8SAnton Blancharderr3; ld r0,0(r4) 484a66086b8SAnton Blanchard addi r4,r4,8 485a66086b8SAnton Blancharderr3; std r0,0(r3) 486a66086b8SAnton Blanchard addi r3,r3,8 487a66086b8SAnton Blanchard 488a66086b8SAnton Blanchard12: bf cr7*4+1,13f 489a66086b8SAnton Blancharderr3; lwz r0,0(r4) 490a66086b8SAnton Blanchard addi r4,r4,4 491a66086b8SAnton Blancharderr3; stw r0,0(r3) 492a66086b8SAnton Blanchard addi r3,r3,4 493a66086b8SAnton Blanchard 494a66086b8SAnton Blanchard13: bf cr7*4+2,14f 495a66086b8SAnton Blancharderr3; lhz r0,0(r4) 496a66086b8SAnton Blanchard addi r4,r4,2 497a66086b8SAnton Blancharderr3; sth r0,0(r3) 498a66086b8SAnton Blanchard addi r3,r3,2 499a66086b8SAnton Blanchard 500a66086b8SAnton Blanchard14: bf cr7*4+3,15f 501a66086b8SAnton Blancharderr3; lbz r0,0(r4) 502a66086b8SAnton Blancharderr3; stb r0,0(r3) 503a66086b8SAnton Blanchard 504a66086b8SAnton Blanchard15: addi r1,r1,STACKFRAMESIZE 505b1576fecSAnton Blanchard b exit_vmx_usercopy /* tail call optimise */ 506a66086b8SAnton Blanchard 507a66086b8SAnton Blanchard.Lvmx_unaligned_copy: 508a66086b8SAnton Blanchard /* Get the destination 16B aligned */ 509a66086b8SAnton Blanchard neg r6,r3 510a66086b8SAnton Blanchard mtocrf 0x01,r6 511a66086b8SAnton Blanchard clrldi r6,r6,(64-4) 512a66086b8SAnton Blanchard 513a66086b8SAnton Blanchard bf cr7*4+3,1f 514a66086b8SAnton Blancharderr3; lbz r0,0(r4) 515a66086b8SAnton Blanchard addi r4,r4,1 516a66086b8SAnton Blancharderr3; stb r0,0(r3) 517a66086b8SAnton Blanchard addi r3,r3,1 518a66086b8SAnton Blanchard 519a66086b8SAnton Blanchard1: bf cr7*4+2,2f 520a66086b8SAnton Blancharderr3; lhz r0,0(r4) 521a66086b8SAnton Blanchard addi r4,r4,2 522a66086b8SAnton Blancharderr3; sth r0,0(r3) 523a66086b8SAnton Blanchard addi r3,r3,2 524a66086b8SAnton Blanchard 525a66086b8SAnton Blanchard2: bf cr7*4+1,3f 526a66086b8SAnton Blancharderr3; lwz r0,0(r4) 527a66086b8SAnton Blanchard addi r4,r4,4 528a66086b8SAnton Blancharderr3; stw r0,0(r3) 529a66086b8SAnton Blanchard addi r3,r3,4 530a66086b8SAnton Blanchard 531a66086b8SAnton Blanchard3: bf cr7*4+0,4f 532a66086b8SAnton Blancharderr3; lwz r0,0(r4) /* Less chance of a reject with word ops */ 533a66086b8SAnton Blancharderr3; lwz r7,4(r4) 534a66086b8SAnton Blanchard addi r4,r4,8 535a66086b8SAnton Blancharderr3; stw r0,0(r3) 536a66086b8SAnton Blancharderr3; stw r7,4(r3) 537a66086b8SAnton Blanchard addi r3,r3,8 538a66086b8SAnton Blanchard 539a66086b8SAnton Blanchard4: sub r5,r5,r6 540a66086b8SAnton Blanchard 541a66086b8SAnton Blanchard /* Get the desination 128B aligned */ 542a66086b8SAnton Blanchard neg r6,r3 543a66086b8SAnton Blanchard srdi r7,r6,4 544a66086b8SAnton Blanchard mtocrf 0x01,r7 545a66086b8SAnton Blanchard clrldi r6,r6,(64-7) 546a66086b8SAnton Blanchard 547a66086b8SAnton Blanchard li r9,16 548a66086b8SAnton Blanchard li r10,32 549a66086b8SAnton Blanchard li r11,48 550a66086b8SAnton Blanchard 551c2ce6f9fSAnton Blanchard LVS(v16,0,r4) /* Setup permute control vector */ 552c2ce6f9fSAnton Blancharderr3; lvx v0,0,r4 553a66086b8SAnton Blanchard addi r4,r4,16 554a66086b8SAnton Blanchard 555a66086b8SAnton Blanchard bf cr7*4+3,5f 556c2ce6f9fSAnton Blancharderr3; lvx v1,r0,r4 557c2ce6f9fSAnton Blanchard VPERM(v8,v0,v1,v16) 558a66086b8SAnton Blanchard addi r4,r4,16 559c2ce6f9fSAnton Blancharderr3; stvx v8,r0,r3 560a66086b8SAnton Blanchard addi r3,r3,16 561c2ce6f9fSAnton Blanchard vor v0,v1,v1 562a66086b8SAnton Blanchard 563a66086b8SAnton Blanchard5: bf cr7*4+2,6f 564c2ce6f9fSAnton Blancharderr3; lvx v1,r0,r4 565c2ce6f9fSAnton Blanchard VPERM(v8,v0,v1,v16) 566c2ce6f9fSAnton Blancharderr3; lvx v0,r4,r9 567c2ce6f9fSAnton Blanchard VPERM(v9,v1,v0,v16) 568a66086b8SAnton Blanchard addi r4,r4,32 569c2ce6f9fSAnton Blancharderr3; stvx v8,r0,r3 570c2ce6f9fSAnton Blancharderr3; stvx v9,r3,r9 571a66086b8SAnton Blanchard addi r3,r3,32 572a66086b8SAnton Blanchard 573a66086b8SAnton Blanchard6: bf cr7*4+1,7f 574c2ce6f9fSAnton Blancharderr3; lvx v3,r0,r4 575c2ce6f9fSAnton Blanchard VPERM(v8,v0,v3,v16) 576c2ce6f9fSAnton Blancharderr3; lvx v2,r4,r9 577c2ce6f9fSAnton Blanchard VPERM(v9,v3,v2,v16) 578c2ce6f9fSAnton Blancharderr3; lvx v1,r4,r10 579c2ce6f9fSAnton Blanchard VPERM(v10,v2,v1,v16) 580c2ce6f9fSAnton Blancharderr3; lvx v0,r4,r11 581c2ce6f9fSAnton Blanchard VPERM(v11,v1,v0,v16) 582a66086b8SAnton Blanchard addi r4,r4,64 583c2ce6f9fSAnton Blancharderr3; stvx v8,r0,r3 584c2ce6f9fSAnton Blancharderr3; stvx v9,r3,r9 585c2ce6f9fSAnton Blancharderr3; stvx v10,r3,r10 586c2ce6f9fSAnton Blancharderr3; stvx v11,r3,r11 587a66086b8SAnton Blanchard addi r3,r3,64 588a66086b8SAnton Blanchard 589a66086b8SAnton Blanchard7: sub r5,r5,r6 590a66086b8SAnton Blanchard srdi r6,r5,7 591a66086b8SAnton Blanchard 592c75df6f9SMichael Neuling std r14,STK_REG(R14)(r1) 593c75df6f9SMichael Neuling std r15,STK_REG(R15)(r1) 594c75df6f9SMichael Neuling std r16,STK_REG(R16)(r1) 595a66086b8SAnton Blanchard 596a66086b8SAnton Blanchard li r12,64 597a66086b8SAnton Blanchard li r14,80 598a66086b8SAnton Blanchard li r15,96 599a66086b8SAnton Blanchard li r16,112 600a66086b8SAnton Blanchard 601a66086b8SAnton Blanchard mtctr r6 602a66086b8SAnton Blanchard 603a66086b8SAnton Blanchard /* 604a66086b8SAnton Blanchard * Now do cacheline sized loads and stores. By this stage the 605a66086b8SAnton Blanchard * cacheline stores are also cacheline aligned. 606a66086b8SAnton Blanchard */ 607a66086b8SAnton Blanchard .align 5 608a66086b8SAnton Blanchard8: 609c2ce6f9fSAnton Blancharderr4; lvx v7,r0,r4 610c2ce6f9fSAnton Blanchard VPERM(v8,v0,v7,v16) 611c2ce6f9fSAnton Blancharderr4; lvx v6,r4,r9 612c2ce6f9fSAnton Blanchard VPERM(v9,v7,v6,v16) 613c2ce6f9fSAnton Blancharderr4; lvx v5,r4,r10 614c2ce6f9fSAnton Blanchard VPERM(v10,v6,v5,v16) 615c2ce6f9fSAnton Blancharderr4; lvx v4,r4,r11 616c2ce6f9fSAnton Blanchard VPERM(v11,v5,v4,v16) 617c2ce6f9fSAnton Blancharderr4; lvx v3,r4,r12 618c2ce6f9fSAnton Blanchard VPERM(v12,v4,v3,v16) 619c2ce6f9fSAnton Blancharderr4; lvx v2,r4,r14 620c2ce6f9fSAnton Blanchard VPERM(v13,v3,v2,v16) 621c2ce6f9fSAnton Blancharderr4; lvx v1,r4,r15 622c2ce6f9fSAnton Blanchard VPERM(v14,v2,v1,v16) 623c2ce6f9fSAnton Blancharderr4; lvx v0,r4,r16 624c2ce6f9fSAnton Blanchard VPERM(v15,v1,v0,v16) 625a66086b8SAnton Blanchard addi r4,r4,128 626c2ce6f9fSAnton Blancharderr4; stvx v8,r0,r3 627c2ce6f9fSAnton Blancharderr4; stvx v9,r3,r9 628c2ce6f9fSAnton Blancharderr4; stvx v10,r3,r10 629c2ce6f9fSAnton Blancharderr4; stvx v11,r3,r11 630c2ce6f9fSAnton Blancharderr4; stvx v12,r3,r12 631c2ce6f9fSAnton Blancharderr4; stvx v13,r3,r14 632c2ce6f9fSAnton Blancharderr4; stvx v14,r3,r15 633c2ce6f9fSAnton Blancharderr4; stvx v15,r3,r16 634a66086b8SAnton Blanchard addi r3,r3,128 635a66086b8SAnton Blanchard bdnz 8b 636a66086b8SAnton Blanchard 637c75df6f9SMichael Neuling ld r14,STK_REG(R14)(r1) 638c75df6f9SMichael Neuling ld r15,STK_REG(R15)(r1) 639c75df6f9SMichael Neuling ld r16,STK_REG(R16)(r1) 640a66086b8SAnton Blanchard 641a66086b8SAnton Blanchard /* Up to 127B to go */ 642a66086b8SAnton Blanchard clrldi r5,r5,(64-7) 643a66086b8SAnton Blanchard srdi r6,r5,4 644a66086b8SAnton Blanchard mtocrf 0x01,r6 645a66086b8SAnton Blanchard 646a66086b8SAnton Blanchard bf cr7*4+1,9f 647c2ce6f9fSAnton Blancharderr3; lvx v3,r0,r4 648c2ce6f9fSAnton Blanchard VPERM(v8,v0,v3,v16) 649c2ce6f9fSAnton Blancharderr3; lvx v2,r4,r9 650c2ce6f9fSAnton Blanchard VPERM(v9,v3,v2,v16) 651c2ce6f9fSAnton Blancharderr3; lvx v1,r4,r10 652c2ce6f9fSAnton Blanchard VPERM(v10,v2,v1,v16) 653c2ce6f9fSAnton Blancharderr3; lvx v0,r4,r11 654c2ce6f9fSAnton Blanchard VPERM(v11,v1,v0,v16) 655a66086b8SAnton Blanchard addi r4,r4,64 656c2ce6f9fSAnton Blancharderr3; stvx v8,r0,r3 657c2ce6f9fSAnton Blancharderr3; stvx v9,r3,r9 658c2ce6f9fSAnton Blancharderr3; stvx v10,r3,r10 659c2ce6f9fSAnton Blancharderr3; stvx v11,r3,r11 660a66086b8SAnton Blanchard addi r3,r3,64 661a66086b8SAnton Blanchard 662a66086b8SAnton Blanchard9: bf cr7*4+2,10f 663c2ce6f9fSAnton Blancharderr3; lvx v1,r0,r4 664c2ce6f9fSAnton Blanchard VPERM(v8,v0,v1,v16) 665c2ce6f9fSAnton Blancharderr3; lvx v0,r4,r9 666c2ce6f9fSAnton Blanchard VPERM(v9,v1,v0,v16) 667a66086b8SAnton Blanchard addi r4,r4,32 668c2ce6f9fSAnton Blancharderr3; stvx v8,r0,r3 669c2ce6f9fSAnton Blancharderr3; stvx v9,r3,r9 670a66086b8SAnton Blanchard addi r3,r3,32 671a66086b8SAnton Blanchard 672a66086b8SAnton Blanchard10: bf cr7*4+3,11f 673c2ce6f9fSAnton Blancharderr3; lvx v1,r0,r4 674c2ce6f9fSAnton Blanchard VPERM(v8,v0,v1,v16) 675a66086b8SAnton Blanchard addi r4,r4,16 676c2ce6f9fSAnton Blancharderr3; stvx v8,r0,r3 677a66086b8SAnton Blanchard addi r3,r3,16 678a66086b8SAnton Blanchard 679a66086b8SAnton Blanchard /* Up to 15B to go */ 680a66086b8SAnton Blanchard11: clrldi r5,r5,(64-4) 681a66086b8SAnton Blanchard addi r4,r4,-16 /* Unwind the +16 load offset */ 682a66086b8SAnton Blanchard mtocrf 0x01,r5 683a66086b8SAnton Blanchard bf cr7*4+0,12f 684a66086b8SAnton Blancharderr3; lwz r0,0(r4) /* Less chance of a reject with word ops */ 685a66086b8SAnton Blancharderr3; lwz r6,4(r4) 686a66086b8SAnton Blanchard addi r4,r4,8 687a66086b8SAnton Blancharderr3; stw r0,0(r3) 688a66086b8SAnton Blancharderr3; stw r6,4(r3) 689a66086b8SAnton Blanchard addi r3,r3,8 690a66086b8SAnton Blanchard 691a66086b8SAnton Blanchard12: bf cr7*4+1,13f 692a66086b8SAnton Blancharderr3; lwz r0,0(r4) 693a66086b8SAnton Blanchard addi r4,r4,4 694a66086b8SAnton Blancharderr3; stw r0,0(r3) 695a66086b8SAnton Blanchard addi r3,r3,4 696a66086b8SAnton Blanchard 697a66086b8SAnton Blanchard13: bf cr7*4+2,14f 698a66086b8SAnton Blancharderr3; lhz r0,0(r4) 699a66086b8SAnton Blanchard addi r4,r4,2 700a66086b8SAnton Blancharderr3; sth r0,0(r3) 701a66086b8SAnton Blanchard addi r3,r3,2 702a66086b8SAnton Blanchard 703a66086b8SAnton Blanchard14: bf cr7*4+3,15f 704a66086b8SAnton Blancharderr3; lbz r0,0(r4) 705a66086b8SAnton Blancharderr3; stb r0,0(r3) 706a66086b8SAnton Blanchard 707a66086b8SAnton Blanchard15: addi r1,r1,STACKFRAMESIZE 708b1576fecSAnton Blanchard b exit_vmx_usercopy /* tail call optimise */ 709c2522dcdSPaul Bolle#endif /* CONFIG_ALTIVEC */ 710