1*ec6347bbSDan Williams/* SPDX-License-Identifier: GPL-2.0 */ 2*ec6347bbSDan Williams/* 3*ec6347bbSDan Williams * Copyright (C) IBM Corporation, 2011 4*ec6347bbSDan Williams * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com> 5*ec6347bbSDan Williams * Author - Balbir Singh <bsingharora@gmail.com> 6*ec6347bbSDan Williams */ 7*ec6347bbSDan Williams#include <linux/export.h> 8*ec6347bbSDan Williams#include <asm/ppc_asm.h> 9*ec6347bbSDan Williams#include <asm/errno.h> 10*ec6347bbSDan Williams 11*ec6347bbSDan Williams .macro err1 12*ec6347bbSDan Williams100: 13*ec6347bbSDan Williams EX_TABLE(100b,.Ldo_err1) 14*ec6347bbSDan Williams .endm 15*ec6347bbSDan Williams 16*ec6347bbSDan Williams .macro err2 17*ec6347bbSDan Williams200: 18*ec6347bbSDan Williams EX_TABLE(200b,.Ldo_err2) 19*ec6347bbSDan Williams .endm 20*ec6347bbSDan Williams 21*ec6347bbSDan Williams .macro err3 22*ec6347bbSDan Williams300: EX_TABLE(300b,.Ldone) 23*ec6347bbSDan Williams .endm 24*ec6347bbSDan Williams 25*ec6347bbSDan Williams.Ldo_err2: 26*ec6347bbSDan Williams ld r22,STK_REG(R22)(r1) 27*ec6347bbSDan Williams ld r21,STK_REG(R21)(r1) 28*ec6347bbSDan Williams ld r20,STK_REG(R20)(r1) 29*ec6347bbSDan Williams ld r19,STK_REG(R19)(r1) 30*ec6347bbSDan Williams ld r18,STK_REG(R18)(r1) 31*ec6347bbSDan Williams ld r17,STK_REG(R17)(r1) 32*ec6347bbSDan Williams ld r16,STK_REG(R16)(r1) 33*ec6347bbSDan Williams ld r15,STK_REG(R15)(r1) 34*ec6347bbSDan Williams ld r14,STK_REG(R14)(r1) 35*ec6347bbSDan Williams addi r1,r1,STACKFRAMESIZE 36*ec6347bbSDan Williams.Ldo_err1: 37*ec6347bbSDan Williams /* Do a byte by byte copy to get the exact remaining size */ 38*ec6347bbSDan Williams mtctr r7 39*ec6347bbSDan Williams46: 40*ec6347bbSDan Williamserr3; lbz r0,0(r4) 41*ec6347bbSDan Williams addi r4,r4,1 42*ec6347bbSDan Williamserr3; stb r0,0(r3) 43*ec6347bbSDan Williams addi r3,r3,1 44*ec6347bbSDan Williams bdnz 46b 45*ec6347bbSDan Williams li r3,0 46*ec6347bbSDan Williams blr 47*ec6347bbSDan Williams 48*ec6347bbSDan Williams.Ldone: 49*ec6347bbSDan Williams mfctr r3 50*ec6347bbSDan Williams blr 51*ec6347bbSDan Williams 52*ec6347bbSDan Williams 53*ec6347bbSDan Williams_GLOBAL(copy_mc_generic) 54*ec6347bbSDan Williams mr r7,r5 55*ec6347bbSDan Williams cmpldi r5,16 56*ec6347bbSDan Williams blt .Lshort_copy 57*ec6347bbSDan Williams 58*ec6347bbSDan Williams.Lcopy: 59*ec6347bbSDan Williams /* Get the source 8B aligned */ 60*ec6347bbSDan Williams neg r6,r4 61*ec6347bbSDan Williams mtocrf 0x01,r6 62*ec6347bbSDan Williams clrldi r6,r6,(64-3) 63*ec6347bbSDan Williams 64*ec6347bbSDan Williams bf cr7*4+3,1f 65*ec6347bbSDan Williamserr1; lbz r0,0(r4) 66*ec6347bbSDan Williams addi r4,r4,1 67*ec6347bbSDan Williamserr1; stb r0,0(r3) 68*ec6347bbSDan Williams addi r3,r3,1 69*ec6347bbSDan Williams subi r7,r7,1 70*ec6347bbSDan Williams 71*ec6347bbSDan Williams1: bf cr7*4+2,2f 72*ec6347bbSDan Williamserr1; lhz r0,0(r4) 73*ec6347bbSDan Williams addi r4,r4,2 74*ec6347bbSDan Williamserr1; sth r0,0(r3) 75*ec6347bbSDan Williams addi r3,r3,2 76*ec6347bbSDan Williams subi r7,r7,2 77*ec6347bbSDan Williams 78*ec6347bbSDan Williams2: bf cr7*4+1,3f 79*ec6347bbSDan Williamserr1; lwz r0,0(r4) 80*ec6347bbSDan Williams addi r4,r4,4 81*ec6347bbSDan Williamserr1; stw r0,0(r3) 82*ec6347bbSDan Williams addi r3,r3,4 83*ec6347bbSDan Williams subi r7,r7,4 84*ec6347bbSDan Williams 85*ec6347bbSDan Williams3: sub r5,r5,r6 86*ec6347bbSDan Williams cmpldi r5,128 87*ec6347bbSDan Williams 88*ec6347bbSDan Williams mflr r0 89*ec6347bbSDan Williams stdu r1,-STACKFRAMESIZE(r1) 90*ec6347bbSDan Williams std r14,STK_REG(R14)(r1) 91*ec6347bbSDan Williams std r15,STK_REG(R15)(r1) 92*ec6347bbSDan Williams std r16,STK_REG(R16)(r1) 93*ec6347bbSDan Williams std r17,STK_REG(R17)(r1) 94*ec6347bbSDan Williams std r18,STK_REG(R18)(r1) 95*ec6347bbSDan Williams std r19,STK_REG(R19)(r1) 96*ec6347bbSDan Williams std r20,STK_REG(R20)(r1) 97*ec6347bbSDan Williams std r21,STK_REG(R21)(r1) 98*ec6347bbSDan Williams std r22,STK_REG(R22)(r1) 99*ec6347bbSDan Williams std r0,STACKFRAMESIZE+16(r1) 100*ec6347bbSDan Williams 101*ec6347bbSDan Williams blt 5f 102*ec6347bbSDan Williams srdi r6,r5,7 103*ec6347bbSDan Williams mtctr r6 104*ec6347bbSDan Williams 105*ec6347bbSDan Williams /* Now do cacheline (128B) sized loads and stores. */ 106*ec6347bbSDan Williams .align 5 107*ec6347bbSDan Williams4: 108*ec6347bbSDan Williamserr2; ld r0,0(r4) 109*ec6347bbSDan Williamserr2; ld r6,8(r4) 110*ec6347bbSDan Williamserr2; ld r8,16(r4) 111*ec6347bbSDan Williamserr2; ld r9,24(r4) 112*ec6347bbSDan Williamserr2; ld r10,32(r4) 113*ec6347bbSDan Williamserr2; ld r11,40(r4) 114*ec6347bbSDan Williamserr2; ld r12,48(r4) 115*ec6347bbSDan Williamserr2; ld r14,56(r4) 116*ec6347bbSDan Williamserr2; ld r15,64(r4) 117*ec6347bbSDan Williamserr2; ld r16,72(r4) 118*ec6347bbSDan Williamserr2; ld r17,80(r4) 119*ec6347bbSDan Williamserr2; ld r18,88(r4) 120*ec6347bbSDan Williamserr2; ld r19,96(r4) 121*ec6347bbSDan Williamserr2; ld r20,104(r4) 122*ec6347bbSDan Williamserr2; ld r21,112(r4) 123*ec6347bbSDan Williamserr2; ld r22,120(r4) 124*ec6347bbSDan Williams addi r4,r4,128 125*ec6347bbSDan Williamserr2; std r0,0(r3) 126*ec6347bbSDan Williamserr2; std r6,8(r3) 127*ec6347bbSDan Williamserr2; std r8,16(r3) 128*ec6347bbSDan Williamserr2; std r9,24(r3) 129*ec6347bbSDan Williamserr2; std r10,32(r3) 130*ec6347bbSDan Williamserr2; std r11,40(r3) 131*ec6347bbSDan Williamserr2; std r12,48(r3) 132*ec6347bbSDan Williamserr2; std r14,56(r3) 133*ec6347bbSDan Williamserr2; std r15,64(r3) 134*ec6347bbSDan Williamserr2; std r16,72(r3) 135*ec6347bbSDan Williamserr2; std r17,80(r3) 136*ec6347bbSDan Williamserr2; std r18,88(r3) 137*ec6347bbSDan Williamserr2; std r19,96(r3) 138*ec6347bbSDan Williamserr2; std r20,104(r3) 139*ec6347bbSDan Williamserr2; std r21,112(r3) 140*ec6347bbSDan Williamserr2; std r22,120(r3) 141*ec6347bbSDan Williams addi r3,r3,128 142*ec6347bbSDan Williams subi r7,r7,128 143*ec6347bbSDan Williams bdnz 4b 144*ec6347bbSDan Williams 145*ec6347bbSDan Williams clrldi r5,r5,(64-7) 146*ec6347bbSDan Williams 147*ec6347bbSDan Williams /* Up to 127B to go */ 148*ec6347bbSDan Williams5: srdi r6,r5,4 149*ec6347bbSDan Williams mtocrf 0x01,r6 150*ec6347bbSDan Williams 151*ec6347bbSDan Williams6: bf cr7*4+1,7f 152*ec6347bbSDan Williamserr2; ld r0,0(r4) 153*ec6347bbSDan Williamserr2; ld r6,8(r4) 154*ec6347bbSDan Williamserr2; ld r8,16(r4) 155*ec6347bbSDan Williamserr2; ld r9,24(r4) 156*ec6347bbSDan Williamserr2; ld r10,32(r4) 157*ec6347bbSDan Williamserr2; ld r11,40(r4) 158*ec6347bbSDan Williamserr2; ld r12,48(r4) 159*ec6347bbSDan Williamserr2; ld r14,56(r4) 160*ec6347bbSDan Williams addi r4,r4,64 161*ec6347bbSDan Williamserr2; std r0,0(r3) 162*ec6347bbSDan Williamserr2; std r6,8(r3) 163*ec6347bbSDan Williamserr2; std r8,16(r3) 164*ec6347bbSDan Williamserr2; std r9,24(r3) 165*ec6347bbSDan Williamserr2; std r10,32(r3) 166*ec6347bbSDan Williamserr2; std r11,40(r3) 167*ec6347bbSDan Williamserr2; std r12,48(r3) 168*ec6347bbSDan Williamserr2; std r14,56(r3) 169*ec6347bbSDan Williams addi r3,r3,64 170*ec6347bbSDan Williams subi r7,r7,64 171*ec6347bbSDan Williams 172*ec6347bbSDan Williams7: ld r14,STK_REG(R14)(r1) 173*ec6347bbSDan Williams ld r15,STK_REG(R15)(r1) 174*ec6347bbSDan Williams ld r16,STK_REG(R16)(r1) 175*ec6347bbSDan Williams ld r17,STK_REG(R17)(r1) 176*ec6347bbSDan Williams ld r18,STK_REG(R18)(r1) 177*ec6347bbSDan Williams ld r19,STK_REG(R19)(r1) 178*ec6347bbSDan Williams ld r20,STK_REG(R20)(r1) 179*ec6347bbSDan Williams ld r21,STK_REG(R21)(r1) 180*ec6347bbSDan Williams ld r22,STK_REG(R22)(r1) 181*ec6347bbSDan Williams addi r1,r1,STACKFRAMESIZE 182*ec6347bbSDan Williams 183*ec6347bbSDan Williams /* Up to 63B to go */ 184*ec6347bbSDan Williams bf cr7*4+2,8f 185*ec6347bbSDan Williamserr1; ld r0,0(r4) 186*ec6347bbSDan Williamserr1; ld r6,8(r4) 187*ec6347bbSDan Williamserr1; ld r8,16(r4) 188*ec6347bbSDan Williamserr1; ld r9,24(r4) 189*ec6347bbSDan Williams addi r4,r4,32 190*ec6347bbSDan Williamserr1; std r0,0(r3) 191*ec6347bbSDan Williamserr1; std r6,8(r3) 192*ec6347bbSDan Williamserr1; std r8,16(r3) 193*ec6347bbSDan Williamserr1; std r9,24(r3) 194*ec6347bbSDan Williams addi r3,r3,32 195*ec6347bbSDan Williams subi r7,r7,32 196*ec6347bbSDan Williams 197*ec6347bbSDan Williams /* Up to 31B to go */ 198*ec6347bbSDan Williams8: bf cr7*4+3,9f 199*ec6347bbSDan Williamserr1; ld r0,0(r4) 200*ec6347bbSDan Williamserr1; ld r6,8(r4) 201*ec6347bbSDan Williams addi r4,r4,16 202*ec6347bbSDan Williamserr1; std r0,0(r3) 203*ec6347bbSDan Williamserr1; std r6,8(r3) 204*ec6347bbSDan Williams addi r3,r3,16 205*ec6347bbSDan Williams subi r7,r7,16 206*ec6347bbSDan Williams 207*ec6347bbSDan Williams9: clrldi r5,r5,(64-4) 208*ec6347bbSDan Williams 209*ec6347bbSDan Williams /* Up to 15B to go */ 210*ec6347bbSDan Williams.Lshort_copy: 211*ec6347bbSDan Williams mtocrf 0x01,r5 212*ec6347bbSDan Williams bf cr7*4+0,12f 213*ec6347bbSDan Williamserr1; lwz r0,0(r4) /* Less chance of a reject with word ops */ 214*ec6347bbSDan Williamserr1; lwz r6,4(r4) 215*ec6347bbSDan Williams addi r4,r4,8 216*ec6347bbSDan Williamserr1; stw r0,0(r3) 217*ec6347bbSDan Williamserr1; stw r6,4(r3) 218*ec6347bbSDan Williams addi r3,r3,8 219*ec6347bbSDan Williams subi r7,r7,8 220*ec6347bbSDan Williams 221*ec6347bbSDan Williams12: bf cr7*4+1,13f 222*ec6347bbSDan Williamserr1; lwz r0,0(r4) 223*ec6347bbSDan Williams addi r4,r4,4 224*ec6347bbSDan Williamserr1; stw r0,0(r3) 225*ec6347bbSDan Williams addi r3,r3,4 226*ec6347bbSDan Williams subi r7,r7,4 227*ec6347bbSDan Williams 228*ec6347bbSDan Williams13: bf cr7*4+2,14f 229*ec6347bbSDan Williamserr1; lhz r0,0(r4) 230*ec6347bbSDan Williams addi r4,r4,2 231*ec6347bbSDan Williamserr1; sth r0,0(r3) 232*ec6347bbSDan Williams addi r3,r3,2 233*ec6347bbSDan Williams subi r7,r7,2 234*ec6347bbSDan Williams 235*ec6347bbSDan Williams14: bf cr7*4+3,15f 236*ec6347bbSDan Williamserr1; lbz r0,0(r4) 237*ec6347bbSDan Williamserr1; stb r0,0(r3) 238*ec6347bbSDan Williams 239*ec6347bbSDan Williams15: li r3,0 240*ec6347bbSDan Williams blr 241*ec6347bbSDan Williams 242*ec6347bbSDan WilliamsEXPORT_SYMBOL_GPL(copy_mc_generic); 243