1/* 2 * Copyright (C) 2002 Paul Mackerras, IBM Corp. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 */ 9#include <asm/processor.h> 10#include <asm/ppc_asm.h> 11#include <asm/export.h> 12 13 .align 7 14_GLOBAL_TOC(memcpy) 15BEGIN_FTR_SECTION 16#ifdef __LITTLE_ENDIAN__ 17 cmpdi cr7,r5,0 18#else 19 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */ 20#endif 21FTR_SECTION_ELSE 22#ifdef CONFIG_PPC_BOOK3S_64 23#ifndef SELFTEST 24 b memcpy_power7 25#endif 26#endif 27ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) 28#ifdef __LITTLE_ENDIAN__ 29 /* dumb little-endian memcpy that will get replaced at runtime */ 30 addi r9,r3,-1 31 addi r4,r4,-1 32 beqlr cr7 33 mtctr r5 341: lbzu r10,1(r4) 35 stbu r10,1(r9) 36 bdnz 1b 37 blr 38#else 39 PPC_MTOCRF(0x01,r5) 40 cmpldi cr1,r5,16 41 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry 42 andi. r6,r6,7 43 dcbt 0,r4 44 blt cr1,.Lshort_copy 45/* Below we want to nop out the bne if we're on a CPU that has the 46 CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit 47 cleared. 48 At the time of writing the only CPU that has this combination of bits 49 set is Power6. */ 50BEGIN_FTR_SECTION 51 nop 52FTR_SECTION_ELSE 53 bne .Ldst_unaligned 54ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ 55 CPU_FTR_UNALIGNED_LD_STD) 56.Ldst_aligned: 57 addi r3,r3,-16 58BEGIN_FTR_SECTION 59 andi. r0,r4,7 60 bne .Lsrc_unaligned 61END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 62 srdi r7,r5,4 63 ld r9,0(r4) 64 addi r4,r4,-8 65 mtctr r7 66 andi. r5,r5,7 67 bf cr7*4+0,2f 68 addi r3,r3,8 69 addi r4,r4,8 70 mr r8,r9 71 blt cr1,3f 721: ld r9,8(r4) 73 std r8,8(r3) 742: ldu r8,16(r4) 75 stdu r9,16(r3) 76 bdnz 1b 773: std r8,8(r3) 78 beq 3f 79 addi r3,r3,16 80.Ldo_tail: 81 bf cr7*4+1,1f 82 lwz r9,8(r4) 83 addi r4,r4,4 84 stw r9,0(r3) 85 addi r3,r3,4 861: bf cr7*4+2,2f 87 lhz r9,8(r4) 88 addi r4,r4,2 89 sth r9,0(r3) 90 addi r3,r3,2 912: bf cr7*4+3,3f 92 lbz r9,8(r4) 93 stb r9,0(r3) 943: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ 95 blr 96 97.Lsrc_unaligned: 98 srdi r6,r5,3 99 addi r5,r5,-16 100 subf r4,r0,r4 101 srdi r7,r5,4 102 sldi r10,r0,3 103 cmpdi cr6,r6,3 104 andi. r5,r5,7 105 mtctr r7 106 subfic r11,r10,64 107 add r5,r5,r0 108 109 bt cr7*4+0,0f 110 111 ld r9,0(r4) # 3+2n loads, 2+2n stores 112 ld r0,8(r4) 113 sld r6,r9,r10 114 ldu r9,16(r4) 115 srd r7,r0,r11 116 sld r8,r0,r10 117 or r7,r7,r6 118 blt cr6,4f 119 ld r0,8(r4) 120 # s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12 121 b 2f 122 1230: ld r0,0(r4) # 4+2n loads, 3+2n stores 124 ldu r9,8(r4) 125 sld r8,r0,r10 126 addi r3,r3,-8 127 blt cr6,5f 128 ld r0,8(r4) 129 srd r12,r9,r11 130 sld r6,r9,r10 131 ldu r9,16(r4) 132 or r12,r8,r12 133 srd r7,r0,r11 134 sld r8,r0,r10 135 addi r3,r3,16 136 beq cr6,3f 137 138 # d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9 1391: or r7,r7,r6 140 ld r0,8(r4) 141 std r12,8(r3) 1422: srd r12,r9,r11 143 sld r6,r9,r10 144 ldu r9,16(r4) 145 or r12,r8,r12 146 stdu r7,16(r3) 147 srd r7,r0,r11 148 sld r8,r0,r10 149 bdnz 1b 150 1513: std r12,8(r3) 152 or r7,r7,r6 1534: std r7,16(r3) 1545: srd r12,r9,r11 155 or r12,r8,r12 156 std r12,24(r3) 157 beq 4f 158 cmpwi cr1,r5,8 159 addi r3,r3,32 160 sld r9,r9,r10 161 ble cr1,6f 162 ld r0,8(r4) 163 srd r7,r0,r11 164 or r9,r7,r9 1656: 166 bf cr7*4+1,1f 167 rotldi r9,r9,32 168 stw r9,0(r3) 169 addi r3,r3,4 1701: bf cr7*4+2,2f 171 rotldi r9,r9,16 172 sth r9,0(r3) 173 addi r3,r3,2 1742: bf cr7*4+3,3f 175 rotldi r9,r9,8 176 stb r9,0(r3) 1773: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ 178 blr 179 180.Ldst_unaligned: 181 PPC_MTOCRF(0x01,r6) # put #bytes to 8B bdry into cr7 182 subf r5,r6,r5 183 li r7,0 184 cmpldi cr1,r5,16 185 bf cr7*4+3,1f 186 lbz r0,0(r4) 187 stb r0,0(r3) 188 addi r7,r7,1 1891: bf cr7*4+2,2f 190 lhzx r0,r7,r4 191 sthx r0,r7,r3 192 addi r7,r7,2 1932: bf cr7*4+1,3f 194 lwzx r0,r7,r4 195 stwx r0,r7,r3 1963: PPC_MTOCRF(0x01,r5) 197 add r4,r6,r4 198 add r3,r6,r3 199 b .Ldst_aligned 200 201.Lshort_copy: 202 bf cr7*4+0,1f 203 lwz r0,0(r4) 204 lwz r9,4(r4) 205 addi r4,r4,8 206 stw r0,0(r3) 207 stw r9,4(r3) 208 addi r3,r3,8 2091: bf cr7*4+1,2f 210 lwz r0,0(r4) 211 addi r4,r4,4 212 stw r0,0(r3) 213 addi r3,r3,4 2142: bf cr7*4+2,3f 215 lhz r0,0(r4) 216 addi r4,r4,2 217 sth r0,0(r3) 218 addi r3,r3,2 2193: bf cr7*4+3,4f 220 lbz r0,0(r4) 221 stb r0,0(r3) 2224: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ 223 blr 224#endif 225EXPORT_SYMBOL(memcpy) 226