1/* 2 * Copyright (C) 2002 Paul Mackerras, IBM Corp. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 */ 9#include <asm/processor.h> 10#include <asm/ppc_asm.h> 11#include <asm/export.h> 12#include <asm/asm-compat.h> 13#include <asm/feature-fixups.h> 14 15#ifndef SELFTEST_CASE 16/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */ 17#define SELFTEST_CASE 0 18#endif 19 20 .align 7 21_GLOBAL_TOC(memcpy) 22BEGIN_FTR_SECTION 23#ifdef __LITTLE_ENDIAN__ 24 cmpdi cr7,r5,0 25#else 26 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */ 27#endif 28FTR_SECTION_ELSE 29#ifdef CONFIG_PPC_BOOK3S_64 30 b memcpy_power7 31#endif 32ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) 33#ifdef __LITTLE_ENDIAN__ 34 /* dumb little-endian memcpy that will get replaced at runtime */ 35 addi r9,r3,-1 36 addi r4,r4,-1 37 beqlr cr7 38 mtctr r5 391: lbzu r10,1(r4) 40 stbu r10,1(r9) 41 bdnz 1b 42 blr 43#else 44 PPC_MTOCRF(0x01,r5) 45 cmpldi cr1,r5,16 46 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry 47 andi. r6,r6,7 48 dcbt 0,r4 49 blt cr1,.Lshort_copy 50/* Below we want to nop out the bne if we're on a CPU that has the 51 CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit 52 cleared. 53 At the time of writing the only CPU that has this combination of bits 54 set is Power6. */ 55test_feature = (SELFTEST_CASE == 1) 56BEGIN_FTR_SECTION 57 nop 58FTR_SECTION_ELSE 59 bne .Ldst_unaligned 60ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ 61 CPU_FTR_UNALIGNED_LD_STD) 62.Ldst_aligned: 63 addi r3,r3,-16 64test_feature = (SELFTEST_CASE == 0) 65BEGIN_FTR_SECTION 66 andi. r0,r4,7 67 bne .Lsrc_unaligned 68END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 69 srdi r7,r5,4 70 ld r9,0(r4) 71 addi r4,r4,-8 72 mtctr r7 73 andi. r5,r5,7 74 bf cr7*4+0,2f 75 addi r3,r3,8 76 addi r4,r4,8 77 mr r8,r9 78 blt cr1,3f 791: ld r9,8(r4) 80 std r8,8(r3) 812: ldu r8,16(r4) 82 stdu r9,16(r3) 83 bdnz 1b 843: std r8,8(r3) 85 beq 3f 86 addi r3,r3,16 87.Ldo_tail: 88 bf cr7*4+1,1f 89 lwz r9,8(r4) 90 addi r4,r4,4 91 stw r9,0(r3) 92 addi r3,r3,4 931: bf cr7*4+2,2f 94 lhz r9,8(r4) 95 addi r4,r4,2 96 sth r9,0(r3) 97 addi r3,r3,2 982: bf cr7*4+3,3f 99 lbz r9,8(r4) 100 stb r9,0(r3) 1013: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ 102 blr 103 104.Lsrc_unaligned: 105 srdi r6,r5,3 106 addi r5,r5,-16 107 subf r4,r0,r4 108 srdi r7,r5,4 109 sldi r10,r0,3 110 cmpdi cr6,r6,3 111 andi. r5,r5,7 112 mtctr r7 113 subfic r11,r10,64 114 add r5,r5,r0 115 116 bt cr7*4+0,0f 117 118 ld r9,0(r4) # 3+2n loads, 2+2n stores 119 ld r0,8(r4) 120 sld r6,r9,r10 121 ldu r9,16(r4) 122 srd r7,r0,r11 123 sld r8,r0,r10 124 or r7,r7,r6 125 blt cr6,4f 126 ld r0,8(r4) 127 # s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12 128 b 2f 129 1300: ld r0,0(r4) # 4+2n loads, 3+2n stores 131 ldu r9,8(r4) 132 sld r8,r0,r10 133 addi r3,r3,-8 134 blt cr6,5f 135 ld r0,8(r4) 136 srd r12,r9,r11 137 sld r6,r9,r10 138 ldu r9,16(r4) 139 or r12,r8,r12 140 srd r7,r0,r11 141 sld r8,r0,r10 142 addi r3,r3,16 143 beq cr6,3f 144 145 # d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9 1461: or r7,r7,r6 147 ld r0,8(r4) 148 std r12,8(r3) 1492: srd r12,r9,r11 150 sld r6,r9,r10 151 ldu r9,16(r4) 152 or r12,r8,r12 153 stdu r7,16(r3) 154 srd r7,r0,r11 155 sld r8,r0,r10 156 bdnz 1b 157 1583: std r12,8(r3) 159 or r7,r7,r6 1604: std r7,16(r3) 1615: srd r12,r9,r11 162 or r12,r8,r12 163 std r12,24(r3) 164 beq 4f 165 cmpwi cr1,r5,8 166 addi r3,r3,32 167 sld r9,r9,r10 168 ble cr1,6f 169 ld r0,8(r4) 170 srd r7,r0,r11 171 or r9,r7,r9 1726: 173 bf cr7*4+1,1f 174 rotldi r9,r9,32 175 stw r9,0(r3) 176 addi r3,r3,4 1771: bf cr7*4+2,2f 178 rotldi r9,r9,16 179 sth r9,0(r3) 180 addi r3,r3,2 1812: bf cr7*4+3,3f 182 rotldi r9,r9,8 183 stb r9,0(r3) 1843: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ 185 blr 186 187.Ldst_unaligned: 188 PPC_MTOCRF(0x01,r6) # put #bytes to 8B bdry into cr7 189 subf r5,r6,r5 190 li r7,0 191 cmpldi cr1,r5,16 192 bf cr7*4+3,1f 193 lbz r0,0(r4) 194 stb r0,0(r3) 195 addi r7,r7,1 1961: bf cr7*4+2,2f 197 lhzx r0,r7,r4 198 sthx r0,r7,r3 199 addi r7,r7,2 2002: bf cr7*4+1,3f 201 lwzx r0,r7,r4 202 stwx r0,r7,r3 2033: PPC_MTOCRF(0x01,r5) 204 add r4,r6,r4 205 add r3,r6,r3 206 b .Ldst_aligned 207 208.Lshort_copy: 209 bf cr7*4+0,1f 210 lwz r0,0(r4) 211 lwz r9,4(r4) 212 addi r4,r4,8 213 stw r0,0(r3) 214 stw r9,4(r3) 215 addi r3,r3,8 2161: bf cr7*4+1,2f 217 lwz r0,0(r4) 218 addi r4,r4,4 219 stw r0,0(r3) 220 addi r3,r3,4 2212: bf cr7*4+2,3f 222 lhz r0,0(r4) 223 addi r4,r4,2 224 sth r0,0(r3) 225 addi r3,r3,2 2263: bf cr7*4+3,4f 227 lbz r0,0(r4) 228 stb r0,0(r3) 2294: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ 230 blr 231#endif 232EXPORT_SYMBOL(memcpy) 233