1/* 2 * Copyright (C) 2002 Paul Mackerras, IBM Corp. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 */ 9#include <asm/processor.h> 10#include <asm/ppc_asm.h> 11#include <asm/export.h> 12#include <asm/asm-compat.h> 13#include <asm/feature-fixups.h> 14#include <asm/kasan.h> 15 16#ifndef SELFTEST_CASE 17/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */ 18#define SELFTEST_CASE 0 19#endif 20 21 .align 7 22_GLOBAL_TOC_KASAN(memcpy) 23BEGIN_FTR_SECTION 24#ifdef __LITTLE_ENDIAN__ 25 cmpdi cr7,r5,0 26#else 27 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */ 28#endif 29FTR_SECTION_ELSE 30#ifdef CONFIG_PPC_BOOK3S_64 31 b memcpy_power7 32#endif 33ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) 34#ifdef __LITTLE_ENDIAN__ 35 /* dumb little-endian memcpy that will get replaced at runtime */ 36 addi r9,r3,-1 37 addi r4,r4,-1 38 beqlr cr7 39 mtctr r5 401: lbzu r10,1(r4) 41 stbu r10,1(r9) 42 bdnz 1b 43 blr 44#else 45 PPC_MTOCRF(0x01,r5) 46 cmpldi cr1,r5,16 47 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry 48 andi. r6,r6,7 49 dcbt 0,r4 50 blt cr1,.Lshort_copy 51/* Below we want to nop out the bne if we're on a CPU that has the 52 CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit 53 cleared. 54 At the time of writing the only CPU that has this combination of bits 55 set is Power6. */ 56test_feature = (SELFTEST_CASE == 1) 57BEGIN_FTR_SECTION 58 nop 59FTR_SECTION_ELSE 60 bne .Ldst_unaligned 61ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ 62 CPU_FTR_UNALIGNED_LD_STD) 63.Ldst_aligned: 64 addi r3,r3,-16 65test_feature = (SELFTEST_CASE == 0) 66BEGIN_FTR_SECTION 67 andi. r0,r4,7 68 bne .Lsrc_unaligned 69END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 70 srdi r7,r5,4 71 ld r9,0(r4) 72 addi r4,r4,-8 73 mtctr r7 74 andi. r5,r5,7 75 bf cr7*4+0,2f 76 addi r3,r3,8 77 addi r4,r4,8 78 mr r8,r9 79 blt cr1,3f 801: ld r9,8(r4) 81 std r8,8(r3) 822: ldu r8,16(r4) 83 stdu r9,16(r3) 84 bdnz 1b 853: std r8,8(r3) 86 beq 3f 87 addi r3,r3,16 88.Ldo_tail: 89 bf cr7*4+1,1f 90 lwz r9,8(r4) 91 addi r4,r4,4 92 stw r9,0(r3) 93 addi r3,r3,4 941: bf cr7*4+2,2f 95 lhz r9,8(r4) 96 addi r4,r4,2 97 sth r9,0(r3) 98 addi r3,r3,2 992: bf cr7*4+3,3f 100 lbz r9,8(r4) 101 stb r9,0(r3) 1023: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ 103 blr 104 105.Lsrc_unaligned: 106 srdi r6,r5,3 107 addi r5,r5,-16 108 subf r4,r0,r4 109 srdi r7,r5,4 110 sldi r10,r0,3 111 cmpdi cr6,r6,3 112 andi. r5,r5,7 113 mtctr r7 114 subfic r11,r10,64 115 add r5,r5,r0 116 117 bt cr7*4+0,0f 118 119 ld r9,0(r4) # 3+2n loads, 2+2n stores 120 ld r0,8(r4) 121 sld r6,r9,r10 122 ldu r9,16(r4) 123 srd r7,r0,r11 124 sld r8,r0,r10 125 or r7,r7,r6 126 blt cr6,4f 127 ld r0,8(r4) 128 # s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12 129 b 2f 130 1310: ld r0,0(r4) # 4+2n loads, 3+2n stores 132 ldu r9,8(r4) 133 sld r8,r0,r10 134 addi r3,r3,-8 135 blt cr6,5f 136 ld r0,8(r4) 137 srd r12,r9,r11 138 sld r6,r9,r10 139 ldu r9,16(r4) 140 or r12,r8,r12 141 srd r7,r0,r11 142 sld r8,r0,r10 143 addi r3,r3,16 144 beq cr6,3f 145 146 # d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9 1471: or r7,r7,r6 148 ld r0,8(r4) 149 std r12,8(r3) 1502: srd r12,r9,r11 151 sld r6,r9,r10 152 ldu r9,16(r4) 153 or r12,r8,r12 154 stdu r7,16(r3) 155 srd r7,r0,r11 156 sld r8,r0,r10 157 bdnz 1b 158 1593: std r12,8(r3) 160 or r7,r7,r6 1614: std r7,16(r3) 1625: srd r12,r9,r11 163 or r12,r8,r12 164 std r12,24(r3) 165 beq 4f 166 cmpwi cr1,r5,8 167 addi r3,r3,32 168 sld r9,r9,r10 169 ble cr1,6f 170 ld r0,8(r4) 171 srd r7,r0,r11 172 or r9,r7,r9 1736: 174 bf cr7*4+1,1f 175 rotldi r9,r9,32 176 stw r9,0(r3) 177 addi r3,r3,4 1781: bf cr7*4+2,2f 179 rotldi r9,r9,16 180 sth r9,0(r3) 181 addi r3,r3,2 1822: bf cr7*4+3,3f 183 rotldi r9,r9,8 184 stb r9,0(r3) 1853: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ 186 blr 187 188.Ldst_unaligned: 189 PPC_MTOCRF(0x01,r6) # put #bytes to 8B bdry into cr7 190 subf r5,r6,r5 191 li r7,0 192 cmpldi cr1,r5,16 193 bf cr7*4+3,1f 194 lbz r0,0(r4) 195 stb r0,0(r3) 196 addi r7,r7,1 1971: bf cr7*4+2,2f 198 lhzx r0,r7,r4 199 sthx r0,r7,r3 200 addi r7,r7,2 2012: bf cr7*4+1,3f 202 lwzx r0,r7,r4 203 stwx r0,r7,r3 2043: PPC_MTOCRF(0x01,r5) 205 add r4,r6,r4 206 add r3,r6,r3 207 b .Ldst_aligned 208 209.Lshort_copy: 210 bf cr7*4+0,1f 211 lwz r0,0(r4) 212 lwz r9,4(r4) 213 addi r4,r4,8 214 stw r0,0(r3) 215 stw r9,4(r3) 216 addi r3,r3,8 2171: bf cr7*4+1,2f 218 lwz r0,0(r4) 219 addi r4,r4,4 220 stw r0,0(r3) 221 addi r3,r3,4 2222: bf cr7*4+2,3f 223 lhz r0,0(r4) 224 addi r4,r4,2 225 sth r0,0(r3) 226 addi r3,r3,2 2273: bf cr7*4+3,4f 228 lbz r0,0(r4) 229 stb r0,0(r3) 2304: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */ 231 blr 232#endif 233EXPORT_SYMBOL(memcpy) 234EXPORT_SYMBOL_KASAN(memcpy) 235