1/* 2 * Copyright (C) 2002 Paul Mackerras, IBM Corp. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 */ 9#include <asm/processor.h> 10#include <asm/ppc_asm.h> 11 12 .align 7 13_GLOBAL(memcpy) 14 std r3,48(r1) /* save destination pointer for return value */ 15 PPC_MTOCRF 0x01,r5 16 cmpldi cr1,r5,16 17 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry 18 andi. r6,r6,7 19 dcbt 0,r4 20 blt cr1,.Lshort_copy 21 bne .Ldst_unaligned 22.Ldst_aligned: 23 andi. r0,r4,7 24 addi r3,r3,-16 25 bne .Lsrc_unaligned 26 srdi r7,r5,4 27 ld r9,0(r4) 28 addi r4,r4,-8 29 mtctr r7 30 andi. r5,r5,7 31 bf cr7*4+0,2f 32 addi r3,r3,8 33 addi r4,r4,8 34 mr r8,r9 35 blt cr1,3f 361: ld r9,8(r4) 37 std r8,8(r3) 382: ldu r8,16(r4) 39 stdu r9,16(r3) 40 bdnz 1b 413: std r8,8(r3) 42 beq 3f 43 addi r3,r3,16 44 ld r9,8(r4) 45.Ldo_tail: 46 bf cr7*4+1,1f 47 rotldi r9,r9,32 48 stw r9,0(r3) 49 addi r3,r3,4 501: bf cr7*4+2,2f 51 rotldi r9,r9,16 52 sth r9,0(r3) 53 addi r3,r3,2 542: bf cr7*4+3,3f 55 rotldi r9,r9,8 56 stb r9,0(r3) 573: ld r3,48(r1) /* return dest pointer */ 58 blr 59 60.Lsrc_unaligned: 61 srdi r6,r5,3 62 addi r5,r5,-16 63 subf r4,r0,r4 64 srdi r7,r5,4 65 sldi r10,r0,3 66 cmpdi cr6,r6,3 67 andi. r5,r5,7 68 mtctr r7 69 subfic r11,r10,64 70 add r5,r5,r0 71 72 bt cr7*4+0,0f 73 74 ld r9,0(r4) # 3+2n loads, 2+2n stores 75 ld r0,8(r4) 76 sld r6,r9,r10 77 ldu r9,16(r4) 78 srd r7,r0,r11 79 sld r8,r0,r10 80 or r7,r7,r6 81 blt cr6,4f 82 ld r0,8(r4) 83 # s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12 84 b 2f 85 860: ld r0,0(r4) # 4+2n loads, 3+2n stores 87 ldu r9,8(r4) 88 sld r8,r0,r10 89 addi r3,r3,-8 90 blt cr6,5f 91 ld r0,8(r4) 92 srd r12,r9,r11 93 sld r6,r9,r10 94 ldu r9,16(r4) 95 or r12,r8,r12 96 srd r7,r0,r11 97 sld r8,r0,r10 98 addi r3,r3,16 99 beq cr6,3f 100 101 # d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9 1021: or r7,r7,r6 103 ld r0,8(r4) 104 std r12,8(r3) 1052: srd r12,r9,r11 106 sld r6,r9,r10 107 ldu r9,16(r4) 108 or r12,r8,r12 109 stdu r7,16(r3) 110 srd r7,r0,r11 111 sld r8,r0,r10 112 bdnz 1b 113 1143: std r12,8(r3) 115 or r7,r7,r6 1164: std r7,16(r3) 1175: srd r12,r9,r11 118 or r12,r8,r12 119 std r12,24(r3) 120 beq 4f 121 cmpwi cr1,r5,8 122 addi r3,r3,32 123 sld r9,r9,r10 124 ble cr1,.Ldo_tail 125 ld r0,8(r4) 126 srd r7,r0,r11 127 or r9,r7,r9 128 b .Ldo_tail 129 130.Ldst_unaligned: 131 PPC_MTOCRF 0x01,r6 # put #bytes to 8B bdry into cr7 132 subf r5,r6,r5 133 li r7,0 134 cmpldi r1,r5,16 135 bf cr7*4+3,1f 136 lbz r0,0(r4) 137 stb r0,0(r3) 138 addi r7,r7,1 1391: bf cr7*4+2,2f 140 lhzx r0,r7,r4 141 sthx r0,r7,r3 142 addi r7,r7,2 1432: bf cr7*4+1,3f 144 lwzx r0,r7,r4 145 stwx r0,r7,r3 1463: PPC_MTOCRF 0x01,r5 147 add r4,r6,r4 148 add r3,r6,r3 149 b .Ldst_aligned 150 151.Lshort_copy: 152 bf cr7*4+0,1f 153 lwz r0,0(r4) 154 lwz r9,4(r4) 155 addi r4,r4,8 156 stw r0,0(r3) 157 stw r9,4(r3) 158 addi r3,r3,8 1591: bf cr7*4+1,2f 160 lwz r0,0(r4) 161 addi r4,r4,4 162 stw r0,0(r3) 163 addi r3,r3,4 1642: bf cr7*4+2,3f 165 lhz r0,0(r4) 166 addi r4,r4,2 167 sth r0,0(r3) 168 addi r3,r3,2 1693: bf cr7*4+3,4f 170 lbz r0,0(r4) 171 stb r0,0(r3) 1724: ld r3,48(r1) /* return dest pointer */ 173 blr 174