1/* 2 * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License version 2 as 6 * published by the Free Software Foundation. 7 */ 8 9#include <linux/linkage.h> 10 11#ifdef __LITTLE_ENDIAN__ 12# define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; << 13# define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >> 14# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM 15# define MERGE_2(RX,RY,IMM) 16# define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF 17# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM 18#else 19# define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >> 20# define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; << 21# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; << 22# define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; << 23# define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM 24# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08 25#endif 26 27#ifdef CONFIG_ARC_HAS_LL64 28# define PREFETCH_READ(RX) prefetch [RX, 56] 29# define PREFETCH_WRITE(RX) prefetchw [RX, 64] 30# define LOADX(DST,RX) ldd.ab DST, [RX, 8] 31# define STOREX(SRC,RX) std.ab SRC, [RX, 8] 32# define ZOLSHFT 5 33# define ZOLAND 0x1F 34#else 35# define PREFETCH_READ(RX) prefetch [RX, 28] 36# define PREFETCH_WRITE(RX) prefetchw [RX, 32] 37# define LOADX(DST,RX) ld.ab DST, [RX, 4] 38# define STOREX(SRC,RX) st.ab SRC, [RX, 4] 39# define ZOLSHFT 4 40# define ZOLAND 0xF 41#endif 42 43ENTRY(memcpy) 44 prefetch [r1] ; Prefetch the read location 45 prefetchw [r0] ; Prefetch the write location 46 mov.f 0, r2 47;;; if size is zero 48 jz.d [blink] 49 mov r3, r0 ; don;t clobber ret val 50 51;;; if size <= 8 52 cmp r2, 8 53 bls.d @.Lsmallchunk 54 mov.f lp_count, r2 55 56 and.f r4, r0, 0x03 57 rsub lp_count, r4, 4 58 lpnz @.Laligndestination 59 ;; LOOP BEGIN 60 ldb.ab r5, [r1,1] 61 sub r2, r2, 1 62 stb.ab r5, [r3,1] 63.Laligndestination: 64 65;;; Check the alignment of the source 66 and.f r4, r1, 0x03 67 bnz.d @.Lsourceunaligned 68 69;;; CASE 0: Both source and destination are 32bit aligned 70;;; Convert len to Dwords, unfold x4 71 lsr.f lp_count, r2, ZOLSHFT 72 lpnz @.Lcopy32_64bytes 73 ;; LOOP START 74 LOADX (r6, r1) 75 PREFETCH_READ (r1) 76 PREFETCH_WRITE (r3) 77 LOADX (r8, r1) 78 LOADX (r10, r1) 79 LOADX (r4, r1) 80 STOREX (r6, r3) 81 STOREX (r8, r3) 82 STOREX (r10, r3) 83 STOREX (r4, r3) 84.Lcopy32_64bytes: 85 86 and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes 87.Lsmallchunk: 88 lpnz @.Lcopyremainingbytes 89 ;; LOOP START 90 ldb.ab r5, [r1,1] 91 stb.ab r5, [r3,1] 92.Lcopyremainingbytes: 93 94 j [blink] 95;;; END CASE 0 96 97.Lsourceunaligned: 98 cmp r4, 2 99 beq.d @.LunalignedOffby2 100 sub r2, r2, 1 101 102 bhi.d @.LunalignedOffby3 103 ldb.ab r5, [r1, 1] 104 105;;; CASE 1: The source is unaligned, off by 1 106 ;; Hence I need to read 1 byte for a 16bit alignment 107 ;; and 2bytes to reach 32bit alignment 108 ldh.ab r6, [r1, 2] 109 sub r2, r2, 2 110 ;; Convert to words, unfold x2 111 lsr.f lp_count, r2, 3 112 MERGE_1 (r6, r6, 8) 113 MERGE_2 (r5, r5, 24) 114 or r5, r5, r6 115 116 ;; Both src and dst are aligned 117 lpnz @.Lcopy8bytes_1 118 ;; LOOP START 119 ld.ab r6, [r1, 4] 120 prefetch [r1, 28] ;Prefetch the next read location 121 ld.ab r8, [r1,4] 122 prefetchw [r3, 32] ;Prefetch the next write location 123 124 SHIFT_1 (r7, r6, 24) 125 or r7, r7, r5 126 SHIFT_2 (r5, r6, 8) 127 128 SHIFT_1 (r9, r8, 24) 129 or r9, r9, r5 130 SHIFT_2 (r5, r8, 8) 131 132 st.ab r7, [r3, 4] 133 st.ab r9, [r3, 4] 134.Lcopy8bytes_1: 135 136 ;; Write back the remaining 16bits 137 EXTRACT_1 (r6, r5, 16) 138 sth.ab r6, [r3, 2] 139 ;; Write back the remaining 8bits 140 EXTRACT_2 (r5, r5, 16) 141 stb.ab r5, [r3, 1] 142 143 and.f lp_count, r2, 0x07 ;Last 8bytes 144 lpnz @.Lcopybytewise_1 145 ;; LOOP START 146 ldb.ab r6, [r1,1] 147 stb.ab r6, [r3,1] 148.Lcopybytewise_1: 149 j [blink] 150 151.LunalignedOffby2: 152;;; CASE 2: The source is unaligned, off by 2 153 ldh.ab r5, [r1, 2] 154 sub r2, r2, 1 155 156 ;; Both src and dst are aligned 157 ;; Convert to words, unfold x2 158 lsr.f lp_count, r2, 3 159#ifdef __BIG_ENDIAN__ 160 asl.nz r5, r5, 16 161#endif 162 lpnz @.Lcopy8bytes_2 163 ;; LOOP START 164 ld.ab r6, [r1, 4] 165 prefetch [r1, 28] ;Prefetch the next read location 166 ld.ab r8, [r1,4] 167 prefetchw [r3, 32] ;Prefetch the next write location 168 169 SHIFT_1 (r7, r6, 16) 170 or r7, r7, r5 171 SHIFT_2 (r5, r6, 16) 172 173 SHIFT_1 (r9, r8, 16) 174 or r9, r9, r5 175 SHIFT_2 (r5, r8, 16) 176 177 st.ab r7, [r3, 4] 178 st.ab r9, [r3, 4] 179.Lcopy8bytes_2: 180 181#ifdef __BIG_ENDIAN__ 182 lsr.nz r5, r5, 16 183#endif 184 sth.ab r5, [r3, 2] 185 186 and.f lp_count, r2, 0x07 ;Last 8bytes 187 lpnz @.Lcopybytewise_2 188 ;; LOOP START 189 ldb.ab r6, [r1,1] 190 stb.ab r6, [r3,1] 191.Lcopybytewise_2: 192 j [blink] 193 194.LunalignedOffby3: 195;;; CASE 3: The source is unaligned, off by 3 196;;; Hence, I need to read 1byte for achieve the 32bit alignment 197 198 ;; Both src and dst are aligned 199 ;; Convert to words, unfold x2 200 lsr.f lp_count, r2, 3 201#ifdef __BIG_ENDIAN__ 202 asl.ne r5, r5, 24 203#endif 204 lpnz @.Lcopy8bytes_3 205 ;; LOOP START 206 ld.ab r6, [r1, 4] 207 prefetch [r1, 28] ;Prefetch the next read location 208 ld.ab r8, [r1,4] 209 prefetchw [r3, 32] ;Prefetch the next write location 210 211 SHIFT_1 (r7, r6, 8) 212 or r7, r7, r5 213 SHIFT_2 (r5, r6, 24) 214 215 SHIFT_1 (r9, r8, 8) 216 or r9, r9, r5 217 SHIFT_2 (r5, r8, 24) 218 219 st.ab r7, [r3, 4] 220 st.ab r9, [r3, 4] 221.Lcopy8bytes_3: 222 223#ifdef __BIG_ENDIAN__ 224 lsr.nz r5, r5, 24 225#endif 226 stb.ab r5, [r3, 1] 227 228 and.f lp_count, r2, 0x07 ;Last 8bytes 229 lpnz @.Lcopybytewise_3 230 ;; LOOP START 231 ldb.ab r6, [r1,1] 232 stb.ab r6, [r3,1] 233.Lcopybytewise_3: 234 j [blink] 235 236END(memcpy) 237