1322ae8ebSMichal Simek/* 2322ae8ebSMichal Simek * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu> 3322ae8ebSMichal Simek * Copyright (C) 2008-2009 PetaLogix 4322ae8ebSMichal Simek * Copyright (C) 2008 Jim Law - Iris LP All rights reserved. 5322ae8ebSMichal Simek * 6322ae8ebSMichal Simek * This file is subject to the terms and conditions of the GNU General 7322ae8ebSMichal Simek * Public License. See the file COPYING in the main directory of this 8322ae8ebSMichal Simek * archive for more details. 9322ae8ebSMichal Simek * 10322ae8ebSMichal Simek * Written by Jim Law <jlaw@irispower.com> 11322ae8ebSMichal Simek * 12322ae8ebSMichal Simek * intended to replace: 13322ae8ebSMichal Simek * memcpy in memcpy.c and 14322ae8ebSMichal Simek * memmove in memmove.c 15322ae8ebSMichal Simek * ... in arch/microblaze/lib 16322ae8ebSMichal Simek * 17322ae8ebSMichal Simek * 18322ae8ebSMichal Simek * assly_fastcopy.S 19322ae8ebSMichal Simek * 20322ae8ebSMichal Simek * Attempt at quicker memcpy and memmove for MicroBlaze 21322ae8ebSMichal Simek * Input : Operand1 in Reg r5 - destination address 22322ae8ebSMichal Simek * Operand2 in Reg r6 - source address 23322ae8ebSMichal Simek * Operand3 in Reg r7 - number of bytes to transfer 24322ae8ebSMichal Simek * Output: Result in Reg r3 - starting destinaition address 25322ae8ebSMichal Simek * 26322ae8ebSMichal Simek * 27322ae8ebSMichal Simek * Explanation: 28322ae8ebSMichal Simek * Perform (possibly unaligned) copy of a block of memory 29322ae8ebSMichal Simek * between mem locations with size of xfer spec'd in bytes 30322ae8ebSMichal Simek */ 31322ae8ebSMichal Simek 32322ae8ebSMichal Simek#include <linux/linkage.h> 33*13851966SMichal Simek .text 34322ae8ebSMichal Simek .globl memcpy 35*13851966SMichal Simek .type memcpy, @function 36322ae8ebSMichal Simek .ent memcpy 37322ae8ebSMichal Simek 38322ae8ebSMichal Simekmemcpy: 39322ae8ebSMichal Simekfast_memcpy_ascending: 40322ae8ebSMichal Simek /* move d to return register as value of function */ 41322ae8ebSMichal Simek addi r3, r5, 0 42322ae8ebSMichal Simek 43322ae8ebSMichal Simek addi r4, r0, 4 /* n = 4 */ 44322ae8ebSMichal Simek cmpu r4, r4, r7 /* n = c - n (unsigned) */ 45322ae8ebSMichal Simek blti r4, a_xfer_end /* if n < 0, less than one word to transfer */ 46322ae8ebSMichal Simek 47322ae8ebSMichal Simek /* transfer first 0~3 bytes to get aligned dest address */ 48322ae8ebSMichal Simek andi r4, r5, 3 /* n = d & 3 */ 49322ae8ebSMichal Simek /* if zero, destination already aligned */ 50322ae8ebSMichal Simek beqi r4, a_dalign_done 51322ae8ebSMichal Simek /* n = 4 - n (yields 3, 2, 1 transfers for 1, 2, 3 addr offset) */ 52322ae8ebSMichal Simek rsubi r4, r4, 4 53322ae8ebSMichal Simek rsub r7, r4, r7 /* c = c - n adjust c */ 54322ae8ebSMichal Simek 55322ae8ebSMichal Simeka_xfer_first_loop: 56322ae8ebSMichal Simek /* if no bytes left to transfer, transfer the bulk */ 57322ae8ebSMichal Simek beqi r4, a_dalign_done 58322ae8ebSMichal Simek lbui r11, r6, 0 /* h = *s */ 59322ae8ebSMichal Simek sbi r11, r5, 0 /* *d = h */ 60322ae8ebSMichal Simek addi r6, r6, 1 /* s++ */ 61322ae8ebSMichal Simek addi r5, r5, 1 /* d++ */ 62322ae8ebSMichal Simek brid a_xfer_first_loop /* loop */ 63322ae8ebSMichal Simek addi r4, r4, -1 /* n-- (IN DELAY SLOT) */ 64322ae8ebSMichal Simek 65322ae8ebSMichal Simeka_dalign_done: 66322ae8ebSMichal Simek addi r4, r0, 32 /* n = 32 */ 67322ae8ebSMichal Simek cmpu r4, r4, r7 /* n = c - n (unsigned) */ 68322ae8ebSMichal Simek /* if n < 0, less than one block to transfer */ 69322ae8ebSMichal Simek blti r4, a_block_done 70322ae8ebSMichal Simek 71322ae8ebSMichal Simeka_block_xfer: 72322ae8ebSMichal Simek andi r4, r7, 0xffffffe0 /* n = c & ~31 */ 73322ae8ebSMichal Simek rsub r7, r4, r7 /* c = c - n */ 74322ae8ebSMichal Simek 75322ae8ebSMichal Simek andi r9, r6, 3 /* t1 = s & 3 */ 76322ae8ebSMichal Simek /* if temp != 0, unaligned transfers needed */ 77322ae8ebSMichal Simek bnei r9, a_block_unaligned 78322ae8ebSMichal Simek 79322ae8ebSMichal Simeka_block_aligned: 80322ae8ebSMichal Simek lwi r9, r6, 0 /* t1 = *(s + 0) */ 81322ae8ebSMichal Simek lwi r10, r6, 4 /* t2 = *(s + 4) */ 82322ae8ebSMichal Simek lwi r11, r6, 8 /* t3 = *(s + 8) */ 83322ae8ebSMichal Simek lwi r12, r6, 12 /* t4 = *(s + 12) */ 84322ae8ebSMichal Simek swi r9, r5, 0 /* *(d + 0) = t1 */ 85322ae8ebSMichal Simek swi r10, r5, 4 /* *(d + 4) = t2 */ 86322ae8ebSMichal Simek swi r11, r5, 8 /* *(d + 8) = t3 */ 87322ae8ebSMichal Simek swi r12, r5, 12 /* *(d + 12) = t4 */ 88322ae8ebSMichal Simek lwi r9, r6, 16 /* t1 = *(s + 16) */ 89322ae8ebSMichal Simek lwi r10, r6, 20 /* t2 = *(s + 20) */ 90322ae8ebSMichal Simek lwi r11, r6, 24 /* t3 = *(s + 24) */ 91322ae8ebSMichal Simek lwi r12, r6, 28 /* t4 = *(s + 28) */ 92322ae8ebSMichal Simek swi r9, r5, 16 /* *(d + 16) = t1 */ 93322ae8ebSMichal Simek swi r10, r5, 20 /* *(d + 20) = t2 */ 94322ae8ebSMichal Simek swi r11, r5, 24 /* *(d + 24) = t3 */ 95322ae8ebSMichal Simek swi r12, r5, 28 /* *(d + 28) = t4 */ 96322ae8ebSMichal Simek addi r6, r6, 32 /* s = s + 32 */ 97322ae8ebSMichal Simek addi r4, r4, -32 /* n = n - 32 */ 98322ae8ebSMichal Simek bneid r4, a_block_aligned /* while (n) loop */ 99322ae8ebSMichal Simek addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */ 100322ae8ebSMichal Simek bri a_block_done 101322ae8ebSMichal Simek 102322ae8ebSMichal Simeka_block_unaligned: 103322ae8ebSMichal Simek andi r8, r6, 0xfffffffc /* as = s & ~3 */ 104322ae8ebSMichal Simek add r6, r6, r4 /* s = s + n */ 105322ae8ebSMichal Simek lwi r11, r8, 0 /* h = *(as + 0) */ 106322ae8ebSMichal Simek 107322ae8ebSMichal Simek addi r9, r9, -1 108322ae8ebSMichal Simek beqi r9, a_block_u1 /* t1 was 1 => 1 byte offset */ 109322ae8ebSMichal Simek addi r9, r9, -1 110322ae8ebSMichal Simek beqi r9, a_block_u2 /* t1 was 2 => 2 byte offset */ 111322ae8ebSMichal Simek 112322ae8ebSMichal Simeka_block_u3: 113322ae8ebSMichal Simek bslli r11, r11, 24 /* h = h << 24 */ 114322ae8ebSMichal Simeka_bu3_loop: 115322ae8ebSMichal Simek lwi r12, r8, 4 /* v = *(as + 4) */ 116322ae8ebSMichal Simek bsrli r9, r12, 8 /* t1 = v >> 8 */ 117322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 118322ae8ebSMichal Simek swi r9, r5, 0 /* *(d + 0) = t1 */ 119322ae8ebSMichal Simek bslli r11, r12, 24 /* h = v << 24 */ 120322ae8ebSMichal Simek lwi r12, r8, 8 /* v = *(as + 8) */ 121322ae8ebSMichal Simek bsrli r9, r12, 8 /* t1 = v >> 8 */ 122322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 123322ae8ebSMichal Simek swi r9, r5, 4 /* *(d + 4) = t1 */ 124322ae8ebSMichal Simek bslli r11, r12, 24 /* h = v << 24 */ 125322ae8ebSMichal Simek lwi r12, r8, 12 /* v = *(as + 12) */ 126322ae8ebSMichal Simek bsrli r9, r12, 8 /* t1 = v >> 8 */ 127322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 128322ae8ebSMichal Simek swi r9, r5, 8 /* *(d + 8) = t1 */ 129322ae8ebSMichal Simek bslli r11, r12, 24 /* h = v << 24 */ 130322ae8ebSMichal Simek lwi r12, r8, 16 /* v = *(as + 16) */ 131322ae8ebSMichal Simek bsrli r9, r12, 8 /* t1 = v >> 8 */ 132322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 133322ae8ebSMichal Simek swi r9, r5, 12 /* *(d + 12) = t1 */ 134322ae8ebSMichal Simek bslli r11, r12, 24 /* h = v << 24 */ 135322ae8ebSMichal Simek lwi r12, r8, 20 /* v = *(as + 20) */ 136322ae8ebSMichal Simek bsrli r9, r12, 8 /* t1 = v >> 8 */ 137322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 138322ae8ebSMichal Simek swi r9, r5, 16 /* *(d + 16) = t1 */ 139322ae8ebSMichal Simek bslli r11, r12, 24 /* h = v << 24 */ 140322ae8ebSMichal Simek lwi r12, r8, 24 /* v = *(as + 24) */ 141322ae8ebSMichal Simek bsrli r9, r12, 8 /* t1 = v >> 8 */ 142322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 143322ae8ebSMichal Simek swi r9, r5, 20 /* *(d + 20) = t1 */ 144322ae8ebSMichal Simek bslli r11, r12, 24 /* h = v << 24 */ 145322ae8ebSMichal Simek lwi r12, r8, 28 /* v = *(as + 28) */ 146322ae8ebSMichal Simek bsrli r9, r12, 8 /* t1 = v >> 8 */ 147322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 148322ae8ebSMichal Simek swi r9, r5, 24 /* *(d + 24) = t1 */ 149322ae8ebSMichal Simek bslli r11, r12, 24 /* h = v << 24 */ 150322ae8ebSMichal Simek lwi r12, r8, 32 /* v = *(as + 32) */ 151322ae8ebSMichal Simek bsrli r9, r12, 8 /* t1 = v >> 8 */ 152322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 153322ae8ebSMichal Simek swi r9, r5, 28 /* *(d + 28) = t1 */ 154322ae8ebSMichal Simek bslli r11, r12, 24 /* h = v << 24 */ 155322ae8ebSMichal Simek addi r8, r8, 32 /* as = as + 32 */ 156322ae8ebSMichal Simek addi r4, r4, -32 /* n = n - 32 */ 157322ae8ebSMichal Simek bneid r4, a_bu3_loop /* while (n) loop */ 158322ae8ebSMichal Simek addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */ 159322ae8ebSMichal Simek bri a_block_done 160322ae8ebSMichal Simek 161322ae8ebSMichal Simeka_block_u1: 162322ae8ebSMichal Simek bslli r11, r11, 8 /* h = h << 8 */ 163322ae8ebSMichal Simeka_bu1_loop: 164322ae8ebSMichal Simek lwi r12, r8, 4 /* v = *(as + 4) */ 165322ae8ebSMichal Simek bsrli r9, r12, 24 /* t1 = v >> 24 */ 166322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 167322ae8ebSMichal Simek swi r9, r5, 0 /* *(d + 0) = t1 */ 168322ae8ebSMichal Simek bslli r11, r12, 8 /* h = v << 8 */ 169322ae8ebSMichal Simek lwi r12, r8, 8 /* v = *(as + 8) */ 170322ae8ebSMichal Simek bsrli r9, r12, 24 /* t1 = v >> 24 */ 171322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 172322ae8ebSMichal Simek swi r9, r5, 4 /* *(d + 4) = t1 */ 173322ae8ebSMichal Simek bslli r11, r12, 8 /* h = v << 8 */ 174322ae8ebSMichal Simek lwi r12, r8, 12 /* v = *(as + 12) */ 175322ae8ebSMichal Simek bsrli r9, r12, 24 /* t1 = v >> 24 */ 176322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 177322ae8ebSMichal Simek swi r9, r5, 8 /* *(d + 8) = t1 */ 178322ae8ebSMichal Simek bslli r11, r12, 8 /* h = v << 8 */ 179322ae8ebSMichal Simek lwi r12, r8, 16 /* v = *(as + 16) */ 180322ae8ebSMichal Simek bsrli r9, r12, 24 /* t1 = v >> 24 */ 181322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 182322ae8ebSMichal Simek swi r9, r5, 12 /* *(d + 12) = t1 */ 183322ae8ebSMichal Simek bslli r11, r12, 8 /* h = v << 8 */ 184322ae8ebSMichal Simek lwi r12, r8, 20 /* v = *(as + 20) */ 185322ae8ebSMichal Simek bsrli r9, r12, 24 /* t1 = v >> 24 */ 186322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 187322ae8ebSMichal Simek swi r9, r5, 16 /* *(d + 16) = t1 */ 188322ae8ebSMichal Simek bslli r11, r12, 8 /* h = v << 8 */ 189322ae8ebSMichal Simek lwi r12, r8, 24 /* v = *(as + 24) */ 190322ae8ebSMichal Simek bsrli r9, r12, 24 /* t1 = v >> 24 */ 191322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 192322ae8ebSMichal Simek swi r9, r5, 20 /* *(d + 20) = t1 */ 193322ae8ebSMichal Simek bslli r11, r12, 8 /* h = v << 8 */ 194322ae8ebSMichal Simek lwi r12, r8, 28 /* v = *(as + 28) */ 195322ae8ebSMichal Simek bsrli r9, r12, 24 /* t1 = v >> 24 */ 196322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 197322ae8ebSMichal Simek swi r9, r5, 24 /* *(d + 24) = t1 */ 198322ae8ebSMichal Simek bslli r11, r12, 8 /* h = v << 8 */ 199322ae8ebSMichal Simek lwi r12, r8, 32 /* v = *(as + 32) */ 200322ae8ebSMichal Simek bsrli r9, r12, 24 /* t1 = v >> 24 */ 201322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 202322ae8ebSMichal Simek swi r9, r5, 28 /* *(d + 28) = t1 */ 203322ae8ebSMichal Simek bslli r11, r12, 8 /* h = v << 8 */ 204322ae8ebSMichal Simek addi r8, r8, 32 /* as = as + 32 */ 205322ae8ebSMichal Simek addi r4, r4, -32 /* n = n - 32 */ 206322ae8ebSMichal Simek bneid r4, a_bu1_loop /* while (n) loop */ 207322ae8ebSMichal Simek addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */ 208322ae8ebSMichal Simek bri a_block_done 209322ae8ebSMichal Simek 210322ae8ebSMichal Simeka_block_u2: 211322ae8ebSMichal Simek bslli r11, r11, 16 /* h = h << 16 */ 212322ae8ebSMichal Simeka_bu2_loop: 213322ae8ebSMichal Simek lwi r12, r8, 4 /* v = *(as + 4) */ 214322ae8ebSMichal Simek bsrli r9, r12, 16 /* t1 = v >> 16 */ 215322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 216322ae8ebSMichal Simek swi r9, r5, 0 /* *(d + 0) = t1 */ 217322ae8ebSMichal Simek bslli r11, r12, 16 /* h = v << 16 */ 218322ae8ebSMichal Simek lwi r12, r8, 8 /* v = *(as + 8) */ 219322ae8ebSMichal Simek bsrli r9, r12, 16 /* t1 = v >> 16 */ 220322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 221322ae8ebSMichal Simek swi r9, r5, 4 /* *(d + 4) = t1 */ 222322ae8ebSMichal Simek bslli r11, r12, 16 /* h = v << 16 */ 223322ae8ebSMichal Simek lwi r12, r8, 12 /* v = *(as + 12) */ 224322ae8ebSMichal Simek bsrli r9, r12, 16 /* t1 = v >> 16 */ 225322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 226322ae8ebSMichal Simek swi r9, r5, 8 /* *(d + 8) = t1 */ 227322ae8ebSMichal Simek bslli r11, r12, 16 /* h = v << 16 */ 228322ae8ebSMichal Simek lwi r12, r8, 16 /* v = *(as + 16) */ 229322ae8ebSMichal Simek bsrli r9, r12, 16 /* t1 = v >> 16 */ 230322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 231322ae8ebSMichal Simek swi r9, r5, 12 /* *(d + 12) = t1 */ 232322ae8ebSMichal Simek bslli r11, r12, 16 /* h = v << 16 */ 233322ae8ebSMichal Simek lwi r12, r8, 20 /* v = *(as + 20) */ 234322ae8ebSMichal Simek bsrli r9, r12, 16 /* t1 = v >> 16 */ 235322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 236322ae8ebSMichal Simek swi r9, r5, 16 /* *(d + 16) = t1 */ 237322ae8ebSMichal Simek bslli r11, r12, 16 /* h = v << 16 */ 238322ae8ebSMichal Simek lwi r12, r8, 24 /* v = *(as + 24) */ 239322ae8ebSMichal Simek bsrli r9, r12, 16 /* t1 = v >> 16 */ 240322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 241322ae8ebSMichal Simek swi r9, r5, 20 /* *(d + 20) = t1 */ 242322ae8ebSMichal Simek bslli r11, r12, 16 /* h = v << 16 */ 243322ae8ebSMichal Simek lwi r12, r8, 28 /* v = *(as + 28) */ 244322ae8ebSMichal Simek bsrli r9, r12, 16 /* t1 = v >> 16 */ 245322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 246322ae8ebSMichal Simek swi r9, r5, 24 /* *(d + 24) = t1 */ 247322ae8ebSMichal Simek bslli r11, r12, 16 /* h = v << 16 */ 248322ae8ebSMichal Simek lwi r12, r8, 32 /* v = *(as + 32) */ 249322ae8ebSMichal Simek bsrli r9, r12, 16 /* t1 = v >> 16 */ 250322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 251322ae8ebSMichal Simek swi r9, r5, 28 /* *(d + 28) = t1 */ 252322ae8ebSMichal Simek bslli r11, r12, 16 /* h = v << 16 */ 253322ae8ebSMichal Simek addi r8, r8, 32 /* as = as + 32 */ 254322ae8ebSMichal Simek addi r4, r4, -32 /* n = n - 32 */ 255322ae8ebSMichal Simek bneid r4, a_bu2_loop /* while (n) loop */ 256322ae8ebSMichal Simek addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */ 257322ae8ebSMichal Simek 258322ae8ebSMichal Simeka_block_done: 259322ae8ebSMichal Simek addi r4, r0, 4 /* n = 4 */ 260322ae8ebSMichal Simek cmpu r4, r4, r7 /* n = c - n (unsigned) */ 261322ae8ebSMichal Simek blti r4, a_xfer_end /* if n < 0, less than one word to transfer */ 262322ae8ebSMichal Simek 263322ae8ebSMichal Simeka_word_xfer: 264322ae8ebSMichal Simek andi r4, r7, 0xfffffffc /* n = c & ~3 */ 265322ae8ebSMichal Simek addi r10, r0, 0 /* offset = 0 */ 266322ae8ebSMichal Simek 267322ae8ebSMichal Simek andi r9, r6, 3 /* t1 = s & 3 */ 268322ae8ebSMichal Simek /* if temp != 0, unaligned transfers needed */ 269322ae8ebSMichal Simek bnei r9, a_word_unaligned 270322ae8ebSMichal Simek 271322ae8ebSMichal Simeka_word_aligned: 272322ae8ebSMichal Simek lw r9, r6, r10 /* t1 = *(s+offset) */ 273322ae8ebSMichal Simek sw r9, r5, r10 /* *(d+offset) = t1 */ 274322ae8ebSMichal Simek addi r4, r4,-4 /* n-- */ 275322ae8ebSMichal Simek bneid r4, a_word_aligned /* loop */ 276322ae8ebSMichal Simek addi r10, r10, 4 /* offset++ (IN DELAY SLOT) */ 277322ae8ebSMichal Simek 278322ae8ebSMichal Simek bri a_word_done 279322ae8ebSMichal Simek 280322ae8ebSMichal Simeka_word_unaligned: 281322ae8ebSMichal Simek andi r8, r6, 0xfffffffc /* as = s & ~3 */ 282322ae8ebSMichal Simek lwi r11, r8, 0 /* h = *(as + 0) */ 283322ae8ebSMichal Simek addi r8, r8, 4 /* as = as + 4 */ 284322ae8ebSMichal Simek 285322ae8ebSMichal Simek addi r9, r9, -1 286322ae8ebSMichal Simek beqi r9, a_word_u1 /* t1 was 1 => 1 byte offset */ 287322ae8ebSMichal Simek addi r9, r9, -1 288322ae8ebSMichal Simek beqi r9, a_word_u2 /* t1 was 2 => 2 byte offset */ 289322ae8ebSMichal Simek 290322ae8ebSMichal Simeka_word_u3: 291322ae8ebSMichal Simek bslli r11, r11, 24 /* h = h << 24 */ 292322ae8ebSMichal Simeka_wu3_loop: 293322ae8ebSMichal Simek lw r12, r8, r10 /* v = *(as + offset) */ 294322ae8ebSMichal Simek bsrli r9, r12, 8 /* t1 = v >> 8 */ 295322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 296322ae8ebSMichal Simek sw r9, r5, r10 /* *(d + offset) = t1 */ 297322ae8ebSMichal Simek bslli r11, r12, 24 /* h = v << 24 */ 298322ae8ebSMichal Simek addi r4, r4,-4 /* n = n - 4 */ 299322ae8ebSMichal Simek bneid r4, a_wu3_loop /* while (n) loop */ 300322ae8ebSMichal Simek addi r10, r10, 4 /* offset = ofset + 4 (IN DELAY SLOT) */ 301322ae8ebSMichal Simek 302322ae8ebSMichal Simek bri a_word_done 303322ae8ebSMichal Simek 304322ae8ebSMichal Simeka_word_u1: 305322ae8ebSMichal Simek bslli r11, r11, 8 /* h = h << 8 */ 306322ae8ebSMichal Simeka_wu1_loop: 307322ae8ebSMichal Simek lw r12, r8, r10 /* v = *(as + offset) */ 308322ae8ebSMichal Simek bsrli r9, r12, 24 /* t1 = v >> 24 */ 309322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 310322ae8ebSMichal Simek sw r9, r5, r10 /* *(d + offset) = t1 */ 311322ae8ebSMichal Simek bslli r11, r12, 8 /* h = v << 8 */ 312322ae8ebSMichal Simek addi r4, r4,-4 /* n = n - 4 */ 313322ae8ebSMichal Simek bneid r4, a_wu1_loop /* while (n) loop */ 314322ae8ebSMichal Simek addi r10, r10, 4 /* offset = ofset + 4 (IN DELAY SLOT) */ 315322ae8ebSMichal Simek 316322ae8ebSMichal Simek bri a_word_done 317322ae8ebSMichal Simek 318322ae8ebSMichal Simeka_word_u2: 319322ae8ebSMichal Simek bslli r11, r11, 16 /* h = h << 16 */ 320322ae8ebSMichal Simeka_wu2_loop: 321322ae8ebSMichal Simek lw r12, r8, r10 /* v = *(as + offset) */ 322322ae8ebSMichal Simek bsrli r9, r12, 16 /* t1 = v >> 16 */ 323322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 324322ae8ebSMichal Simek sw r9, r5, r10 /* *(d + offset) = t1 */ 325322ae8ebSMichal Simek bslli r11, r12, 16 /* h = v << 16 */ 326322ae8ebSMichal Simek addi r4, r4,-4 /* n = n - 4 */ 327322ae8ebSMichal Simek bneid r4, a_wu2_loop /* while (n) loop */ 328322ae8ebSMichal Simek addi r10, r10, 4 /* offset = ofset + 4 (IN DELAY SLOT) */ 329322ae8ebSMichal Simek 330322ae8ebSMichal Simeka_word_done: 331322ae8ebSMichal Simek add r5, r5, r10 /* d = d + offset */ 332322ae8ebSMichal Simek add r6, r6, r10 /* s = s + offset */ 333322ae8ebSMichal Simek rsub r7, r10, r7 /* c = c - offset */ 334322ae8ebSMichal Simek 335322ae8ebSMichal Simeka_xfer_end: 336322ae8ebSMichal Simeka_xfer_end_loop: 337322ae8ebSMichal Simek beqi r7, a_done /* while (c) */ 338322ae8ebSMichal Simek lbui r9, r6, 0 /* t1 = *s */ 339322ae8ebSMichal Simek addi r6, r6, 1 /* s++ */ 340322ae8ebSMichal Simek sbi r9, r5, 0 /* *d = t1 */ 341322ae8ebSMichal Simek addi r7, r7, -1 /* c-- */ 342322ae8ebSMichal Simek brid a_xfer_end_loop /* loop */ 343322ae8ebSMichal Simek addi r5, r5, 1 /* d++ (IN DELAY SLOT) */ 344322ae8ebSMichal Simek 345322ae8ebSMichal Simeka_done: 346322ae8ebSMichal Simek rtsd r15, 8 347322ae8ebSMichal Simek nop 348322ae8ebSMichal Simek 349*13851966SMichal Simek.size memcpy, . - memcpy 350322ae8ebSMichal Simek.end memcpy 351322ae8ebSMichal Simek/*----------------------------------------------------------------------------*/ 352322ae8ebSMichal Simek .globl memmove 353*13851966SMichal Simek .type memmove, @function 354322ae8ebSMichal Simek .ent memmove 355322ae8ebSMichal Simek 356322ae8ebSMichal Simekmemmove: 357322ae8ebSMichal Simek cmpu r4, r5, r6 /* n = s - d */ 358322ae8ebSMichal Simek bgei r4,fast_memcpy_ascending 359322ae8ebSMichal Simek 360322ae8ebSMichal Simekfast_memcpy_descending: 361322ae8ebSMichal Simek /* move d to return register as value of function */ 362322ae8ebSMichal Simek addi r3, r5, 0 363322ae8ebSMichal Simek 364322ae8ebSMichal Simek add r5, r5, r7 /* d = d + c */ 365322ae8ebSMichal Simek add r6, r6, r7 /* s = s + c */ 366322ae8ebSMichal Simek 367322ae8ebSMichal Simek addi r4, r0, 4 /* n = 4 */ 368322ae8ebSMichal Simek cmpu r4, r4, r7 /* n = c - n (unsigned) */ 369322ae8ebSMichal Simek blti r4,d_xfer_end /* if n < 0, less than one word to transfer */ 370322ae8ebSMichal Simek 371322ae8ebSMichal Simek /* transfer first 0~3 bytes to get aligned dest address */ 372322ae8ebSMichal Simek andi r4, r5, 3 /* n = d & 3 */ 373322ae8ebSMichal Simek /* if zero, destination already aligned */ 374322ae8ebSMichal Simek beqi r4,d_dalign_done 375322ae8ebSMichal Simek rsub r7, r4, r7 /* c = c - n adjust c */ 376322ae8ebSMichal Simek 377322ae8ebSMichal Simekd_xfer_first_loop: 378322ae8ebSMichal Simek /* if no bytes left to transfer, transfer the bulk */ 379322ae8ebSMichal Simek beqi r4,d_dalign_done 380322ae8ebSMichal Simek addi r6, r6, -1 /* s-- */ 381322ae8ebSMichal Simek addi r5, r5, -1 /* d-- */ 382322ae8ebSMichal Simek lbui r11, r6, 0 /* h = *s */ 383322ae8ebSMichal Simek sbi r11, r5, 0 /* *d = h */ 384322ae8ebSMichal Simek brid d_xfer_first_loop /* loop */ 385322ae8ebSMichal Simek addi r4, r4, -1 /* n-- (IN DELAY SLOT) */ 386322ae8ebSMichal Simek 387322ae8ebSMichal Simekd_dalign_done: 388322ae8ebSMichal Simek addi r4, r0, 32 /* n = 32 */ 389322ae8ebSMichal Simek cmpu r4, r4, r7 /* n = c - n (unsigned) */ 390322ae8ebSMichal Simek /* if n < 0, less than one block to transfer */ 391322ae8ebSMichal Simek blti r4, d_block_done 392322ae8ebSMichal Simek 393322ae8ebSMichal Simekd_block_xfer: 394322ae8ebSMichal Simek andi r4, r7, 0xffffffe0 /* n = c & ~31 */ 395322ae8ebSMichal Simek rsub r7, r4, r7 /* c = c - n */ 396322ae8ebSMichal Simek 397322ae8ebSMichal Simek andi r9, r6, 3 /* t1 = s & 3 */ 398322ae8ebSMichal Simek /* if temp != 0, unaligned transfers needed */ 399322ae8ebSMichal Simek bnei r9, d_block_unaligned 400322ae8ebSMichal Simek 401322ae8ebSMichal Simekd_block_aligned: 402322ae8ebSMichal Simek addi r6, r6, -32 /* s = s - 32 */ 403322ae8ebSMichal Simek addi r5, r5, -32 /* d = d - 32 */ 404322ae8ebSMichal Simek lwi r9, r6, 28 /* t1 = *(s + 28) */ 405322ae8ebSMichal Simek lwi r10, r6, 24 /* t2 = *(s + 24) */ 406322ae8ebSMichal Simek lwi r11, r6, 20 /* t3 = *(s + 20) */ 407322ae8ebSMichal Simek lwi r12, r6, 16 /* t4 = *(s + 16) */ 408322ae8ebSMichal Simek swi r9, r5, 28 /* *(d + 28) = t1 */ 409322ae8ebSMichal Simek swi r10, r5, 24 /* *(d + 24) = t2 */ 410322ae8ebSMichal Simek swi r11, r5, 20 /* *(d + 20) = t3 */ 411322ae8ebSMichal Simek swi r12, r5, 16 /* *(d + 16) = t4 */ 412322ae8ebSMichal Simek lwi r9, r6, 12 /* t1 = *(s + 12) */ 413322ae8ebSMichal Simek lwi r10, r6, 8 /* t2 = *(s + 8) */ 414322ae8ebSMichal Simek lwi r11, r6, 4 /* t3 = *(s + 4) */ 415322ae8ebSMichal Simek lwi r12, r6, 0 /* t4 = *(s + 0) */ 416322ae8ebSMichal Simek swi r9, r5, 12 /* *(d + 12) = t1 */ 417322ae8ebSMichal Simek swi r10, r5, 8 /* *(d + 8) = t2 */ 418322ae8ebSMichal Simek swi r11, r5, 4 /* *(d + 4) = t3 */ 419322ae8ebSMichal Simek addi r4, r4, -32 /* n = n - 32 */ 420322ae8ebSMichal Simek bneid r4, d_block_aligned /* while (n) loop */ 421322ae8ebSMichal Simek swi r12, r5, 0 /* *(d + 0) = t4 (IN DELAY SLOT) */ 422322ae8ebSMichal Simek bri d_block_done 423322ae8ebSMichal Simek 424322ae8ebSMichal Simekd_block_unaligned: 425322ae8ebSMichal Simek andi r8, r6, 0xfffffffc /* as = s & ~3 */ 426322ae8ebSMichal Simek rsub r6, r4, r6 /* s = s - n */ 427322ae8ebSMichal Simek lwi r11, r8, 0 /* h = *(as + 0) */ 428322ae8ebSMichal Simek 429322ae8ebSMichal Simek addi r9, r9, -1 430322ae8ebSMichal Simek beqi r9,d_block_u1 /* t1 was 1 => 1 byte offset */ 431322ae8ebSMichal Simek addi r9, r9, -1 432322ae8ebSMichal Simek beqi r9,d_block_u2 /* t1 was 2 => 2 byte offset */ 433322ae8ebSMichal Simek 434322ae8ebSMichal Simekd_block_u3: 435322ae8ebSMichal Simek bsrli r11, r11, 8 /* h = h >> 8 */ 436322ae8ebSMichal Simekd_bu3_loop: 437322ae8ebSMichal Simek addi r8, r8, -32 /* as = as - 32 */ 438322ae8ebSMichal Simek addi r5, r5, -32 /* d = d - 32 */ 439322ae8ebSMichal Simek lwi r12, r8, 28 /* v = *(as + 28) */ 440322ae8ebSMichal Simek bslli r9, r12, 24 /* t1 = v << 24 */ 441322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 442322ae8ebSMichal Simek swi r9, r5, 28 /* *(d + 28) = t1 */ 443322ae8ebSMichal Simek bsrli r11, r12, 8 /* h = v >> 8 */ 444322ae8ebSMichal Simek lwi r12, r8, 24 /* v = *(as + 24) */ 445322ae8ebSMichal Simek bslli r9, r12, 24 /* t1 = v << 24 */ 446322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 447322ae8ebSMichal Simek swi r9, r5, 24 /* *(d + 24) = t1 */ 448322ae8ebSMichal Simek bsrli r11, r12, 8 /* h = v >> 8 */ 449322ae8ebSMichal Simek lwi r12, r8, 20 /* v = *(as + 20) */ 450322ae8ebSMichal Simek bslli r9, r12, 24 /* t1 = v << 24 */ 451322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 452322ae8ebSMichal Simek swi r9, r5, 20 /* *(d + 20) = t1 */ 453322ae8ebSMichal Simek bsrli r11, r12, 8 /* h = v >> 8 */ 454322ae8ebSMichal Simek lwi r12, r8, 16 /* v = *(as + 16) */ 455322ae8ebSMichal Simek bslli r9, r12, 24 /* t1 = v << 24 */ 456322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 457322ae8ebSMichal Simek swi r9, r5, 16 /* *(d + 16) = t1 */ 458322ae8ebSMichal Simek bsrli r11, r12, 8 /* h = v >> 8 */ 459322ae8ebSMichal Simek lwi r12, r8, 12 /* v = *(as + 12) */ 460322ae8ebSMichal Simek bslli r9, r12, 24 /* t1 = v << 24 */ 461322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 462322ae8ebSMichal Simek swi r9, r5, 12 /* *(d + 112) = t1 */ 463322ae8ebSMichal Simek bsrli r11, r12, 8 /* h = v >> 8 */ 464322ae8ebSMichal Simek lwi r12, r8, 8 /* v = *(as + 8) */ 465322ae8ebSMichal Simek bslli r9, r12, 24 /* t1 = v << 24 */ 466322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 467322ae8ebSMichal Simek swi r9, r5, 8 /* *(d + 8) = t1 */ 468322ae8ebSMichal Simek bsrli r11, r12, 8 /* h = v >> 8 */ 469322ae8ebSMichal Simek lwi r12, r8, 4 /* v = *(as + 4) */ 470322ae8ebSMichal Simek bslli r9, r12, 24 /* t1 = v << 24 */ 471322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 472322ae8ebSMichal Simek swi r9, r5, 4 /* *(d + 4) = t1 */ 473322ae8ebSMichal Simek bsrli r11, r12, 8 /* h = v >> 8 */ 474322ae8ebSMichal Simek lwi r12, r8, 0 /* v = *(as + 0) */ 475322ae8ebSMichal Simek bslli r9, r12, 24 /* t1 = v << 24 */ 476322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 477322ae8ebSMichal Simek swi r9, r5, 0 /* *(d + 0) = t1 */ 478322ae8ebSMichal Simek addi r4, r4, -32 /* n = n - 32 */ 479322ae8ebSMichal Simek bneid r4, d_bu3_loop /* while (n) loop */ 480322ae8ebSMichal Simek bsrli r11, r12, 8 /* h = v >> 8 (IN DELAY SLOT) */ 481322ae8ebSMichal Simek bri d_block_done 482322ae8ebSMichal Simek 483322ae8ebSMichal Simekd_block_u1: 484322ae8ebSMichal Simek bsrli r11, r11, 24 /* h = h >> 24 */ 485322ae8ebSMichal Simekd_bu1_loop: 486322ae8ebSMichal Simek addi r8, r8, -32 /* as = as - 32 */ 487322ae8ebSMichal Simek addi r5, r5, -32 /* d = d - 32 */ 488322ae8ebSMichal Simek lwi r12, r8, 28 /* v = *(as + 28) */ 489322ae8ebSMichal Simek bslli r9, r12, 8 /* t1 = v << 8 */ 490322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 491322ae8ebSMichal Simek swi r9, r5, 28 /* *(d + 28) = t1 */ 492322ae8ebSMichal Simek bsrli r11, r12, 24 /* h = v >> 24 */ 493322ae8ebSMichal Simek lwi r12, r8, 24 /* v = *(as + 24) */ 494322ae8ebSMichal Simek bslli r9, r12, 8 /* t1 = v << 8 */ 495322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 496322ae8ebSMichal Simek swi r9, r5, 24 /* *(d + 24) = t1 */ 497322ae8ebSMichal Simek bsrli r11, r12, 24 /* h = v >> 24 */ 498322ae8ebSMichal Simek lwi r12, r8, 20 /* v = *(as + 20) */ 499322ae8ebSMichal Simek bslli r9, r12, 8 /* t1 = v << 8 */ 500322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 501322ae8ebSMichal Simek swi r9, r5, 20 /* *(d + 20) = t1 */ 502322ae8ebSMichal Simek bsrli r11, r12, 24 /* h = v >> 24 */ 503322ae8ebSMichal Simek lwi r12, r8, 16 /* v = *(as + 16) */ 504322ae8ebSMichal Simek bslli r9, r12, 8 /* t1 = v << 8 */ 505322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 506322ae8ebSMichal Simek swi r9, r5, 16 /* *(d + 16) = t1 */ 507322ae8ebSMichal Simek bsrli r11, r12, 24 /* h = v >> 24 */ 508322ae8ebSMichal Simek lwi r12, r8, 12 /* v = *(as + 12) */ 509322ae8ebSMichal Simek bslli r9, r12, 8 /* t1 = v << 8 */ 510322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 511322ae8ebSMichal Simek swi r9, r5, 12 /* *(d + 112) = t1 */ 512322ae8ebSMichal Simek bsrli r11, r12, 24 /* h = v >> 24 */ 513322ae8ebSMichal Simek lwi r12, r8, 8 /* v = *(as + 8) */ 514322ae8ebSMichal Simek bslli r9, r12, 8 /* t1 = v << 8 */ 515322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 516322ae8ebSMichal Simek swi r9, r5, 8 /* *(d + 8) = t1 */ 517322ae8ebSMichal Simek bsrli r11, r12, 24 /* h = v >> 24 */ 518322ae8ebSMichal Simek lwi r12, r8, 4 /* v = *(as + 4) */ 519322ae8ebSMichal Simek bslli r9, r12, 8 /* t1 = v << 8 */ 520322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 521322ae8ebSMichal Simek swi r9, r5, 4 /* *(d + 4) = t1 */ 522322ae8ebSMichal Simek bsrli r11, r12, 24 /* h = v >> 24 */ 523322ae8ebSMichal Simek lwi r12, r8, 0 /* v = *(as + 0) */ 524322ae8ebSMichal Simek bslli r9, r12, 8 /* t1 = v << 8 */ 525322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 526322ae8ebSMichal Simek swi r9, r5, 0 /* *(d + 0) = t1 */ 527322ae8ebSMichal Simek addi r4, r4, -32 /* n = n - 32 */ 528322ae8ebSMichal Simek bneid r4, d_bu1_loop /* while (n) loop */ 529322ae8ebSMichal Simek bsrli r11, r12, 24 /* h = v >> 24 (IN DELAY SLOT) */ 530322ae8ebSMichal Simek bri d_block_done 531322ae8ebSMichal Simek 532322ae8ebSMichal Simekd_block_u2: 533322ae8ebSMichal Simek bsrli r11, r11, 16 /* h = h >> 16 */ 534322ae8ebSMichal Simekd_bu2_loop: 535322ae8ebSMichal Simek addi r8, r8, -32 /* as = as - 32 */ 536322ae8ebSMichal Simek addi r5, r5, -32 /* d = d - 32 */ 537322ae8ebSMichal Simek lwi r12, r8, 28 /* v = *(as + 28) */ 538322ae8ebSMichal Simek bslli r9, r12, 16 /* t1 = v << 16 */ 539322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 540322ae8ebSMichal Simek swi r9, r5, 28 /* *(d + 28) = t1 */ 541322ae8ebSMichal Simek bsrli r11, r12, 16 /* h = v >> 16 */ 542322ae8ebSMichal Simek lwi r12, r8, 24 /* v = *(as + 24) */ 543322ae8ebSMichal Simek bslli r9, r12, 16 /* t1 = v << 16 */ 544322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 545322ae8ebSMichal Simek swi r9, r5, 24 /* *(d + 24) = t1 */ 546322ae8ebSMichal Simek bsrli r11, r12, 16 /* h = v >> 16 */ 547322ae8ebSMichal Simek lwi r12, r8, 20 /* v = *(as + 20) */ 548322ae8ebSMichal Simek bslli r9, r12, 16 /* t1 = v << 16 */ 549322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 550322ae8ebSMichal Simek swi r9, r5, 20 /* *(d + 20) = t1 */ 551322ae8ebSMichal Simek bsrli r11, r12, 16 /* h = v >> 16 */ 552322ae8ebSMichal Simek lwi r12, r8, 16 /* v = *(as + 16) */ 553322ae8ebSMichal Simek bslli r9, r12, 16 /* t1 = v << 16 */ 554322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 555322ae8ebSMichal Simek swi r9, r5, 16 /* *(d + 16) = t1 */ 556322ae8ebSMichal Simek bsrli r11, r12, 16 /* h = v >> 16 */ 557322ae8ebSMichal Simek lwi r12, r8, 12 /* v = *(as + 12) */ 558322ae8ebSMichal Simek bslli r9, r12, 16 /* t1 = v << 16 */ 559322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 560322ae8ebSMichal Simek swi r9, r5, 12 /* *(d + 112) = t1 */ 561322ae8ebSMichal Simek bsrli r11, r12, 16 /* h = v >> 16 */ 562322ae8ebSMichal Simek lwi r12, r8, 8 /* v = *(as + 8) */ 563322ae8ebSMichal Simek bslli r9, r12, 16 /* t1 = v << 16 */ 564322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 565322ae8ebSMichal Simek swi r9, r5, 8 /* *(d + 8) = t1 */ 566322ae8ebSMichal Simek bsrli r11, r12, 16 /* h = v >> 16 */ 567322ae8ebSMichal Simek lwi r12, r8, 4 /* v = *(as + 4) */ 568322ae8ebSMichal Simek bslli r9, r12, 16 /* t1 = v << 16 */ 569322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 570322ae8ebSMichal Simek swi r9, r5, 4 /* *(d + 4) = t1 */ 571322ae8ebSMichal Simek bsrli r11, r12, 16 /* h = v >> 16 */ 572322ae8ebSMichal Simek lwi r12, r8, 0 /* v = *(as + 0) */ 573322ae8ebSMichal Simek bslli r9, r12, 16 /* t1 = v << 16 */ 574322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 575322ae8ebSMichal Simek swi r9, r5, 0 /* *(d + 0) = t1 */ 576322ae8ebSMichal Simek addi r4, r4, -32 /* n = n - 32 */ 577322ae8ebSMichal Simek bneid r4, d_bu2_loop /* while (n) loop */ 578322ae8ebSMichal Simek bsrli r11, r12, 16 /* h = v >> 16 (IN DELAY SLOT) */ 579322ae8ebSMichal Simek 580322ae8ebSMichal Simekd_block_done: 581322ae8ebSMichal Simek addi r4, r0, 4 /* n = 4 */ 582322ae8ebSMichal Simek cmpu r4, r4, r7 /* n = c - n (unsigned) */ 583322ae8ebSMichal Simek blti r4,d_xfer_end /* if n < 0, less than one word to transfer */ 584322ae8ebSMichal Simek 585322ae8ebSMichal Simekd_word_xfer: 586322ae8ebSMichal Simek andi r4, r7, 0xfffffffc /* n = c & ~3 */ 587322ae8ebSMichal Simek rsub r5, r4, r5 /* d = d - n */ 588322ae8ebSMichal Simek rsub r6, r4, r6 /* s = s - n */ 589322ae8ebSMichal Simek rsub r7, r4, r7 /* c = c - n */ 590322ae8ebSMichal Simek 591322ae8ebSMichal Simek andi r9, r6, 3 /* t1 = s & 3 */ 592322ae8ebSMichal Simek /* if temp != 0, unaligned transfers needed */ 593322ae8ebSMichal Simek bnei r9, d_word_unaligned 594322ae8ebSMichal Simek 595322ae8ebSMichal Simekd_word_aligned: 596322ae8ebSMichal Simek addi r4, r4,-4 /* n-- */ 597322ae8ebSMichal Simek lw r9, r6, r4 /* t1 = *(s+n) */ 598322ae8ebSMichal Simek bneid r4, d_word_aligned /* loop */ 599322ae8ebSMichal Simek sw r9, r5, r4 /* *(d+n) = t1 (IN DELAY SLOT) */ 600322ae8ebSMichal Simek 601322ae8ebSMichal Simek bri d_word_done 602322ae8ebSMichal Simek 603322ae8ebSMichal Simekd_word_unaligned: 604322ae8ebSMichal Simek andi r8, r6, 0xfffffffc /* as = s & ~3 */ 605322ae8ebSMichal Simek lw r11, r8, r4 /* h = *(as + n) */ 606322ae8ebSMichal Simek 607322ae8ebSMichal Simek addi r9, r9, -1 608322ae8ebSMichal Simek beqi r9,d_word_u1 /* t1 was 1 => 1 byte offset */ 609322ae8ebSMichal Simek addi r9, r9, -1 610322ae8ebSMichal Simek beqi r9,d_word_u2 /* t1 was 2 => 2 byte offset */ 611322ae8ebSMichal Simek 612322ae8ebSMichal Simekd_word_u3: 613322ae8ebSMichal Simek bsrli r11, r11, 8 /* h = h >> 8 */ 614322ae8ebSMichal Simekd_wu3_loop: 615322ae8ebSMichal Simek addi r4, r4,-4 /* n = n - 4 */ 616322ae8ebSMichal Simek lw r12, r8, r4 /* v = *(as + n) */ 617322ae8ebSMichal Simek bslli r9, r12, 24 /* t1 = v << 24 */ 618322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 619322ae8ebSMichal Simek sw r9, r5, r4 /* *(d + n) = t1 */ 620322ae8ebSMichal Simek bneid r4, d_wu3_loop /* while (n) loop */ 621322ae8ebSMichal Simek bsrli r11, r12, 8 /* h = v >> 8 (IN DELAY SLOT) */ 622322ae8ebSMichal Simek 623322ae8ebSMichal Simek bri d_word_done 624322ae8ebSMichal Simek 625322ae8ebSMichal Simekd_word_u1: 626322ae8ebSMichal Simek bsrli r11, r11, 24 /* h = h >> 24 */ 627322ae8ebSMichal Simekd_wu1_loop: 628322ae8ebSMichal Simek addi r4, r4,-4 /* n = n - 4 */ 629322ae8ebSMichal Simek lw r12, r8, r4 /* v = *(as + n) */ 630322ae8ebSMichal Simek bslli r9, r12, 8 /* t1 = v << 8 */ 631322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 632322ae8ebSMichal Simek sw r9, r5, r4 /* *(d + n) = t1 */ 633322ae8ebSMichal Simek bneid r4, d_wu1_loop /* while (n) loop */ 634322ae8ebSMichal Simek bsrli r11, r12, 24 /* h = v >> 24 (IN DELAY SLOT) */ 635322ae8ebSMichal Simek 636322ae8ebSMichal Simek bri d_word_done 637322ae8ebSMichal Simek 638322ae8ebSMichal Simekd_word_u2: 639322ae8ebSMichal Simek bsrli r11, r11, 16 /* h = h >> 16 */ 640322ae8ebSMichal Simekd_wu2_loop: 641322ae8ebSMichal Simek addi r4, r4,-4 /* n = n - 4 */ 642322ae8ebSMichal Simek lw r12, r8, r4 /* v = *(as + n) */ 643322ae8ebSMichal Simek bslli r9, r12, 16 /* t1 = v << 16 */ 644322ae8ebSMichal Simek or r9, r11, r9 /* t1 = h | t1 */ 645322ae8ebSMichal Simek sw r9, r5, r4 /* *(d + n) = t1 */ 646322ae8ebSMichal Simek bneid r4, d_wu2_loop /* while (n) loop */ 647322ae8ebSMichal Simek bsrli r11, r12, 16 /* h = v >> 16 (IN DELAY SLOT) */ 648322ae8ebSMichal Simek 649322ae8ebSMichal Simekd_word_done: 650322ae8ebSMichal Simek 651322ae8ebSMichal Simekd_xfer_end: 652322ae8ebSMichal Simekd_xfer_end_loop: 653322ae8ebSMichal Simek beqi r7, a_done /* while (c) */ 654322ae8ebSMichal Simek addi r6, r6, -1 /* s-- */ 655322ae8ebSMichal Simek lbui r9, r6, 0 /* t1 = *s */ 656322ae8ebSMichal Simek addi r5, r5, -1 /* d-- */ 657322ae8ebSMichal Simek sbi r9, r5, 0 /* *d = t1 */ 658322ae8ebSMichal Simek brid d_xfer_end_loop /* loop */ 659322ae8ebSMichal Simek addi r7, r7, -1 /* c-- (IN DELAY SLOT) */ 660322ae8ebSMichal Simek 661322ae8ebSMichal Simekd_done: 662322ae8ebSMichal Simek rtsd r15, 8 663322ae8ebSMichal Simek nop 664322ae8ebSMichal Simek 665*13851966SMichal Simek.size memmove, . - memmove 666322ae8ebSMichal Simek.end memmove 667