1322ae8ebSMichal Simek /*
2322ae8ebSMichal Simek * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
3322ae8ebSMichal Simek * Copyright (C) 2008-2009 PetaLogix
4322ae8ebSMichal Simek * Copyright (C) 2007 John Williams
5322ae8ebSMichal Simek *
6322ae8ebSMichal Simek * Reasonably optimised generic C-code for memcpy on Microblaze
7322ae8ebSMichal Simek * This is generic C code to do efficient, alignment-aware memmove.
8322ae8ebSMichal Simek *
9322ae8ebSMichal Simek * It is based on demo code originally Copyright 2001 by Intel Corp, taken from
10322ae8ebSMichal Simek * http://www.embedded.com/showArticle.jhtml?articleID=19205567
11322ae8ebSMichal Simek *
12af901ca1SAndré Goddard Rosa * Attempts were made, unsuccessfully, to contact the original
13322ae8ebSMichal Simek * author of this code (Michael Morrow, Intel). Below is the original
14322ae8ebSMichal Simek * copyright notice.
15322ae8ebSMichal Simek *
16322ae8ebSMichal Simek * This software has been developed by Intel Corporation.
17322ae8ebSMichal Simek * Intel specifically disclaims all warranties, express or
18322ae8ebSMichal Simek * implied, and all liability, including consequential and
19322ae8ebSMichal Simek * other indirect damages, for the use of this program, including
20322ae8ebSMichal Simek * liability for infringement of any proprietary rights,
21322ae8ebSMichal Simek * and including the warranties of merchantability and fitness
22322ae8ebSMichal Simek * for a particular purpose. Intel does not assume any
23322ae8ebSMichal Simek * responsibility for and errors which may appear in this program
24322ae8ebSMichal Simek * not any responsibility to update it.
25322ae8ebSMichal Simek */
26322ae8ebSMichal Simek
27d64af918SMichal Simek #include <linux/export.h>
28322ae8ebSMichal Simek #include <linux/types.h>
29322ae8ebSMichal Simek #include <linux/stddef.h>
30322ae8ebSMichal Simek #include <linux/compiler.h>
31322ae8ebSMichal Simek #include <linux/string.h>
32322ae8ebSMichal Simek
3361a4e653SMichal Simek #ifdef CONFIG_OPT_LIB_FUNCTION
memmove(void * v_dst,const void * v_src,__kernel_size_t c)3493e2e851SMichal Simek void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
3593e2e851SMichal Simek {
3693e2e851SMichal Simek const char *src = v_src;
3793e2e851SMichal Simek char *dst = v_dst;
3893e2e851SMichal Simek const uint32_t *i_src;
3993e2e851SMichal Simek uint32_t *i_dst;
4093e2e851SMichal Simek
4193e2e851SMichal Simek if (!c)
4293e2e851SMichal Simek return v_dst;
4393e2e851SMichal Simek
4493e2e851SMichal Simek /* Use memcpy when source is higher than dest */
4593e2e851SMichal Simek if (v_dst <= v_src)
4693e2e851SMichal Simek return memcpy(v_dst, v_src, c);
4793e2e851SMichal Simek
48322ae8ebSMichal Simek /* The following code tries to optimize the copy by using unsigned
49322ae8ebSMichal Simek * alignment. This will work fine if both source and destination are
50322ae8ebSMichal Simek * aligned on the same boundary. However, if they are aligned on
51322ae8ebSMichal Simek * different boundaries shifts will be necessary. This might result in
52322ae8ebSMichal Simek * bad performance on MicroBlaze systems without a barrel shifter.
53322ae8ebSMichal Simek */
54322ae8ebSMichal Simek /* FIXME this part needs more test */
55322ae8ebSMichal Simek /* Do a descending copy - this is a bit trickier! */
56322ae8ebSMichal Simek dst += c;
57322ae8ebSMichal Simek src += c;
58322ae8ebSMichal Simek
59322ae8ebSMichal Simek if (c >= 4) {
60322ae8ebSMichal Simek unsigned value, buf_hold;
61322ae8ebSMichal Simek
6225985edcSLucas De Marchi /* Align the destination to a word boundary. */
6325985edcSLucas De Marchi /* This is done in an endian independent manner. */
64322ae8ebSMichal Simek
65322ae8ebSMichal Simek switch ((unsigned long)dst & 3) {
66322ae8ebSMichal Simek case 3:
67322ae8ebSMichal Simek *--dst = *--src;
68322ae8ebSMichal Simek --c;
6947de4477SRandy Dunlap fallthrough;
70322ae8ebSMichal Simek case 2:
71322ae8ebSMichal Simek *--dst = *--src;
72322ae8ebSMichal Simek --c;
7347de4477SRandy Dunlap fallthrough;
74322ae8ebSMichal Simek case 1:
75322ae8ebSMichal Simek *--dst = *--src;
76322ae8ebSMichal Simek --c;
77322ae8ebSMichal Simek }
78322ae8ebSMichal Simek
79322ae8ebSMichal Simek i_dst = (void *)dst;
80322ae8ebSMichal Simek /* Choose a copy scheme based on the source */
81*78b5f52aSJulia Lawall /* alignment relative to destination. */
82322ae8ebSMichal Simek switch ((unsigned long)src & 3) {
83322ae8ebSMichal Simek case 0x0: /* Both byte offsets are aligned */
84322ae8ebSMichal Simek
85322ae8ebSMichal Simek i_src = (const void *)src;
86322ae8ebSMichal Simek
87322ae8ebSMichal Simek for (; c >= 4; c -= 4)
88322ae8ebSMichal Simek *--i_dst = *--i_src;
89322ae8ebSMichal Simek
90322ae8ebSMichal Simek src = (const void *)i_src;
91322ae8ebSMichal Simek break;
92322ae8ebSMichal Simek case 0x1: /* Unaligned - Off by 1 */
93322ae8ebSMichal Simek /* Word align the source */
94322ae8ebSMichal Simek i_src = (const void *) (((unsigned)src + 4) & ~3);
951180b28cSMichal Simek #ifndef __MICROBLAZEEL__
96322ae8ebSMichal Simek /* Load the holding buffer */
97322ae8ebSMichal Simek buf_hold = *--i_src >> 24;
98322ae8ebSMichal Simek
99322ae8ebSMichal Simek for (; c >= 4; c -= 4) {
100322ae8ebSMichal Simek value = *--i_src;
101322ae8ebSMichal Simek *--i_dst = buf_hold << 8 | value;
102322ae8ebSMichal Simek buf_hold = value >> 24;
103322ae8ebSMichal Simek }
1041180b28cSMichal Simek #else
1051180b28cSMichal Simek /* Load the holding buffer */
1061180b28cSMichal Simek buf_hold = (*--i_src & 0xFF) << 24;
107322ae8ebSMichal Simek
1081180b28cSMichal Simek for (; c >= 4; c -= 4) {
1091180b28cSMichal Simek value = *--i_src;
1106bd55f0bSMichal Simek *--i_dst = buf_hold |
1116bd55f0bSMichal Simek ((value & 0xFFFFFF00) >> 8);
1121180b28cSMichal Simek buf_hold = (value & 0xFF) << 24;
1131180b28cSMichal Simek }
1141180b28cSMichal Simek #endif
115322ae8ebSMichal Simek /* Realign the source */
116322ae8ebSMichal Simek src = (const void *)i_src;
117322ae8ebSMichal Simek src += 1;
118322ae8ebSMichal Simek break;
119322ae8ebSMichal Simek case 0x2: /* Unaligned - Off by 2 */
120322ae8ebSMichal Simek /* Word align the source */
121322ae8ebSMichal Simek i_src = (const void *) (((unsigned)src + 4) & ~3);
1221180b28cSMichal Simek #ifndef __MICROBLAZEEL__
123322ae8ebSMichal Simek /* Load the holding buffer */
124322ae8ebSMichal Simek buf_hold = *--i_src >> 16;
125322ae8ebSMichal Simek
126322ae8ebSMichal Simek for (; c >= 4; c -= 4) {
127322ae8ebSMichal Simek value = *--i_src;
128322ae8ebSMichal Simek *--i_dst = buf_hold << 16 | value;
129322ae8ebSMichal Simek buf_hold = value >> 16;
130322ae8ebSMichal Simek }
1311180b28cSMichal Simek #else
1321180b28cSMichal Simek /* Load the holding buffer */
1331180b28cSMichal Simek buf_hold = (*--i_src & 0xFFFF) << 16;
134322ae8ebSMichal Simek
1351180b28cSMichal Simek for (; c >= 4; c -= 4) {
1361180b28cSMichal Simek value = *--i_src;
1376bd55f0bSMichal Simek *--i_dst = buf_hold |
1386bd55f0bSMichal Simek ((value & 0xFFFF0000) >> 16);
1391180b28cSMichal Simek buf_hold = (value & 0xFFFF) << 16;
1401180b28cSMichal Simek }
1411180b28cSMichal Simek #endif
142322ae8ebSMichal Simek /* Realign the source */
143322ae8ebSMichal Simek src = (const void *)i_src;
144322ae8ebSMichal Simek src += 2;
145322ae8ebSMichal Simek break;
146322ae8ebSMichal Simek case 0x3: /* Unaligned - Off by 3 */
147322ae8ebSMichal Simek /* Word align the source */
148322ae8ebSMichal Simek i_src = (const void *) (((unsigned)src + 4) & ~3);
1491180b28cSMichal Simek #ifndef __MICROBLAZEEL__
150322ae8ebSMichal Simek /* Load the holding buffer */
151322ae8ebSMichal Simek buf_hold = *--i_src >> 8;
152322ae8ebSMichal Simek
153322ae8ebSMichal Simek for (; c >= 4; c -= 4) {
154322ae8ebSMichal Simek value = *--i_src;
155322ae8ebSMichal Simek *--i_dst = buf_hold << 24 | value;
156322ae8ebSMichal Simek buf_hold = value >> 8;
157322ae8ebSMichal Simek }
1581180b28cSMichal Simek #else
1591180b28cSMichal Simek /* Load the holding buffer */
1601180b28cSMichal Simek buf_hold = (*--i_src & 0xFFFFFF) << 8;
161322ae8ebSMichal Simek
1621180b28cSMichal Simek for (; c >= 4; c -= 4) {
1631180b28cSMichal Simek value = *--i_src;
1646bd55f0bSMichal Simek *--i_dst = buf_hold |
1656bd55f0bSMichal Simek ((value & 0xFF000000) >> 24);
166473ff660SJoe Perches buf_hold = (value & 0xFFFFFF) << 8;
1671180b28cSMichal Simek }
1681180b28cSMichal Simek #endif
169322ae8ebSMichal Simek /* Realign the source */
170322ae8ebSMichal Simek src = (const void *)i_src;
171322ae8ebSMichal Simek src += 3;
172322ae8ebSMichal Simek break;
173322ae8ebSMichal Simek }
174322ae8ebSMichal Simek dst = (void *)i_dst;
175322ae8ebSMichal Simek }
176322ae8ebSMichal Simek
17725985edcSLucas De Marchi /* simple fast copy, ... unless a cache boundary is crossed */
178322ae8ebSMichal Simek /* Finish off any remaining bytes */
179322ae8ebSMichal Simek switch (c) {
180322ae8ebSMichal Simek case 4:
181322ae8ebSMichal Simek *--dst = *--src;
18247de4477SRandy Dunlap fallthrough;
183322ae8ebSMichal Simek case 3:
184322ae8ebSMichal Simek *--dst = *--src;
18547de4477SRandy Dunlap fallthrough;
186322ae8ebSMichal Simek case 2:
187322ae8ebSMichal Simek *--dst = *--src;
18847de4477SRandy Dunlap fallthrough;
189322ae8ebSMichal Simek case 1:
190322ae8ebSMichal Simek *--dst = *--src;
191322ae8ebSMichal Simek }
192322ae8ebSMichal Simek return v_dst;
193322ae8ebSMichal Simek }
194322ae8ebSMichal Simek EXPORT_SYMBOL(memmove);
19561a4e653SMichal Simek #endif /* CONFIG_OPT_LIB_FUNCTION */
196