xref: /openbmc/linux/arch/microblaze/lib/memmove.c (revision 4f2c0a4acffbec01079c28f839422e64ddeff004)
1322ae8ebSMichal Simek /*
2322ae8ebSMichal Simek  * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
3322ae8ebSMichal Simek  * Copyright (C) 2008-2009 PetaLogix
4322ae8ebSMichal Simek  * Copyright (C) 2007 John Williams
5322ae8ebSMichal Simek  *
6322ae8ebSMichal Simek  * Reasonably optimised generic C-code for memcpy on Microblaze
7322ae8ebSMichal Simek  * This is generic C code to do efficient, alignment-aware memmove.
8322ae8ebSMichal Simek  *
9322ae8ebSMichal Simek  * It is based on demo code originally Copyright 2001 by Intel Corp, taken from
10322ae8ebSMichal Simek  * http://www.embedded.com/showArticle.jhtml?articleID=19205567
11322ae8ebSMichal Simek  *
12af901ca1SAndré Goddard Rosa  * Attempts were made, unsuccessfully, to contact the original
13322ae8ebSMichal Simek  * author of this code (Michael Morrow, Intel).  Below is the original
14322ae8ebSMichal Simek  * copyright notice.
15322ae8ebSMichal Simek  *
16322ae8ebSMichal Simek  * This software has been developed by Intel Corporation.
17322ae8ebSMichal Simek  * Intel specifically disclaims all warranties, express or
18322ae8ebSMichal Simek  * implied, and all liability, including consequential and
19322ae8ebSMichal Simek  * other indirect damages, for the use of this program, including
20322ae8ebSMichal Simek  * liability for infringement of any proprietary rights,
21322ae8ebSMichal Simek  * and including the warranties of merchantability and fitness
22322ae8ebSMichal Simek  * for a particular purpose. Intel does not assume any
23322ae8ebSMichal Simek  * responsibility for and errors which may appear in this program
24322ae8ebSMichal Simek  * not any responsibility to update it.
25322ae8ebSMichal Simek  */
26322ae8ebSMichal Simek 
27d64af918SMichal Simek #include <linux/export.h>
28322ae8ebSMichal Simek #include <linux/types.h>
29322ae8ebSMichal Simek #include <linux/stddef.h>
30322ae8ebSMichal Simek #include <linux/compiler.h>
31322ae8ebSMichal Simek #include <linux/string.h>
32322ae8ebSMichal Simek 
3361a4e653SMichal Simek #ifdef CONFIG_OPT_LIB_FUNCTION
memmove(void * v_dst,const void * v_src,__kernel_size_t c)3493e2e851SMichal Simek void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
3593e2e851SMichal Simek {
3693e2e851SMichal Simek 	const char *src = v_src;
3793e2e851SMichal Simek 	char *dst = v_dst;
3893e2e851SMichal Simek 	const uint32_t *i_src;
3993e2e851SMichal Simek 	uint32_t *i_dst;
4093e2e851SMichal Simek 
4193e2e851SMichal Simek 	if (!c)
4293e2e851SMichal Simek 		return v_dst;
4393e2e851SMichal Simek 
4493e2e851SMichal Simek 	/* Use memcpy when source is higher than dest */
4593e2e851SMichal Simek 	if (v_dst <= v_src)
4693e2e851SMichal Simek 		return memcpy(v_dst, v_src, c);
4793e2e851SMichal Simek 
48322ae8ebSMichal Simek 	/* The following code tries to optimize the copy by using unsigned
49322ae8ebSMichal Simek 	 * alignment. This will work fine if both source and destination are
50322ae8ebSMichal Simek 	 * aligned on the same boundary. However, if they are aligned on
51322ae8ebSMichal Simek 	 * different boundaries shifts will be necessary. This might result in
52322ae8ebSMichal Simek 	 * bad performance on MicroBlaze systems without a barrel shifter.
53322ae8ebSMichal Simek 	 */
54322ae8ebSMichal Simek 	/* FIXME this part needs more test */
55322ae8ebSMichal Simek 	/* Do a descending copy - this is a bit trickier! */
56322ae8ebSMichal Simek 	dst += c;
57322ae8ebSMichal Simek 	src += c;
58322ae8ebSMichal Simek 
59322ae8ebSMichal Simek 	if (c >= 4) {
60322ae8ebSMichal Simek 		unsigned  value, buf_hold;
61322ae8ebSMichal Simek 
6225985edcSLucas De Marchi 		/* Align the destination to a word boundary. */
6325985edcSLucas De Marchi 		/* This is done in an endian independent manner. */
64322ae8ebSMichal Simek 
65322ae8ebSMichal Simek 		switch ((unsigned long)dst & 3) {
66322ae8ebSMichal Simek 		case 3:
67322ae8ebSMichal Simek 			*--dst = *--src;
68322ae8ebSMichal Simek 			--c;
6947de4477SRandy Dunlap 			fallthrough;
70322ae8ebSMichal Simek 		case 2:
71322ae8ebSMichal Simek 			*--dst = *--src;
72322ae8ebSMichal Simek 			--c;
7347de4477SRandy Dunlap 			fallthrough;
74322ae8ebSMichal Simek 		case 1:
75322ae8ebSMichal Simek 			*--dst = *--src;
76322ae8ebSMichal Simek 			--c;
77322ae8ebSMichal Simek 		}
78322ae8ebSMichal Simek 
79322ae8ebSMichal Simek 		i_dst = (void *)dst;
80322ae8ebSMichal Simek 		/* Choose a copy scheme based on the source */
81*78b5f52aSJulia Lawall 		/* alignment relative to destination. */
82322ae8ebSMichal Simek 		switch ((unsigned long)src & 3) {
83322ae8ebSMichal Simek 		case 0x0:	/* Both byte offsets are aligned */
84322ae8ebSMichal Simek 
85322ae8ebSMichal Simek 			i_src  = (const void *)src;
86322ae8ebSMichal Simek 
87322ae8ebSMichal Simek 			for (; c >= 4; c -= 4)
88322ae8ebSMichal Simek 				*--i_dst = *--i_src;
89322ae8ebSMichal Simek 
90322ae8ebSMichal Simek 			src  = (const void *)i_src;
91322ae8ebSMichal Simek 			break;
92322ae8ebSMichal Simek 		case 0x1:	/* Unaligned - Off by 1 */
93322ae8ebSMichal Simek 			/* Word align the source */
94322ae8ebSMichal Simek 			i_src = (const void *) (((unsigned)src + 4) & ~3);
951180b28cSMichal Simek #ifndef __MICROBLAZEEL__
96322ae8ebSMichal Simek 			/* Load the holding buffer */
97322ae8ebSMichal Simek 			buf_hold = *--i_src >> 24;
98322ae8ebSMichal Simek 
99322ae8ebSMichal Simek 			for (; c >= 4; c -= 4) {
100322ae8ebSMichal Simek 				value = *--i_src;
101322ae8ebSMichal Simek 				*--i_dst = buf_hold << 8 | value;
102322ae8ebSMichal Simek 				buf_hold = value >> 24;
103322ae8ebSMichal Simek 			}
1041180b28cSMichal Simek #else
1051180b28cSMichal Simek 			/* Load the holding buffer */
1061180b28cSMichal Simek 			buf_hold = (*--i_src & 0xFF) << 24;
107322ae8ebSMichal Simek 
1081180b28cSMichal Simek 			for (; c >= 4; c -= 4) {
1091180b28cSMichal Simek 				value = *--i_src;
1106bd55f0bSMichal Simek 				*--i_dst = buf_hold |
1116bd55f0bSMichal Simek 						((value & 0xFFFFFF00) >> 8);
1121180b28cSMichal Simek 				buf_hold = (value  & 0xFF) << 24;
1131180b28cSMichal Simek 			}
1141180b28cSMichal Simek #endif
115322ae8ebSMichal Simek 			/* Realign the source */
116322ae8ebSMichal Simek 			src = (const void *)i_src;
117322ae8ebSMichal Simek 			src += 1;
118322ae8ebSMichal Simek 			break;
119322ae8ebSMichal Simek 		case 0x2:	/* Unaligned - Off by 2 */
120322ae8ebSMichal Simek 			/* Word align the source */
121322ae8ebSMichal Simek 			i_src = (const void *) (((unsigned)src + 4) & ~3);
1221180b28cSMichal Simek #ifndef __MICROBLAZEEL__
123322ae8ebSMichal Simek 			/* Load the holding buffer */
124322ae8ebSMichal Simek 			buf_hold = *--i_src >> 16;
125322ae8ebSMichal Simek 
126322ae8ebSMichal Simek 			for (; c >= 4; c -= 4) {
127322ae8ebSMichal Simek 				value = *--i_src;
128322ae8ebSMichal Simek 				*--i_dst = buf_hold << 16 | value;
129322ae8ebSMichal Simek 				buf_hold = value >> 16;
130322ae8ebSMichal Simek 			}
1311180b28cSMichal Simek #else
1321180b28cSMichal Simek 			/* Load the holding buffer */
1331180b28cSMichal Simek 			buf_hold = (*--i_src & 0xFFFF) << 16;
134322ae8ebSMichal Simek 
1351180b28cSMichal Simek 			for (; c >= 4; c -= 4) {
1361180b28cSMichal Simek 				value = *--i_src;
1376bd55f0bSMichal Simek 				*--i_dst = buf_hold |
1386bd55f0bSMichal Simek 						((value & 0xFFFF0000) >> 16);
1391180b28cSMichal Simek 				buf_hold = (value & 0xFFFF) << 16;
1401180b28cSMichal Simek 			}
1411180b28cSMichal Simek #endif
142322ae8ebSMichal Simek 			/* Realign the source */
143322ae8ebSMichal Simek 			src = (const void *)i_src;
144322ae8ebSMichal Simek 			src += 2;
145322ae8ebSMichal Simek 			break;
146322ae8ebSMichal Simek 		case 0x3:	/* Unaligned - Off by 3 */
147322ae8ebSMichal Simek 			/* Word align the source */
148322ae8ebSMichal Simek 			i_src = (const void *) (((unsigned)src + 4) & ~3);
1491180b28cSMichal Simek #ifndef __MICROBLAZEEL__
150322ae8ebSMichal Simek 			/* Load the holding buffer */
151322ae8ebSMichal Simek 			buf_hold = *--i_src >> 8;
152322ae8ebSMichal Simek 
153322ae8ebSMichal Simek 			for (; c >= 4; c -= 4) {
154322ae8ebSMichal Simek 				value = *--i_src;
155322ae8ebSMichal Simek 				*--i_dst = buf_hold << 24 | value;
156322ae8ebSMichal Simek 				buf_hold = value >> 8;
157322ae8ebSMichal Simek 			}
1581180b28cSMichal Simek #else
1591180b28cSMichal Simek 			/* Load the holding buffer */
1601180b28cSMichal Simek 			buf_hold = (*--i_src & 0xFFFFFF) << 8;
161322ae8ebSMichal Simek 
1621180b28cSMichal Simek 			for (; c >= 4; c -= 4) {
1631180b28cSMichal Simek 				value = *--i_src;
1646bd55f0bSMichal Simek 				*--i_dst = buf_hold |
1656bd55f0bSMichal Simek 						((value & 0xFF000000) >> 24);
166473ff660SJoe Perches 				buf_hold = (value & 0xFFFFFF) << 8;
1671180b28cSMichal Simek 			}
1681180b28cSMichal Simek #endif
169322ae8ebSMichal Simek 			/* Realign the source */
170322ae8ebSMichal Simek 			src = (const void *)i_src;
171322ae8ebSMichal Simek 			src += 3;
172322ae8ebSMichal Simek 			break;
173322ae8ebSMichal Simek 		}
174322ae8ebSMichal Simek 		dst = (void *)i_dst;
175322ae8ebSMichal Simek 	}
176322ae8ebSMichal Simek 
17725985edcSLucas De Marchi 	/* simple fast copy, ... unless a cache boundary is crossed */
178322ae8ebSMichal Simek 	/* Finish off any remaining bytes */
179322ae8ebSMichal Simek 	switch (c) {
180322ae8ebSMichal Simek 	case 4:
181322ae8ebSMichal Simek 		*--dst = *--src;
18247de4477SRandy Dunlap 		fallthrough;
183322ae8ebSMichal Simek 	case 3:
184322ae8ebSMichal Simek 		*--dst = *--src;
18547de4477SRandy Dunlap 		fallthrough;
186322ae8ebSMichal Simek 	case 2:
187322ae8ebSMichal Simek 		*--dst = *--src;
18847de4477SRandy Dunlap 		fallthrough;
189322ae8ebSMichal Simek 	case 1:
190322ae8ebSMichal Simek 		*--dst = *--src;
191322ae8ebSMichal Simek 	}
192322ae8ebSMichal Simek 	return v_dst;
193322ae8ebSMichal Simek }
194322ae8ebSMichal Simek EXPORT_SYMBOL(memmove);
19561a4e653SMichal Simek #endif /* CONFIG_OPT_LIB_FUNCTION */
196