xref: /openbmc/linux/arch/microblaze/lib/memcpy.c (revision 61a4e653)
1322ae8ebSMichal Simek /*
2322ae8ebSMichal Simek  * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
3322ae8ebSMichal Simek  * Copyright (C) 2008-2009 PetaLogix
4322ae8ebSMichal Simek  * Copyright (C) 2007 John Williams
5322ae8ebSMichal Simek  *
6322ae8ebSMichal Simek  * Reasonably optimised generic C-code for memcpy on Microblaze
7322ae8ebSMichal Simek  * This is generic C code to do efficient, alignment-aware memcpy.
8322ae8ebSMichal Simek  *
9322ae8ebSMichal Simek  * It is based on demo code originally Copyright 2001 by Intel Corp, taken from
10322ae8ebSMichal Simek  * http://www.embedded.com/showArticle.jhtml?articleID=19205567
11322ae8ebSMichal Simek  *
12af901ca1SAndré Goddard Rosa  * Attempts were made, unsuccessfully, to contact the original
13322ae8ebSMichal Simek  * author of this code (Michael Morrow, Intel).  Below is the original
14322ae8ebSMichal Simek  * copyright notice.
15322ae8ebSMichal Simek  *
16322ae8ebSMichal Simek  * This software has been developed by Intel Corporation.
17322ae8ebSMichal Simek  * Intel specifically disclaims all warranties, express or
18322ae8ebSMichal Simek  * implied, and all liability, including consequential and
19322ae8ebSMichal Simek  * other indirect damages, for the use of this program, including
20322ae8ebSMichal Simek  * liability for infringement of any proprietary rights,
21322ae8ebSMichal Simek  * and including the warranties of merchantability and fitness
22322ae8ebSMichal Simek  * for a particular purpose. Intel does not assume any
23322ae8ebSMichal Simek  * responsibility for and errors which may appear in this program
24322ae8ebSMichal Simek  * not any responsibility to update it.
25322ae8ebSMichal Simek  */
26322ae8ebSMichal Simek 
27d64af918SMichal Simek #include <linux/export.h>
28322ae8ebSMichal Simek #include <linux/types.h>
29322ae8ebSMichal Simek #include <linux/stddef.h>
30322ae8ebSMichal Simek #include <linux/compiler.h>
31322ae8ebSMichal Simek 
32322ae8ebSMichal Simek #include <linux/string.h>
33322ae8ebSMichal Simek 
34*61a4e653SMichal Simek #ifdef CONFIG_OPT_LIB_FUNCTION
memcpy(void * v_dst,const void * v_src,__kernel_size_t c)3593e2e851SMichal Simek void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
3693e2e851SMichal Simek {
3793e2e851SMichal Simek 	const char *src = v_src;
3893e2e851SMichal Simek 	char *dst = v_dst;
3993e2e851SMichal Simek 
40322ae8ebSMichal Simek 	/* The following code tries to optimize the copy by using unsigned
41322ae8ebSMichal Simek 	 * alignment. This will work fine if both source and destination are
42322ae8ebSMichal Simek 	 * aligned on the same boundary. However, if they are aligned on
43322ae8ebSMichal Simek 	 * different boundaries shifts will be necessary. This might result in
44322ae8ebSMichal Simek 	 * bad performance on MicroBlaze systems without a barrel shifter.
45322ae8ebSMichal Simek 	 */
46322ae8ebSMichal Simek 	const uint32_t *i_src;
47322ae8ebSMichal Simek 	uint32_t *i_dst;
48322ae8ebSMichal Simek 
4978ebfa88SMichal Simek 	if (likely(c >= 4)) {
50322ae8ebSMichal Simek 		unsigned  value, buf_hold;
51322ae8ebSMichal Simek 
5225985edcSLucas De Marchi 		/* Align the destination to a word boundary. */
5325985edcSLucas De Marchi 		/* This is done in an endian independent manner. */
54322ae8ebSMichal Simek 		switch ((unsigned long)dst & 3) {
55322ae8ebSMichal Simek 		case 1:
56322ae8ebSMichal Simek 			*dst++ = *src++;
57322ae8ebSMichal Simek 			--c;
5847de4477SRandy Dunlap 			fallthrough;
59322ae8ebSMichal Simek 		case 2:
60322ae8ebSMichal Simek 			*dst++ = *src++;
61322ae8ebSMichal Simek 			--c;
6247de4477SRandy Dunlap 			fallthrough;
63322ae8ebSMichal Simek 		case 3:
64322ae8ebSMichal Simek 			*dst++ = *src++;
65322ae8ebSMichal Simek 			--c;
66322ae8ebSMichal Simek 		}
67322ae8ebSMichal Simek 
68322ae8ebSMichal Simek 		i_dst = (void *)dst;
69322ae8ebSMichal Simek 
70322ae8ebSMichal Simek 		/* Choose a copy scheme based on the source */
7125985edcSLucas De Marchi 		/* alignment relative to destination. */
72322ae8ebSMichal Simek 		switch ((unsigned long)src & 3) {
73322ae8ebSMichal Simek 		case 0x0:	/* Both byte offsets are aligned */
74322ae8ebSMichal Simek 			i_src  = (const void *)src;
75322ae8ebSMichal Simek 
76322ae8ebSMichal Simek 			for (; c >= 4; c -= 4)
77322ae8ebSMichal Simek 				*i_dst++ = *i_src++;
78322ae8ebSMichal Simek 
79322ae8ebSMichal Simek 			src  = (const void *)i_src;
80322ae8ebSMichal Simek 			break;
81322ae8ebSMichal Simek 		case 0x1:	/* Unaligned - Off by 1 */
82322ae8ebSMichal Simek 			/* Word align the source */
83322ae8ebSMichal Simek 			i_src = (const void *) ((unsigned)src & ~3);
841180b28cSMichal Simek #ifndef __MICROBLAZEEL__
85322ae8ebSMichal Simek 			/* Load the holding buffer */
86322ae8ebSMichal Simek 			buf_hold = *i_src++ << 8;
87322ae8ebSMichal Simek 
88322ae8ebSMichal Simek 			for (; c >= 4; c -= 4) {
89322ae8ebSMichal Simek 				value = *i_src++;
90322ae8ebSMichal Simek 				*i_dst++ = buf_hold | value >> 24;
91322ae8ebSMichal Simek 				buf_hold = value << 8;
92322ae8ebSMichal Simek 			}
931180b28cSMichal Simek #else
941180b28cSMichal Simek 			/* Load the holding buffer */
951180b28cSMichal Simek 			buf_hold = (*i_src++ & 0xFFFFFF00) >> 8;
96322ae8ebSMichal Simek 
971180b28cSMichal Simek 			for (; c >= 4; c -= 4) {
981180b28cSMichal Simek 				value = *i_src++;
991180b28cSMichal Simek 				*i_dst++ = buf_hold | ((value & 0xFF) << 24);
1001180b28cSMichal Simek 				buf_hold = (value & 0xFFFFFF00) >> 8;
1011180b28cSMichal Simek 			}
1021180b28cSMichal Simek #endif
103322ae8ebSMichal Simek 			/* Realign the source */
104322ae8ebSMichal Simek 			src = (const void *)i_src;
105322ae8ebSMichal Simek 			src -= 3;
106322ae8ebSMichal Simek 			break;
107322ae8ebSMichal Simek 		case 0x2:	/* Unaligned - Off by 2 */
108322ae8ebSMichal Simek 			/* Word align the source */
109322ae8ebSMichal Simek 			i_src = (const void *) ((unsigned)src & ~3);
1101180b28cSMichal Simek #ifndef __MICROBLAZEEL__
111322ae8ebSMichal Simek 			/* Load the holding buffer */
112322ae8ebSMichal Simek 			buf_hold = *i_src++ << 16;
113322ae8ebSMichal Simek 
114322ae8ebSMichal Simek 			for (; c >= 4; c -= 4) {
115322ae8ebSMichal Simek 				value = *i_src++;
116322ae8ebSMichal Simek 				*i_dst++ = buf_hold | value >> 16;
117322ae8ebSMichal Simek 				buf_hold = value << 16;
118322ae8ebSMichal Simek 			}
1191180b28cSMichal Simek #else
1201180b28cSMichal Simek 			/* Load the holding buffer */
1211180b28cSMichal Simek 			buf_hold = (*i_src++ & 0xFFFF0000) >> 16;
122322ae8ebSMichal Simek 
1231180b28cSMichal Simek 			for (; c >= 4; c -= 4) {
1241180b28cSMichal Simek 				value = *i_src++;
1251180b28cSMichal Simek 				*i_dst++ = buf_hold | ((value & 0xFFFF) << 16);
1261180b28cSMichal Simek 				buf_hold = (value & 0xFFFF0000) >> 16;
1271180b28cSMichal Simek 			}
1281180b28cSMichal Simek #endif
129322ae8ebSMichal Simek 			/* Realign the source */
130322ae8ebSMichal Simek 			src = (const void *)i_src;
131322ae8ebSMichal Simek 			src -= 2;
132322ae8ebSMichal Simek 			break;
133322ae8ebSMichal Simek 		case 0x3:	/* Unaligned - Off by 3 */
134322ae8ebSMichal Simek 			/* Word align the source */
135322ae8ebSMichal Simek 			i_src = (const void *) ((unsigned)src & ~3);
1361180b28cSMichal Simek #ifndef __MICROBLAZEEL__
137322ae8ebSMichal Simek 			/* Load the holding buffer */
138322ae8ebSMichal Simek 			buf_hold = *i_src++ << 24;
139322ae8ebSMichal Simek 
140322ae8ebSMichal Simek 			for (; c >= 4; c -= 4) {
141322ae8ebSMichal Simek 				value = *i_src++;
142322ae8ebSMichal Simek 				*i_dst++ = buf_hold | value >> 8;
143322ae8ebSMichal Simek 				buf_hold = value << 24;
144322ae8ebSMichal Simek 			}
1451180b28cSMichal Simek #else
1461180b28cSMichal Simek 			/* Load the holding buffer */
1471180b28cSMichal Simek 			buf_hold = (*i_src++ & 0xFF000000) >> 24;
148322ae8ebSMichal Simek 
1491180b28cSMichal Simek 			for (; c >= 4; c -= 4) {
1501180b28cSMichal Simek 				value = *i_src++;
1511180b28cSMichal Simek 				*i_dst++ = buf_hold | ((value & 0xFFFFFF) << 8);
1521180b28cSMichal Simek 				buf_hold = (value & 0xFF000000) >> 24;
1531180b28cSMichal Simek 			}
1541180b28cSMichal Simek #endif
155322ae8ebSMichal Simek 			/* Realign the source */
156322ae8ebSMichal Simek 			src = (const void *)i_src;
157322ae8ebSMichal Simek 			src -= 1;
158322ae8ebSMichal Simek 			break;
159322ae8ebSMichal Simek 		}
160322ae8ebSMichal Simek 		dst = (void *)i_dst;
161322ae8ebSMichal Simek 	}
162322ae8ebSMichal Simek 
163322ae8ebSMichal Simek 	/* Finish off any remaining bytes */
16425985edcSLucas De Marchi 	/* simple fast copy, ... unless a cache boundary is crossed */
165322ae8ebSMichal Simek 	switch (c) {
166322ae8ebSMichal Simek 	case 3:
167322ae8ebSMichal Simek 		*dst++ = *src++;
16847de4477SRandy Dunlap 		fallthrough;
169322ae8ebSMichal Simek 	case 2:
170322ae8ebSMichal Simek 		*dst++ = *src++;
17147de4477SRandy Dunlap 		fallthrough;
172322ae8ebSMichal Simek 	case 1:
173322ae8ebSMichal Simek 		*dst++ = *src++;
174322ae8ebSMichal Simek 	}
175322ae8ebSMichal Simek 
176322ae8ebSMichal Simek 	return v_dst;
177322ae8ebSMichal Simek }
178322ae8ebSMichal Simek EXPORT_SYMBOL(memcpy);
179*61a4e653SMichal Simek #endif /* CONFIG_OPT_LIB_FUNCTION */
180