1 /* 2 * arch/openrisc/lib/memcpy.c 3 * 4 * Optimized memory copy routines for openrisc. These are mostly copied 5 * from ohter sources but slightly entended based on ideas discuassed in 6 * #openrisc. 7 * 8 * The word unroll implementation is an extension to the arm byte 9 * unrolled implementation, but using word copies (if things are 10 * properly aligned) 11 * 12 * The great arm loop unroll algorithm can be found at: 13 * arch/arm/boot/compressed/string.c 14 */ 15 16 #include <linux/export.h> 17 18 #include <linux/string.h> 19 20 #ifdef CONFIG_OR1K_1200 21 /* 22 * Do memcpy with word copies and loop unrolling. This gives the 23 * best performance on the OR1200 and MOR1KX archirectures 24 */ 25 void *memcpy(void *dest, __const void *src, __kernel_size_t n) 26 { 27 int i = 0; 28 unsigned char *d, *s; 29 uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src; 30 31 /* If both source and dest are word aligned copy words */ 32 if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) { 33 /* Copy 32 bytes per loop */ 34 for (i = n >> 5; i > 0; i--) { 35 *dest_w++ = *src_w++; 36 *dest_w++ = *src_w++; 37 *dest_w++ = *src_w++; 38 *dest_w++ = *src_w++; 39 *dest_w++ = *src_w++; 40 *dest_w++ = *src_w++; 41 *dest_w++ = *src_w++; 42 *dest_w++ = *src_w++; 43 } 44 45 if (n & 1 << 4) { 46 *dest_w++ = *src_w++; 47 *dest_w++ = *src_w++; 48 *dest_w++ = *src_w++; 49 *dest_w++ = *src_w++; 50 } 51 52 if (n & 1 << 3) { 53 *dest_w++ = *src_w++; 54 *dest_w++ = *src_w++; 55 } 56 57 if (n & 1 << 2) 58 *dest_w++ = *src_w++; 59 60 d = (unsigned char *)dest_w; 61 s = (unsigned char *)src_w; 62 63 } else { 64 d = (unsigned char *)dest_w; 65 s = (unsigned char *)src_w; 66 67 for (i = n >> 3; i > 0; i--) { 68 *d++ = *s++; 69 *d++ = *s++; 70 *d++ = *s++; 71 *d++ = *s++; 72 *d++ = *s++; 73 *d++ = *s++; 74 *d++ = *s++; 75 *d++ = *s++; 76 } 77 78 if (n & 1 << 2) { 79 *d++ = *s++; 80 *d++ = *s++; 81 *d++ = *s++; 82 *d++ = *s++; 83 } 84 } 85 86 if (n & 1 << 1) { 87 *d++ = *s++; 88 *d++ = *s++; 89 } 90 91 if (n & 1) 92 *d++ = *s++; 93 94 return dest; 95 } 96 #else 97 /* 98 * Use word copies but no loop unrolling as we cannot assume there 99 * will be benefits on the archirecture 100 */ 101 void *memcpy(void *dest, __const void *src, __kernel_size_t n) 102 { 103 unsigned char *d = (unsigned char *)dest, *s = (unsigned char *)src; 104 uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src; 105 106 /* If both source and dest are word aligned copy words */ 107 if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) { 108 for (; n >= 4; n -= 4) 109 *dest_w++ = *src_w++; 110 } 111 112 d = (unsigned char *)dest_w; 113 s = (unsigned char *)src_w; 114 115 /* For remaining or if not aligned, copy bytes */ 116 for (; n >= 1; n -= 1) 117 *d++ = *s++; 118 119 return dest; 120 121 } 122 #endif 123 124 EXPORT_SYMBOL(memcpy); 125