1 /* 2 * linux/arch/alpha/lib/memcpy.c 3 * 4 * Copyright (C) 1995 Linus Torvalds 5 */ 6 7 /* 8 * This is a reasonably optimized memcpy() routine. 9 */ 10 11 /* 12 * Note that the C code is written to be optimized into good assembly. However, 13 * at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a 14 * explicit compare against 0 (instead of just using the proper "blt reg, xx" or 15 * "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually.. 16 */ 17 18 #include <linux/types.h> 19 #include <linux/export.h> 20 21 /* 22 * This should be done in one go with ldq_u*2/mask/stq_u. Do it 23 * with a macro so that we can fix it up later.. 24 */ 25 #define ALIGN_DEST_TO8_UP(d,s,n) \ 26 while (d & 7) { \ 27 if (n <= 0) return; \ 28 n--; \ 29 *(char *) d = *(char *) s; \ 30 d++; s++; \ 31 } 32 #define ALIGN_DEST_TO8_DN(d,s,n) \ 33 while (d & 7) { \ 34 if (n <= 0) return; \ 35 n--; \ 36 d--; s--; \ 37 *(char *) d = *(char *) s; \ 38 } 39 40 /* 41 * This should similarly be done with ldq_u*2/mask/stq. The destination 42 * is aligned, but we don't fill in a full quad-word 43 */ 44 #define DO_REST_UP(d,s,n) \ 45 while (n > 0) { \ 46 n--; \ 47 *(char *) d = *(char *) s; \ 48 d++; s++; \ 49 } 50 #define DO_REST_DN(d,s,n) \ 51 while (n > 0) { \ 52 n--; \ 53 d--; s--; \ 54 *(char *) d = *(char *) s; \ 55 } 56 57 /* 58 * This should be done with ldq/mask/stq. The source and destination are 59 * aligned, but we don't fill in a full quad-word 60 */ 61 #define DO_REST_ALIGNED_UP(d,s,n) DO_REST_UP(d,s,n) 62 #define DO_REST_ALIGNED_DN(d,s,n) DO_REST_DN(d,s,n) 63 64 /* 65 * This does unaligned memory copies. We want to avoid storing to 66 * an unaligned address, as that would do a read-modify-write cycle. 67 * We also want to avoid double-reading the unaligned reads. 68 * 69 * Note the ordering to try to avoid load (and address generation) latencies. 70 */ 71 static inline void __memcpy_unaligned_up (unsigned long d, unsigned long s, 72 long n) 73 { 74 ALIGN_DEST_TO8_UP(d,s,n); 75 n -= 8; /* to avoid compare against 8 in the loop */ 76 if (n >= 0) { 77 unsigned long low_word, high_word; 78 __asm__("ldq_u %0,%1":"=r" (low_word):"m" (*(unsigned long *) s)); 79 do { 80 unsigned long tmp; 81 __asm__("ldq_u %0,%1":"=r" (high_word):"m" (*(unsigned long *)(s+8))); 82 n -= 8; 83 __asm__("extql %1,%2,%0" 84 :"=r" (low_word) 85 :"r" (low_word), "r" (s)); 86 __asm__("extqh %1,%2,%0" 87 :"=r" (tmp) 88 :"r" (high_word), "r" (s)); 89 s += 8; 90 *(unsigned long *) d = low_word | tmp; 91 d += 8; 92 low_word = high_word; 93 } while (n >= 0); 94 } 95 n += 8; 96 DO_REST_UP(d,s,n); 97 } 98 99 static inline void __memcpy_unaligned_dn (unsigned long d, unsigned long s, 100 long n) 101 { 102 /* I don't understand AXP assembler well enough for this. -Tim */ 103 s += n; 104 d += n; 105 while (n--) 106 * (char *) --d = * (char *) --s; 107 } 108 109 /* 110 * Hmm.. Strange. The __asm__ here is there to make gcc use an integer register 111 * for the load-store. I don't know why, but it would seem that using a floating 112 * point register for the move seems to slow things down (very small difference, 113 * though). 114 * 115 * Note the ordering to try to avoid load (and address generation) latencies. 116 */ 117 static inline void __memcpy_aligned_up (unsigned long d, unsigned long s, 118 long n) 119 { 120 ALIGN_DEST_TO8_UP(d,s,n); 121 n -= 8; 122 while (n >= 0) { 123 unsigned long tmp; 124 __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s)); 125 n -= 8; 126 s += 8; 127 *(unsigned long *) d = tmp; 128 d += 8; 129 } 130 n += 8; 131 DO_REST_ALIGNED_UP(d,s,n); 132 } 133 static inline void __memcpy_aligned_dn (unsigned long d, unsigned long s, 134 long n) 135 { 136 s += n; 137 d += n; 138 ALIGN_DEST_TO8_DN(d,s,n); 139 n -= 8; 140 while (n >= 0) { 141 unsigned long tmp; 142 s -= 8; 143 __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s)); 144 n -= 8; 145 d -= 8; 146 *(unsigned long *) d = tmp; 147 } 148 n += 8; 149 DO_REST_ALIGNED_DN(d,s,n); 150 } 151 152 void * memcpy(void * dest, const void *src, size_t n) 153 { 154 if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) { 155 __memcpy_aligned_up ((unsigned long) dest, (unsigned long) src, 156 n); 157 return dest; 158 } 159 __memcpy_unaligned_up ((unsigned long) dest, (unsigned long) src, n); 160 return dest; 161 } 162 EXPORT_SYMBOL(memcpy); 163