1/* 2 * linux/arch/arm/lib/memcpy.S 3 * 4 * Copyright (C) 1995-1999 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 * 10 * ASM optimised string functions 11 */ 12#include <linux/linkage.h> 13#include <asm/assembler.h> 14 15 .text 16 17#define ENTER \ 18 mov ip,sp ;\ 19 stmfd sp!,{r0,r4-r9,fp,ip,lr,pc} ;\ 20 sub fp,ip,#4 21 22#define EXIT \ 23 LOADREGS(ea, fp, {r0, r4 - r9, fp, sp, pc}) 24 25#define EXITEQ \ 26 LOADREGS(eqea, fp, {r0, r4 - r9, fp, sp, pc}) 27 28/* 29 * Prototype: void memcpy(void *to,const void *from,unsigned long n); 30 */ 31ENTRY(memcpy) 32ENTRY(memmove) 33 ENTER 34 cmp r1, r0 35 bcc 23f 36 subs r2, r2, #4 37 blt 6f 38 PLD( pld [r1, #0] ) 39 ands ip, r0, #3 40 bne 7f 41 ands ip, r1, #3 42 bne 8f 43 441: subs r2, r2, #8 45 blt 5f 46 subs r2, r2, #20 47 blt 4f 48 PLD( pld [r1, #28] ) 49 PLD( subs r2, r2, #64 ) 50 PLD( blt 3f ) 512: PLD( pld [r1, #60] ) 52 PLD( pld [r1, #92] ) 53 ldmia r1!, {r3 - r9, ip} 54 subs r2, r2, #32 55 stmgeia r0!, {r3 - r9, ip} 56 ldmgeia r1!, {r3 - r9, ip} 57 subges r2, r2, #32 58 stmia r0!, {r3 - r9, ip} 59 bge 2b 603: PLD( ldmia r1!, {r3 - r9, ip} ) 61 PLD( adds r2, r2, #32 ) 62 PLD( stmgeia r0!, {r3 - r9, ip} ) 63 PLD( ldmgeia r1!, {r3 - r9, ip} ) 64 PLD( subges r2, r2, #32 ) 65 PLD( stmia r0!, {r3 - r9, ip} ) 664: cmn r2, #16 67 ldmgeia r1!, {r3 - r6} 68 subge r2, r2, #16 69 stmgeia r0!, {r3 - r6} 70 adds r2, r2, #20 71 ldmgeia r1!, {r3 - r5} 72 subge r2, r2, #12 73 stmgeia r0!, {r3 - r5} 745: adds r2, r2, #8 75 blt 6f 76 subs r2, r2, #4 77 ldrlt r3, [r1], #4 78 ldmgeia r1!, {r4, r5} 79 subge r2, r2, #4 80 strlt r3, [r0], #4 81 stmgeia r0!, {r4, r5} 82 836: adds r2, r2, #4 84 EXITEQ 85 cmp r2, #2 86 ldrb r3, [r1], #1 87 ldrgeb r4, [r1], #1 88 ldrgtb r5, [r1], #1 89 strb r3, [r0], #1 90 strgeb r4, [r0], #1 91 strgtb r5, [r0], #1 92 EXIT 93 947: rsb ip, ip, #4 95 cmp ip, #2 96 ldrb r3, [r1], #1 97 ldrgeb r4, [r1], #1 98 ldrgtb r5, [r1], #1 99 strb r3, [r0], #1 100 strgeb r4, [r0], #1 101 strgtb r5, [r0], #1 102 subs r2, r2, ip 103 blt 6b 104 ands ip, r1, #3 105 beq 1b 106 1078: bic r1, r1, #3 108 ldr r7, [r1], #4 109 cmp ip, #2 110 bgt 18f 111 beq 13f 112 cmp r2, #12 113 blt 11f 114 PLD( pld [r1, #12] ) 115 sub r2, r2, #12 116 PLD( subs r2, r2, #32 ) 117 PLD( blt 10f ) 118 PLD( pld [r1, #28] ) 1199: PLD( pld [r1, #44] ) 12010: mov r3, r7, pull #8 121 ldmia r1!, {r4 - r7} 122 subs r2, r2, #16 123 orr r3, r3, r4, push #24 124 mov r4, r4, pull #8 125 orr r4, r4, r5, push #24 126 mov r5, r5, pull #8 127 orr r5, r5, r6, push #24 128 mov r6, r6, pull #8 129 orr r6, r6, r7, push #24 130 stmia r0!, {r3 - r6} 131 bge 9b 132 PLD( cmn r2, #32 ) 133 PLD( bge 10b ) 134 PLD( add r2, r2, #32 ) 135 adds r2, r2, #12 136 blt 12f 13711: mov r3, r7, pull #8 138 ldr r7, [r1], #4 139 subs r2, r2, #4 140 orr r3, r3, r7, push #24 141 str r3, [r0], #4 142 bge 11b 14312: sub r1, r1, #3 144 b 6b 145 14613: cmp r2, #12 147 blt 16f 148 PLD( pld [r1, #12] ) 149 sub r2, r2, #12 150 PLD( subs r2, r2, #32 ) 151 PLD( blt 15f ) 152 PLD( pld [r1, #28] ) 15314: PLD( pld [r1, #44] ) 15415: mov r3, r7, pull #16 155 ldmia r1!, {r4 - r7} 156 subs r2, r2, #16 157 orr r3, r3, r4, push #16 158 mov r4, r4, pull #16 159 orr r4, r4, r5, push #16 160 mov r5, r5, pull #16 161 orr r5, r5, r6, push #16 162 mov r6, r6, pull #16 163 orr r6, r6, r7, push #16 164 stmia r0!, {r3 - r6} 165 bge 14b 166 PLD( cmn r2, #32 ) 167 PLD( bge 15b ) 168 PLD( add r2, r2, #32 ) 169 adds r2, r2, #12 170 blt 17f 17116: mov r3, r7, pull #16 172 ldr r7, [r1], #4 173 subs r2, r2, #4 174 orr r3, r3, r7, push #16 175 str r3, [r0], #4 176 bge 16b 17717: sub r1, r1, #2 178 b 6b 179 18018: cmp r2, #12 181 blt 21f 182 PLD( pld [r1, #12] ) 183 sub r2, r2, #12 184 PLD( subs r2, r2, #32 ) 185 PLD( blt 20f ) 186 PLD( pld [r1, #28] ) 18719: PLD( pld [r1, #44] ) 18820: mov r3, r7, pull #24 189 ldmia r1!, {r4 - r7} 190 subs r2, r2, #16 191 orr r3, r3, r4, push #8 192 mov r4, r4, pull #24 193 orr r4, r4, r5, push #8 194 mov r5, r5, pull #24 195 orr r5, r5, r6, push #8 196 mov r6, r6, pull #24 197 orr r6, r6, r7, push #8 198 stmia r0!, {r3 - r6} 199 bge 19b 200 PLD( cmn r2, #32 ) 201 PLD( bge 20b ) 202 PLD( add r2, r2, #32 ) 203 adds r2, r2, #12 204 blt 22f 20521: mov r3, r7, pull #24 206 ldr r7, [r1], #4 207 subs r2, r2, #4 208 orr r3, r3, r7, push #8 209 str r3, [r0], #4 210 bge 21b 21122: sub r1, r1, #1 212 b 6b 213 214 21523: add r1, r1, r2 216 add r0, r0, r2 217 subs r2, r2, #4 218 blt 29f 219 PLD( pld [r1, #-4] ) 220 ands ip, r0, #3 221 bne 30f 222 ands ip, r1, #3 223 bne 31f 224 22524: subs r2, r2, #8 226 blt 28f 227 subs r2, r2, #20 228 blt 27f 229 PLD( pld [r1, #-32] ) 230 PLD( subs r2, r2, #64 ) 231 PLD( blt 26f ) 23225: PLD( pld [r1, #-64] ) 233 PLD( pld [r1, #-96] ) 234 ldmdb r1!, {r3 - r9, ip} 235 subs r2, r2, #32 236 stmgedb r0!, {r3 - r9, ip} 237 ldmgedb r1!, {r3 - r9, ip} 238 subges r2, r2, #32 239 stmdb r0!, {r3 - r9, ip} 240 bge 25b 24126: PLD( ldmdb r1!, {r3 - r9, ip} ) 242 PLD( adds r2, r2, #32 ) 243 PLD( stmgedb r0!, {r3 - r9, ip} ) 244 PLD( ldmgedb r1!, {r3 - r9, ip} ) 245 PLD( subges r2, r2, #32 ) 246 PLD( stmdb r0!, {r3 - r9, ip} ) 24727: cmn r2, #16 248 ldmgedb r1!, {r3 - r6} 249 subge r2, r2, #16 250 stmgedb r0!, {r3 - r6} 251 adds r2, r2, #20 252 ldmgedb r1!, {r3 - r5} 253 subge r2, r2, #12 254 stmgedb r0!, {r3 - r5} 25528: adds r2, r2, #8 256 blt 29f 257 subs r2, r2, #4 258 ldrlt r3, [r1, #-4]! 259 ldmgedb r1!, {r4, r5} 260 subge r2, r2, #4 261 strlt r3, [r0, #-4]! 262 stmgedb r0!, {r4, r5} 263 26429: adds r2, r2, #4 265 EXITEQ 266 cmp r2, #2 267 ldrb r3, [r1, #-1]! 268 ldrgeb r4, [r1, #-1]! 269 ldrgtb r5, [r1, #-1]! 270 strb r3, [r0, #-1]! 271 strgeb r4, [r0, #-1]! 272 strgtb r5, [r0, #-1]! 273 EXIT 274 27530: cmp ip, #2 276 ldrb r3, [r1, #-1]! 277 ldrgeb r4, [r1, #-1]! 278 ldrgtb r5, [r1, #-1]! 279 strb r3, [r0, #-1]! 280 strgeb r4, [r0, #-1]! 281 strgtb r5, [r0, #-1]! 282 subs r2, r2, ip 283 blt 29b 284 ands ip, r1, #3 285 beq 24b 286 28731: bic r1, r1, #3 288 ldr r3, [r1], #0 289 cmp ip, #2 290 blt 41f 291 beq 36f 292 cmp r2, #12 293 blt 34f 294 PLD( pld [r1, #-16] ) 295 sub r2, r2, #12 296 PLD( subs r2, r2, #32 ) 297 PLD( blt 33f ) 298 PLD( pld [r1, #-32] ) 29932: PLD( pld [r1, #-48] ) 30033: mov r7, r3, push #8 301 ldmdb r1!, {r3, r4, r5, r6} 302 subs r2, r2, #16 303 orr r7, r7, r6, pull #24 304 mov r6, r6, push #8 305 orr r6, r6, r5, pull #24 306 mov r5, r5, push #8 307 orr r5, r5, r4, pull #24 308 mov r4, r4, push #8 309 orr r4, r4, r3, pull #24 310 stmdb r0!, {r4, r5, r6, r7} 311 bge 32b 312 PLD( cmn r2, #32 ) 313 PLD( bge 33b ) 314 PLD( add r2, r2, #32 ) 315 adds r2, r2, #12 316 blt 35f 31734: mov ip, r3, push #8 318 ldr r3, [r1, #-4]! 319 subs r2, r2, #4 320 orr ip, ip, r3, pull #24 321 str ip, [r0, #-4]! 322 bge 34b 32335: add r1, r1, #3 324 b 29b 325 32636: cmp r2, #12 327 blt 39f 328 PLD( pld [r1, #-16] ) 329 sub r2, r2, #12 330 PLD( subs r2, r2, #32 ) 331 PLD( blt 38f ) 332 PLD( pld [r1, #-32] ) 33337: PLD( pld [r1, #-48] ) 33438: mov r7, r3, push #16 335 ldmdb r1!, {r3, r4, r5, r6} 336 subs r2, r2, #16 337 orr r7, r7, r6, pull #16 338 mov r6, r6, push #16 339 orr r6, r6, r5, pull #16 340 mov r5, r5, push #16 341 orr r5, r5, r4, pull #16 342 mov r4, r4, push #16 343 orr r4, r4, r3, pull #16 344 stmdb r0!, {r4, r5, r6, r7} 345 bge 37b 346 PLD( cmn r2, #32 ) 347 PLD( bge 38b ) 348 PLD( add r2, r2, #32 ) 349 adds r2, r2, #12 350 blt 40f 35139: mov ip, r3, push #16 352 ldr r3, [r1, #-4]! 353 subs r2, r2, #4 354 orr ip, ip, r3, pull #16 355 str ip, [r0, #-4]! 356 bge 39b 35740: add r1, r1, #2 358 b 29b 359 36041: cmp r2, #12 361 blt 44f 362 PLD( pld [r1, #-16] ) 363 sub r2, r2, #12 364 PLD( subs r2, r2, #32 ) 365 PLD( blt 43f ) 366 PLD( pld [r1, #-32] ) 36742: PLD( pld [r1, #-48] ) 36843: mov r7, r3, push #24 369 ldmdb r1!, {r3, r4, r5, r6} 370 subs r2, r2, #16 371 orr r7, r7, r6, pull #8 372 mov r6, r6, push #24 373 orr r6, r6, r5, pull #8 374 mov r5, r5, push #24 375 orr r5, r5, r4, pull #8 376 mov r4, r4, push #24 377 orr r4, r4, r3, pull #8 378 stmdb r0!, {r4, r5, r6, r7} 379 bge 42b 380 PLD( cmn r2, #32 ) 381 PLD( bge 43b ) 382 PLD( add r2, r2, #32 ) 383 adds r2, r2, #12 384 blt 45f 38544: mov ip, r3, push #24 386 ldr r3, [r1, #-4]! 387 subs r2, r2, #4 388 orr ip, ip, r3, pull #8 389 str ip, [r0, #-4]! 390 bge 44b 39145: add r1, r1, #1 392 b 29b 393 394