1/* 2 * linux/arch/arm/lib/memset.S 3 * 4 * Copyright (C) 1995-2000 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 * 10 * ASM optimised string functions 11 */ 12#include <linux/linkage.h> 13#include <asm/assembler.h> 14#include <asm/unwind.h> 15 16 .text 17 .align 5 18 19ENTRY(mmioset) 20ENTRY(memset) 21UNWIND( .fnstart ) 22 ands r3, r0, #3 @ 1 unaligned? 23 mov ip, r0 @ preserve r0 as return value 24 bne 6f @ 1 25/* 26 * we know that the pointer in ip is aligned to a word boundary. 27 */ 281: orr r1, r1, r1, lsl #8 29 orr r1, r1, r1, lsl #16 30 mov r3, r1 317: cmp r2, #16 32 blt 4f 33 34#if ! CALGN(1)+0 35 36/* 37 * We need 2 extra registers for this loop - use r8 and the LR 38 */ 39 stmfd sp!, {r8, lr} 40UNWIND( .fnend ) 41UNWIND( .fnstart ) 42UNWIND( .save {r8, lr} ) 43 mov r8, r1 44 mov lr, r3 45 462: subs r2, r2, #64 47 stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time. 48 stmgeia ip!, {r1, r3, r8, lr} 49 stmgeia ip!, {r1, r3, r8, lr} 50 stmgeia ip!, {r1, r3, r8, lr} 51 bgt 2b 52 ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. 53/* 54 * No need to correct the count; we're only testing bits from now on 55 */ 56 tst r2, #32 57 stmneia ip!, {r1, r3, r8, lr} 58 stmneia ip!, {r1, r3, r8, lr} 59 tst r2, #16 60 stmneia ip!, {r1, r3, r8, lr} 61 ldmfd sp!, {r8, lr} 62UNWIND( .fnend ) 63 64#else 65 66/* 67 * This version aligns the destination pointer in order to write 68 * whole cache lines at once. 69 */ 70 71 stmfd sp!, {r4-r8, lr} 72UNWIND( .fnend ) 73UNWIND( .fnstart ) 74UNWIND( .save {r4-r8, lr} ) 75 mov r4, r1 76 mov r5, r3 77 mov r6, r1 78 mov r7, r3 79 mov r8, r1 80 mov lr, r3 81 82 cmp r2, #96 83 tstgt ip, #31 84 ble 3f 85 86 and r8, ip, #31 87 rsb r8, r8, #32 88 sub r2, r2, r8 89 movs r8, r8, lsl #(32 - 4) 90 stmcsia ip!, {r4, r5, r6, r7} 91 stmmiia ip!, {r4, r5} 92 tst r8, #(1 << 30) 93 mov r8, r1 94 strne r1, [ip], #4 95 963: subs r2, r2, #64 97 stmgeia ip!, {r1, r3-r8, lr} 98 stmgeia ip!, {r1, r3-r8, lr} 99 bgt 3b 100 ldmeqfd sp!, {r4-r8, pc} 101 102 tst r2, #32 103 stmneia ip!, {r1, r3-r8, lr} 104 tst r2, #16 105 stmneia ip!, {r4-r7} 106 ldmfd sp!, {r4-r8, lr} 107UNWIND( .fnend ) 108 109#endif 110 111UNWIND( .fnstart ) 1124: tst r2, #8 113 stmneia ip!, {r1, r3} 114 tst r2, #4 115 strne r1, [ip], #4 116/* 117 * When we get here, we've got less than 4 bytes to set. We 118 * may have an unaligned pointer as well. 119 */ 1205: tst r2, #2 121 strneb r1, [ip], #1 122 strneb r1, [ip], #1 123 tst r2, #1 124 strneb r1, [ip], #1 125 ret lr 126 1276: subs r2, r2, #4 @ 1 do we have enough 128 blt 5b @ 1 bytes to align with? 129 cmp r3, #2 @ 1 130 strltb r1, [ip], #1 @ 1 131 strleb r1, [ip], #1 @ 1 132 strb r1, [ip], #1 @ 1 133 add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) 134 b 1b 135UNWIND( .fnend ) 136ENDPROC(memset) 137ENDPROC(mmioset) 138 139ENTRY(__memset32) 140UNWIND( .fnstart ) 141 mov r3, r1 @ copy r1 to r3 and fall into memset64 142UNWIND( .fnend ) 143ENDPROC(__memset32) 144ENTRY(__memset64) 145UNWIND( .fnstart ) 146 mov ip, r0 @ preserve r0 as return value 147 b 7b @ jump into the middle of memset 148UNWIND( .fnend ) 149ENDPROC(__memset64) 150