xref: /openbmc/linux/arch/arm/lib/memset.S (revision f97cee494dc92395a668445bcd24d34c89f4ff8c)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 *  linux/arch/arm/lib/memset.S
4 *
5 *  Copyright (C) 1995-2000 Russell King
6 *
7 *  ASM optimised string functions
8 */
9#include <linux/linkage.h>
10#include <asm/assembler.h>
11#include <asm/unwind.h>
12
13	.text
14	.align	5
15
16ENTRY(mmioset)
17ENTRY(memset)
18UNWIND( .fnstart         )
19	ands	r3, r0, #3		@ 1 unaligned?
20	mov	ip, r0			@ preserve r0 as return value
21	bne	6f			@ 1
22/*
23 * we know that the pointer in ip is aligned to a word boundary.
24 */
251:	orr	r1, r1, r1, lsl #8
26	orr	r1, r1, r1, lsl #16
27	mov	r3, r1
287:	cmp	r2, #16
29	blt	4f
30
31#if ! CALGN(1)+0
32
33/*
34 * We need 2 extra registers for this loop - use r8 and the LR
35 */
36	stmfd	sp!, {r8, lr}
37UNWIND( .fnend              )
38UNWIND( .fnstart            )
39UNWIND( .save {r8, lr}      )
40	mov	r8, r1
41	mov	lr, r3
42
432:	subs	r2, r2, #64
44	stmiage	ip!, {r1, r3, r8, lr}	@ 64 bytes at a time.
45	stmiage	ip!, {r1, r3, r8, lr}
46	stmiage	ip!, {r1, r3, r8, lr}
47	stmiage	ip!, {r1, r3, r8, lr}
48	bgt	2b
49	ldmfdeq	sp!, {r8, pc}		@ Now <64 bytes to go.
50/*
51 * No need to correct the count; we're only testing bits from now on
52 */
53	tst	r2, #32
54	stmiane	ip!, {r1, r3, r8, lr}
55	stmiane	ip!, {r1, r3, r8, lr}
56	tst	r2, #16
57	stmiane	ip!, {r1, r3, r8, lr}
58	ldmfd	sp!, {r8, lr}
59UNWIND( .fnend              )
60
61#else
62
63/*
64 * This version aligns the destination pointer in order to write
65 * whole cache lines at once.
66 */
67
68	stmfd	sp!, {r4-r8, lr}
69UNWIND( .fnend                 )
70UNWIND( .fnstart               )
71UNWIND( .save {r4-r8, lr}      )
72	mov	r4, r1
73	mov	r5, r3
74	mov	r6, r1
75	mov	r7, r3
76	mov	r8, r1
77	mov	lr, r3
78
79	cmp	r2, #96
80	tstgt	ip, #31
81	ble	3f
82
83	and	r8, ip, #31
84	rsb	r8, r8, #32
85	sub	r2, r2, r8
86	movs	r8, r8, lsl #(32 - 4)
87	stmiacs	ip!, {r4, r5, r6, r7}
88	stmiami	ip!, {r4, r5}
89	tst	r8, #(1 << 30)
90	mov	r8, r1
91	strne	r1, [ip], #4
92
933:	subs	r2, r2, #64
94	stmiage	ip!, {r1, r3-r8, lr}
95	stmiage	ip!, {r1, r3-r8, lr}
96	bgt	3b
97	ldmfdeq	sp!, {r4-r8, pc}
98
99	tst	r2, #32
100	stmiane	ip!, {r1, r3-r8, lr}
101	tst	r2, #16
102	stmiane	ip!, {r4-r7}
103	ldmfd	sp!, {r4-r8, lr}
104UNWIND( .fnend                 )
105
106#endif
107
108UNWIND( .fnstart            )
1094:	tst	r2, #8
110	stmiane	ip!, {r1, r3}
111	tst	r2, #4
112	strne	r1, [ip], #4
113/*
114 * When we get here, we've got less than 4 bytes to set.  We
115 * may have an unaligned pointer as well.
116 */
1175:	tst	r2, #2
118	strbne	r1, [ip], #1
119	strbne	r1, [ip], #1
120	tst	r2, #1
121	strbne	r1, [ip], #1
122	ret	lr
123
1246:	subs	r2, r2, #4		@ 1 do we have enough
125	blt	5b			@ 1 bytes to align with?
126	cmp	r3, #2			@ 1
127	strblt	r1, [ip], #1		@ 1
128	strble	r1, [ip], #1		@ 1
129	strb	r1, [ip], #1		@ 1
130	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
131	b	1b
132UNWIND( .fnend   )
133ENDPROC(memset)
134ENDPROC(mmioset)
135
136ENTRY(__memset32)
137UNWIND( .fnstart         )
138	mov	r3, r1			@ copy r1 to r3 and fall into memset64
139UNWIND( .fnend   )
140ENDPROC(__memset32)
141ENTRY(__memset64)
142UNWIND( .fnstart         )
143	mov	ip, r0			@ preserve r0 as return value
144	b	7b			@ jump into the middle of memset
145UNWIND( .fnend   )
146ENDPROC(__memset64)
147