xref: /openbmc/u-boot/arch/arm/lib/memset.S (revision e8f80a5a)
1*83d290c5STom Rini/* SPDX-License-Identifier: GPL-2.0 */
2d8834a13SMatthias Weisser/*
3d8834a13SMatthias Weisser *  linux/arch/arm/lib/memset.S
4d8834a13SMatthias Weisser *
5d8834a13SMatthias Weisser *  Copyright (C) 1995-2000 Russell King
6d8834a13SMatthias Weisser *
7d8834a13SMatthias Weisser *  ASM optimised string functions
8d8834a13SMatthias Weisser */
975d7a0d7SStefan Agner#include <linux/linkage.h>
10d8834a13SMatthias Weisser#include <asm/assembler.h>
11d8834a13SMatthias Weisser
12d8834a13SMatthias Weisser	.text
13d8834a13SMatthias Weisser	.align	5
14d8834a13SMatthias Weisser
1575d7a0d7SStefan Agner	.syntax unified
163a649407STom Rini#if CONFIG_IS_ENABLED(SYS_THUMB_BUILD) && !defined(MEMSET_NO_THUMB_BUILD)
1775d7a0d7SStefan Agner	.thumb
1875d7a0d7SStefan Agner	.thumb_func
1975d7a0d7SStefan Agner#endif
2075d7a0d7SStefan AgnerENTRY(memset)
21d8834a13SMatthias Weisser	ands	r3, r0, #3		@ 1 unaligned?
2275d7a0d7SStefan Agner	mov	ip, r0			@ preserve r0 as return value
2375d7a0d7SStefan Agner	bne	6f			@ 1
24d8834a13SMatthias Weisser/*
2575d7a0d7SStefan Agner * we know that the pointer in ip is aligned to a word boundary.
26d8834a13SMatthias Weisser */
2775d7a0d7SStefan Agner1:	orr	r1, r1, r1, lsl #8
28d8834a13SMatthias Weisser	orr	r1, r1, r1, lsl #16
29d8834a13SMatthias Weisser	mov	r3, r1
30d8834a13SMatthias Weisser	cmp	r2, #16
31d8834a13SMatthias Weisser	blt	4f
32d8834a13SMatthias Weisser
33d8834a13SMatthias Weisser#if ! CALGN(1)+0
34d8834a13SMatthias Weisser
35d8834a13SMatthias Weisser/*
3675d7a0d7SStefan Agner * We need 2 extra registers for this loop - use r8 and the LR
37d8834a13SMatthias Weisser */
3875d7a0d7SStefan Agner	stmfd	sp!, {r8, lr}
3975d7a0d7SStefan Agner	mov	r8, r1
40d8834a13SMatthias Weisser	mov	lr, r1
41d8834a13SMatthias Weisser
42d8834a13SMatthias Weisser2:	subs	r2, r2, #64
4375d7a0d7SStefan Agner	stmiage	ip!, {r1, r3, r8, lr}	@ 64 bytes at a time.
4475d7a0d7SStefan Agner	stmiage	ip!, {r1, r3, r8, lr}
4575d7a0d7SStefan Agner	stmiage	ip!, {r1, r3, r8, lr}
4675d7a0d7SStefan Agner	stmiage	ip!, {r1, r3, r8, lr}
47d8834a13SMatthias Weisser	bgt	2b
4875d7a0d7SStefan Agner	ldmfdeq	sp!, {r8, pc}		@ Now <64 bytes to go.
49d8834a13SMatthias Weisser/*
50d8834a13SMatthias Weisser * No need to correct the count; we're only testing bits from now on
51d8834a13SMatthias Weisser */
52d8834a13SMatthias Weisser	tst	r2, #32
5375d7a0d7SStefan Agner	stmiane	ip!, {r1, r3, r8, lr}
5475d7a0d7SStefan Agner	stmiane	ip!, {r1, r3, r8, lr}
55d8834a13SMatthias Weisser	tst	r2, #16
5675d7a0d7SStefan Agner	stmiane	ip!, {r1, r3, r8, lr}
5775d7a0d7SStefan Agner	ldmfd	sp!, {r8, lr}
58d8834a13SMatthias Weisser
59d8834a13SMatthias Weisser#else
60d8834a13SMatthias Weisser
61d8834a13SMatthias Weisser/*
62d8834a13SMatthias Weisser * This version aligns the destination pointer in order to write
63d8834a13SMatthias Weisser * whole cache lines at once.
64d8834a13SMatthias Weisser */
65d8834a13SMatthias Weisser
6675d7a0d7SStefan Agner	stmfd	sp!, {r4-r8, lr}
67d8834a13SMatthias Weisser	mov	r4, r1
68d8834a13SMatthias Weisser	mov	r5, r1
69d8834a13SMatthias Weisser	mov	r6, r1
70d8834a13SMatthias Weisser	mov	r7, r1
7175d7a0d7SStefan Agner	mov	r8, r1
72d8834a13SMatthias Weisser	mov	lr, r1
73d8834a13SMatthias Weisser
74d8834a13SMatthias Weisser	cmp	r2, #96
7575d7a0d7SStefan Agner	tstgt	ip, #31
76d8834a13SMatthias Weisser	ble	3f
77d8834a13SMatthias Weisser
7875d7a0d7SStefan Agner	and	r8, ip, #31
7975d7a0d7SStefan Agner	rsb	r8, r8, #32
8075d7a0d7SStefan Agner	sub	r2, r2, r8
8175d7a0d7SStefan Agner	movs	r8, r8, lsl #(32 - 4)
8275d7a0d7SStefan Agner	stmiacs	ip!, {r4, r5, r6, r7}
8375d7a0d7SStefan Agner	stmiami	ip!, {r4, r5}
8475d7a0d7SStefan Agner	tst	r8, #(1 << 30)
8575d7a0d7SStefan Agner	mov	r8, r1
8675d7a0d7SStefan Agner	strne	r1, [ip], #4
87d8834a13SMatthias Weisser
88d8834a13SMatthias Weisser3:	subs	r2, r2, #64
8975d7a0d7SStefan Agner	stmiage	ip!, {r1, r3-r8, lr}
9075d7a0d7SStefan Agner	stmiage	ip!, {r1, r3-r8, lr}
91d8834a13SMatthias Weisser	bgt	3b
9275d7a0d7SStefan Agner	ldmfdeq	sp!, {r4-r8, pc}
93d8834a13SMatthias Weisser
94d8834a13SMatthias Weisser	tst	r2, #32
9575d7a0d7SStefan Agner	stmiane	ip!, {r1, r3-r8, lr}
96d8834a13SMatthias Weisser	tst	r2, #16
9775d7a0d7SStefan Agner	stmiane	ip!, {r4-r7}
9875d7a0d7SStefan Agner	ldmfd	sp!, {r4-r8, lr}
99d8834a13SMatthias Weisser
100d8834a13SMatthias Weisser#endif
101d8834a13SMatthias Weisser
102d8834a13SMatthias Weisser4:	tst	r2, #8
10375d7a0d7SStefan Agner	stmiane	ip!, {r1, r3}
104d8834a13SMatthias Weisser	tst	r2, #4
10575d7a0d7SStefan Agner	strne	r1, [ip], #4
106d8834a13SMatthias Weisser/*
107d8834a13SMatthias Weisser * When we get here, we've got less than 4 bytes to zero.  We
108d8834a13SMatthias Weisser * may have an unaligned pointer as well.
109d8834a13SMatthias Weisser */
110d8834a13SMatthias Weisser5:	tst	r2, #2
11175d7a0d7SStefan Agner	strbne	r1, [ip], #1
11275d7a0d7SStefan Agner	strbne	r1, [ip], #1
113d8834a13SMatthias Weisser	tst	r2, #1
11475d7a0d7SStefan Agner	strbne	r1, [ip], #1
11575d7a0d7SStefan Agner	ret	lr
11675d7a0d7SStefan Agner
11775d7a0d7SStefan Agner6:	subs	r2, r2, #4		@ 1 do we have enough
11875d7a0d7SStefan Agner	blt	5b			@ 1 bytes to align with?
11975d7a0d7SStefan Agner	cmp	r3, #2			@ 1
12075d7a0d7SStefan Agner	strblt	r1, [ip], #1		@ 1
12175d7a0d7SStefan Agner	strble	r1, [ip], #1		@ 1
12275d7a0d7SStefan Agner	strb	r1, [ip], #1		@ 1
12375d7a0d7SStefan Agner	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
12475d7a0d7SStefan Agner	b	1b
12575d7a0d7SStefan AgnerENDPROC(memset)
126