xref: /openbmc/linux/arch/arm/lib/memset.S (revision b97d6790d03b763eca08847a9a5869a4291b9f9a)
1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */
21da177e4SLinus Torvalds/*
31da177e4SLinus Torvalds *  linux/arch/arm/lib/memset.S
41da177e4SLinus Torvalds *
51da177e4SLinus Torvalds *  Copyright (C) 1995-2000 Russell King
61da177e4SLinus Torvalds *
71da177e4SLinus Torvalds *  ASM optimised string functions
81da177e4SLinus Torvalds */
91da177e4SLinus Torvalds#include <linux/linkage.h>
101da177e4SLinus Torvalds#include <asm/assembler.h>
11c2459d35SLin Yongting#include <asm/unwind.h>
121da177e4SLinus Torvalds
131da177e4SLinus Torvalds	.text
141da177e4SLinus Torvalds	.align	5
151da177e4SLinus Torvalds
16d6d51a96SLinus WalleijENTRY(__memset)
171bd46782SRussell KingENTRY(mmioset)
18735e8d93SFangrui SongWEAK(memset)
19c2459d35SLin YongtingUNWIND( .fnstart         )
20*7c8ad760SKursad Oney	and	r1, r1, #255		@ cast to unsigned char
21418df63aSNicolas Pitre	ands	r3, r0, #3		@ 1 unaligned?
22418df63aSNicolas Pitre	mov	ip, r0			@ preserve r0 as return value
23418df63aSNicolas Pitre	bne	6f			@ 1
241da177e4SLinus Torvalds/*
25455bd4c4SIvan Djelic * we know that the pointer in ip is aligned to a word boundary.
261da177e4SLinus Torvalds */
27418df63aSNicolas Pitre1:	orr	r1, r1, r1, lsl #8
281da177e4SLinus Torvalds	orr	r1, r1, r1, lsl #16
291da177e4SLinus Torvalds	mov	r3, r1
30fd1d3626SMatthew Wilcox7:	cmp	r2, #16
311da177e4SLinus Torvalds	blt	4f
32ad3d09b5SArd BiesheuvelUNWIND( .fnend              )
33f91a8dccSNicolas Pitre
34f91a8dccSNicolas Pitre#if ! CALGN(1)+0
35f91a8dccSNicolas Pitre
361da177e4SLinus Torvalds/*
37455bd4c4SIvan Djelic * We need 2 extra registers for this loop - use r8 and the LR
381da177e4SLinus Torvalds */
39c2459d35SLin YongtingUNWIND( .fnstart            )
40c2459d35SLin YongtingUNWIND( .save {r8, lr}      )
41ad3d09b5SArd Biesheuvel	stmfd	sp!, {r8, lr}
42455bd4c4SIvan Djelic	mov	r8, r1
43fd1d3626SMatthew Wilcox	mov	lr, r3
441da177e4SLinus Torvalds
451da177e4SLinus Torvalds2:	subs	r2, r2, #64
46e44fc388SStefan Agner	stmiage	ip!, {r1, r3, r8, lr}	@ 64 bytes at a time.
47e44fc388SStefan Agner	stmiage	ip!, {r1, r3, r8, lr}
48e44fc388SStefan Agner	stmiage	ip!, {r1, r3, r8, lr}
49e44fc388SStefan Agner	stmiage	ip!, {r1, r3, r8, lr}
501da177e4SLinus Torvalds	bgt	2b
51e44fc388SStefan Agner	ldmfdeq	sp!, {r8, pc}		@ Now <64 bytes to go.
521da177e4SLinus Torvalds/*
531da177e4SLinus Torvalds * No need to correct the count; we're only testing bits from now on
541da177e4SLinus Torvalds */
551da177e4SLinus Torvalds	tst	r2, #32
56e44fc388SStefan Agner	stmiane	ip!, {r1, r3, r8, lr}
57e44fc388SStefan Agner	stmiane	ip!, {r1, r3, r8, lr}
581da177e4SLinus Torvalds	tst	r2, #16
59e44fc388SStefan Agner	stmiane	ip!, {r1, r3, r8, lr}
60455bd4c4SIvan Djelic	ldmfd	sp!, {r8, lr}
61c2459d35SLin YongtingUNWIND( .fnend              )
621da177e4SLinus Torvalds
63f91a8dccSNicolas Pitre#else
64f91a8dccSNicolas Pitre
65f91a8dccSNicolas Pitre/*
66f91a8dccSNicolas Pitre * This version aligns the destination pointer in order to write
67f91a8dccSNicolas Pitre * whole cache lines at once.
68f91a8dccSNicolas Pitre */
69f91a8dccSNicolas Pitre
70c2459d35SLin YongtingUNWIND( .fnstart               )
71c2459d35SLin YongtingUNWIND( .save {r4-r8, lr}      )
72ad3d09b5SArd Biesheuvel	stmfd	sp!, {r4-r8, lr}
73f91a8dccSNicolas Pitre	mov	r4, r1
74fd1d3626SMatthew Wilcox	mov	r5, r3
75f91a8dccSNicolas Pitre	mov	r6, r1
76fd1d3626SMatthew Wilcox	mov	r7, r3
77455bd4c4SIvan Djelic	mov	r8, r1
78fd1d3626SMatthew Wilcox	mov	lr, r3
79f91a8dccSNicolas Pitre
80f91a8dccSNicolas Pitre	cmp	r2, #96
81455bd4c4SIvan Djelic	tstgt	ip, #31
82f91a8dccSNicolas Pitre	ble	3f
83f91a8dccSNicolas Pitre
84455bd4c4SIvan Djelic	and	r8, ip, #31
85455bd4c4SIvan Djelic	rsb	r8, r8, #32
86455bd4c4SIvan Djelic	sub	r2, r2, r8
87455bd4c4SIvan Djelic	movs	r8, r8, lsl #(32 - 4)
88e44fc388SStefan Agner	stmiacs	ip!, {r4, r5, r6, r7}
89e44fc388SStefan Agner	stmiami	ip!, {r4, r5}
90455bd4c4SIvan Djelic	tst	r8, #(1 << 30)
91455bd4c4SIvan Djelic	mov	r8, r1
92455bd4c4SIvan Djelic	strne	r1, [ip], #4
93f91a8dccSNicolas Pitre
94f91a8dccSNicolas Pitre3:	subs	r2, r2, #64
95e44fc388SStefan Agner	stmiage	ip!, {r1, r3-r8, lr}
96e44fc388SStefan Agner	stmiage	ip!, {r1, r3-r8, lr}
97f91a8dccSNicolas Pitre	bgt	3b
98e44fc388SStefan Agner	ldmfdeq	sp!, {r4-r8, pc}
99f91a8dccSNicolas Pitre
100f91a8dccSNicolas Pitre	tst	r2, #32
101e44fc388SStefan Agner	stmiane	ip!, {r1, r3-r8, lr}
102f91a8dccSNicolas Pitre	tst	r2, #16
103e44fc388SStefan Agner	stmiane	ip!, {r4-r7}
104455bd4c4SIvan Djelic	ldmfd	sp!, {r4-r8, lr}
105c2459d35SLin YongtingUNWIND( .fnend                 )
106f91a8dccSNicolas Pitre
107f91a8dccSNicolas Pitre#endif
108f91a8dccSNicolas Pitre
109c2459d35SLin YongtingUNWIND( .fnstart            )
1101da177e4SLinus Torvalds4:	tst	r2, #8
111e44fc388SStefan Agner	stmiane	ip!, {r1, r3}
1121da177e4SLinus Torvalds	tst	r2, #4
113455bd4c4SIvan Djelic	strne	r1, [ip], #4
1141da177e4SLinus Torvalds/*
115fd1d3626SMatthew Wilcox * When we get here, we've got less than 4 bytes to set.  We
1161da177e4SLinus Torvalds * may have an unaligned pointer as well.
1171da177e4SLinus Torvalds */
1181da177e4SLinus Torvalds5:	tst	r2, #2
119e44fc388SStefan Agner	strbne	r1, [ip], #1
120e44fc388SStefan Agner	strbne	r1, [ip], #1
1211da177e4SLinus Torvalds	tst	r2, #1
122e44fc388SStefan Agner	strbne	r1, [ip], #1
1236ebbf2ceSRussell King	ret	lr
124418df63aSNicolas Pitre
125418df63aSNicolas Pitre6:	subs	r2, r2, #4		@ 1 do we have enough
126418df63aSNicolas Pitre	blt	5b			@ 1 bytes to align with?
127418df63aSNicolas Pitre	cmp	r3, #2			@ 1
128e44fc388SStefan Agner	strblt	r1, [ip], #1		@ 1
129e44fc388SStefan Agner	strble	r1, [ip], #1		@ 1
130418df63aSNicolas Pitre	strb	r1, [ip], #1		@ 1
131418df63aSNicolas Pitre	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
132418df63aSNicolas Pitre	b	1b
133c2459d35SLin YongtingUNWIND( .fnend   )
13493ed3970SCatalin MarinasENDPROC(memset)
1351bd46782SRussell KingENDPROC(mmioset)
136d6d51a96SLinus WalleijENDPROC(__memset)
137fd1d3626SMatthew Wilcox
138fd1d3626SMatthew WilcoxENTRY(__memset32)
139fd1d3626SMatthew WilcoxUNWIND( .fnstart         )
140fd1d3626SMatthew Wilcox	mov	r3, r1			@ copy r1 to r3 and fall into memset64
141fd1d3626SMatthew WilcoxUNWIND( .fnend   )
142fd1d3626SMatthew WilcoxENDPROC(__memset32)
143fd1d3626SMatthew WilcoxENTRY(__memset64)
144fd1d3626SMatthew WilcoxUNWIND( .fnstart         )
145fd1d3626SMatthew Wilcox	mov	ip, r0			@ preserve r0 as return value
146fd1d3626SMatthew Wilcox	b	7b			@ jump into the middle of memset
147fd1d3626SMatthew WilcoxUNWIND( .fnend   )
148fd1d3626SMatthew WilcoxENDPROC(__memset64)
149