xref: /openbmc/linux/arch/arc/lib/memset-archs.S (revision 1918693f)
1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */
21f7e3dc0SClaudiu Zissulescu/*
31f7e3dc0SClaudiu Zissulescu * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
41f7e3dc0SClaudiu Zissulescu */
51f7e3dc0SClaudiu Zissulescu
61f7e3dc0SClaudiu Zissulescu#include <linux/linkage.h>
7e6a72b7dSEugeniy Paltsev#include <asm/cache.h>
81f7e3dc0SClaudiu Zissulescu
9e6a72b7dSEugeniy Paltsev/*
10e6a72b7dSEugeniy Paltsev * The memset implementation below is optimized to use prefetchw and prealloc
11e6a72b7dSEugeniy Paltsev * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
12e6a72b7dSEugeniy Paltsev * If you want to implement optimized memset for other possible L1 data cache
13e6a72b7dSEugeniy Paltsev * line lengths (32B and 128B) you should rewrite code carefully checking
14e6a72b7dSEugeniy Paltsev * we don't call any prefetchw/prealloc instruction for L1 cache lines which
15e6a72b7dSEugeniy Paltsev * don't belongs to memset area.
16e6a72b7dSEugeniy Paltsev */
17e6a72b7dSEugeniy Paltsev
18e6a72b7dSEugeniy Paltsev#if L1_CACHE_SHIFT == 6
19e6a72b7dSEugeniy Paltsev
20e6a72b7dSEugeniy Paltsev.macro PREALLOC_INSTR	reg, off
21e6a72b7dSEugeniy Paltsev	prealloc	[\reg, \off]
22e6a72b7dSEugeniy Paltsev.endm
23e6a72b7dSEugeniy Paltsev
24e6a72b7dSEugeniy Paltsev.macro PREFETCHW_INSTR	reg, off
25e6a72b7dSEugeniy Paltsev	prefetchw	[\reg, \off]
26e6a72b7dSEugeniy Paltsev.endm
27e6a72b7dSEugeniy Paltsev
28e6a72b7dSEugeniy Paltsev#else
29e6a72b7dSEugeniy Paltsev
3055c0c4c7SEugeniy Paltsev.macro PREALLOC_INSTR	reg, off
31e6a72b7dSEugeniy Paltsev.endm
32e6a72b7dSEugeniy Paltsev
3355c0c4c7SEugeniy Paltsev.macro PREFETCHW_INSTR	reg, off
34e6a72b7dSEugeniy Paltsev.endm
35e6a72b7dSEugeniy Paltsev
36e6a72b7dSEugeniy Paltsev#endif
371f7e3dc0SClaudiu Zissulescu
3886effd0dSVineet GuptaENTRY_CFI(memset)
391f7e3dc0SClaudiu Zissulescu	mov.f	0, r2
401f7e3dc0SClaudiu Zissulescu;;; if size is zero
411f7e3dc0SClaudiu Zissulescu	jz.d	[blink]
421f7e3dc0SClaudiu Zissulescu	mov	r3, r0		; don't clobber ret val
431f7e3dc0SClaudiu Zissulescu
44*1918693fSVineet Gupta	PREFETCHW_INSTR	r0, 0	; Prefetch the first write location
45*1918693fSVineet Gupta
461f7e3dc0SClaudiu Zissulescu;;; if length < 8
471f7e3dc0SClaudiu Zissulescu	brls.d.nt	r2, 8, .Lsmallchunk
481f7e3dc0SClaudiu Zissulescu	mov.f	lp_count,r2
491f7e3dc0SClaudiu Zissulescu
501f7e3dc0SClaudiu Zissulescu	and.f	r4, r0, 0x03
511f7e3dc0SClaudiu Zissulescu	rsub	lp_count, r4, 4
521f7e3dc0SClaudiu Zissulescu	lpnz	@.Laligndestination
531f7e3dc0SClaudiu Zissulescu	;; LOOP BEGIN
541f7e3dc0SClaudiu Zissulescu	stb.ab	r1, [r3,1]
551f7e3dc0SClaudiu Zissulescu	sub	r2, r2, 1
561f7e3dc0SClaudiu Zissulescu.Laligndestination:
571f7e3dc0SClaudiu Zissulescu
581f7e3dc0SClaudiu Zissulescu;;; Destination is aligned
591f7e3dc0SClaudiu Zissulescu	and	r1, r1, 0xFF
601f7e3dc0SClaudiu Zissulescu	asl	r4, r1, 8
611f7e3dc0SClaudiu Zissulescu	or	r4, r4, r1
621f7e3dc0SClaudiu Zissulescu	asl	r5, r4, 16
631f7e3dc0SClaudiu Zissulescu	or	r5, r5, r4
641f7e3dc0SClaudiu Zissulescu	mov	r4, r5
651f7e3dc0SClaudiu Zissulescu
661f7e3dc0SClaudiu Zissulescu	sub3	lp_count, r2, 8
671f7e3dc0SClaudiu Zissulescu	cmp     r2, 64
681f7e3dc0SClaudiu Zissulescu	bmsk.hi	r2, r2, 5
691f7e3dc0SClaudiu Zissulescu	mov.ls	lp_count, 0
701f7e3dc0SClaudiu Zissulescu	add3.hi	r2, r2, 8
711f7e3dc0SClaudiu Zissulescu
721f7e3dc0SClaudiu Zissulescu;;; Convert len to Dwords, unfold x8
731f7e3dc0SClaudiu Zissulescu	lsr.f	lp_count, lp_count, 6
74262137bcSVineet Gupta
751f7e3dc0SClaudiu Zissulescu	lpnz	@.Lset64bytes
761f7e3dc0SClaudiu Zissulescu	;; LOOP START
77e6a72b7dSEugeniy Paltsev	PREALLOC_INSTR	r3, 64	; alloc next line w/o fetching
78e6a72b7dSEugeniy Paltsev
79262137bcSVineet Gupta#ifdef CONFIG_ARC_HAS_LL64
801f7e3dc0SClaudiu Zissulescu	std.ab	r4, [r3, 8]
811f7e3dc0SClaudiu Zissulescu	std.ab	r4, [r3, 8]
821f7e3dc0SClaudiu Zissulescu	std.ab	r4, [r3, 8]
831f7e3dc0SClaudiu Zissulescu	std.ab	r4, [r3, 8]
841f7e3dc0SClaudiu Zissulescu	std.ab	r4, [r3, 8]
851f7e3dc0SClaudiu Zissulescu	std.ab	r4, [r3, 8]
861f7e3dc0SClaudiu Zissulescu	std.ab	r4, [r3, 8]
871f7e3dc0SClaudiu Zissulescu	std.ab	r4, [r3, 8]
88262137bcSVineet Gupta#else
89262137bcSVineet Gupta	st.ab	r4, [r3, 4]
90262137bcSVineet Gupta	st.ab	r4, [r3, 4]
91262137bcSVineet Gupta	st.ab	r4, [r3, 4]
92262137bcSVineet Gupta	st.ab	r4, [r3, 4]
93262137bcSVineet Gupta	st.ab	r4, [r3, 4]
94262137bcSVineet Gupta	st.ab	r4, [r3, 4]
95262137bcSVineet Gupta	st.ab	r4, [r3, 4]
96262137bcSVineet Gupta	st.ab	r4, [r3, 4]
97262137bcSVineet Gupta	st.ab	r4, [r3, 4]
98262137bcSVineet Gupta	st.ab	r4, [r3, 4]
99262137bcSVineet Gupta	st.ab	r4, [r3, 4]
100262137bcSVineet Gupta	st.ab	r4, [r3, 4]
101262137bcSVineet Gupta	st.ab	r4, [r3, 4]
102262137bcSVineet Gupta	st.ab	r4, [r3, 4]
103262137bcSVineet Gupta	st.ab	r4, [r3, 4]
104262137bcSVineet Gupta	st.ab	r4, [r3, 4]
105262137bcSVineet Gupta#endif
1061f7e3dc0SClaudiu Zissulescu.Lset64bytes:
1071f7e3dc0SClaudiu Zissulescu
1081f7e3dc0SClaudiu Zissulescu	lsr.f	lp_count, r2, 5 ;Last remaining  max 124 bytes
1091f7e3dc0SClaudiu Zissulescu	lpnz	.Lset32bytes
1101f7e3dc0SClaudiu Zissulescu	;; LOOP START
111262137bcSVineet Gupta#ifdef CONFIG_ARC_HAS_LL64
1121f7e3dc0SClaudiu Zissulescu	std.ab	r4, [r3, 8]
1131f7e3dc0SClaudiu Zissulescu	std.ab	r4, [r3, 8]
1141f7e3dc0SClaudiu Zissulescu	std.ab	r4, [r3, 8]
1151f7e3dc0SClaudiu Zissulescu	std.ab	r4, [r3, 8]
116262137bcSVineet Gupta#else
117262137bcSVineet Gupta	st.ab	r4, [r3, 4]
118262137bcSVineet Gupta	st.ab	r4, [r3, 4]
119262137bcSVineet Gupta	st.ab	r4, [r3, 4]
120262137bcSVineet Gupta	st.ab	r4, [r3, 4]
121262137bcSVineet Gupta	st.ab	r4, [r3, 4]
122262137bcSVineet Gupta	st.ab	r4, [r3, 4]
123262137bcSVineet Gupta	st.ab	r4, [r3, 4]
124262137bcSVineet Gupta	st.ab	r4, [r3, 4]
125262137bcSVineet Gupta#endif
1261f7e3dc0SClaudiu Zissulescu.Lset32bytes:
1271f7e3dc0SClaudiu Zissulescu
1281f7e3dc0SClaudiu Zissulescu	and.f	lp_count, r2, 0x1F ;Last remaining 31 bytes
1291f7e3dc0SClaudiu Zissulescu.Lsmallchunk:
1301f7e3dc0SClaudiu Zissulescu	lpnz	.Lcopy3bytes
1311f7e3dc0SClaudiu Zissulescu	;; LOOP START
1321f7e3dc0SClaudiu Zissulescu	stb.ab	r1, [r3, 1]
1331f7e3dc0SClaudiu Zissulescu.Lcopy3bytes:
1341f7e3dc0SClaudiu Zissulescu
1351f7e3dc0SClaudiu Zissulescu	j	[blink]
1361f7e3dc0SClaudiu Zissulescu
13786effd0dSVineet GuptaEND_CFI(memset)
1381f7e3dc0SClaudiu Zissulescu
13986effd0dSVineet GuptaENTRY_CFI(memzero)
1401f7e3dc0SClaudiu Zissulescu    ; adjust bzero args to memset args
1411f7e3dc0SClaudiu Zissulescu    mov r2, r1
1421f7e3dc0SClaudiu Zissulescu    b.d  memset    ;tail call so need to tinker with blink
1431f7e3dc0SClaudiu Zissulescu    mov r1, 0
14486effd0dSVineet GuptaEND_CFI(memzero)
145