xref: /openbmc/linux/arch/arc/lib/memcpy-archs.S (revision 8f762fe5)
1/*
2 * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/linkage.h>
10
11#ifdef __LITTLE_ENDIAN__
12# define SHIFT_1(RX,RY,IMM)	asl	RX, RY, IMM	; <<
13# define SHIFT_2(RX,RY,IMM)	lsr	RX, RY, IMM	; >>
14# define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM
15# define MERGE_2(RX,RY,IMM)
16# define EXTRACT_1(RX,RY,IMM)	and	RX, RY, 0xFFFF
17# define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, IMM
18#else
19# define SHIFT_1(RX,RY,IMM)	lsr	RX, RY, IMM	; >>
20# define SHIFT_2(RX,RY,IMM)	asl	RX, RY, IMM	; <<
21# define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM	; <<
22# define MERGE_2(RX,RY,IMM)	asl	RX, RY, IMM	; <<
23# define EXTRACT_1(RX,RY,IMM)	lsr	RX, RY, IMM
24# define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, 0x08
25#endif
26
27#ifdef CONFIG_ARC_HAS_LL64
28# define LOADX(DST,RX)		ldd.ab	DST, [RX, 8]
29# define STOREX(SRC,RX)		std.ab	SRC, [RX, 8]
30# define ZOLSHFT		5
31# define ZOLAND			0x1F
32#else
33# define LOADX(DST,RX)		ld.ab	DST, [RX, 4]
34# define STOREX(SRC,RX)		st.ab	SRC, [RX, 4]
35# define ZOLSHFT		4
36# define ZOLAND			0xF
37#endif
38
39ENTRY_CFI(memcpy)
40	mov.f	0, r2
41;;; if size is zero
42	jz.d	[blink]
43	mov	r3, r0		; don;t clobber ret val
44
45;;; if size <= 8
46	cmp	r2, 8
47	bls.d	@.Lsmallchunk
48	mov.f	lp_count, r2
49
50	and.f	r4, r0, 0x03
51	rsub	lp_count, r4, 4
52	lpnz	@.Laligndestination
53	;; LOOP BEGIN
54	ldb.ab	r5, [r1,1]
55	sub	r2, r2, 1
56	stb.ab	r5, [r3,1]
57.Laligndestination:
58
59;;; Check the alignment of the source
60	and.f	r4, r1, 0x03
61	bnz.d	@.Lsourceunaligned
62
63;;; CASE 0: Both source and destination are 32bit aligned
64;;; Convert len to Dwords, unfold x4
65	lsr.f	lp_count, r2, ZOLSHFT
66	lpnz	@.Lcopy32_64bytes
67	;; LOOP START
68	LOADX (r6, r1)
69	LOADX (r8, r1)
70	LOADX (r10, r1)
71	LOADX (r4, r1)
72	STOREX (r6, r3)
73	STOREX (r8, r3)
74	STOREX (r10, r3)
75	STOREX (r4, r3)
76.Lcopy32_64bytes:
77
78	and.f	lp_count, r2, ZOLAND ;Last remaining 31 bytes
79.Lsmallchunk:
80	lpnz	@.Lcopyremainingbytes
81	;; LOOP START
82	ldb.ab	r5, [r1,1]
83	stb.ab	r5, [r3,1]
84.Lcopyremainingbytes:
85
86	j	[blink]
87;;; END CASE 0
88
89.Lsourceunaligned:
90	cmp	r4, 2
91	beq.d	@.LunalignedOffby2
92	sub	r2, r2, 1
93
94	bhi.d	@.LunalignedOffby3
95	ldb.ab	r5, [r1, 1]
96
97;;; CASE 1: The source is unaligned, off by 1
98	;; Hence I need to read 1 byte for a 16bit alignment
99	;; and 2bytes to reach 32bit alignment
100	ldh.ab	r6, [r1, 2]
101	sub	r2, r2, 2
102	;; Convert to words, unfold x2
103	lsr.f	lp_count, r2, 3
104	MERGE_1 (r6, r6, 8)
105	MERGE_2 (r5, r5, 24)
106	or	r5, r5, r6
107
108	;; Both src and dst are aligned
109	lpnz	@.Lcopy8bytes_1
110	;; LOOP START
111	ld.ab	r6, [r1, 4]
112	ld.ab	r8, [r1,4]
113
114	SHIFT_1	(r7, r6, 24)
115	or	r7, r7, r5
116	SHIFT_2	(r5, r6, 8)
117
118	SHIFT_1	(r9, r8, 24)
119	or	r9, r9, r5
120	SHIFT_2	(r5, r8, 8)
121
122	st.ab	r7, [r3, 4]
123	st.ab	r9, [r3, 4]
124.Lcopy8bytes_1:
125
126	;; Write back the remaining 16bits
127	EXTRACT_1 (r6, r5, 16)
128	sth.ab	r6, [r3, 2]
129	;; Write back the remaining 8bits
130	EXTRACT_2 (r5, r5, 16)
131	stb.ab	r5, [r3, 1]
132
133	and.f	lp_count, r2, 0x07 ;Last 8bytes
134	lpnz	@.Lcopybytewise_1
135	;; LOOP START
136	ldb.ab	r6, [r1,1]
137	stb.ab	r6, [r3,1]
138.Lcopybytewise_1:
139	j	[blink]
140
141.LunalignedOffby2:
142;;; CASE 2: The source is unaligned, off by 2
143	ldh.ab	r5, [r1, 2]
144	sub	r2, r2, 1
145
146	;; Both src and dst are aligned
147	;; Convert to words, unfold x2
148	lsr.f	lp_count, r2, 3
149#ifdef __BIG_ENDIAN__
150	asl.nz	r5, r5, 16
151#endif
152	lpnz	@.Lcopy8bytes_2
153	;; LOOP START
154	ld.ab	r6, [r1, 4]
155	ld.ab	r8, [r1,4]
156
157	SHIFT_1	(r7, r6, 16)
158	or	r7, r7, r5
159	SHIFT_2	(r5, r6, 16)
160
161	SHIFT_1	(r9, r8, 16)
162	or	r9, r9, r5
163	SHIFT_2	(r5, r8, 16)
164
165	st.ab	r7, [r3, 4]
166	st.ab	r9, [r3, 4]
167.Lcopy8bytes_2:
168
169#ifdef __BIG_ENDIAN__
170	lsr.nz	r5, r5, 16
171#endif
172	sth.ab	r5, [r3, 2]
173
174	and.f	lp_count, r2, 0x07 ;Last 8bytes
175	lpnz	@.Lcopybytewise_2
176	;; LOOP START
177	ldb.ab	r6, [r1,1]
178	stb.ab	r6, [r3,1]
179.Lcopybytewise_2:
180	j	[blink]
181
182.LunalignedOffby3:
183;;; CASE 3: The source is unaligned, off by 3
184;;; Hence, I need to read 1byte for achieve the 32bit alignment
185
186	;; Both src and dst are aligned
187	;; Convert to words, unfold x2
188	lsr.f	lp_count, r2, 3
189#ifdef __BIG_ENDIAN__
190	asl.ne	r5, r5, 24
191#endif
192	lpnz	@.Lcopy8bytes_3
193	;; LOOP START
194	ld.ab	r6, [r1, 4]
195	ld.ab	r8, [r1,4]
196
197	SHIFT_1	(r7, r6, 8)
198	or	r7, r7, r5
199	SHIFT_2	(r5, r6, 24)
200
201	SHIFT_1	(r9, r8, 8)
202	or	r9, r9, r5
203	SHIFT_2	(r5, r8, 24)
204
205	st.ab	r7, [r3, 4]
206	st.ab	r9, [r3, 4]
207.Lcopy8bytes_3:
208
209#ifdef __BIG_ENDIAN__
210	lsr.nz	r5, r5, 24
211#endif
212	stb.ab	r5, [r3, 1]
213
214	and.f	lp_count, r2, 0x07 ;Last 8bytes
215	lpnz	@.Lcopybytewise_3
216	;; LOOP START
217	ldb.ab	r6, [r1,1]
218	stb.ab	r6, [r3,1]
219.Lcopybytewise_3:
220	j	[blink]
221
222END_CFI(memcpy)
223