xref: /openbmc/linux/arch/xtensa/lib/usercopy.S (revision 6b5fc336)
1/*
2 *  arch/xtensa/lib/usercopy.S
3 *
4 *  Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
5 *
6 *  DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
7 *  It needs to remain separate and distinct.  The hal files are part
8 *  of the Xtensa link-time HAL, and those files may differ per
9 *  processor configuration.  Patching the kernel for another
10 *  processor configuration includes replacing the hal files, and we
11 *  could lose the special functionality for accessing user-space
12 *  memory during such a patch.  We sacrifice a little code space here
13 *  in favor to simplify code maintenance.
14 *
15 *  This file is subject to the terms and conditions of the GNU General
16 *  Public License.  See the file "COPYING" in the main directory of
17 *  this archive for more details.
18 *
19 *  Copyright (C) 2002 Tensilica Inc.
20 */
21
22
23/*
24 * size_t __xtensa_copy_user (void *dst, const void *src, size_t len);
25 *
26 * The returned value is the number of bytes not copied.  Implies zero
27 * is success.
28 *
29 * The general case algorithm is as follows:
30 *   If the destination and source are both aligned,
31 *     do 16B chunks with a loop, and then finish up with
32 *     8B, 4B, 2B, and 1B copies conditional on the length.
33 *   If destination is aligned and source unaligned,
34 *     do the same, but use SRC to align the source data.
35 *   If destination is unaligned, align it by conditionally
36 *     copying 1B and 2B and then retest.
37 *   This code tries to use fall-through braches for the common
38 *     case of aligned destinations (except for the branches to
39 *     the alignment label).
40 *
41 * Register use:
42 *	a0/ return address
43 *	a1/ stack pointer
44 *	a2/ return value
45 *	a3/ src
46 *	a4/ length
47 *	a5/ dst
48 *	a6/ tmp
49 *	a7/ tmp
50 *	a8/ tmp
51 *	a9/ tmp
52 *	a10/ tmp
53 *	a11/ original length
54 */
55
56#include <variant/core.h>
57
58#ifdef __XTENSA_EB__
59#define ALIGN(R, W0, W1) src	R, W0, W1
60#define SSA8(R)	ssa8b R
61#else
62#define ALIGN(R, W0, W1) src	R, W1, W0
63#define SSA8(R)	ssa8l R
64#endif
65
66/* Load or store instructions that may cause exceptions use the EX macro. */
67
68#define EX(insn,reg1,reg2,offset,handler)	\
699:	insn	reg1, reg2, offset;		\
70	.section __ex_table, "a";		\
71	.word	9b, handler;			\
72	.previous
73
74
75	.text
76	.align	4
77	.global	__xtensa_copy_user
78	.type	__xtensa_copy_user,@function
79__xtensa_copy_user:
80	entry	sp, 16		# minimal stack frame
81	# a2/ dst, a3/ src, a4/ len
82	mov	a5, a2		# copy dst so that a2 is return value
83	mov	a11, a4		# preserve original len for error case
84.Lcommon:
85	bbsi.l	a2, 0, .Ldst1mod2 # if dst is 1 mod 2
86	bbsi.l	a2, 1, .Ldst2mod4 # if dst is 2 mod 4
87.Ldstaligned:	# return here from .Ldstunaligned when dst is aligned
88	srli	a7, a4, 4	# number of loop iterations with 16B
89				# per iteration
90	movi	a8, 3		  # if source is also aligned,
91	bnone	a3, a8, .Laligned # then use word copy
92	SSA8(	a3)		# set shift amount from byte offset
93	bnez	a4, .Lsrcunaligned
94	movi	a2, 0		# return success for len==0
95	retw
96
97/*
98 * Destination is unaligned
99 */
100
101.Ldst1mod2:	# dst is only byte aligned
102	bltui	a4, 7, .Lbytecopy	# do short copies byte by byte
103
104	# copy 1 byte
105	EX(l8ui, a6, a3, 0, fixup)
106	addi	a3, a3,  1
107	EX(s8i, a6, a5,  0, fixup)
108	addi	a5, a5,  1
109	addi	a4, a4, -1
110	bbci.l	a5, 1, .Ldstaligned	# if dst is now aligned, then
111					# return to main algorithm
112.Ldst2mod4:	# dst 16-bit aligned
113	# copy 2 bytes
114	bltui	a4, 6, .Lbytecopy	# do short copies byte by byte
115	EX(l8ui, a6, a3, 0, fixup)
116	EX(l8ui, a7, a3, 1, fixup)
117	addi	a3, a3,  2
118	EX(s8i, a6, a5,  0, fixup)
119	EX(s8i, a7, a5,  1, fixup)
120	addi	a5, a5,  2
121	addi	a4, a4, -2
122	j	.Ldstaligned	# dst is now aligned, return to main algorithm
123
124/*
125 * Byte by byte copy
126 */
127	.align	4
128	.byte	0		# 1 mod 4 alignment for LOOPNEZ
129				# (0 mod 4 alignment for LBEG)
130.Lbytecopy:
131#if XCHAL_HAVE_LOOPS
132	loopnez	a4, .Lbytecopydone
133#else /* !XCHAL_HAVE_LOOPS */
134	beqz	a4, .Lbytecopydone
135	add	a7, a3, a4	# a7 = end address for source
136#endif /* !XCHAL_HAVE_LOOPS */
137.Lnextbyte:
138	EX(l8ui, a6, a3, 0, fixup)
139	addi	a3, a3, 1
140	EX(s8i, a6, a5, 0, fixup)
141	addi	a5, a5, 1
142#if !XCHAL_HAVE_LOOPS
143	blt	a3, a7, .Lnextbyte
144#endif /* !XCHAL_HAVE_LOOPS */
145.Lbytecopydone:
146	movi	a2, 0		# return success for len bytes copied
147	retw
148
149/*
150 * Destination and source are word-aligned.
151 */
152	# copy 16 bytes per iteration for word-aligned dst and word-aligned src
153	.align	4		# 1 mod 4 alignment for LOOPNEZ
154	.byte	0		# (0 mod 4 alignment for LBEG)
155.Laligned:
156#if XCHAL_HAVE_LOOPS
157	loopnez	a7, .Loop1done
158#else /* !XCHAL_HAVE_LOOPS */
159	beqz	a7, .Loop1done
160	slli	a8, a7, 4
161	add	a8, a8, a3	# a8 = end of last 16B source chunk
162#endif /* !XCHAL_HAVE_LOOPS */
163.Loop1:
164	EX(l32i, a6, a3,  0, fixup)
165	EX(l32i, a7, a3,  4, fixup)
166	EX(s32i, a6, a5,  0, fixup)
167	EX(l32i, a6, a3,  8, fixup)
168	EX(s32i, a7, a5,  4, fixup)
169	EX(l32i, a7, a3, 12, fixup)
170	EX(s32i, a6, a5,  8, fixup)
171	addi	a3, a3, 16
172	EX(s32i, a7, a5, 12, fixup)
173	addi	a5, a5, 16
174#if !XCHAL_HAVE_LOOPS
175	blt	a3, a8, .Loop1
176#endif /* !XCHAL_HAVE_LOOPS */
177.Loop1done:
178	bbci.l	a4, 3, .L2
179	# copy 8 bytes
180	EX(l32i, a6, a3,  0, fixup)
181	EX(l32i, a7, a3,  4, fixup)
182	addi	a3, a3,  8
183	EX(s32i, a6, a5,  0, fixup)
184	EX(s32i, a7, a5,  4, fixup)
185	addi	a5, a5,  8
186.L2:
187	bbci.l	a4, 2, .L3
188	# copy 4 bytes
189	EX(l32i, a6, a3,  0, fixup)
190	addi	a3, a3,  4
191	EX(s32i, a6, a5,  0, fixup)
192	addi	a5, a5,  4
193.L3:
194	bbci.l	a4, 1, .L4
195	# copy 2 bytes
196	EX(l16ui, a6, a3,  0, fixup)
197	addi	a3, a3,  2
198	EX(s16i,  a6, a5,  0, fixup)
199	addi	a5, a5,  2
200.L4:
201	bbci.l	a4, 0, .L5
202	# copy 1 byte
203	EX(l8ui, a6, a3,  0, fixup)
204	EX(s8i,  a6, a5,  0, fixup)
205.L5:
206	movi	a2, 0		# return success for len bytes copied
207	retw
208
209/*
210 * Destination is aligned, Source is unaligned
211 */
212
213	.align	4
214	.byte	0		# 1 mod 4 alignement for LOOPNEZ
215				# (0 mod 4 alignment for LBEG)
216.Lsrcunaligned:
217	# copy 16 bytes per iteration for word-aligned dst and unaligned src
218	and	a10, a3, a8	# save unalignment offset for below
219	sub	a3, a3, a10	# align a3 (to avoid sim warnings only; not needed for hardware)
220	EX(l32i, a6, a3, 0, fixup)	# load first word
221#if XCHAL_HAVE_LOOPS
222	loopnez	a7, .Loop2done
223#else /* !XCHAL_HAVE_LOOPS */
224	beqz	a7, .Loop2done
225	slli	a12, a7, 4
226	add	a12, a12, a3	# a12 = end of last 16B source chunk
227#endif /* !XCHAL_HAVE_LOOPS */
228.Loop2:
229	EX(l32i, a7, a3,  4, fixup)
230	EX(l32i, a8, a3,  8, fixup)
231	ALIGN(	a6, a6, a7)
232	EX(s32i, a6, a5,  0, fixup)
233	EX(l32i, a9, a3, 12, fixup)
234	ALIGN(	a7, a7, a8)
235	EX(s32i, a7, a5,  4, fixup)
236	EX(l32i, a6, a3, 16, fixup)
237	ALIGN(	a8, a8, a9)
238	EX(s32i, a8, a5,  8, fixup)
239	addi	a3, a3, 16
240	ALIGN(	a9, a9, a6)
241	EX(s32i, a9, a5, 12, fixup)
242	addi	a5, a5, 16
243#if !XCHAL_HAVE_LOOPS
244	blt	a3, a12, .Loop2
245#endif /* !XCHAL_HAVE_LOOPS */
246.Loop2done:
247	bbci.l	a4, 3, .L12
248	# copy 8 bytes
249	EX(l32i, a7, a3,  4, fixup)
250	EX(l32i, a8, a3,  8, fixup)
251	ALIGN(	a6, a6, a7)
252	EX(s32i, a6, a5,  0, fixup)
253	addi	a3, a3,  8
254	ALIGN(	a7, a7, a8)
255	EX(s32i, a7, a5,  4, fixup)
256	addi	a5, a5,  8
257	mov	a6, a8
258.L12:
259	bbci.l	a4, 2, .L13
260	# copy 4 bytes
261	EX(l32i, a7, a3,  4, fixup)
262	addi	a3, a3,  4
263	ALIGN(	a6, a6, a7)
264	EX(s32i, a6, a5,  0, fixup)
265	addi	a5, a5,  4
266	mov	a6, a7
267.L13:
268	add	a3, a3, a10	# readjust a3 with correct misalignment
269	bbci.l	a4, 1, .L14
270	# copy 2 bytes
271	EX(l8ui, a6, a3,  0, fixup)
272	EX(l8ui, a7, a3,  1, fixup)
273	addi	a3, a3,  2
274	EX(s8i, a6, a5,  0, fixup)
275	EX(s8i, a7, a5,  1, fixup)
276	addi	a5, a5,  2
277.L14:
278	bbci.l	a4, 0, .L15
279	# copy 1 byte
280	EX(l8ui, a6, a3,  0, fixup)
281	EX(s8i,  a6, a5,  0, fixup)
282.L15:
283	movi	a2, 0		# return success for len bytes copied
284	retw
285
286
287	.section .fixup, "ax"
288	.align	4
289
290/* a2 = original dst; a5 = current dst; a11= original len
291 * bytes_copied = a5 - a2
292 * retval = bytes_not_copied = original len - bytes_copied
293 * retval = a11 - (a5 - a2)
294 */
295
296
297fixup:
298	sub	a2, a5, a2	/* a2 <-- bytes copied */
299	sub	a2, a11, a2	/* a2 <-- bytes not copied */
300	retw
301