xref: /openbmc/linux/arch/xtensa/lib/strncpy_user.S (revision 0013aceb)
1249ac17eSChris Zankel/*
2249ac17eSChris Zankel *  arch/xtensa/lib/strncpy_user.S
3249ac17eSChris Zankel *
4249ac17eSChris Zankel *  This file is subject to the terms and conditions of the GNU General
5249ac17eSChris Zankel *  Public License.  See the file "COPYING" in the main directory of
6249ac17eSChris Zankel *  this archive for more details.
7249ac17eSChris Zankel *
8249ac17eSChris Zankel *  Returns: -EFAULT if exception before terminator, N if the entire
9249ac17eSChris Zankel *  buffer filled, else strlen.
10249ac17eSChris Zankel *
11249ac17eSChris Zankel *  Copyright (C) 2002 Tensilica Inc.
12249ac17eSChris Zankel */
13249ac17eSChris Zankel
14249ac17eSChris Zankel#include <linux/errno.h>
150013acebSMax Filippov#include <variant/core.h>
160013acebSMax Filippov#include <asm/asmmacro.h>
17249ac17eSChris Zankel
18249ac17eSChris Zankel/*
19249ac17eSChris Zankel * char *__strncpy_user(char *dst, const char *src, size_t len)
20249ac17eSChris Zankel */
21a0bb46baSChris Zankel
22a0bb46baSChris Zankel#ifdef __XTENSA_EB__
23a0bb46baSChris Zankel# define MASK0 0xff000000
24a0bb46baSChris Zankel# define MASK1 0x00ff0000
25a0bb46baSChris Zankel# define MASK2 0x0000ff00
26a0bb46baSChris Zankel# define MASK3 0x000000ff
27a0bb46baSChris Zankel#else
28a0bb46baSChris Zankel# define MASK0 0x000000ff
29a0bb46baSChris Zankel# define MASK1 0x0000ff00
30a0bb46baSChris Zankel# define MASK2 0x00ff0000
31a0bb46baSChris Zankel# define MASK3 0xff000000
32a0bb46baSChris Zankel#endif
33249ac17eSChris Zankel
34249ac17eSChris Zankel# Register use
35249ac17eSChris Zankel#   a0/ return address
36249ac17eSChris Zankel#   a1/ stack pointer
37249ac17eSChris Zankel#   a2/ return value
38249ac17eSChris Zankel#   a3/ src
39249ac17eSChris Zankel#   a4/ len
40249ac17eSChris Zankel#   a5/ mask0
41249ac17eSChris Zankel#   a6/ mask1
42249ac17eSChris Zankel#   a7/ mask2
43249ac17eSChris Zankel#   a8/ mask3
44249ac17eSChris Zankel#   a9/ tmp
45249ac17eSChris Zankel#   a10/ tmp
46249ac17eSChris Zankel#   a11/ dst
47249ac17eSChris Zankel#   a12/ tmp
48249ac17eSChris Zankel
49a0bb46baSChris Zankel.text
50249ac17eSChris Zankel.align	4
51249ac17eSChris Zankel.global	__strncpy_user
52249ac17eSChris Zankel.type	__strncpy_user,@function
53249ac17eSChris Zankel__strncpy_user:
54249ac17eSChris Zankel	entry	sp, 16		# minimal stack frame
55249ac17eSChris Zankel	# a2/ dst, a3/ src, a4/ len
56249ac17eSChris Zankel	mov	a11, a2		# leave dst in return value register
57249ac17eSChris Zankel	beqz	a4, .Lret	# if len is zero
58a0bb46baSChris Zankel	movi	a5, MASK0	# mask for byte 0
59a0bb46baSChris Zankel	movi	a6, MASK1	# mask for byte 1
60a0bb46baSChris Zankel	movi	a7, MASK2	# mask for byte 2
61a0bb46baSChris Zankel	movi	a8, MASK3	# mask for byte 3
62249ac17eSChris Zankel	bbsi.l	a3, 0, .Lsrc1mod2 # if only  8-bit aligned
63249ac17eSChris Zankel	bbsi.l	a3, 1, .Lsrc2mod4 # if only 16-bit aligned
64249ac17eSChris Zankel.Lsrcaligned:	# return here when src is word-aligned
65249ac17eSChris Zankel	srli	a12, a4, 2	# number of loop iterations with 4B per loop
66249ac17eSChris Zankel	movi	a9, 3
67249ac17eSChris Zankel	bnone	a11, a9, .Laligned
68249ac17eSChris Zankel	j	.Ldstunaligned
69249ac17eSChris Zankel
70249ac17eSChris Zankel.Lsrc1mod2:	# src address is odd
710013acebSMax FilippovEX(11f)	l8ui	a9, a3, 0		# get byte 0
72249ac17eSChris Zankel	addi	a3, a3, 1		# advance src pointer
730013acebSMax FilippovEX(10f)	s8i	a9, a11, 0		# store byte 0
74249ac17eSChris Zankel	beqz	a9, .Lret		# if byte 0 is zero
75249ac17eSChris Zankel	addi	a11, a11, 1		# advance dst pointer
76249ac17eSChris Zankel	addi	a4, a4, -1		# decrement len
77249ac17eSChris Zankel	beqz	a4, .Lret		# if len is zero
78249ac17eSChris Zankel	bbci.l	a3, 1, .Lsrcaligned	# if src is now word-aligned
79249ac17eSChris Zankel
80249ac17eSChris Zankel.Lsrc2mod4:	# src address is 2 mod 4
810013acebSMax FilippovEX(11f)	l8ui	a9, a3, 0		# get byte 0
82249ac17eSChris Zankel	/* 1-cycle interlock */
830013acebSMax FilippovEX(10f)	s8i	a9, a11, 0		# store byte 0
84249ac17eSChris Zankel	beqz	a9, .Lret		# if byte 0 is zero
85249ac17eSChris Zankel	addi	a11, a11, 1		# advance dst pointer
86249ac17eSChris Zankel	addi	a4, a4, -1		# decrement len
87249ac17eSChris Zankel	beqz	a4, .Lret		# if len is zero
880013acebSMax FilippovEX(11f)	l8ui	a9, a3, 1		# get byte 0
89249ac17eSChris Zankel	addi	a3, a3, 2		# advance src pointer
900013acebSMax FilippovEX(10f)	s8i	a9, a11, 0		# store byte 0
91249ac17eSChris Zankel	beqz	a9, .Lret		# if byte 0 is zero
92249ac17eSChris Zankel	addi	a11, a11, 1		# advance dst pointer
93249ac17eSChris Zankel	addi	a4, a4, -1		# decrement len
94249ac17eSChris Zankel	bnez	a4, .Lsrcaligned	# if len is nonzero
95249ac17eSChris Zankel.Lret:
96249ac17eSChris Zankel	sub	a2, a11, a2		# compute strlen
97249ac17eSChris Zankel	retw
98249ac17eSChris Zankel
99249ac17eSChris Zankel/*
100249ac17eSChris Zankel * dst is word-aligned, src is word-aligned
101249ac17eSChris Zankel */
102249ac17eSChris Zankel	.align	4		# 1 mod 4 alignment for LOOPNEZ
103249ac17eSChris Zankel	.byte	0		# (0 mod 4 alignment for LBEG)
104249ac17eSChris Zankel.Laligned:
105249ac17eSChris Zankel#if XCHAL_HAVE_LOOPS
106249ac17eSChris Zankel	loopnez	a12, .Loop1done
107249ac17eSChris Zankel#else
108249ac17eSChris Zankel	beqz	a12, .Loop1done
109249ac17eSChris Zankel	slli	a12, a12, 2
110249ac17eSChris Zankel	add	a12, a12, a11	# a12 = end of last 4B chunck
111249ac17eSChris Zankel#endif
112249ac17eSChris Zankel.Loop1:
1130013acebSMax FilippovEX(11f)	l32i	a9, a3, 0		# get word from src
114249ac17eSChris Zankel	addi	a3, a3, 4		# advance src pointer
115249ac17eSChris Zankel	bnone	a9, a5, .Lz0		# if byte 0 is zero
116249ac17eSChris Zankel	bnone	a9, a6, .Lz1		# if byte 1 is zero
117249ac17eSChris Zankel	bnone	a9, a7, .Lz2		# if byte 2 is zero
1180013acebSMax FilippovEX(10f)	s32i	a9, a11, 0		# store word to dst
119249ac17eSChris Zankel	bnone	a9, a8, .Lz3		# if byte 3 is zero
120249ac17eSChris Zankel	addi	a11, a11, 4		# advance dst pointer
121249ac17eSChris Zankel#if !XCHAL_HAVE_LOOPS
122249ac17eSChris Zankel	blt	a11, a12, .Loop1
123249ac17eSChris Zankel#endif
124249ac17eSChris Zankel
125249ac17eSChris Zankel.Loop1done:
126249ac17eSChris Zankel	bbci.l	a4, 1, .L100
127249ac17eSChris Zankel	# copy 2 bytes
1280013acebSMax FilippovEX(11f)	l16ui	a9, a3, 0
129249ac17eSChris Zankel	addi	a3, a3, 2		# advance src pointer
130249ac17eSChris Zankel#ifdef __XTENSA_EB__
131249ac17eSChris Zankel	bnone	a9, a7, .Lz0		# if byte 2 is zero
132249ac17eSChris Zankel	bnone	a9, a8, .Lz1		# if byte 3 is zero
133249ac17eSChris Zankel#else
134249ac17eSChris Zankel	bnone	a9, a5, .Lz0		# if byte 0 is zero
135249ac17eSChris Zankel	bnone	a9, a6, .Lz1		# if byte 1 is zero
136249ac17eSChris Zankel#endif
1370013acebSMax FilippovEX(10f)	s16i	a9, a11, 0
138249ac17eSChris Zankel	addi	a11, a11, 2		# advance dst pointer
139249ac17eSChris Zankel.L100:
140249ac17eSChris Zankel	bbci.l	a4, 0, .Lret
1410013acebSMax FilippovEX(11f)	l8ui	a9, a3, 0
142249ac17eSChris Zankel	/* slot */
1430013acebSMax FilippovEX(10f)	s8i	a9, a11, 0
144249ac17eSChris Zankel	beqz	a9, .Lret		# if byte is zero
145249ac17eSChris Zankel	addi	a11, a11, 1-3		# advance dst ptr 1, but also cancel
146249ac17eSChris Zankel					# the effect of adding 3 in .Lz3 code
147249ac17eSChris Zankel	/* fall thru to .Lz3 and "retw" */
148249ac17eSChris Zankel
149249ac17eSChris Zankel.Lz3:	# byte 3 is zero
150249ac17eSChris Zankel	addi	a11, a11, 3		# advance dst pointer
151249ac17eSChris Zankel	sub	a2, a11, a2		# compute strlen
152249ac17eSChris Zankel	retw
153249ac17eSChris Zankel.Lz0:	# byte 0 is zero
154249ac17eSChris Zankel#ifdef __XTENSA_EB__
155249ac17eSChris Zankel	movi	a9, 0
156249ac17eSChris Zankel#endif /* __XTENSA_EB__ */
1570013acebSMax FilippovEX(10f)	s8i	a9, a11, 0
158249ac17eSChris Zankel	sub	a2, a11, a2		# compute strlen
159249ac17eSChris Zankel	retw
160249ac17eSChris Zankel.Lz1:	# byte 1 is zero
161249ac17eSChris Zankel#ifdef __XTENSA_EB__
162249ac17eSChris Zankel	extui   a9, a9, 16, 16
163249ac17eSChris Zankel#endif /* __XTENSA_EB__ */
1640013acebSMax FilippovEX(10f)	s16i	a9, a11, 0
165249ac17eSChris Zankel	addi	a11, a11, 1		# advance dst pointer
166249ac17eSChris Zankel	sub	a2, a11, a2		# compute strlen
167249ac17eSChris Zankel	retw
168249ac17eSChris Zankel.Lz2:	# byte 2 is zero
169249ac17eSChris Zankel#ifdef __XTENSA_EB__
170249ac17eSChris Zankel	extui   a9, a9, 16, 16
171249ac17eSChris Zankel#endif /* __XTENSA_EB__ */
1720013acebSMax FilippovEX(10f)	s16i	a9, a11, 0
173249ac17eSChris Zankel	movi	a9, 0
1740013acebSMax FilippovEX(10f)	s8i	a9, a11, 2
175249ac17eSChris Zankel	addi	a11, a11, 2		# advance dst pointer
176249ac17eSChris Zankel	sub	a2, a11, a2		# compute strlen
177249ac17eSChris Zankel	retw
178249ac17eSChris Zankel
179249ac17eSChris Zankel	.align	4		# 1 mod 4 alignment for LOOPNEZ
180249ac17eSChris Zankel	.byte	0		# (0 mod 4 alignment for LBEG)
181249ac17eSChris Zankel.Ldstunaligned:
182249ac17eSChris Zankel/*
183249ac17eSChris Zankel * for now just use byte copy loop
184249ac17eSChris Zankel */
185249ac17eSChris Zankel#if XCHAL_HAVE_LOOPS
186249ac17eSChris Zankel	loopnez	a4, .Lunalignedend
187249ac17eSChris Zankel#else
188249ac17eSChris Zankel	beqz	a4, .Lunalignedend
189249ac17eSChris Zankel	add	a12, a11, a4		# a12 = ending address
190249ac17eSChris Zankel#endif /* XCHAL_HAVE_LOOPS */
191249ac17eSChris Zankel.Lnextbyte:
1920013acebSMax FilippovEX(11f)	l8ui	a9, a3, 0
193249ac17eSChris Zankel	addi	a3, a3, 1
1940013acebSMax FilippovEX(10f)	s8i	a9, a11, 0
195249ac17eSChris Zankel	beqz	a9, .Lunalignedend
196249ac17eSChris Zankel	addi	a11, a11, 1
197249ac17eSChris Zankel#if !XCHAL_HAVE_LOOPS
198249ac17eSChris Zankel	blt	a11, a12, .Lnextbyte
199249ac17eSChris Zankel#endif
200249ac17eSChris Zankel
201249ac17eSChris Zankel.Lunalignedend:
202249ac17eSChris Zankel	sub	a2, a11, a2		# compute strlen
203249ac17eSChris Zankel	retw
204249ac17eSChris Zankel
205249ac17eSChris Zankel
206249ac17eSChris Zankel	.section .fixup, "ax"
207249ac17eSChris Zankel	.align	4
208249ac17eSChris Zankel
209249ac17eSChris Zankel	/* For now, just return -EFAULT.  Future implementations might
210249ac17eSChris Zankel	 * like to clear remaining kernel space, like the fixup
211249ac17eSChris Zankel	 * implementation in memset().  Thus, we differentiate between
212249ac17eSChris Zankel	 * load/store fixups. */
213249ac17eSChris Zankel
2140013acebSMax Filippov10:
2150013acebSMax Filippov11:
216249ac17eSChris Zankel	movi	a2, -EFAULT
217249ac17eSChris Zankel	retw
218