xref: /openbmc/u-boot/arch/arm/lib/lib1funcs.S (revision b9553986)
1/* SPDX-License-Identifier: GPL-2.0+ */
2/*
3 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
4 *
5 * Author: Nicolas Pitre <nico@fluxnic.net>
6 *   - contributed to gcc-3.4 on Sep 30, 2003
7 *   - adapted for the Linux kernel on Oct 2, 2003
8 */
9/*
10 * Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
11 */
12
13#include <linux/linkage.h>
14#include <asm/assembler.h>
15
16/*
17 * U-Boot compatibility bit, define empty UNWIND() macro as, since we
18 * do not support stack unwinding and define CONFIG_AEABI to make all
19 * of the functions available without diverging from Linux code.
20 */
21#ifdef __UBOOT__
22#define UNWIND(x...)
23#define CONFIG_AEABI
24#endif
25
26.macro ARM_DIV_BODY dividend, divisor, result, curbit
27
28#if __LINUX_ARM_ARCH__ >= 5
29
30	clz	\curbit, \divisor
31	clz	\result, \dividend
32	sub	\result, \curbit, \result
33	mov	\curbit, #1
34	mov	\divisor, \divisor, lsl \result
35	mov	\curbit, \curbit, lsl \result
36	mov	\result, #0
37
38#else
39
40	@ Initially shift the divisor left 3 bits if possible,
41	@ set curbit accordingly.  This allows for curbit to be located
42	@ at the left end of each 4 bit nibbles in the division loop
43	@ to save one loop in most cases.
44	tst	\divisor, #0xe0000000
45	moveq	\divisor, \divisor, lsl #3
46	moveq	\curbit, #8
47	movne	\curbit, #1
48
49	@ Unless the divisor is very big, shift it up in multiples of
50	@ four bits, since this is the amount of unwinding in the main
51	@ division loop.  Continue shifting until the divisor is
52	@ larger than the dividend.
531:	cmp	\divisor, #0x10000000
54	cmplo	\divisor, \dividend
55	movlo	\divisor, \divisor, lsl #4
56	movlo	\curbit, \curbit, lsl #4
57	blo	1b
58
59	@ For very big divisors, we must shift it a bit at a time, or
60	@ we will be in danger of overflowing.
611:	cmp	\divisor, #0x80000000
62	cmplo	\divisor, \dividend
63	movlo	\divisor, \divisor, lsl #1
64	movlo	\curbit, \curbit, lsl #1
65	blo	1b
66
67	mov	\result, #0
68
69#endif
70
71	@ Division loop
721:	cmp	\dividend, \divisor
73	subhs	\dividend, \dividend, \divisor
74	orrhs	\result,   \result,   \curbit
75	cmp	\dividend, \divisor,  lsr #1
76	subhs	\dividend, \dividend, \divisor, lsr #1
77	orrhs	\result,   \result,   \curbit,  lsr #1
78	cmp	\dividend, \divisor,  lsr #2
79	subhs	\dividend, \dividend, \divisor, lsr #2
80	orrhs	\result,   \result,   \curbit,  lsr #2
81	cmp	\dividend, \divisor,  lsr #3
82	subhs	\dividend, \dividend, \divisor, lsr #3
83	orrhs	\result,   \result,   \curbit,  lsr #3
84	cmp	\dividend, #0			@ Early termination?
85	movsne	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
86	movne	\divisor,  \divisor, lsr #4
87	bne	1b
88
89.endm
90
91
92.macro ARM_DIV2_ORDER divisor, order
93
94#if __LINUX_ARM_ARCH__ >= 5
95
96	clz	\order, \divisor
97	rsb	\order, \order, #31
98
99#else
100
101	cmp	\divisor, #(1 << 16)
102	movhs	\divisor, \divisor, lsr #16
103	movhs	\order, #16
104	movlo	\order, #0
105
106	cmp	\divisor, #(1 << 8)
107	movhs	\divisor, \divisor, lsr #8
108	addhs	\order, \order, #8
109
110	cmp	\divisor, #(1 << 4)
111	movhs	\divisor, \divisor, lsr #4
112	addhs	\order, \order, #4
113
114	cmp	\divisor, #(1 << 2)
115	addhi	\order, \order, #3
116	addls	\order, \order, \divisor, lsr #1
117
118#endif
119
120.endm
121
122
123.macro ARM_MOD_BODY dividend, divisor, order, spare
124
125#if __LINUX_ARM_ARCH__ >= 5
126
127	clz	\order, \divisor
128	clz	\spare, \dividend
129	sub	\order, \order, \spare
130	mov	\divisor, \divisor, lsl \order
131
132#else
133
134	mov	\order, #0
135
136	@ Unless the divisor is very big, shift it up in multiples of
137	@ four bits, since this is the amount of unwinding in the main
138	@ division loop.  Continue shifting until the divisor is
139	@ larger than the dividend.
1401:	cmp	\divisor, #0x10000000
141	cmplo	\divisor, \dividend
142	movlo	\divisor, \divisor, lsl #4
143	addlo	\order, \order, #4
144	blo	1b
145
146	@ For very big divisors, we must shift it a bit at a time, or
147	@ we will be in danger of overflowing.
1481:	cmp	\divisor, #0x80000000
149	cmplo	\divisor, \dividend
150	movlo	\divisor, \divisor, lsl #1
151	addlo	\order, \order, #1
152	blo	1b
153
154#endif
155
156	@ Perform all needed subtractions to keep only the reminder.
157	@ Do comparisons in batch of 4 first.
158	subs	\order, \order, #3		@ yes, 3 is intended here
159	blt	2f
160
1611:	cmp	\dividend, \divisor
162	subhs	\dividend, \dividend, \divisor
163	cmp	\dividend, \divisor,  lsr #1
164	subhs	\dividend, \dividend, \divisor, lsr #1
165	cmp	\dividend, \divisor,  lsr #2
166	subhs	\dividend, \dividend, \divisor, lsr #2
167	cmp	\dividend, \divisor,  lsr #3
168	subhs	\dividend, \dividend, \divisor, lsr #3
169	cmp	\dividend, #1
170	mov	\divisor, \divisor, lsr #4
171	subsge	\order, \order, #4
172	bge	1b
173
174	tst	\order, #3
175	teqne	\dividend, #0
176	beq	5f
177
178	@ Either 1, 2 or 3 comparison/subtractions are left.
1792:	cmn	\order, #2
180	blt	4f
181	beq	3f
182	cmp	\dividend, \divisor
183	subhs	\dividend, \dividend, \divisor
184	mov	\divisor,  \divisor,  lsr #1
1853:	cmp	\dividend, \divisor
186	subhs	\dividend, \dividend, \divisor
187	mov	\divisor,  \divisor,  lsr #1
1884:	cmp	\dividend, \divisor
189	subhs	\dividend, \dividend, \divisor
1905:
191.endm
192
193
194.pushsection .text.__udivsi3, "ax"
195ENTRY(__udivsi3)
196ENTRY(__aeabi_uidiv)
197UNWIND(.fnstart)
198
199	subs	r2, r1, #1
200	reteq	lr
201	bcc	Ldiv0
202	cmp	r0, r1
203	bls	11f
204	tst	r1, r2
205	beq	12f
206
207	ARM_DIV_BODY r0, r1, r2, r3
208
209	mov	r0, r2
210	ret	lr
211
21211:	moveq	r0, #1
213	movne	r0, #0
214	ret	lr
215
21612:	ARM_DIV2_ORDER r1, r2
217
218	mov	r0, r0, lsr r2
219	ret	lr
220
221UNWIND(.fnend)
222ENDPROC(__udivsi3)
223ENDPROC(__aeabi_uidiv)
224.popsection
225
226.pushsection .text.__umodsi3, "ax"
227ENTRY(__umodsi3)
228UNWIND(.fnstart)
229
230	subs	r2, r1, #1			@ compare divisor with 1
231	bcc	Ldiv0
232	cmpne	r0, r1				@ compare dividend with divisor
233	moveq   r0, #0
234	tsthi	r1, r2				@ see if divisor is power of 2
235	andeq	r0, r0, r2
236	retls	lr
237
238	ARM_MOD_BODY r0, r1, r2, r3
239
240	ret	lr
241
242UNWIND(.fnend)
243ENDPROC(__umodsi3)
244.popsection
245
246.pushsection .text.__divsi3, "ax"
247ENTRY(__divsi3)
248ENTRY(__aeabi_idiv)
249UNWIND(.fnstart)
250
251	cmp	r1, #0
252	eor	ip, r0, r1			@ save the sign of the result.
253	beq	Ldiv0
254	rsbmi	r1, r1, #0			@ loops below use unsigned.
255	subs	r2, r1, #1			@ division by 1 or -1 ?
256	beq	10f
257	movs	r3, r0
258	rsbmi	r3, r0, #0			@ positive dividend value
259	cmp	r3, r1
260	bls	11f
261	tst	r1, r2				@ divisor is power of 2 ?
262	beq	12f
263
264	ARM_DIV_BODY r3, r1, r0, r2
265
266	cmp	ip, #0
267	rsbmi	r0, r0, #0
268	ret	lr
269
27010:	teq	ip, r0				@ same sign ?
271	rsbmi	r0, r0, #0
272	ret	lr
273
27411:	movlo	r0, #0
275	moveq	r0, ip, asr #31
276	orreq	r0, r0, #1
277	ret	lr
278
27912:	ARM_DIV2_ORDER r1, r2
280
281	cmp	ip, #0
282	mov	r0, r3, lsr r2
283	rsbmi	r0, r0, #0
284	ret	lr
285
286UNWIND(.fnend)
287ENDPROC(__divsi3)
288ENDPROC(__aeabi_idiv)
289.popsection
290
291.pushsection .text.__modsi3, "ax"
292ENTRY(__modsi3)
293UNWIND(.fnstart)
294
295	cmp	r1, #0
296	beq	Ldiv0
297	rsbmi	r1, r1, #0			@ loops below use unsigned.
298	movs	ip, r0				@ preserve sign of dividend
299	rsbmi	r0, r0, #0			@ if negative make positive
300	subs	r2, r1, #1			@ compare divisor with 1
301	cmpne	r0, r1				@ compare dividend with divisor
302	moveq	r0, #0
303	tsthi	r1, r2				@ see if divisor is power of 2
304	andeq	r0, r0, r2
305	bls	10f
306
307	ARM_MOD_BODY r0, r1, r2, r3
308
30910:	cmp	ip, #0
310	rsbmi	r0, r0, #0
311	ret	lr
312
313UNWIND(.fnend)
314ENDPROC(__modsi3)
315.popsection
316
317#ifdef CONFIG_AEABI
318
319.pushsection .text.__aeabi_uidivmod, "ax"
320ENTRY(__aeabi_uidivmod)
321UNWIND(.fnstart)
322UNWIND(.save {r0, r1, ip, lr}	)
323
324	stmfd	sp!, {r0, r1, ip, lr}
325	bl	__aeabi_uidiv
326	ldmfd	sp!, {r1, r2, ip, lr}
327	mul	r3, r0, r2
328	sub	r1, r1, r3
329	ret	lr
330
331UNWIND(.fnend)
332ENDPROC(__aeabi_uidivmod)
333.popsection
334
335.pushsection .text.__aeabi_uidivmod, "ax"
336ENTRY(__aeabi_idivmod)
337UNWIND(.fnstart)
338UNWIND(.save {r0, r1, ip, lr}	)
339
340	stmfd	sp!, {r0, r1, ip, lr}
341	bl	__aeabi_idiv
342	ldmfd	sp!, {r1, r2, ip, lr}
343	mul	r3, r0, r2
344	sub	r1, r1, r3
345	ret	lr
346
347UNWIND(.fnend)
348ENDPROC(__aeabi_idivmod)
349.popsection
350
351#endif
352
353.pushsection .text.Ldiv0, "ax"
354Ldiv0:
355UNWIND(.fnstart)
356UNWIND(.pad #4)
357UNWIND(.save {lr})
358
359	str	lr, [sp, #-8]!
360	bl	__div0
361	mov	r0, #0			@ About as wrong as it could be.
362	ldr	pc, [sp], #8
363
364UNWIND(.fnend)
365ENDPROC(Ldiv0)
366.popsection
367
368/* Thumb-1 specialities */
369#if CONFIG_IS_ENABLED(SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2)
370.pushsection .text.__gnu_thumb1_case_sqi, "ax"
371ENTRY(__gnu_thumb1_case_sqi)
372	push	{r1}
373	mov	r1, lr
374	lsrs	r1, r1, #1
375	lsls	r1, r1, #1
376	ldrsb	r1, [r1, r0]
377	lsls	r1, r1, #1
378	add	lr, lr, r1
379	pop	{r1}
380	bx	lr
381ENDPROC(__gnu_thumb1_case_sqi)
382.popsection
383
384.pushsection .text.__gnu_thumb1_case_uqi, "ax"
385ENTRY(__gnu_thumb1_case_uqi)
386	push	{r1}
387	mov	r1, lr
388	lsrs	r1, r1, #1
389	lsls	r1, r1, #1
390	ldrb	r1, [r1, r0]
391	lsls	r1, r1, #1
392	add	lr, lr, r1
393	pop	{r1}
394	bx	lr
395ENDPROC(__gnu_thumb1_case_uqi)
396.popsection
397
398.pushsection .text.__gnu_thumb1_case_shi, "ax"
399ENTRY(__gnu_thumb1_case_shi)
400	push	{r0, r1}
401	mov	r1, lr
402	lsrs	r1, r1, #1
403	lsls	r0, r0, #1
404	lsls	r1, r1, #1
405	ldrsh	r1, [r1, r0]
406	lsls	r1, r1, #1
407	add	lr, lr, r1
408	pop	{r0, r1}
409	bx	lr
410ENDPROC(__gnu_thumb1_case_shi)
411.popsection
412
413.pushsection .text.__gnu_thumb1_case_uhi, "ax"
414ENTRY(__gnu_thumb1_case_uhi)
415	push	{r0, r1}
416	mov	r1, lr
417	lsrs	r1, r1, #1
418	lsls	r0, r0, #1
419	lsls	r1, r1, #1
420	ldrh	r1, [r1, r0]
421	lsls	r1, r1, #1
422	add	lr, lr, r1
423	pop	{r0, r1}
424	bx	lr
425ENDPROC(__gnu_thumb1_case_uhi)
426.popsection
427#endif
428