xref: /openbmc/u-boot/arch/arm/lib/lib1funcs.S (revision ca6c5e03)
1/*
2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
3 *
4 * Author: Nicolas Pitre <nico@fluxnic.net>
5 *   - contributed to gcc-3.4 on Sep 30, 2003
6 *   - adapted for the Linux kernel on Oct 2, 2003
7 */
8
9/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
10
11 * SPDX-License-Identifier:	GPL-2.0+
12 */
13
14
15#include <linux/linkage.h>
16#include <asm/assembler.h>
17
18/*
19 * U-Boot compatibility bit, define empty UNWIND() macro as, since we
20 * do not support stack unwinding and define CONFIG_AEABI to make all
21 * of the functions available without diverging from Linux code.
22 */
23#ifdef __UBOOT__
24#define UNWIND(x...)
25#define CONFIG_AEABI
26#endif
27
28.macro ARM_DIV_BODY dividend, divisor, result, curbit
29
30#if __LINUX_ARM_ARCH__ >= 5
31
32	clz	\curbit, \divisor
33	clz	\result, \dividend
34	sub	\result, \curbit, \result
35	mov	\curbit, #1
36	mov	\divisor, \divisor, lsl \result
37	mov	\curbit, \curbit, lsl \result
38	mov	\result, #0
39
40#else
41
42	@ Initially shift the divisor left 3 bits if possible,
43	@ set curbit accordingly.  This allows for curbit to be located
44	@ at the left end of each 4 bit nibbles in the division loop
45	@ to save one loop in most cases.
46	tst	\divisor, #0xe0000000
47	moveq	\divisor, \divisor, lsl #3
48	moveq	\curbit, #8
49	movne	\curbit, #1
50
51	@ Unless the divisor is very big, shift it up in multiples of
52	@ four bits, since this is the amount of unwinding in the main
53	@ division loop.  Continue shifting until the divisor is
54	@ larger than the dividend.
551:	cmp	\divisor, #0x10000000
56	cmplo	\divisor, \dividend
57	movlo	\divisor, \divisor, lsl #4
58	movlo	\curbit, \curbit, lsl #4
59	blo	1b
60
61	@ For very big divisors, we must shift it a bit at a time, or
62	@ we will be in danger of overflowing.
631:	cmp	\divisor, #0x80000000
64	cmplo	\divisor, \dividend
65	movlo	\divisor, \divisor, lsl #1
66	movlo	\curbit, \curbit, lsl #1
67	blo	1b
68
69	mov	\result, #0
70
71#endif
72
73	@ Division loop
741:	cmp	\dividend, \divisor
75	subhs	\dividend, \dividend, \divisor
76	orrhs	\result,   \result,   \curbit
77	cmp	\dividend, \divisor,  lsr #1
78	subhs	\dividend, \dividend, \divisor, lsr #1
79	orrhs	\result,   \result,   \curbit,  lsr #1
80	cmp	\dividend, \divisor,  lsr #2
81	subhs	\dividend, \dividend, \divisor, lsr #2
82	orrhs	\result,   \result,   \curbit,  lsr #2
83	cmp	\dividend, \divisor,  lsr #3
84	subhs	\dividend, \dividend, \divisor, lsr #3
85	orrhs	\result,   \result,   \curbit,  lsr #3
86	cmp	\dividend, #0			@ Early termination?
87	movsne	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
88	movne	\divisor,  \divisor, lsr #4
89	bne	1b
90
91.endm
92
93
94.macro ARM_DIV2_ORDER divisor, order
95
96#if __LINUX_ARM_ARCH__ >= 5
97
98	clz	\order, \divisor
99	rsb	\order, \order, #31
100
101#else
102
103	cmp	\divisor, #(1 << 16)
104	movhs	\divisor, \divisor, lsr #16
105	movhs	\order, #16
106	movlo	\order, #0
107
108	cmp	\divisor, #(1 << 8)
109	movhs	\divisor, \divisor, lsr #8
110	addhs	\order, \order, #8
111
112	cmp	\divisor, #(1 << 4)
113	movhs	\divisor, \divisor, lsr #4
114	addhs	\order, \order, #4
115
116	cmp	\divisor, #(1 << 2)
117	addhi	\order, \order, #3
118	addls	\order, \order, \divisor, lsr #1
119
120#endif
121
122.endm
123
124
125.macro ARM_MOD_BODY dividend, divisor, order, spare
126
127#if __LINUX_ARM_ARCH__ >= 5
128
129	clz	\order, \divisor
130	clz	\spare, \dividend
131	sub	\order, \order, \spare
132	mov	\divisor, \divisor, lsl \order
133
134#else
135
136	mov	\order, #0
137
138	@ Unless the divisor is very big, shift it up in multiples of
139	@ four bits, since this is the amount of unwinding in the main
140	@ division loop.  Continue shifting until the divisor is
141	@ larger than the dividend.
1421:	cmp	\divisor, #0x10000000
143	cmplo	\divisor, \dividend
144	movlo	\divisor, \divisor, lsl #4
145	addlo	\order, \order, #4
146	blo	1b
147
148	@ For very big divisors, we must shift it a bit at a time, or
149	@ we will be in danger of overflowing.
1501:	cmp	\divisor, #0x80000000
151	cmplo	\divisor, \dividend
152	movlo	\divisor, \divisor, lsl #1
153	addlo	\order, \order, #1
154	blo	1b
155
156#endif
157
158	@ Perform all needed subtractions to keep only the reminder.
159	@ Do comparisons in batch of 4 first.
160	subs	\order, \order, #3		@ yes, 3 is intended here
161	blt	2f
162
1631:	cmp	\dividend, \divisor
164	subhs	\dividend, \dividend, \divisor
165	cmp	\dividend, \divisor,  lsr #1
166	subhs	\dividend, \dividend, \divisor, lsr #1
167	cmp	\dividend, \divisor,  lsr #2
168	subhs	\dividend, \dividend, \divisor, lsr #2
169	cmp	\dividend, \divisor,  lsr #3
170	subhs	\dividend, \dividend, \divisor, lsr #3
171	cmp	\dividend, #1
172	mov	\divisor, \divisor, lsr #4
173	subsge	\order, \order, #4
174	bge	1b
175
176	tst	\order, #3
177	teqne	\dividend, #0
178	beq	5f
179
180	@ Either 1, 2 or 3 comparison/subtractions are left.
1812:	cmn	\order, #2
182	blt	4f
183	beq	3f
184	cmp	\dividend, \divisor
185	subhs	\dividend, \dividend, \divisor
186	mov	\divisor,  \divisor,  lsr #1
1873:	cmp	\dividend, \divisor
188	subhs	\dividend, \dividend, \divisor
189	mov	\divisor,  \divisor,  lsr #1
1904:	cmp	\dividend, \divisor
191	subhs	\dividend, \dividend, \divisor
1925:
193.endm
194
195
196.pushsection .text.__udivsi3, "ax"
197ENTRY(__udivsi3)
198ENTRY(__aeabi_uidiv)
199UNWIND(.fnstart)
200
201	subs	r2, r1, #1
202	reteq	lr
203	bcc	Ldiv0
204	cmp	r0, r1
205	bls	11f
206	tst	r1, r2
207	beq	12f
208
209	ARM_DIV_BODY r0, r1, r2, r3
210
211	mov	r0, r2
212	ret	lr
213
21411:	moveq	r0, #1
215	movne	r0, #0
216	ret	lr
217
21812:	ARM_DIV2_ORDER r1, r2
219
220	mov	r0, r0, lsr r2
221	ret	lr
222
223UNWIND(.fnend)
224ENDPROC(__udivsi3)
225ENDPROC(__aeabi_uidiv)
226.popsection
227
228.pushsection .text.__umodsi3, "ax"
229ENTRY(__umodsi3)
230UNWIND(.fnstart)
231
232	subs	r2, r1, #1			@ compare divisor with 1
233	bcc	Ldiv0
234	cmpne	r0, r1				@ compare dividend with divisor
235	moveq   r0, #0
236	tsthi	r1, r2				@ see if divisor is power of 2
237	andeq	r0, r0, r2
238	retls	lr
239
240	ARM_MOD_BODY r0, r1, r2, r3
241
242	ret	lr
243
244UNWIND(.fnend)
245ENDPROC(__umodsi3)
246.popsection
247
248.pushsection .text.__divsi3, "ax"
249ENTRY(__divsi3)
250ENTRY(__aeabi_idiv)
251UNWIND(.fnstart)
252
253	cmp	r1, #0
254	eor	ip, r0, r1			@ save the sign of the result.
255	beq	Ldiv0
256	rsbmi	r1, r1, #0			@ loops below use unsigned.
257	subs	r2, r1, #1			@ division by 1 or -1 ?
258	beq	10f
259	movs	r3, r0
260	rsbmi	r3, r0, #0			@ positive dividend value
261	cmp	r3, r1
262	bls	11f
263	tst	r1, r2				@ divisor is power of 2 ?
264	beq	12f
265
266	ARM_DIV_BODY r3, r1, r0, r2
267
268	cmp	ip, #0
269	rsbmi	r0, r0, #0
270	ret	lr
271
27210:	teq	ip, r0				@ same sign ?
273	rsbmi	r0, r0, #0
274	ret	lr
275
27611:	movlo	r0, #0
277	moveq	r0, ip, asr #31
278	orreq	r0, r0, #1
279	ret	lr
280
28112:	ARM_DIV2_ORDER r1, r2
282
283	cmp	ip, #0
284	mov	r0, r3, lsr r2
285	rsbmi	r0, r0, #0
286	ret	lr
287
288UNWIND(.fnend)
289ENDPROC(__divsi3)
290ENDPROC(__aeabi_idiv)
291.popsection
292
293.pushsection .text.__modsi3, "ax"
294ENTRY(__modsi3)
295UNWIND(.fnstart)
296
297	cmp	r1, #0
298	beq	Ldiv0
299	rsbmi	r1, r1, #0			@ loops below use unsigned.
300	movs	ip, r0				@ preserve sign of dividend
301	rsbmi	r0, r0, #0			@ if negative make positive
302	subs	r2, r1, #1			@ compare divisor with 1
303	cmpne	r0, r1				@ compare dividend with divisor
304	moveq	r0, #0
305	tsthi	r1, r2				@ see if divisor is power of 2
306	andeq	r0, r0, r2
307	bls	10f
308
309	ARM_MOD_BODY r0, r1, r2, r3
310
31110:	cmp	ip, #0
312	rsbmi	r0, r0, #0
313	ret	lr
314
315UNWIND(.fnend)
316ENDPROC(__modsi3)
317.popsection
318
319#ifdef CONFIG_AEABI
320
321.pushsection .text.__aeabi_uidivmod, "ax"
322ENTRY(__aeabi_uidivmod)
323UNWIND(.fnstart)
324UNWIND(.save {r0, r1, ip, lr}	)
325
326	stmfd	sp!, {r0, r1, ip, lr}
327	bl	__aeabi_uidiv
328	ldmfd	sp!, {r1, r2, ip, lr}
329	mul	r3, r0, r2
330	sub	r1, r1, r3
331	ret	lr
332
333UNWIND(.fnend)
334ENDPROC(__aeabi_uidivmod)
335.popsection
336
337.pushsection .text.__aeabi_uidivmod, "ax"
338ENTRY(__aeabi_idivmod)
339UNWIND(.fnstart)
340UNWIND(.save {r0, r1, ip, lr}	)
341
342	stmfd	sp!, {r0, r1, ip, lr}
343	bl	__aeabi_idiv
344	ldmfd	sp!, {r1, r2, ip, lr}
345	mul	r3, r0, r2
346	sub	r1, r1, r3
347	ret	lr
348
349UNWIND(.fnend)
350ENDPROC(__aeabi_idivmod)
351.popsection
352
353#endif
354
355.pushsection .text.Ldiv0, "ax"
356Ldiv0:
357UNWIND(.fnstart)
358UNWIND(.pad #4)
359UNWIND(.save {lr})
360
361	str	lr, [sp, #-8]!
362	bl	__div0
363	mov	r0, #0			@ About as wrong as it could be.
364	ldr	pc, [sp], #8
365
366UNWIND(.fnend)
367ENDPROC(Ldiv0)
368.popsection
369
370/* Thumb-1 specialities */
371#if defined(CONFIG_SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2)
372.pushsection .text.__gnu_thumb1_case_sqi, "ax"
373ENTRY(__gnu_thumb1_case_sqi)
374	push	{r1}
375	mov	r1, lr
376	lsrs	r1, r1, #1
377	lsls	r1, r1, #1
378	ldrsb	r1, [r1, r0]
379	lsls	r1, r1, #1
380	add	lr, lr, r1
381	pop	{r1}
382	bx	lr
383ENDPROC(__gnu_thumb1_case_sqi)
384.popsection
385
386.pushsection .text.__gnu_thumb1_case_uqi, "ax"
387ENTRY(__gnu_thumb1_case_uqi)
388	push	{r1}
389	mov	r1, lr
390	lsrs	r1, r1, #1
391	lsls	r1, r1, #1
392	ldrb	r1, [r1, r0]
393	lsls	r1, r1, #1
394	add	lr, lr, r1
395	pop	{r1}
396	bx	lr
397ENDPROC(__gnu_thumb1_case_uqi)
398.popsection
399
400.pushsection .text.__gnu_thumb1_case_shi, "ax"
401ENTRY(__gnu_thumb1_case_shi)
402	push	{r0, r1}
403	mov	r1, lr
404	lsrs	r1, r1, #1
405	lsls	r0, r0, #1
406	lsls	r1, r1, #1
407	ldrsh	r1, [r1, r0]
408	lsls	r1, r1, #1
409	add	lr, lr, r1
410	pop	{r0, r1}
411	bx	lr
412ENDPROC(__gnu_thumb1_case_shi)
413.popsection
414
415.pushsection .text.__gnu_thumb1_case_uhi, "ax"
416ENTRY(__gnu_thumb1_case_uhi)
417	push	{r0, r1}
418	mov	r1, lr
419	lsrs	r1, r1, #1
420	lsls	r0, r0, #1
421	lsls	r1, r1, #1
422	ldrh	r1, [r1, r0]
423	lsls	r1, r1, #1
424	add	lr, lr, r1
425	pop	{r0, r1}
426	bx	lr
427ENDPROC(__gnu_thumb1_case_uhi)
428.popsection
429#endif
430