xref: /openbmc/linux/arch/arm/lib/lib1funcs.S (revision 6dfcd296)
1/*
2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
3 *
4 * Author: Nicolas Pitre <nico@fluxnic.net>
5 *   - contributed to gcc-3.4 on Sep 30, 2003
6 *   - adapted for the Linux kernel on Oct 2, 2003
7 */
8
9/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
10
11This file is free software; you can redistribute it and/or modify it
12under the terms of the GNU General Public License as published by the
13Free Software Foundation; either version 2, or (at your option) any
14later version.
15
16In addition to the permissions in the GNU General Public License, the
17Free Software Foundation gives you unlimited permission to link the
18compiled version of this file into combinations with other programs,
19and to distribute those combinations without any restriction coming
20from the use of this file.  (The General Public License restrictions
21do apply in other respects; for example, they cover modification of
22the file, and distribution when not linked into a combine
23executable.)
24
25This file is distributed in the hope that it will be useful, but
26WITHOUT ANY WARRANTY; without even the implied warranty of
27MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
28General Public License for more details.
29
30You should have received a copy of the GNU General Public License
31along with this program; see the file COPYING.  If not, write to
32the Free Software Foundation, 59 Temple Place - Suite 330,
33Boston, MA 02111-1307, USA.  */
34
35
36#include <linux/linkage.h>
37#include <asm/assembler.h>
38#include <asm/unwind.h>
39#include <asm/export.h>
40
41.macro ARM_DIV_BODY dividend, divisor, result, curbit
42
43#if __LINUX_ARM_ARCH__ >= 5
44
45	clz	\curbit, \divisor
46	clz	\result, \dividend
47	sub	\result, \curbit, \result
48	mov	\curbit, #1
49	mov	\divisor, \divisor, lsl \result
50	mov	\curbit, \curbit, lsl \result
51	mov	\result, #0
52
53#else
54
55	@ Initially shift the divisor left 3 bits if possible,
56	@ set curbit accordingly.  This allows for curbit to be located
57	@ at the left end of each 4 bit nibbles in the division loop
58	@ to save one loop in most cases.
59	tst	\divisor, #0xe0000000
60	moveq	\divisor, \divisor, lsl #3
61	moveq	\curbit, #8
62	movne	\curbit, #1
63
64	@ Unless the divisor is very big, shift it up in multiples of
65	@ four bits, since this is the amount of unwinding in the main
66	@ division loop.  Continue shifting until the divisor is
67	@ larger than the dividend.
681:	cmp	\divisor, #0x10000000
69	cmplo	\divisor, \dividend
70	movlo	\divisor, \divisor, lsl #4
71	movlo	\curbit, \curbit, lsl #4
72	blo	1b
73
74	@ For very big divisors, we must shift it a bit at a time, or
75	@ we will be in danger of overflowing.
761:	cmp	\divisor, #0x80000000
77	cmplo	\divisor, \dividend
78	movlo	\divisor, \divisor, lsl #1
79	movlo	\curbit, \curbit, lsl #1
80	blo	1b
81
82	mov	\result, #0
83
84#endif
85
86	@ Division loop
871:	cmp	\dividend, \divisor
88	subhs	\dividend, \dividend, \divisor
89	orrhs	\result,   \result,   \curbit
90	cmp	\dividend, \divisor,  lsr #1
91	subhs	\dividend, \dividend, \divisor, lsr #1
92	orrhs	\result,   \result,   \curbit,  lsr #1
93	cmp	\dividend, \divisor,  lsr #2
94	subhs	\dividend, \dividend, \divisor, lsr #2
95	orrhs	\result,   \result,   \curbit,  lsr #2
96	cmp	\dividend, \divisor,  lsr #3
97	subhs	\dividend, \dividend, \divisor, lsr #3
98	orrhs	\result,   \result,   \curbit,  lsr #3
99	cmp	\dividend, #0			@ Early termination?
100	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
101	movne	\divisor,  \divisor, lsr #4
102	bne	1b
103
104.endm
105
106
107.macro ARM_DIV2_ORDER divisor, order
108
109#if __LINUX_ARM_ARCH__ >= 5
110
111	clz	\order, \divisor
112	rsb	\order, \order, #31
113
114#else
115
116	cmp	\divisor, #(1 << 16)
117	movhs	\divisor, \divisor, lsr #16
118	movhs	\order, #16
119	movlo	\order, #0
120
121	cmp	\divisor, #(1 << 8)
122	movhs	\divisor, \divisor, lsr #8
123	addhs	\order, \order, #8
124
125	cmp	\divisor, #(1 << 4)
126	movhs	\divisor, \divisor, lsr #4
127	addhs	\order, \order, #4
128
129	cmp	\divisor, #(1 << 2)
130	addhi	\order, \order, #3
131	addls	\order, \order, \divisor, lsr #1
132
133#endif
134
135.endm
136
137
138.macro ARM_MOD_BODY dividend, divisor, order, spare
139
140#if __LINUX_ARM_ARCH__ >= 5
141
142	clz	\order, \divisor
143	clz	\spare, \dividend
144	sub	\order, \order, \spare
145	mov	\divisor, \divisor, lsl \order
146
147#else
148
149	mov	\order, #0
150
151	@ Unless the divisor is very big, shift it up in multiples of
152	@ four bits, since this is the amount of unwinding in the main
153	@ division loop.  Continue shifting until the divisor is
154	@ larger than the dividend.
1551:	cmp	\divisor, #0x10000000
156	cmplo	\divisor, \dividend
157	movlo	\divisor, \divisor, lsl #4
158	addlo	\order, \order, #4
159	blo	1b
160
161	@ For very big divisors, we must shift it a bit at a time, or
162	@ we will be in danger of overflowing.
1631:	cmp	\divisor, #0x80000000
164	cmplo	\divisor, \dividend
165	movlo	\divisor, \divisor, lsl #1
166	addlo	\order, \order, #1
167	blo	1b
168
169#endif
170
171	@ Perform all needed subtractions to keep only the reminder.
172	@ Do comparisons in batch of 4 first.
173	subs	\order, \order, #3		@ yes, 3 is intended here
174	blt	2f
175
1761:	cmp	\dividend, \divisor
177	subhs	\dividend, \dividend, \divisor
178	cmp	\dividend, \divisor,  lsr #1
179	subhs	\dividend, \dividend, \divisor, lsr #1
180	cmp	\dividend, \divisor,  lsr #2
181	subhs	\dividend, \dividend, \divisor, lsr #2
182	cmp	\dividend, \divisor,  lsr #3
183	subhs	\dividend, \dividend, \divisor, lsr #3
184	cmp	\dividend, #1
185	mov	\divisor, \divisor, lsr #4
186	subges	\order, \order, #4
187	bge	1b
188
189	tst	\order, #3
190	teqne	\dividend, #0
191	beq	5f
192
193	@ Either 1, 2 or 3 comparison/subtractions are left.
1942:	cmn	\order, #2
195	blt	4f
196	beq	3f
197	cmp	\dividend, \divisor
198	subhs	\dividend, \dividend, \divisor
199	mov	\divisor,  \divisor,  lsr #1
2003:	cmp	\dividend, \divisor
201	subhs	\dividend, \dividend, \divisor
202	mov	\divisor,  \divisor,  lsr #1
2034:	cmp	\dividend, \divisor
204	subhs	\dividend, \dividend, \divisor
2055:
206.endm
207
208
209#ifdef CONFIG_ARM_PATCH_IDIV
210	.align	3
211#endif
212
213ENTRY(__udivsi3)
214ENTRY(__aeabi_uidiv)
215UNWIND(.fnstart)
216
217	subs	r2, r1, #1
218	reteq	lr
219	bcc	Ldiv0
220	cmp	r0, r1
221	bls	11f
222	tst	r1, r2
223	beq	12f
224
225	ARM_DIV_BODY r0, r1, r2, r3
226
227	mov	r0, r2
228	ret	lr
229
23011:	moveq	r0, #1
231	movne	r0, #0
232	ret	lr
233
23412:	ARM_DIV2_ORDER r1, r2
235
236	mov	r0, r0, lsr r2
237	ret	lr
238
239UNWIND(.fnend)
240ENDPROC(__udivsi3)
241ENDPROC(__aeabi_uidiv)
242EXPORT_SYMBOL(__udivsi3)
243EXPORT_SYMBOL(__aeabi_uidiv)
244
245ENTRY(__umodsi3)
246UNWIND(.fnstart)
247
248	subs	r2, r1, #1			@ compare divisor with 1
249	bcc	Ldiv0
250	cmpne	r0, r1				@ compare dividend with divisor
251	moveq   r0, #0
252	tsthi	r1, r2				@ see if divisor is power of 2
253	andeq	r0, r0, r2
254	retls	lr
255
256	ARM_MOD_BODY r0, r1, r2, r3
257
258	ret	lr
259
260UNWIND(.fnend)
261ENDPROC(__umodsi3)
262EXPORT_SYMBOL(__umodsi3)
263
264#ifdef CONFIG_ARM_PATCH_IDIV
265	.align 3
266#endif
267
268ENTRY(__divsi3)
269ENTRY(__aeabi_idiv)
270UNWIND(.fnstart)
271
272	cmp	r1, #0
273	eor	ip, r0, r1			@ save the sign of the result.
274	beq	Ldiv0
275	rsbmi	r1, r1, #0			@ loops below use unsigned.
276	subs	r2, r1, #1			@ division by 1 or -1 ?
277	beq	10f
278	movs	r3, r0
279	rsbmi	r3, r0, #0			@ positive dividend value
280	cmp	r3, r1
281	bls	11f
282	tst	r1, r2				@ divisor is power of 2 ?
283	beq	12f
284
285	ARM_DIV_BODY r3, r1, r0, r2
286
287	cmp	ip, #0
288	rsbmi	r0, r0, #0
289	ret	lr
290
29110:	teq	ip, r0				@ same sign ?
292	rsbmi	r0, r0, #0
293	ret	lr
294
29511:	movlo	r0, #0
296	moveq	r0, ip, asr #31
297	orreq	r0, r0, #1
298	ret	lr
299
30012:	ARM_DIV2_ORDER r1, r2
301
302	cmp	ip, #0
303	mov	r0, r3, lsr r2
304	rsbmi	r0, r0, #0
305	ret	lr
306
307UNWIND(.fnend)
308ENDPROC(__divsi3)
309ENDPROC(__aeabi_idiv)
310EXPORT_SYMBOL(__divsi3)
311EXPORT_SYMBOL(__aeabi_idiv)
312
313ENTRY(__modsi3)
314UNWIND(.fnstart)
315
316	cmp	r1, #0
317	beq	Ldiv0
318	rsbmi	r1, r1, #0			@ loops below use unsigned.
319	movs	ip, r0				@ preserve sign of dividend
320	rsbmi	r0, r0, #0			@ if negative make positive
321	subs	r2, r1, #1			@ compare divisor with 1
322	cmpne	r0, r1				@ compare dividend with divisor
323	moveq	r0, #0
324	tsthi	r1, r2				@ see if divisor is power of 2
325	andeq	r0, r0, r2
326	bls	10f
327
328	ARM_MOD_BODY r0, r1, r2, r3
329
33010:	cmp	ip, #0
331	rsbmi	r0, r0, #0
332	ret	lr
333
334UNWIND(.fnend)
335ENDPROC(__modsi3)
336EXPORT_SYMBOL(__modsi3)
337
338#ifdef CONFIG_AEABI
339
340ENTRY(__aeabi_uidivmod)
341UNWIND(.fnstart)
342UNWIND(.save {r0, r1, ip, lr}	)
343
344	stmfd	sp!, {r0, r1, ip, lr}
345	bl	__aeabi_uidiv
346	ldmfd	sp!, {r1, r2, ip, lr}
347	mul	r3, r0, r2
348	sub	r1, r1, r3
349	ret	lr
350
351UNWIND(.fnend)
352ENDPROC(__aeabi_uidivmod)
353EXPORT_SYMBOL(__aeabi_uidivmod)
354
355ENTRY(__aeabi_idivmod)
356UNWIND(.fnstart)
357UNWIND(.save {r0, r1, ip, lr}	)
358	stmfd	sp!, {r0, r1, ip, lr}
359	bl	__aeabi_idiv
360	ldmfd	sp!, {r1, r2, ip, lr}
361	mul	r3, r0, r2
362	sub	r1, r1, r3
363	ret	lr
364
365UNWIND(.fnend)
366ENDPROC(__aeabi_idivmod)
367EXPORT_SYMBOL(__aeabi_idivmod)
368
369#endif
370
371Ldiv0:
372UNWIND(.fnstart)
373UNWIND(.pad #4)
374UNWIND(.save {lr})
375	str	lr, [sp, #-8]!
376	bl	__div0
377	mov	r0, #0			@ About as wrong as it could be.
378	ldr	pc, [sp], #8
379UNWIND(.fnend)
380ENDPROC(Ldiv0)
381