xref: /openbmc/linux/arch/arm/lib/lib1funcs.S (revision 6ebbf2ce)
11da177e4SLinus Torvalds/*
21da177e4SLinus Torvalds * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
31da177e4SLinus Torvalds *
42f82af08SNicolas Pitre * Author: Nicolas Pitre <nico@fluxnic.net>
51da177e4SLinus Torvalds *   - contributed to gcc-3.4 on Sep 30, 2003
61da177e4SLinus Torvalds *   - adapted for the Linux kernel on Oct 2, 2003
71da177e4SLinus Torvalds */
81da177e4SLinus Torvalds
91da177e4SLinus Torvalds/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
101da177e4SLinus Torvalds
111da177e4SLinus TorvaldsThis file is free software; you can redistribute it and/or modify it
121da177e4SLinus Torvaldsunder the terms of the GNU General Public License as published by the
131da177e4SLinus TorvaldsFree Software Foundation; either version 2, or (at your option) any
141da177e4SLinus Torvaldslater version.
151da177e4SLinus Torvalds
161da177e4SLinus TorvaldsIn addition to the permissions in the GNU General Public License, the
171da177e4SLinus TorvaldsFree Software Foundation gives you unlimited permission to link the
181da177e4SLinus Torvaldscompiled version of this file into combinations with other programs,
191da177e4SLinus Torvaldsand to distribute those combinations without any restriction coming
201da177e4SLinus Torvaldsfrom the use of this file.  (The General Public License restrictions
211da177e4SLinus Torvaldsdo apply in other respects; for example, they cover modification of
221da177e4SLinus Torvaldsthe file, and distribution when not linked into a combine
231da177e4SLinus Torvaldsexecutable.)
241da177e4SLinus Torvalds
251da177e4SLinus TorvaldsThis file is distributed in the hope that it will be useful, but
261da177e4SLinus TorvaldsWITHOUT ANY WARRANTY; without even the implied warranty of
271da177e4SLinus TorvaldsMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
281da177e4SLinus TorvaldsGeneral Public License for more details.
291da177e4SLinus Torvalds
301da177e4SLinus TorvaldsYou should have received a copy of the GNU General Public License
311da177e4SLinus Torvaldsalong with this program; see the file COPYING.  If not, write to
321da177e4SLinus Torvaldsthe Free Software Foundation, 59 Temple Place - Suite 330,
331da177e4SLinus TorvaldsBoston, MA 02111-1307, USA.  */
341da177e4SLinus Torvalds
351da177e4SLinus Torvalds
361da177e4SLinus Torvalds#include <linux/linkage.h>
371da177e4SLinus Torvalds#include <asm/assembler.h>
3881479c24SLaura Abbott#include <asm/unwind.h>
391da177e4SLinus Torvalds
401da177e4SLinus Torvalds.macro ARM_DIV_BODY dividend, divisor, result, curbit
411da177e4SLinus Torvalds
421da177e4SLinus Torvalds#if __LINUX_ARM_ARCH__ >= 5
431da177e4SLinus Torvalds
441da177e4SLinus Torvalds	clz	\curbit, \divisor
451da177e4SLinus Torvalds	clz	\result, \dividend
461da177e4SLinus Torvalds	sub	\result, \curbit, \result
471da177e4SLinus Torvalds	mov	\curbit, #1
481da177e4SLinus Torvalds	mov	\divisor, \divisor, lsl \result
491da177e4SLinus Torvalds	mov	\curbit, \curbit, lsl \result
501da177e4SLinus Torvalds	mov	\result, #0
511da177e4SLinus Torvalds
521da177e4SLinus Torvalds#else
531da177e4SLinus Torvalds
541da177e4SLinus Torvalds	@ Initially shift the divisor left 3 bits if possible,
551da177e4SLinus Torvalds	@ set curbit accordingly.  This allows for curbit to be located
561da177e4SLinus Torvalds	@ at the left end of each 4 bit nibbles in the division loop
571da177e4SLinus Torvalds	@ to save one loop in most cases.
581da177e4SLinus Torvalds	tst	\divisor, #0xe0000000
591da177e4SLinus Torvalds	moveq	\divisor, \divisor, lsl #3
601da177e4SLinus Torvalds	moveq	\curbit, #8
611da177e4SLinus Torvalds	movne	\curbit, #1
621da177e4SLinus Torvalds
631da177e4SLinus Torvalds	@ Unless the divisor is very big, shift it up in multiples of
641da177e4SLinus Torvalds	@ four bits, since this is the amount of unwinding in the main
651da177e4SLinus Torvalds	@ division loop.  Continue shifting until the divisor is
661da177e4SLinus Torvalds	@ larger than the dividend.
671da177e4SLinus Torvalds1:	cmp	\divisor, #0x10000000
681da177e4SLinus Torvalds	cmplo	\divisor, \dividend
691da177e4SLinus Torvalds	movlo	\divisor, \divisor, lsl #4
701da177e4SLinus Torvalds	movlo	\curbit, \curbit, lsl #4
711da177e4SLinus Torvalds	blo	1b
721da177e4SLinus Torvalds
731da177e4SLinus Torvalds	@ For very big divisors, we must shift it a bit at a time, or
741da177e4SLinus Torvalds	@ we will be in danger of overflowing.
751da177e4SLinus Torvalds1:	cmp	\divisor, #0x80000000
761da177e4SLinus Torvalds	cmplo	\divisor, \dividend
771da177e4SLinus Torvalds	movlo	\divisor, \divisor, lsl #1
781da177e4SLinus Torvalds	movlo	\curbit, \curbit, lsl #1
791da177e4SLinus Torvalds	blo	1b
801da177e4SLinus Torvalds
811da177e4SLinus Torvalds	mov	\result, #0
821da177e4SLinus Torvalds
831da177e4SLinus Torvalds#endif
841da177e4SLinus Torvalds
851da177e4SLinus Torvalds	@ Division loop
861da177e4SLinus Torvalds1:	cmp	\dividend, \divisor
871da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor
881da177e4SLinus Torvalds	orrhs	\result,   \result,   \curbit
891da177e4SLinus Torvalds	cmp	\dividend, \divisor,  lsr #1
901da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor, lsr #1
911da177e4SLinus Torvalds	orrhs	\result,   \result,   \curbit,  lsr #1
921da177e4SLinus Torvalds	cmp	\dividend, \divisor,  lsr #2
931da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor, lsr #2
941da177e4SLinus Torvalds	orrhs	\result,   \result,   \curbit,  lsr #2
951da177e4SLinus Torvalds	cmp	\dividend, \divisor,  lsr #3
961da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor, lsr #3
971da177e4SLinus Torvalds	orrhs	\result,   \result,   \curbit,  lsr #3
981da177e4SLinus Torvalds	cmp	\dividend, #0			@ Early termination?
991da177e4SLinus Torvalds	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
1001da177e4SLinus Torvalds	movne	\divisor,  \divisor, lsr #4
1011da177e4SLinus Torvalds	bne	1b
1021da177e4SLinus Torvalds
1031da177e4SLinus Torvalds.endm
1041da177e4SLinus Torvalds
1051da177e4SLinus Torvalds
1061da177e4SLinus Torvalds.macro ARM_DIV2_ORDER divisor, order
1071da177e4SLinus Torvalds
1081da177e4SLinus Torvalds#if __LINUX_ARM_ARCH__ >= 5
1091da177e4SLinus Torvalds
1101da177e4SLinus Torvalds	clz	\order, \divisor
1111da177e4SLinus Torvalds	rsb	\order, \order, #31
1121da177e4SLinus Torvalds
1131da177e4SLinus Torvalds#else
1141da177e4SLinus Torvalds
1151da177e4SLinus Torvalds	cmp	\divisor, #(1 << 16)
1161da177e4SLinus Torvalds	movhs	\divisor, \divisor, lsr #16
1171da177e4SLinus Torvalds	movhs	\order, #16
1181da177e4SLinus Torvalds	movlo	\order, #0
1191da177e4SLinus Torvalds
1201da177e4SLinus Torvalds	cmp	\divisor, #(1 << 8)
1211da177e4SLinus Torvalds	movhs	\divisor, \divisor, lsr #8
1221da177e4SLinus Torvalds	addhs	\order, \order, #8
1231da177e4SLinus Torvalds
1241da177e4SLinus Torvalds	cmp	\divisor, #(1 << 4)
1251da177e4SLinus Torvalds	movhs	\divisor, \divisor, lsr #4
1261da177e4SLinus Torvalds	addhs	\order, \order, #4
1271da177e4SLinus Torvalds
1281da177e4SLinus Torvalds	cmp	\divisor, #(1 << 2)
1291da177e4SLinus Torvalds	addhi	\order, \order, #3
1301da177e4SLinus Torvalds	addls	\order, \order, \divisor, lsr #1
1311da177e4SLinus Torvalds
1321da177e4SLinus Torvalds#endif
1331da177e4SLinus Torvalds
1341da177e4SLinus Torvalds.endm
1351da177e4SLinus Torvalds
1361da177e4SLinus Torvalds
1371da177e4SLinus Torvalds.macro ARM_MOD_BODY dividend, divisor, order, spare
1381da177e4SLinus Torvalds
1391da177e4SLinus Torvalds#if __LINUX_ARM_ARCH__ >= 5
1401da177e4SLinus Torvalds
1411da177e4SLinus Torvalds	clz	\order, \divisor
1421da177e4SLinus Torvalds	clz	\spare, \dividend
1431da177e4SLinus Torvalds	sub	\order, \order, \spare
1441da177e4SLinus Torvalds	mov	\divisor, \divisor, lsl \order
1451da177e4SLinus Torvalds
1461da177e4SLinus Torvalds#else
1471da177e4SLinus Torvalds
1481da177e4SLinus Torvalds	mov	\order, #0
1491da177e4SLinus Torvalds
1501da177e4SLinus Torvalds	@ Unless the divisor is very big, shift it up in multiples of
1511da177e4SLinus Torvalds	@ four bits, since this is the amount of unwinding in the main
1521da177e4SLinus Torvalds	@ division loop.  Continue shifting until the divisor is
1531da177e4SLinus Torvalds	@ larger than the dividend.
1541da177e4SLinus Torvalds1:	cmp	\divisor, #0x10000000
1551da177e4SLinus Torvalds	cmplo	\divisor, \dividend
1561da177e4SLinus Torvalds	movlo	\divisor, \divisor, lsl #4
1571da177e4SLinus Torvalds	addlo	\order, \order, #4
1581da177e4SLinus Torvalds	blo	1b
1591da177e4SLinus Torvalds
1601da177e4SLinus Torvalds	@ For very big divisors, we must shift it a bit at a time, or
1611da177e4SLinus Torvalds	@ we will be in danger of overflowing.
1621da177e4SLinus Torvalds1:	cmp	\divisor, #0x80000000
1631da177e4SLinus Torvalds	cmplo	\divisor, \dividend
1641da177e4SLinus Torvalds	movlo	\divisor, \divisor, lsl #1
1651da177e4SLinus Torvalds	addlo	\order, \order, #1
1661da177e4SLinus Torvalds	blo	1b
1671da177e4SLinus Torvalds
1681da177e4SLinus Torvalds#endif
1691da177e4SLinus Torvalds
1701da177e4SLinus Torvalds	@ Perform all needed substractions to keep only the reminder.
1711da177e4SLinus Torvalds	@ Do comparisons in batch of 4 first.
1721da177e4SLinus Torvalds	subs	\order, \order, #3		@ yes, 3 is intended here
1731da177e4SLinus Torvalds	blt	2f
1741da177e4SLinus Torvalds
1751da177e4SLinus Torvalds1:	cmp	\dividend, \divisor
1761da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor
1771da177e4SLinus Torvalds	cmp	\dividend, \divisor,  lsr #1
1781da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor, lsr #1
1791da177e4SLinus Torvalds	cmp	\dividend, \divisor,  lsr #2
1801da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor, lsr #2
1811da177e4SLinus Torvalds	cmp	\dividend, \divisor,  lsr #3
1821da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor, lsr #3
1831da177e4SLinus Torvalds	cmp	\dividend, #1
1841da177e4SLinus Torvalds	mov	\divisor, \divisor, lsr #4
1851da177e4SLinus Torvalds	subges	\order, \order, #4
1861da177e4SLinus Torvalds	bge	1b
1871da177e4SLinus Torvalds
1881da177e4SLinus Torvalds	tst	\order, #3
1891da177e4SLinus Torvalds	teqne	\dividend, #0
1901da177e4SLinus Torvalds	beq	5f
1911da177e4SLinus Torvalds
1921da177e4SLinus Torvalds	@ Either 1, 2 or 3 comparison/substractions are left.
1931da177e4SLinus Torvalds2:	cmn	\order, #2
1941da177e4SLinus Torvalds	blt	4f
1951da177e4SLinus Torvalds	beq	3f
1961da177e4SLinus Torvalds	cmp	\dividend, \divisor
1971da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor
1981da177e4SLinus Torvalds	mov	\divisor,  \divisor,  lsr #1
1991da177e4SLinus Torvalds3:	cmp	\dividend, \divisor
2001da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor
2011da177e4SLinus Torvalds	mov	\divisor,  \divisor,  lsr #1
2021da177e4SLinus Torvalds4:	cmp	\dividend, \divisor
2031da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor
2041da177e4SLinus Torvalds5:
2051da177e4SLinus Torvalds.endm
2061da177e4SLinus Torvalds
2071da177e4SLinus Torvalds
2081da177e4SLinus TorvaldsENTRY(__udivsi3)
209ba95e4e4SNicolas PitreENTRY(__aeabi_uidiv)
21081479c24SLaura AbbottUNWIND(.fnstart)
2111da177e4SLinus Torvalds
2121da177e4SLinus Torvalds	subs	r2, r1, #1
2136ebbf2ceSRussell King	reteq	lr
2141da177e4SLinus Torvalds	bcc	Ldiv0
2151da177e4SLinus Torvalds	cmp	r0, r1
2161da177e4SLinus Torvalds	bls	11f
2171da177e4SLinus Torvalds	tst	r1, r2
2181da177e4SLinus Torvalds	beq	12f
2191da177e4SLinus Torvalds
2201da177e4SLinus Torvalds	ARM_DIV_BODY r0, r1, r2, r3
2211da177e4SLinus Torvalds
2221da177e4SLinus Torvalds	mov	r0, r2
2236ebbf2ceSRussell King	ret	lr
2241da177e4SLinus Torvalds
2251da177e4SLinus Torvalds11:	moveq	r0, #1
2261da177e4SLinus Torvalds	movne	r0, #0
2276ebbf2ceSRussell King	ret	lr
2281da177e4SLinus Torvalds
2291da177e4SLinus Torvalds12:	ARM_DIV2_ORDER r1, r2
2301da177e4SLinus Torvalds
2311da177e4SLinus Torvalds	mov	r0, r0, lsr r2
2326ebbf2ceSRussell King	ret	lr
2331da177e4SLinus Torvalds
23481479c24SLaura AbbottUNWIND(.fnend)
23593ed3970SCatalin MarinasENDPROC(__udivsi3)
23693ed3970SCatalin MarinasENDPROC(__aeabi_uidiv)
2371da177e4SLinus Torvalds
2381da177e4SLinus TorvaldsENTRY(__umodsi3)
23981479c24SLaura AbbottUNWIND(.fnstart)
2401da177e4SLinus Torvalds
2411da177e4SLinus Torvalds	subs	r2, r1, #1			@ compare divisor with 1
2421da177e4SLinus Torvalds	bcc	Ldiv0
2431da177e4SLinus Torvalds	cmpne	r0, r1				@ compare dividend with divisor
2441da177e4SLinus Torvalds	moveq   r0, #0
2451da177e4SLinus Torvalds	tsthi	r1, r2				@ see if divisor is power of 2
2461da177e4SLinus Torvalds	andeq	r0, r0, r2
2476ebbf2ceSRussell King	retls	lr
2481da177e4SLinus Torvalds
2491da177e4SLinus Torvalds	ARM_MOD_BODY r0, r1, r2, r3
2501da177e4SLinus Torvalds
2516ebbf2ceSRussell King	ret	lr
2521da177e4SLinus Torvalds
25381479c24SLaura AbbottUNWIND(.fnend)
25493ed3970SCatalin MarinasENDPROC(__umodsi3)
2551da177e4SLinus Torvalds
2561da177e4SLinus TorvaldsENTRY(__divsi3)
257ba95e4e4SNicolas PitreENTRY(__aeabi_idiv)
25881479c24SLaura AbbottUNWIND(.fnstart)
2591da177e4SLinus Torvalds
2601da177e4SLinus Torvalds	cmp	r1, #0
2611da177e4SLinus Torvalds	eor	ip, r0, r1			@ save the sign of the result.
2621da177e4SLinus Torvalds	beq	Ldiv0
2631da177e4SLinus Torvalds	rsbmi	r1, r1, #0			@ loops below use unsigned.
2641da177e4SLinus Torvalds	subs	r2, r1, #1			@ division by 1 or -1 ?
2651da177e4SLinus Torvalds	beq	10f
2661da177e4SLinus Torvalds	movs	r3, r0
2671da177e4SLinus Torvalds	rsbmi	r3, r0, #0			@ positive dividend value
2681da177e4SLinus Torvalds	cmp	r3, r1
2691da177e4SLinus Torvalds	bls	11f
2701da177e4SLinus Torvalds	tst	r1, r2				@ divisor is power of 2 ?
2711da177e4SLinus Torvalds	beq	12f
2721da177e4SLinus Torvalds
2731da177e4SLinus Torvalds	ARM_DIV_BODY r3, r1, r0, r2
2741da177e4SLinus Torvalds
2751da177e4SLinus Torvalds	cmp	ip, #0
2761da177e4SLinus Torvalds	rsbmi	r0, r0, #0
2776ebbf2ceSRussell King	ret	lr
2781da177e4SLinus Torvalds
2791da177e4SLinus Torvalds10:	teq	ip, r0				@ same sign ?
2801da177e4SLinus Torvalds	rsbmi	r0, r0, #0
2816ebbf2ceSRussell King	ret	lr
2821da177e4SLinus Torvalds
2831da177e4SLinus Torvalds11:	movlo	r0, #0
2841da177e4SLinus Torvalds	moveq	r0, ip, asr #31
2851da177e4SLinus Torvalds	orreq	r0, r0, #1
2866ebbf2ceSRussell King	ret	lr
2871da177e4SLinus Torvalds
2881da177e4SLinus Torvalds12:	ARM_DIV2_ORDER r1, r2
2891da177e4SLinus Torvalds
2901da177e4SLinus Torvalds	cmp	ip, #0
2911da177e4SLinus Torvalds	mov	r0, r3, lsr r2
2921da177e4SLinus Torvalds	rsbmi	r0, r0, #0
2936ebbf2ceSRussell King	ret	lr
2941da177e4SLinus Torvalds
29581479c24SLaura AbbottUNWIND(.fnend)
29693ed3970SCatalin MarinasENDPROC(__divsi3)
29793ed3970SCatalin MarinasENDPROC(__aeabi_idiv)
2981da177e4SLinus Torvalds
2991da177e4SLinus TorvaldsENTRY(__modsi3)
30081479c24SLaura AbbottUNWIND(.fnstart)
3011da177e4SLinus Torvalds
3021da177e4SLinus Torvalds	cmp	r1, #0
3031da177e4SLinus Torvalds	beq	Ldiv0
3041da177e4SLinus Torvalds	rsbmi	r1, r1, #0			@ loops below use unsigned.
3051da177e4SLinus Torvalds	movs	ip, r0				@ preserve sign of dividend
3061da177e4SLinus Torvalds	rsbmi	r0, r0, #0			@ if negative make positive
3071da177e4SLinus Torvalds	subs	r2, r1, #1			@ compare divisor with 1
3081da177e4SLinus Torvalds	cmpne	r0, r1				@ compare dividend with divisor
3091da177e4SLinus Torvalds	moveq	r0, #0
3101da177e4SLinus Torvalds	tsthi	r1, r2				@ see if divisor is power of 2
3111da177e4SLinus Torvalds	andeq	r0, r0, r2
3121da177e4SLinus Torvalds	bls	10f
3131da177e4SLinus Torvalds
3141da177e4SLinus Torvalds	ARM_MOD_BODY r0, r1, r2, r3
3151da177e4SLinus Torvalds
3161da177e4SLinus Torvalds10:	cmp	ip, #0
3171da177e4SLinus Torvalds	rsbmi	r0, r0, #0
3186ebbf2ceSRussell King	ret	lr
3191da177e4SLinus Torvalds
32081479c24SLaura AbbottUNWIND(.fnend)
32193ed3970SCatalin MarinasENDPROC(__modsi3)
32293ed3970SCatalin Marinas
323ba95e4e4SNicolas Pitre#ifdef CONFIG_AEABI
324ba95e4e4SNicolas Pitre
325ba95e4e4SNicolas PitreENTRY(__aeabi_uidivmod)
32681479c24SLaura AbbottUNWIND(.fnstart)
32781479c24SLaura AbbottUNWIND(.save {r0, r1, ip, lr}	)
328ba95e4e4SNicolas Pitre
329ba95e4e4SNicolas Pitre	stmfd	sp!, {r0, r1, ip, lr}
330ba95e4e4SNicolas Pitre	bl	__aeabi_uidiv
331ba95e4e4SNicolas Pitre	ldmfd	sp!, {r1, r2, ip, lr}
332ba95e4e4SNicolas Pitre	mul	r3, r0, r2
333ba95e4e4SNicolas Pitre	sub	r1, r1, r3
3346ebbf2ceSRussell King	ret	lr
335ba95e4e4SNicolas Pitre
33681479c24SLaura AbbottUNWIND(.fnend)
33793ed3970SCatalin MarinasENDPROC(__aeabi_uidivmod)
33893ed3970SCatalin Marinas
339ba95e4e4SNicolas PitreENTRY(__aeabi_idivmod)
34081479c24SLaura AbbottUNWIND(.fnstart)
34181479c24SLaura AbbottUNWIND(.save {r0, r1, ip, lr}	)
342ba95e4e4SNicolas Pitre	stmfd	sp!, {r0, r1, ip, lr}
343ba95e4e4SNicolas Pitre	bl	__aeabi_idiv
344ba95e4e4SNicolas Pitre	ldmfd	sp!, {r1, r2, ip, lr}
345ba95e4e4SNicolas Pitre	mul	r3, r0, r2
346ba95e4e4SNicolas Pitre	sub	r1, r1, r3
3476ebbf2ceSRussell King	ret	lr
348ba95e4e4SNicolas Pitre
34981479c24SLaura AbbottUNWIND(.fnend)
35093ed3970SCatalin MarinasENDPROC(__aeabi_idivmod)
35193ed3970SCatalin Marinas
352ba95e4e4SNicolas Pitre#endif
3531da177e4SLinus Torvalds
3541da177e4SLinus TorvaldsLdiv0:
35581479c24SLaura AbbottUNWIND(.fnstart)
35681479c24SLaura AbbottUNWIND(.pad #4)
35781479c24SLaura AbbottUNWIND(.save {lr})
358499b2ea1SNicolas Pitre	str	lr, [sp, #-8]!
3591da177e4SLinus Torvalds	bl	__div0
3601da177e4SLinus Torvalds	mov	r0, #0			@ About as wrong as it could be.
361499b2ea1SNicolas Pitre	ldr	pc, [sp], #8
36281479c24SLaura AbbottUNWIND(.fnend)
36381479c24SLaura AbbottENDPROC(Ldiv0)
364