xref: /openbmc/linux/arch/arm/lib/lib1funcs.S (revision 93ed3970)
11da177e4SLinus Torvalds/*
21da177e4SLinus Torvalds * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
31da177e4SLinus Torvalds *
41da177e4SLinus Torvalds * Author: Nicolas Pitre <nico@cam.org>
51da177e4SLinus Torvalds *   - contributed to gcc-3.4 on Sep 30, 2003
61da177e4SLinus Torvalds *   - adapted for the Linux kernel on Oct 2, 2003
71da177e4SLinus Torvalds */
81da177e4SLinus Torvalds
91da177e4SLinus Torvalds/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
101da177e4SLinus Torvalds
111da177e4SLinus TorvaldsThis file is free software; you can redistribute it and/or modify it
121da177e4SLinus Torvaldsunder the terms of the GNU General Public License as published by the
131da177e4SLinus TorvaldsFree Software Foundation; either version 2, or (at your option) any
141da177e4SLinus Torvaldslater version.
151da177e4SLinus Torvalds
161da177e4SLinus TorvaldsIn addition to the permissions in the GNU General Public License, the
171da177e4SLinus TorvaldsFree Software Foundation gives you unlimited permission to link the
181da177e4SLinus Torvaldscompiled version of this file into combinations with other programs,
191da177e4SLinus Torvaldsand to distribute those combinations without any restriction coming
201da177e4SLinus Torvaldsfrom the use of this file.  (The General Public License restrictions
211da177e4SLinus Torvaldsdo apply in other respects; for example, they cover modification of
221da177e4SLinus Torvaldsthe file, and distribution when not linked into a combine
231da177e4SLinus Torvaldsexecutable.)
241da177e4SLinus Torvalds
251da177e4SLinus TorvaldsThis file is distributed in the hope that it will be useful, but
261da177e4SLinus TorvaldsWITHOUT ANY WARRANTY; without even the implied warranty of
271da177e4SLinus TorvaldsMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
281da177e4SLinus TorvaldsGeneral Public License for more details.
291da177e4SLinus Torvalds
301da177e4SLinus TorvaldsYou should have received a copy of the GNU General Public License
311da177e4SLinus Torvaldsalong with this program; see the file COPYING.  If not, write to
321da177e4SLinus Torvaldsthe Free Software Foundation, 59 Temple Place - Suite 330,
331da177e4SLinus TorvaldsBoston, MA 02111-1307, USA.  */
341da177e4SLinus Torvalds
351da177e4SLinus Torvalds
361da177e4SLinus Torvalds#include <linux/linkage.h>
371da177e4SLinus Torvalds#include <asm/assembler.h>
381da177e4SLinus Torvalds
391da177e4SLinus Torvalds
401da177e4SLinus Torvalds.macro ARM_DIV_BODY dividend, divisor, result, curbit
411da177e4SLinus Torvalds
421da177e4SLinus Torvalds#if __LINUX_ARM_ARCH__ >= 5
431da177e4SLinus Torvalds
441da177e4SLinus Torvalds	clz	\curbit, \divisor
451da177e4SLinus Torvalds	clz	\result, \dividend
461da177e4SLinus Torvalds	sub	\result, \curbit, \result
471da177e4SLinus Torvalds	mov	\curbit, #1
481da177e4SLinus Torvalds	mov	\divisor, \divisor, lsl \result
491da177e4SLinus Torvalds	mov	\curbit, \curbit, lsl \result
501da177e4SLinus Torvalds	mov	\result, #0
511da177e4SLinus Torvalds
521da177e4SLinus Torvalds#else
531da177e4SLinus Torvalds
541da177e4SLinus Torvalds	@ Initially shift the divisor left 3 bits if possible,
551da177e4SLinus Torvalds	@ set curbit accordingly.  This allows for curbit to be located
561da177e4SLinus Torvalds	@ at the left end of each 4 bit nibbles in the division loop
571da177e4SLinus Torvalds	@ to save one loop in most cases.
581da177e4SLinus Torvalds	tst	\divisor, #0xe0000000
591da177e4SLinus Torvalds	moveq	\divisor, \divisor, lsl #3
601da177e4SLinus Torvalds	moveq	\curbit, #8
611da177e4SLinus Torvalds	movne	\curbit, #1
621da177e4SLinus Torvalds
631da177e4SLinus Torvalds	@ Unless the divisor is very big, shift it up in multiples of
641da177e4SLinus Torvalds	@ four bits, since this is the amount of unwinding in the main
651da177e4SLinus Torvalds	@ division loop.  Continue shifting until the divisor is
661da177e4SLinus Torvalds	@ larger than the dividend.
671da177e4SLinus Torvalds1:	cmp	\divisor, #0x10000000
681da177e4SLinus Torvalds	cmplo	\divisor, \dividend
691da177e4SLinus Torvalds	movlo	\divisor, \divisor, lsl #4
701da177e4SLinus Torvalds	movlo	\curbit, \curbit, lsl #4
711da177e4SLinus Torvalds	blo	1b
721da177e4SLinus Torvalds
731da177e4SLinus Torvalds	@ For very big divisors, we must shift it a bit at a time, or
741da177e4SLinus Torvalds	@ we will be in danger of overflowing.
751da177e4SLinus Torvalds1:	cmp	\divisor, #0x80000000
761da177e4SLinus Torvalds	cmplo	\divisor, \dividend
771da177e4SLinus Torvalds	movlo	\divisor, \divisor, lsl #1
781da177e4SLinus Torvalds	movlo	\curbit, \curbit, lsl #1
791da177e4SLinus Torvalds	blo	1b
801da177e4SLinus Torvalds
811da177e4SLinus Torvalds	mov	\result, #0
821da177e4SLinus Torvalds
831da177e4SLinus Torvalds#endif
841da177e4SLinus Torvalds
851da177e4SLinus Torvalds	@ Division loop
861da177e4SLinus Torvalds1:	cmp	\dividend, \divisor
871da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor
881da177e4SLinus Torvalds	orrhs	\result,   \result,   \curbit
891da177e4SLinus Torvalds	cmp	\dividend, \divisor,  lsr #1
901da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor, lsr #1
911da177e4SLinus Torvalds	orrhs	\result,   \result,   \curbit,  lsr #1
921da177e4SLinus Torvalds	cmp	\dividend, \divisor,  lsr #2
931da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor, lsr #2
941da177e4SLinus Torvalds	orrhs	\result,   \result,   \curbit,  lsr #2
951da177e4SLinus Torvalds	cmp	\dividend, \divisor,  lsr #3
961da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor, lsr #3
971da177e4SLinus Torvalds	orrhs	\result,   \result,   \curbit,  lsr #3
981da177e4SLinus Torvalds	cmp	\dividend, #0			@ Early termination?
991da177e4SLinus Torvalds	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
1001da177e4SLinus Torvalds	movne	\divisor,  \divisor, lsr #4
1011da177e4SLinus Torvalds	bne	1b
1021da177e4SLinus Torvalds
1031da177e4SLinus Torvalds.endm
1041da177e4SLinus Torvalds
1051da177e4SLinus Torvalds
1061da177e4SLinus Torvalds.macro ARM_DIV2_ORDER divisor, order
1071da177e4SLinus Torvalds
1081da177e4SLinus Torvalds#if __LINUX_ARM_ARCH__ >= 5
1091da177e4SLinus Torvalds
1101da177e4SLinus Torvalds	clz	\order, \divisor
1111da177e4SLinus Torvalds	rsb	\order, \order, #31
1121da177e4SLinus Torvalds
1131da177e4SLinus Torvalds#else
1141da177e4SLinus Torvalds
1151da177e4SLinus Torvalds	cmp	\divisor, #(1 << 16)
1161da177e4SLinus Torvalds	movhs	\divisor, \divisor, lsr #16
1171da177e4SLinus Torvalds	movhs	\order, #16
1181da177e4SLinus Torvalds	movlo	\order, #0
1191da177e4SLinus Torvalds
1201da177e4SLinus Torvalds	cmp	\divisor, #(1 << 8)
1211da177e4SLinus Torvalds	movhs	\divisor, \divisor, lsr #8
1221da177e4SLinus Torvalds	addhs	\order, \order, #8
1231da177e4SLinus Torvalds
1241da177e4SLinus Torvalds	cmp	\divisor, #(1 << 4)
1251da177e4SLinus Torvalds	movhs	\divisor, \divisor, lsr #4
1261da177e4SLinus Torvalds	addhs	\order, \order, #4
1271da177e4SLinus Torvalds
1281da177e4SLinus Torvalds	cmp	\divisor, #(1 << 2)
1291da177e4SLinus Torvalds	addhi	\order, \order, #3
1301da177e4SLinus Torvalds	addls	\order, \order, \divisor, lsr #1
1311da177e4SLinus Torvalds
1321da177e4SLinus Torvalds#endif
1331da177e4SLinus Torvalds
1341da177e4SLinus Torvalds.endm
1351da177e4SLinus Torvalds
1361da177e4SLinus Torvalds
1371da177e4SLinus Torvalds.macro ARM_MOD_BODY dividend, divisor, order, spare
1381da177e4SLinus Torvalds
1391da177e4SLinus Torvalds#if __LINUX_ARM_ARCH__ >= 5
1401da177e4SLinus Torvalds
1411da177e4SLinus Torvalds	clz	\order, \divisor
1421da177e4SLinus Torvalds	clz	\spare, \dividend
1431da177e4SLinus Torvalds	sub	\order, \order, \spare
1441da177e4SLinus Torvalds	mov	\divisor, \divisor, lsl \order
1451da177e4SLinus Torvalds
1461da177e4SLinus Torvalds#else
1471da177e4SLinus Torvalds
1481da177e4SLinus Torvalds	mov	\order, #0
1491da177e4SLinus Torvalds
1501da177e4SLinus Torvalds	@ Unless the divisor is very big, shift it up in multiples of
1511da177e4SLinus Torvalds	@ four bits, since this is the amount of unwinding in the main
1521da177e4SLinus Torvalds	@ division loop.  Continue shifting until the divisor is
1531da177e4SLinus Torvalds	@ larger than the dividend.
1541da177e4SLinus Torvalds1:	cmp	\divisor, #0x10000000
1551da177e4SLinus Torvalds	cmplo	\divisor, \dividend
1561da177e4SLinus Torvalds	movlo	\divisor, \divisor, lsl #4
1571da177e4SLinus Torvalds	addlo	\order, \order, #4
1581da177e4SLinus Torvalds	blo	1b
1591da177e4SLinus Torvalds
1601da177e4SLinus Torvalds	@ For very big divisors, we must shift it a bit at a time, or
1611da177e4SLinus Torvalds	@ we will be in danger of overflowing.
1621da177e4SLinus Torvalds1:	cmp	\divisor, #0x80000000
1631da177e4SLinus Torvalds	cmplo	\divisor, \dividend
1641da177e4SLinus Torvalds	movlo	\divisor, \divisor, lsl #1
1651da177e4SLinus Torvalds	addlo	\order, \order, #1
1661da177e4SLinus Torvalds	blo	1b
1671da177e4SLinus Torvalds
1681da177e4SLinus Torvalds#endif
1691da177e4SLinus Torvalds
1701da177e4SLinus Torvalds	@ Perform all needed substractions to keep only the reminder.
1711da177e4SLinus Torvalds	@ Do comparisons in batch of 4 first.
1721da177e4SLinus Torvalds	subs	\order, \order, #3		@ yes, 3 is intended here
1731da177e4SLinus Torvalds	blt	2f
1741da177e4SLinus Torvalds
1751da177e4SLinus Torvalds1:	cmp	\dividend, \divisor
1761da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor
1771da177e4SLinus Torvalds	cmp	\dividend, \divisor,  lsr #1
1781da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor, lsr #1
1791da177e4SLinus Torvalds	cmp	\dividend, \divisor,  lsr #2
1801da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor, lsr #2
1811da177e4SLinus Torvalds	cmp	\dividend, \divisor,  lsr #3
1821da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor, lsr #3
1831da177e4SLinus Torvalds	cmp	\dividend, #1
1841da177e4SLinus Torvalds	mov	\divisor, \divisor, lsr #4
1851da177e4SLinus Torvalds	subges	\order, \order, #4
1861da177e4SLinus Torvalds	bge	1b
1871da177e4SLinus Torvalds
1881da177e4SLinus Torvalds	tst	\order, #3
1891da177e4SLinus Torvalds	teqne	\dividend, #0
1901da177e4SLinus Torvalds	beq	5f
1911da177e4SLinus Torvalds
1921da177e4SLinus Torvalds	@ Either 1, 2 or 3 comparison/substractions are left.
1931da177e4SLinus Torvalds2:	cmn	\order, #2
1941da177e4SLinus Torvalds	blt	4f
1951da177e4SLinus Torvalds	beq	3f
1961da177e4SLinus Torvalds	cmp	\dividend, \divisor
1971da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor
1981da177e4SLinus Torvalds	mov	\divisor,  \divisor,  lsr #1
1991da177e4SLinus Torvalds3:	cmp	\dividend, \divisor
2001da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor
2011da177e4SLinus Torvalds	mov	\divisor,  \divisor,  lsr #1
2021da177e4SLinus Torvalds4:	cmp	\dividend, \divisor
2031da177e4SLinus Torvalds	subhs	\dividend, \dividend, \divisor
2041da177e4SLinus Torvalds5:
2051da177e4SLinus Torvalds.endm
2061da177e4SLinus Torvalds
2071da177e4SLinus Torvalds
2081da177e4SLinus TorvaldsENTRY(__udivsi3)
209ba95e4e4SNicolas PitreENTRY(__aeabi_uidiv)
2101da177e4SLinus Torvalds
2111da177e4SLinus Torvalds	subs	r2, r1, #1
2121da177e4SLinus Torvalds	moveq	pc, lr
2131da177e4SLinus Torvalds	bcc	Ldiv0
2141da177e4SLinus Torvalds	cmp	r0, r1
2151da177e4SLinus Torvalds	bls	11f
2161da177e4SLinus Torvalds	tst	r1, r2
2171da177e4SLinus Torvalds	beq	12f
2181da177e4SLinus Torvalds
2191da177e4SLinus Torvalds	ARM_DIV_BODY r0, r1, r2, r3
2201da177e4SLinus Torvalds
2211da177e4SLinus Torvalds	mov	r0, r2
2221da177e4SLinus Torvalds	mov	pc, lr
2231da177e4SLinus Torvalds
2241da177e4SLinus Torvalds11:	moveq	r0, #1
2251da177e4SLinus Torvalds	movne	r0, #0
2261da177e4SLinus Torvalds	mov	pc, lr
2271da177e4SLinus Torvalds
2281da177e4SLinus Torvalds12:	ARM_DIV2_ORDER r1, r2
2291da177e4SLinus Torvalds
2301da177e4SLinus Torvalds	mov	r0, r0, lsr r2
2311da177e4SLinus Torvalds	mov	pc, lr
2321da177e4SLinus Torvalds
23393ed3970SCatalin MarinasENDPROC(__udivsi3)
23493ed3970SCatalin MarinasENDPROC(__aeabi_uidiv)
2351da177e4SLinus Torvalds
2361da177e4SLinus TorvaldsENTRY(__umodsi3)
2371da177e4SLinus Torvalds
2381da177e4SLinus Torvalds	subs	r2, r1, #1			@ compare divisor with 1
2391da177e4SLinus Torvalds	bcc	Ldiv0
2401da177e4SLinus Torvalds	cmpne	r0, r1				@ compare dividend with divisor
2411da177e4SLinus Torvalds	moveq   r0, #0
2421da177e4SLinus Torvalds	tsthi	r1, r2				@ see if divisor is power of 2
2431da177e4SLinus Torvalds	andeq	r0, r0, r2
2441da177e4SLinus Torvalds	movls	pc, lr
2451da177e4SLinus Torvalds
2461da177e4SLinus Torvalds	ARM_MOD_BODY r0, r1, r2, r3
2471da177e4SLinus Torvalds
2481da177e4SLinus Torvalds	mov	pc, lr
2491da177e4SLinus Torvalds
25093ed3970SCatalin MarinasENDPROC(__umodsi3)
2511da177e4SLinus Torvalds
2521da177e4SLinus TorvaldsENTRY(__divsi3)
253ba95e4e4SNicolas PitreENTRY(__aeabi_idiv)
2541da177e4SLinus Torvalds
2551da177e4SLinus Torvalds	cmp	r1, #0
2561da177e4SLinus Torvalds	eor	ip, r0, r1			@ save the sign of the result.
2571da177e4SLinus Torvalds	beq	Ldiv0
2581da177e4SLinus Torvalds	rsbmi	r1, r1, #0			@ loops below use unsigned.
2591da177e4SLinus Torvalds	subs	r2, r1, #1			@ division by 1 or -1 ?
2601da177e4SLinus Torvalds	beq	10f
2611da177e4SLinus Torvalds	movs	r3, r0
2621da177e4SLinus Torvalds	rsbmi	r3, r0, #0			@ positive dividend value
2631da177e4SLinus Torvalds	cmp	r3, r1
2641da177e4SLinus Torvalds	bls	11f
2651da177e4SLinus Torvalds	tst	r1, r2				@ divisor is power of 2 ?
2661da177e4SLinus Torvalds	beq	12f
2671da177e4SLinus Torvalds
2681da177e4SLinus Torvalds	ARM_DIV_BODY r3, r1, r0, r2
2691da177e4SLinus Torvalds
2701da177e4SLinus Torvalds	cmp	ip, #0
2711da177e4SLinus Torvalds	rsbmi	r0, r0, #0
2721da177e4SLinus Torvalds	mov	pc, lr
2731da177e4SLinus Torvalds
2741da177e4SLinus Torvalds10:	teq	ip, r0				@ same sign ?
2751da177e4SLinus Torvalds	rsbmi	r0, r0, #0
2761da177e4SLinus Torvalds	mov	pc, lr
2771da177e4SLinus Torvalds
2781da177e4SLinus Torvalds11:	movlo	r0, #0
2791da177e4SLinus Torvalds	moveq	r0, ip, asr #31
2801da177e4SLinus Torvalds	orreq	r0, r0, #1
2811da177e4SLinus Torvalds	mov	pc, lr
2821da177e4SLinus Torvalds
2831da177e4SLinus Torvalds12:	ARM_DIV2_ORDER r1, r2
2841da177e4SLinus Torvalds
2851da177e4SLinus Torvalds	cmp	ip, #0
2861da177e4SLinus Torvalds	mov	r0, r3, lsr r2
2871da177e4SLinus Torvalds	rsbmi	r0, r0, #0
2881da177e4SLinus Torvalds	mov	pc, lr
2891da177e4SLinus Torvalds
29093ed3970SCatalin MarinasENDPROC(__divsi3)
29193ed3970SCatalin MarinasENDPROC(__aeabi_idiv)
2921da177e4SLinus Torvalds
2931da177e4SLinus TorvaldsENTRY(__modsi3)
2941da177e4SLinus Torvalds
2951da177e4SLinus Torvalds	cmp	r1, #0
2961da177e4SLinus Torvalds	beq	Ldiv0
2971da177e4SLinus Torvalds	rsbmi	r1, r1, #0			@ loops below use unsigned.
2981da177e4SLinus Torvalds	movs	ip, r0				@ preserve sign of dividend
2991da177e4SLinus Torvalds	rsbmi	r0, r0, #0			@ if negative make positive
3001da177e4SLinus Torvalds	subs	r2, r1, #1			@ compare divisor with 1
3011da177e4SLinus Torvalds	cmpne	r0, r1				@ compare dividend with divisor
3021da177e4SLinus Torvalds	moveq	r0, #0
3031da177e4SLinus Torvalds	tsthi	r1, r2				@ see if divisor is power of 2
3041da177e4SLinus Torvalds	andeq	r0, r0, r2
3051da177e4SLinus Torvalds	bls	10f
3061da177e4SLinus Torvalds
3071da177e4SLinus Torvalds	ARM_MOD_BODY r0, r1, r2, r3
3081da177e4SLinus Torvalds
3091da177e4SLinus Torvalds10:	cmp	ip, #0
3101da177e4SLinus Torvalds	rsbmi	r0, r0, #0
3111da177e4SLinus Torvalds	mov	pc, lr
3121da177e4SLinus Torvalds
31393ed3970SCatalin MarinasENDPROC(__modsi3)
31493ed3970SCatalin Marinas
315ba95e4e4SNicolas Pitre#ifdef CONFIG_AEABI
316ba95e4e4SNicolas Pitre
317ba95e4e4SNicolas PitreENTRY(__aeabi_uidivmod)
318ba95e4e4SNicolas Pitre
319ba95e4e4SNicolas Pitre	stmfd	sp!, {r0, r1, ip, lr}
320ba95e4e4SNicolas Pitre	bl	__aeabi_uidiv
321ba95e4e4SNicolas Pitre	ldmfd	sp!, {r1, r2, ip, lr}
322ba95e4e4SNicolas Pitre	mul	r3, r0, r2
323ba95e4e4SNicolas Pitre	sub	r1, r1, r3
324ba95e4e4SNicolas Pitre	mov	pc, lr
325ba95e4e4SNicolas Pitre
32693ed3970SCatalin MarinasENDPROC(__aeabi_uidivmod)
32793ed3970SCatalin Marinas
328ba95e4e4SNicolas PitreENTRY(__aeabi_idivmod)
329ba95e4e4SNicolas Pitre
330ba95e4e4SNicolas Pitre	stmfd	sp!, {r0, r1, ip, lr}
331ba95e4e4SNicolas Pitre	bl	__aeabi_idiv
332ba95e4e4SNicolas Pitre	ldmfd	sp!, {r1, r2, ip, lr}
333ba95e4e4SNicolas Pitre	mul	r3, r0, r2
334ba95e4e4SNicolas Pitre	sub	r1, r1, r3
335ba95e4e4SNicolas Pitre	mov	pc, lr
336ba95e4e4SNicolas Pitre
33793ed3970SCatalin MarinasENDPROC(__aeabi_idivmod)
33893ed3970SCatalin Marinas
339ba95e4e4SNicolas Pitre#endif
3401da177e4SLinus Torvalds
3411da177e4SLinus TorvaldsLdiv0:
3421da177e4SLinus Torvalds
343499b2ea1SNicolas Pitre	str	lr, [sp, #-8]!
3441da177e4SLinus Torvalds	bl	__div0
3451da177e4SLinus Torvalds	mov	r0, #0			@ About as wrong as it could be.
346499b2ea1SNicolas Pitre	ldr	pc, [sp], #8
3471da177e4SLinus Torvalds
3481da177e4SLinus Torvalds
349