xref: /openbmc/u-boot/arch/arm/lib/div64.S (revision 83d290c56fab2d38cd1ab4c4cc7099559c1d5046)
1*83d290c5STom Rini/* SPDX-License-Identifier: GPL-2.0 */
27b9f9c5dSMarek Vasut/*
37b9f9c5dSMarek Vasut *  linux/arch/arm/lib/div64.S
47b9f9c5dSMarek Vasut *
57b9f9c5dSMarek Vasut *  Optimized computation of 64-bit dividend / 32-bit divisor
67b9f9c5dSMarek Vasut *
77b9f9c5dSMarek Vasut *  Author:	Nicolas Pitre
87b9f9c5dSMarek Vasut *  Created:	Oct 5, 2003
97b9f9c5dSMarek Vasut *  Copyright:	Monta Vista Software, Inc.
107b9f9c5dSMarek Vasut */
117b9f9c5dSMarek Vasut
127b9f9c5dSMarek Vasut#include <linux/linkage.h>
137b9f9c5dSMarek Vasut#include <asm/assembler.h>
147b9f9c5dSMarek Vasut#ifdef __UBOOT__
157b9f9c5dSMarek Vasut#define UNWIND(x...)
167b9f9c5dSMarek Vasut#endif
177b9f9c5dSMarek Vasut
187b9f9c5dSMarek Vasut#ifdef __ARMEB__
197b9f9c5dSMarek Vasut#define xh r0
207b9f9c5dSMarek Vasut#define xl r1
217b9f9c5dSMarek Vasut#define yh r2
227b9f9c5dSMarek Vasut#define yl r3
237b9f9c5dSMarek Vasut#else
247b9f9c5dSMarek Vasut#define xl r0
257b9f9c5dSMarek Vasut#define xh r1
267b9f9c5dSMarek Vasut#define yl r2
277b9f9c5dSMarek Vasut#define yh r3
287b9f9c5dSMarek Vasut#endif
297b9f9c5dSMarek Vasut
307b9f9c5dSMarek Vasut/*
317b9f9c5dSMarek Vasut * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
327b9f9c5dSMarek Vasut *
337b9f9c5dSMarek Vasut * Note: Calling convention is totally non standard for optimal code.
347b9f9c5dSMarek Vasut *       This is meant to be used by do_div() from include/asm/div64.h only.
357b9f9c5dSMarek Vasut *
367b9f9c5dSMarek Vasut * Input parameters:
377b9f9c5dSMarek Vasut * 	xh-xl	= dividend (clobbered)
387b9f9c5dSMarek Vasut * 	r4	= divisor (preserved)
397b9f9c5dSMarek Vasut *
407b9f9c5dSMarek Vasut * Output values:
417b9f9c5dSMarek Vasut * 	yh-yl	= result
427b9f9c5dSMarek Vasut * 	xh	= remainder
437b9f9c5dSMarek Vasut *
447b9f9c5dSMarek Vasut * Clobbered regs: xl, ip
457b9f9c5dSMarek Vasut */
467b9f9c5dSMarek Vasut
47b2f18584SStephen Warren.pushsection .text.__do_div64, "ax"
487b9f9c5dSMarek VasutENTRY(__do_div64)
497b9f9c5dSMarek VasutUNWIND(.fnstart)
507b9f9c5dSMarek Vasut
517b9f9c5dSMarek Vasut	@ Test for easy paths first.
527b9f9c5dSMarek Vasut	subs	ip, r4, #1
537b9f9c5dSMarek Vasut	bls	9f			@ divisor is 0 or 1
547b9f9c5dSMarek Vasut	tst	ip, r4
557b9f9c5dSMarek Vasut	beq	8f			@ divisor is power of 2
567b9f9c5dSMarek Vasut
577b9f9c5dSMarek Vasut	@ See if we need to handle upper 32-bit result.
587b9f9c5dSMarek Vasut	cmp	xh, r4
597b9f9c5dSMarek Vasut	mov	yh, #0
607b9f9c5dSMarek Vasut	blo	3f
617b9f9c5dSMarek Vasut
627b9f9c5dSMarek Vasut	@ Align divisor with upper part of dividend.
637b9f9c5dSMarek Vasut	@ The aligned divisor is stored in yl preserving the original.
647b9f9c5dSMarek Vasut	@ The bit position is stored in ip.
657b9f9c5dSMarek Vasut
667b9f9c5dSMarek Vasut#if __LINUX_ARM_ARCH__ >= 5
677b9f9c5dSMarek Vasut
687b9f9c5dSMarek Vasut	clz	yl, r4
697b9f9c5dSMarek Vasut	clz	ip, xh
707b9f9c5dSMarek Vasut	sub	yl, yl, ip
717b9f9c5dSMarek Vasut	mov	ip, #1
727b9f9c5dSMarek Vasut	mov	ip, ip, lsl yl
737b9f9c5dSMarek Vasut	mov	yl, r4, lsl yl
747b9f9c5dSMarek Vasut
757b9f9c5dSMarek Vasut#else
767b9f9c5dSMarek Vasut
777b9f9c5dSMarek Vasut	mov	yl, r4
787b9f9c5dSMarek Vasut	mov	ip, #1
797b9f9c5dSMarek Vasut1:	cmp	yl, #0x80000000
807b9f9c5dSMarek Vasut	cmpcc	yl, xh
817b9f9c5dSMarek Vasut	movcc	yl, yl, lsl #1
827b9f9c5dSMarek Vasut	movcc	ip, ip, lsl #1
837b9f9c5dSMarek Vasut	bcc	1b
847b9f9c5dSMarek Vasut
857b9f9c5dSMarek Vasut#endif
867b9f9c5dSMarek Vasut
877b9f9c5dSMarek Vasut	@ The division loop for needed upper bit positions.
887b9f9c5dSMarek Vasut 	@ Break out early if dividend reaches 0.
897b9f9c5dSMarek Vasut2:	cmp	xh, yl
907b9f9c5dSMarek Vasut	orrcs	yh, yh, ip
9140d67c75SMarek Vasut	subscs	xh, xh, yl
9240d67c75SMarek Vasut	movsne	ip, ip, lsr #1
937b9f9c5dSMarek Vasut	mov	yl, yl, lsr #1
947b9f9c5dSMarek Vasut	bne	2b
957b9f9c5dSMarek Vasut
967b9f9c5dSMarek Vasut	@ See if we need to handle lower 32-bit result.
977b9f9c5dSMarek Vasut3:	cmp	xh, #0
987b9f9c5dSMarek Vasut	mov	yl, #0
997b9f9c5dSMarek Vasut	cmpeq	xl, r4
1007b9f9c5dSMarek Vasut	movlo	xh, xl
1017b9f9c5dSMarek Vasut	retlo	lr
1027b9f9c5dSMarek Vasut
1037b9f9c5dSMarek Vasut	@ The division loop for lower bit positions.
1047b9f9c5dSMarek Vasut	@ Here we shift remainer bits leftwards rather than moving the
1057b9f9c5dSMarek Vasut	@ divisor for comparisons, considering the carry-out bit as well.
1067b9f9c5dSMarek Vasut	mov	ip, #0x80000000
1077b9f9c5dSMarek Vasut4:	movs	xl, xl, lsl #1
1087b9f9c5dSMarek Vasut	adcs	xh, xh, xh
1097b9f9c5dSMarek Vasut	beq	6f
1107b9f9c5dSMarek Vasut	cmpcc	xh, r4
1117b9f9c5dSMarek Vasut5:	orrcs	yl, yl, ip
1127b9f9c5dSMarek Vasut	subcs	xh, xh, r4
1137b9f9c5dSMarek Vasut	movs	ip, ip, lsr #1
1147b9f9c5dSMarek Vasut	bne	4b
1157b9f9c5dSMarek Vasut	ret	lr
1167b9f9c5dSMarek Vasut
1177b9f9c5dSMarek Vasut	@ The top part of remainder became zero.  If carry is set
1187b9f9c5dSMarek Vasut	@ (the 33th bit) this is a false positive so resume the loop.
1197b9f9c5dSMarek Vasut	@ Otherwise, if lower part is also null then we are done.
1207b9f9c5dSMarek Vasut6:	bcs	5b
1217b9f9c5dSMarek Vasut	cmp	xl, #0
1227b9f9c5dSMarek Vasut	reteq	lr
1237b9f9c5dSMarek Vasut
1247b9f9c5dSMarek Vasut	@ We still have remainer bits in the low part.  Bring them up.
1257b9f9c5dSMarek Vasut
1267b9f9c5dSMarek Vasut#if __LINUX_ARM_ARCH__ >= 5
1277b9f9c5dSMarek Vasut
1287b9f9c5dSMarek Vasut	clz	xh, xl			@ we know xh is zero here so...
1297b9f9c5dSMarek Vasut	add	xh, xh, #1
1307b9f9c5dSMarek Vasut	mov	xl, xl, lsl xh
1317b9f9c5dSMarek Vasut	mov	ip, ip, lsr xh
1327b9f9c5dSMarek Vasut
1337b9f9c5dSMarek Vasut#else
1347b9f9c5dSMarek Vasut
1357b9f9c5dSMarek Vasut7:	movs	xl, xl, lsl #1
1367b9f9c5dSMarek Vasut	mov	ip, ip, lsr #1
1377b9f9c5dSMarek Vasut	bcc	7b
1387b9f9c5dSMarek Vasut
1397b9f9c5dSMarek Vasut#endif
1407b9f9c5dSMarek Vasut
1417b9f9c5dSMarek Vasut	@ Current remainder is now 1.  It is worthless to compare with
1427b9f9c5dSMarek Vasut	@ divisor at this point since divisor can not be smaller than 3 here.
1437b9f9c5dSMarek Vasut	@ If possible, branch for another shift in the division loop.
1447b9f9c5dSMarek Vasut	@ If no bit position left then we are done.
1457b9f9c5dSMarek Vasut	movs	ip, ip, lsr #1
1467b9f9c5dSMarek Vasut	mov	xh, #1
1477b9f9c5dSMarek Vasut	bne	4b
1487b9f9c5dSMarek Vasut	ret	lr
1497b9f9c5dSMarek Vasut
1507b9f9c5dSMarek Vasut8:	@ Division by a power of 2: determine what that divisor order is
1517b9f9c5dSMarek Vasut	@ then simply shift values around
1527b9f9c5dSMarek Vasut
1537b9f9c5dSMarek Vasut#if __LINUX_ARM_ARCH__ >= 5
1547b9f9c5dSMarek Vasut
1557b9f9c5dSMarek Vasut	clz	ip, r4
1567b9f9c5dSMarek Vasut	rsb	ip, ip, #31
1577b9f9c5dSMarek Vasut
1587b9f9c5dSMarek Vasut#else
1597b9f9c5dSMarek Vasut
1607b9f9c5dSMarek Vasut	mov	yl, r4
1617b9f9c5dSMarek Vasut	cmp	r4, #(1 << 16)
1627b9f9c5dSMarek Vasut	mov	ip, #0
1637b9f9c5dSMarek Vasut	movhs	yl, yl, lsr #16
1647b9f9c5dSMarek Vasut	movhs	ip, #16
1657b9f9c5dSMarek Vasut
1667b9f9c5dSMarek Vasut	cmp	yl, #(1 << 8)
1677b9f9c5dSMarek Vasut	movhs	yl, yl, lsr #8
1687b9f9c5dSMarek Vasut	addhs	ip, ip, #8
1697b9f9c5dSMarek Vasut
1707b9f9c5dSMarek Vasut	cmp	yl, #(1 << 4)
1717b9f9c5dSMarek Vasut	movhs	yl, yl, lsr #4
1727b9f9c5dSMarek Vasut	addhs	ip, ip, #4
1737b9f9c5dSMarek Vasut
1747b9f9c5dSMarek Vasut	cmp	yl, #(1 << 2)
1757b9f9c5dSMarek Vasut	addhi	ip, ip, #3
1767b9f9c5dSMarek Vasut	addls	ip, ip, yl, lsr #1
1777b9f9c5dSMarek Vasut
1787b9f9c5dSMarek Vasut#endif
1797b9f9c5dSMarek Vasut
1807b9f9c5dSMarek Vasut	mov	yh, xh, lsr ip
1817b9f9c5dSMarek Vasut	mov	yl, xl, lsr ip
1827b9f9c5dSMarek Vasut	rsb	ip, ip, #32
1837b9f9c5dSMarek Vasut ARM(	orr	yl, yl, xh, lsl ip	)
1847b9f9c5dSMarek Vasut THUMB(	lsl	xh, xh, ip		)
1857b9f9c5dSMarek Vasut THUMB(	orr	yl, yl, xh		)
1867b9f9c5dSMarek Vasut	mov	xh, xl, lsl ip
1877b9f9c5dSMarek Vasut	mov	xh, xh, lsr ip
1887b9f9c5dSMarek Vasut	ret	lr
1897b9f9c5dSMarek Vasut
1907b9f9c5dSMarek Vasut	@ eq -> division by 1: obvious enough...
1917b9f9c5dSMarek Vasut9:	moveq	yl, xl
1927b9f9c5dSMarek Vasut	moveq	yh, xh
1937b9f9c5dSMarek Vasut	moveq	xh, #0
1947b9f9c5dSMarek Vasut	reteq	lr
1957b9f9c5dSMarek VasutUNWIND(.fnend)
1967b9f9c5dSMarek Vasut
1977b9f9c5dSMarek VasutUNWIND(.fnstart)
1987b9f9c5dSMarek VasutUNWIND(.pad #4)
1997b9f9c5dSMarek VasutUNWIND(.save {lr})
2007b9f9c5dSMarek VasutLdiv0_64:
2017b9f9c5dSMarek Vasut	@ Division by 0:
2027b9f9c5dSMarek Vasut	str	lr, [sp, #-8]!
2037b9f9c5dSMarek Vasut	bl	__div0
2047b9f9c5dSMarek Vasut
2057b9f9c5dSMarek Vasut	@ as wrong as it could be...
2067b9f9c5dSMarek Vasut	mov	yl, #0
2077b9f9c5dSMarek Vasut	mov	yh, #0
2087b9f9c5dSMarek Vasut	mov	xh, #0
2097b9f9c5dSMarek Vasut	ldr	pc, [sp], #8
2107b9f9c5dSMarek Vasut
2117b9f9c5dSMarek VasutUNWIND(.fnend)
2127b9f9c5dSMarek VasutENDPROC(__do_div64)
213b2f18584SStephen Warren.popsection
214