xref: /openbmc/linux/arch/arm/lib/div64.S (revision 75bf465f0bc33e9b776a46d6a1b9b990f5fb7c37)
1*d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */
21da177e4SLinus Torvalds/*
31da177e4SLinus Torvalds *  linux/arch/arm/lib/div64.S
41da177e4SLinus Torvalds *
51da177e4SLinus Torvalds *  Optimized computation of 64-bit dividend / 32-bit divisor
61da177e4SLinus Torvalds *
71da177e4SLinus Torvalds *  Author:	Nicolas Pitre
81da177e4SLinus Torvalds *  Created:	Oct 5, 2003
91da177e4SLinus Torvalds *  Copyright:	Monta Vista Software, Inc.
101da177e4SLinus Torvalds */
111da177e4SLinus Torvalds
121da177e4SLinus Torvalds#include <linux/linkage.h>
136ebbf2ceSRussell King#include <asm/assembler.h>
1401885bc5SLaura Abbott#include <asm/unwind.h>
151da177e4SLinus Torvalds
161da177e4SLinus Torvalds#ifdef __ARMEB__
171da177e4SLinus Torvalds#define xh r0
181da177e4SLinus Torvalds#define xl r1
191da177e4SLinus Torvalds#define yh r2
201da177e4SLinus Torvalds#define yl r3
211da177e4SLinus Torvalds#else
221da177e4SLinus Torvalds#define xl r0
231da177e4SLinus Torvalds#define xh r1
241da177e4SLinus Torvalds#define yl r2
251da177e4SLinus Torvalds#define yh r3
261da177e4SLinus Torvalds#endif
271da177e4SLinus Torvalds
281da177e4SLinus Torvalds/*
291da177e4SLinus Torvalds * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
301da177e4SLinus Torvalds *
311da177e4SLinus Torvalds * Note: Calling convention is totally non standard for optimal code.
321da177e4SLinus Torvalds *       This is meant to be used by do_div() from include/asm/div64.h only.
331da177e4SLinus Torvalds *
341da177e4SLinus Torvalds * Input parameters:
351da177e4SLinus Torvalds * 	xh-xl	= dividend (clobbered)
361da177e4SLinus Torvalds * 	r4	= divisor (preserved)
371da177e4SLinus Torvalds *
381da177e4SLinus Torvalds * Output values:
391da177e4SLinus Torvalds * 	yh-yl	= result
401da177e4SLinus Torvalds * 	xh	= remainder
411da177e4SLinus Torvalds *
421da177e4SLinus Torvalds * Clobbered regs: xl, ip
431da177e4SLinus Torvalds */
441da177e4SLinus Torvalds
451da177e4SLinus TorvaldsENTRY(__do_div64)
4601885bc5SLaura AbbottUNWIND(.fnstart)
471da177e4SLinus Torvalds
481da177e4SLinus Torvalds	@ Test for easy paths first.
491da177e4SLinus Torvalds	subs	ip, r4, #1
501da177e4SLinus Torvalds	bls	9f			@ divisor is 0 or 1
511da177e4SLinus Torvalds	tst	ip, r4
521da177e4SLinus Torvalds	beq	8f			@ divisor is power of 2
531da177e4SLinus Torvalds
541da177e4SLinus Torvalds	@ See if we need to handle upper 32-bit result.
551da177e4SLinus Torvalds	cmp	xh, r4
561da177e4SLinus Torvalds	mov	yh, #0
571da177e4SLinus Torvalds	blo	3f
581da177e4SLinus Torvalds
591da177e4SLinus Torvalds	@ Align divisor with upper part of dividend.
601da177e4SLinus Torvalds	@ The aligned divisor is stored in yl preserving the original.
611da177e4SLinus Torvalds	@ The bit position is stored in ip.
621da177e4SLinus Torvalds
631da177e4SLinus Torvalds#if __LINUX_ARM_ARCH__ >= 5
641da177e4SLinus Torvalds
651da177e4SLinus Torvalds	clz	yl, r4
661da177e4SLinus Torvalds	clz	ip, xh
671da177e4SLinus Torvalds	sub	yl, yl, ip
681da177e4SLinus Torvalds	mov	ip, #1
691da177e4SLinus Torvalds	mov	ip, ip, lsl yl
701da177e4SLinus Torvalds	mov	yl, r4, lsl yl
711da177e4SLinus Torvalds
721da177e4SLinus Torvalds#else
731da177e4SLinus Torvalds
741da177e4SLinus Torvalds	mov	yl, r4
751da177e4SLinus Torvalds	mov	ip, #1
761da177e4SLinus Torvalds1:	cmp	yl, #0x80000000
771da177e4SLinus Torvalds	cmpcc	yl, xh
781da177e4SLinus Torvalds	movcc	yl, yl, lsl #1
791da177e4SLinus Torvalds	movcc	ip, ip, lsl #1
801da177e4SLinus Torvalds	bcc	1b
811da177e4SLinus Torvalds
821da177e4SLinus Torvalds#endif
831da177e4SLinus Torvalds
841da177e4SLinus Torvalds	@ The division loop for needed upper bit positions.
851da177e4SLinus Torvalds 	@ Break out early if dividend reaches 0.
861da177e4SLinus Torvalds2:	cmp	xh, yl
871da177e4SLinus Torvalds	orrcs	yh, yh, ip
88e44fc388SStefan Agner	subscs	xh, xh, yl
89e44fc388SStefan Agner	movsne	ip, ip, lsr #1
901da177e4SLinus Torvalds	mov	yl, yl, lsr #1
911da177e4SLinus Torvalds	bne	2b
921da177e4SLinus Torvalds
931da177e4SLinus Torvalds	@ See if we need to handle lower 32-bit result.
941da177e4SLinus Torvalds3:	cmp	xh, #0
951da177e4SLinus Torvalds	mov	yl, #0
961da177e4SLinus Torvalds	cmpeq	xl, r4
971da177e4SLinus Torvalds	movlo	xh, xl
986ebbf2ceSRussell King	retlo	lr
991da177e4SLinus Torvalds
1001da177e4SLinus Torvalds	@ The division loop for lower bit positions.
1011da177e4SLinus Torvalds	@ Here we shift remainer bits leftwards rather than moving the
1021da177e4SLinus Torvalds	@ divisor for comparisons, considering the carry-out bit as well.
1031da177e4SLinus Torvalds	mov	ip, #0x80000000
1041da177e4SLinus Torvalds4:	movs	xl, xl, lsl #1
1051da177e4SLinus Torvalds	adcs	xh, xh, xh
1061da177e4SLinus Torvalds	beq	6f
1071da177e4SLinus Torvalds	cmpcc	xh, r4
1081da177e4SLinus Torvalds5:	orrcs	yl, yl, ip
1091da177e4SLinus Torvalds	subcs	xh, xh, r4
1101da177e4SLinus Torvalds	movs	ip, ip, lsr #1
1111da177e4SLinus Torvalds	bne	4b
1126ebbf2ceSRussell King	ret	lr
1131da177e4SLinus Torvalds
1141da177e4SLinus Torvalds	@ The top part of remainder became zero.  If carry is set
1151da177e4SLinus Torvalds	@ (the 33th bit) this is a false positive so resume the loop.
1161da177e4SLinus Torvalds	@ Otherwise, if lower part is also null then we are done.
1171da177e4SLinus Torvalds6:	bcs	5b
1181da177e4SLinus Torvalds	cmp	xl, #0
1196ebbf2ceSRussell King	reteq	lr
1201da177e4SLinus Torvalds
1211da177e4SLinus Torvalds	@ We still have remainer bits in the low part.  Bring them up.
1221da177e4SLinus Torvalds
1231da177e4SLinus Torvalds#if __LINUX_ARM_ARCH__ >= 5
1241da177e4SLinus Torvalds
1251da177e4SLinus Torvalds	clz	xh, xl			@ we know xh is zero here so...
1261da177e4SLinus Torvalds	add	xh, xh, #1
1271da177e4SLinus Torvalds	mov	xl, xl, lsl xh
1281da177e4SLinus Torvalds	mov	ip, ip, lsr xh
1291da177e4SLinus Torvalds
1301da177e4SLinus Torvalds#else
1311da177e4SLinus Torvalds
1321da177e4SLinus Torvalds7:	movs	xl, xl, lsl #1
1331da177e4SLinus Torvalds	mov	ip, ip, lsr #1
1341da177e4SLinus Torvalds	bcc	7b
1351da177e4SLinus Torvalds
1361da177e4SLinus Torvalds#endif
1371da177e4SLinus Torvalds
1381da177e4SLinus Torvalds	@ Current remainder is now 1.  It is worthless to compare with
1391da177e4SLinus Torvalds	@ divisor at this point since divisor can not be smaller than 3 here.
1401da177e4SLinus Torvalds	@ If possible, branch for another shift in the division loop.
1411da177e4SLinus Torvalds	@ If no bit position left then we are done.
1421da177e4SLinus Torvalds	movs	ip, ip, lsr #1
1431da177e4SLinus Torvalds	mov	xh, #1
1441da177e4SLinus Torvalds	bne	4b
1456ebbf2ceSRussell King	ret	lr
1461da177e4SLinus Torvalds
1471da177e4SLinus Torvalds8:	@ Division by a power of 2: determine what that divisor order is
1481da177e4SLinus Torvalds	@ then simply shift values around
1491da177e4SLinus Torvalds
1501da177e4SLinus Torvalds#if __LINUX_ARM_ARCH__ >= 5
1511da177e4SLinus Torvalds
1521da177e4SLinus Torvalds	clz	ip, r4
1531da177e4SLinus Torvalds	rsb	ip, ip, #31
1541da177e4SLinus Torvalds
1551da177e4SLinus Torvalds#else
1561da177e4SLinus Torvalds
1571da177e4SLinus Torvalds	mov	yl, r4
1581da177e4SLinus Torvalds	cmp	r4, #(1 << 16)
1591da177e4SLinus Torvalds	mov	ip, #0
1601da177e4SLinus Torvalds	movhs	yl, yl, lsr #16
1611da177e4SLinus Torvalds	movhs	ip, #16
1621da177e4SLinus Torvalds
1631da177e4SLinus Torvalds	cmp	yl, #(1 << 8)
1641da177e4SLinus Torvalds	movhs	yl, yl, lsr #8
1651da177e4SLinus Torvalds	addhs	ip, ip, #8
1661da177e4SLinus Torvalds
1671da177e4SLinus Torvalds	cmp	yl, #(1 << 4)
1681da177e4SLinus Torvalds	movhs	yl, yl, lsr #4
1691da177e4SLinus Torvalds	addhs	ip, ip, #4
1701da177e4SLinus Torvalds
1711da177e4SLinus Torvalds	cmp	yl, #(1 << 2)
1721da177e4SLinus Torvalds	addhi	ip, ip, #3
1731da177e4SLinus Torvalds	addls	ip, ip, yl, lsr #1
1741da177e4SLinus Torvalds
1751da177e4SLinus Torvalds#endif
1761da177e4SLinus Torvalds
1771da177e4SLinus Torvalds	mov	yh, xh, lsr ip
1781da177e4SLinus Torvalds	mov	yl, xl, lsr ip
1791da177e4SLinus Torvalds	rsb	ip, ip, #32
1808b592783SCatalin Marinas ARM(	orr	yl, yl, xh, lsl ip	)
1818b592783SCatalin Marinas THUMB(	lsl	xh, xh, ip		)
1828b592783SCatalin Marinas THUMB(	orr	yl, yl, xh		)
1831da177e4SLinus Torvalds	mov	xh, xl, lsl ip
1841da177e4SLinus Torvalds	mov	xh, xh, lsr ip
1856ebbf2ceSRussell King	ret	lr
1861da177e4SLinus Torvalds
1871da177e4SLinus Torvalds	@ eq -> division by 1: obvious enough...
1881da177e4SLinus Torvalds9:	moveq	yl, xl
1891da177e4SLinus Torvalds	moveq	yh, xh
1901da177e4SLinus Torvalds	moveq	xh, #0
1916ebbf2ceSRussell King	reteq	lr
19201885bc5SLaura AbbottUNWIND(.fnend)
1931da177e4SLinus Torvalds
19401885bc5SLaura AbbottUNWIND(.fnstart)
19501885bc5SLaura AbbottUNWIND(.pad #4)
19601885bc5SLaura AbbottUNWIND(.save {lr})
19701885bc5SLaura AbbottLdiv0_64:
1981da177e4SLinus Torvalds	@ Division by 0:
1991d6760a3SNicolas Pitre	str	lr, [sp, #-8]!
2001da177e4SLinus Torvalds	bl	__div0
2011da177e4SLinus Torvalds
2021da177e4SLinus Torvalds	@ as wrong as it could be...
2031da177e4SLinus Torvalds	mov	yl, #0
2041da177e4SLinus Torvalds	mov	yh, #0
2051da177e4SLinus Torvalds	mov	xh, #0
2061d6760a3SNicolas Pitre	ldr	pc, [sp], #8
2071da177e4SLinus Torvalds
20801885bc5SLaura AbbottUNWIND(.fnend)
20993ed3970SCatalin MarinasENDPROC(__do_div64)
210