1*d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */ 21da177e4SLinus Torvalds/* 31da177e4SLinus Torvalds * linux/arch/arm/lib/div64.S 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * Optimized computation of 64-bit dividend / 32-bit divisor 61da177e4SLinus Torvalds * 71da177e4SLinus Torvalds * Author: Nicolas Pitre 81da177e4SLinus Torvalds * Created: Oct 5, 2003 91da177e4SLinus Torvalds * Copyright: Monta Vista Software, Inc. 101da177e4SLinus Torvalds */ 111da177e4SLinus Torvalds 121da177e4SLinus Torvalds#include <linux/linkage.h> 136ebbf2ceSRussell King#include <asm/assembler.h> 1401885bc5SLaura Abbott#include <asm/unwind.h> 151da177e4SLinus Torvalds 161da177e4SLinus Torvalds#ifdef __ARMEB__ 171da177e4SLinus Torvalds#define xh r0 181da177e4SLinus Torvalds#define xl r1 191da177e4SLinus Torvalds#define yh r2 201da177e4SLinus Torvalds#define yl r3 211da177e4SLinus Torvalds#else 221da177e4SLinus Torvalds#define xl r0 231da177e4SLinus Torvalds#define xh r1 241da177e4SLinus Torvalds#define yl r2 251da177e4SLinus Torvalds#define yh r3 261da177e4SLinus Torvalds#endif 271da177e4SLinus Torvalds 281da177e4SLinus Torvalds/* 291da177e4SLinus Torvalds * __do_div64: perform a division with 64-bit dividend and 32-bit divisor. 301da177e4SLinus Torvalds * 311da177e4SLinus Torvalds * Note: Calling convention is totally non standard for optimal code. 321da177e4SLinus Torvalds * This is meant to be used by do_div() from include/asm/div64.h only. 331da177e4SLinus Torvalds * 341da177e4SLinus Torvalds * Input parameters: 351da177e4SLinus Torvalds * xh-xl = dividend (clobbered) 361da177e4SLinus Torvalds * r4 = divisor (preserved) 371da177e4SLinus Torvalds * 381da177e4SLinus Torvalds * Output values: 391da177e4SLinus Torvalds * yh-yl = result 401da177e4SLinus Torvalds * xh = remainder 411da177e4SLinus Torvalds * 421da177e4SLinus Torvalds * Clobbered regs: xl, ip 431da177e4SLinus Torvalds */ 441da177e4SLinus Torvalds 451da177e4SLinus TorvaldsENTRY(__do_div64) 4601885bc5SLaura AbbottUNWIND(.fnstart) 471da177e4SLinus Torvalds 481da177e4SLinus Torvalds @ Test for easy paths first. 491da177e4SLinus Torvalds subs ip, r4, #1 501da177e4SLinus Torvalds bls 9f @ divisor is 0 or 1 511da177e4SLinus Torvalds tst ip, r4 521da177e4SLinus Torvalds beq 8f @ divisor is power of 2 531da177e4SLinus Torvalds 541da177e4SLinus Torvalds @ See if we need to handle upper 32-bit result. 551da177e4SLinus Torvalds cmp xh, r4 561da177e4SLinus Torvalds mov yh, #0 571da177e4SLinus Torvalds blo 3f 581da177e4SLinus Torvalds 591da177e4SLinus Torvalds @ Align divisor with upper part of dividend. 601da177e4SLinus Torvalds @ The aligned divisor is stored in yl preserving the original. 611da177e4SLinus Torvalds @ The bit position is stored in ip. 621da177e4SLinus Torvalds 631da177e4SLinus Torvalds#if __LINUX_ARM_ARCH__ >= 5 641da177e4SLinus Torvalds 651da177e4SLinus Torvalds clz yl, r4 661da177e4SLinus Torvalds clz ip, xh 671da177e4SLinus Torvalds sub yl, yl, ip 681da177e4SLinus Torvalds mov ip, #1 691da177e4SLinus Torvalds mov ip, ip, lsl yl 701da177e4SLinus Torvalds mov yl, r4, lsl yl 711da177e4SLinus Torvalds 721da177e4SLinus Torvalds#else 731da177e4SLinus Torvalds 741da177e4SLinus Torvalds mov yl, r4 751da177e4SLinus Torvalds mov ip, #1 761da177e4SLinus Torvalds1: cmp yl, #0x80000000 771da177e4SLinus Torvalds cmpcc yl, xh 781da177e4SLinus Torvalds movcc yl, yl, lsl #1 791da177e4SLinus Torvalds movcc ip, ip, lsl #1 801da177e4SLinus Torvalds bcc 1b 811da177e4SLinus Torvalds 821da177e4SLinus Torvalds#endif 831da177e4SLinus Torvalds 841da177e4SLinus Torvalds @ The division loop for needed upper bit positions. 851da177e4SLinus Torvalds @ Break out early if dividend reaches 0. 861da177e4SLinus Torvalds2: cmp xh, yl 871da177e4SLinus Torvalds orrcs yh, yh, ip 88e44fc388SStefan Agner subscs xh, xh, yl 89e44fc388SStefan Agner movsne ip, ip, lsr #1 901da177e4SLinus Torvalds mov yl, yl, lsr #1 911da177e4SLinus Torvalds bne 2b 921da177e4SLinus Torvalds 931da177e4SLinus Torvalds @ See if we need to handle lower 32-bit result. 941da177e4SLinus Torvalds3: cmp xh, #0 951da177e4SLinus Torvalds mov yl, #0 961da177e4SLinus Torvalds cmpeq xl, r4 971da177e4SLinus Torvalds movlo xh, xl 986ebbf2ceSRussell King retlo lr 991da177e4SLinus Torvalds 1001da177e4SLinus Torvalds @ The division loop for lower bit positions. 1011da177e4SLinus Torvalds @ Here we shift remainer bits leftwards rather than moving the 1021da177e4SLinus Torvalds @ divisor for comparisons, considering the carry-out bit as well. 1031da177e4SLinus Torvalds mov ip, #0x80000000 1041da177e4SLinus Torvalds4: movs xl, xl, lsl #1 1051da177e4SLinus Torvalds adcs xh, xh, xh 1061da177e4SLinus Torvalds beq 6f 1071da177e4SLinus Torvalds cmpcc xh, r4 1081da177e4SLinus Torvalds5: orrcs yl, yl, ip 1091da177e4SLinus Torvalds subcs xh, xh, r4 1101da177e4SLinus Torvalds movs ip, ip, lsr #1 1111da177e4SLinus Torvalds bne 4b 1126ebbf2ceSRussell King ret lr 1131da177e4SLinus Torvalds 1141da177e4SLinus Torvalds @ The top part of remainder became zero. If carry is set 1151da177e4SLinus Torvalds @ (the 33th bit) this is a false positive so resume the loop. 1161da177e4SLinus Torvalds @ Otherwise, if lower part is also null then we are done. 1171da177e4SLinus Torvalds6: bcs 5b 1181da177e4SLinus Torvalds cmp xl, #0 1196ebbf2ceSRussell King reteq lr 1201da177e4SLinus Torvalds 1211da177e4SLinus Torvalds @ We still have remainer bits in the low part. Bring them up. 1221da177e4SLinus Torvalds 1231da177e4SLinus Torvalds#if __LINUX_ARM_ARCH__ >= 5 1241da177e4SLinus Torvalds 1251da177e4SLinus Torvalds clz xh, xl @ we know xh is zero here so... 1261da177e4SLinus Torvalds add xh, xh, #1 1271da177e4SLinus Torvalds mov xl, xl, lsl xh 1281da177e4SLinus Torvalds mov ip, ip, lsr xh 1291da177e4SLinus Torvalds 1301da177e4SLinus Torvalds#else 1311da177e4SLinus Torvalds 1321da177e4SLinus Torvalds7: movs xl, xl, lsl #1 1331da177e4SLinus Torvalds mov ip, ip, lsr #1 1341da177e4SLinus Torvalds bcc 7b 1351da177e4SLinus Torvalds 1361da177e4SLinus Torvalds#endif 1371da177e4SLinus Torvalds 1381da177e4SLinus Torvalds @ Current remainder is now 1. It is worthless to compare with 1391da177e4SLinus Torvalds @ divisor at this point since divisor can not be smaller than 3 here. 1401da177e4SLinus Torvalds @ If possible, branch for another shift in the division loop. 1411da177e4SLinus Torvalds @ If no bit position left then we are done. 1421da177e4SLinus Torvalds movs ip, ip, lsr #1 1431da177e4SLinus Torvalds mov xh, #1 1441da177e4SLinus Torvalds bne 4b 1456ebbf2ceSRussell King ret lr 1461da177e4SLinus Torvalds 1471da177e4SLinus Torvalds8: @ Division by a power of 2: determine what that divisor order is 1481da177e4SLinus Torvalds @ then simply shift values around 1491da177e4SLinus Torvalds 1501da177e4SLinus Torvalds#if __LINUX_ARM_ARCH__ >= 5 1511da177e4SLinus Torvalds 1521da177e4SLinus Torvalds clz ip, r4 1531da177e4SLinus Torvalds rsb ip, ip, #31 1541da177e4SLinus Torvalds 1551da177e4SLinus Torvalds#else 1561da177e4SLinus Torvalds 1571da177e4SLinus Torvalds mov yl, r4 1581da177e4SLinus Torvalds cmp r4, #(1 << 16) 1591da177e4SLinus Torvalds mov ip, #0 1601da177e4SLinus Torvalds movhs yl, yl, lsr #16 1611da177e4SLinus Torvalds movhs ip, #16 1621da177e4SLinus Torvalds 1631da177e4SLinus Torvalds cmp yl, #(1 << 8) 1641da177e4SLinus Torvalds movhs yl, yl, lsr #8 1651da177e4SLinus Torvalds addhs ip, ip, #8 1661da177e4SLinus Torvalds 1671da177e4SLinus Torvalds cmp yl, #(1 << 4) 1681da177e4SLinus Torvalds movhs yl, yl, lsr #4 1691da177e4SLinus Torvalds addhs ip, ip, #4 1701da177e4SLinus Torvalds 1711da177e4SLinus Torvalds cmp yl, #(1 << 2) 1721da177e4SLinus Torvalds addhi ip, ip, #3 1731da177e4SLinus Torvalds addls ip, ip, yl, lsr #1 1741da177e4SLinus Torvalds 1751da177e4SLinus Torvalds#endif 1761da177e4SLinus Torvalds 1771da177e4SLinus Torvalds mov yh, xh, lsr ip 1781da177e4SLinus Torvalds mov yl, xl, lsr ip 1791da177e4SLinus Torvalds rsb ip, ip, #32 1808b592783SCatalin Marinas ARM( orr yl, yl, xh, lsl ip ) 1818b592783SCatalin Marinas THUMB( lsl xh, xh, ip ) 1828b592783SCatalin Marinas THUMB( orr yl, yl, xh ) 1831da177e4SLinus Torvalds mov xh, xl, lsl ip 1841da177e4SLinus Torvalds mov xh, xh, lsr ip 1856ebbf2ceSRussell King ret lr 1861da177e4SLinus Torvalds 1871da177e4SLinus Torvalds @ eq -> division by 1: obvious enough... 1881da177e4SLinus Torvalds9: moveq yl, xl 1891da177e4SLinus Torvalds moveq yh, xh 1901da177e4SLinus Torvalds moveq xh, #0 1916ebbf2ceSRussell King reteq lr 19201885bc5SLaura AbbottUNWIND(.fnend) 1931da177e4SLinus Torvalds 19401885bc5SLaura AbbottUNWIND(.fnstart) 19501885bc5SLaura AbbottUNWIND(.pad #4) 19601885bc5SLaura AbbottUNWIND(.save {lr}) 19701885bc5SLaura AbbottLdiv0_64: 1981da177e4SLinus Torvalds @ Division by 0: 1991d6760a3SNicolas Pitre str lr, [sp, #-8]! 2001da177e4SLinus Torvalds bl __div0 2011da177e4SLinus Torvalds 2021da177e4SLinus Torvalds @ as wrong as it could be... 2031da177e4SLinus Torvalds mov yl, #0 2041da177e4SLinus Torvalds mov yh, #0 2051da177e4SLinus Torvalds mov xh, #0 2061d6760a3SNicolas Pitre ldr pc, [sp], #8 2071da177e4SLinus Torvalds 20801885bc5SLaura AbbottUNWIND(.fnend) 20993ed3970SCatalin MarinasENDPROC(__do_div64) 210