1*83d290c5STom Rini/* SPDX-License-Identifier: GPL-2.0 */ 27b9f9c5dSMarek Vasut/* 37b9f9c5dSMarek Vasut * linux/arch/arm/lib/div64.S 47b9f9c5dSMarek Vasut * 57b9f9c5dSMarek Vasut * Optimized computation of 64-bit dividend / 32-bit divisor 67b9f9c5dSMarek Vasut * 77b9f9c5dSMarek Vasut * Author: Nicolas Pitre 87b9f9c5dSMarek Vasut * Created: Oct 5, 2003 97b9f9c5dSMarek Vasut * Copyright: Monta Vista Software, Inc. 107b9f9c5dSMarek Vasut */ 117b9f9c5dSMarek Vasut 127b9f9c5dSMarek Vasut#include <linux/linkage.h> 137b9f9c5dSMarek Vasut#include <asm/assembler.h> 147b9f9c5dSMarek Vasut#ifdef __UBOOT__ 157b9f9c5dSMarek Vasut#define UNWIND(x...) 167b9f9c5dSMarek Vasut#endif 177b9f9c5dSMarek Vasut 187b9f9c5dSMarek Vasut#ifdef __ARMEB__ 197b9f9c5dSMarek Vasut#define xh r0 207b9f9c5dSMarek Vasut#define xl r1 217b9f9c5dSMarek Vasut#define yh r2 227b9f9c5dSMarek Vasut#define yl r3 237b9f9c5dSMarek Vasut#else 247b9f9c5dSMarek Vasut#define xl r0 257b9f9c5dSMarek Vasut#define xh r1 267b9f9c5dSMarek Vasut#define yl r2 277b9f9c5dSMarek Vasut#define yh r3 287b9f9c5dSMarek Vasut#endif 297b9f9c5dSMarek Vasut 307b9f9c5dSMarek Vasut/* 317b9f9c5dSMarek Vasut * __do_div64: perform a division with 64-bit dividend and 32-bit divisor. 327b9f9c5dSMarek Vasut * 337b9f9c5dSMarek Vasut * Note: Calling convention is totally non standard for optimal code. 347b9f9c5dSMarek Vasut * This is meant to be used by do_div() from include/asm/div64.h only. 357b9f9c5dSMarek Vasut * 367b9f9c5dSMarek Vasut * Input parameters: 377b9f9c5dSMarek Vasut * xh-xl = dividend (clobbered) 387b9f9c5dSMarek Vasut * r4 = divisor (preserved) 397b9f9c5dSMarek Vasut * 407b9f9c5dSMarek Vasut * Output values: 417b9f9c5dSMarek Vasut * yh-yl = result 427b9f9c5dSMarek Vasut * xh = remainder 437b9f9c5dSMarek Vasut * 447b9f9c5dSMarek Vasut * Clobbered regs: xl, ip 457b9f9c5dSMarek Vasut */ 467b9f9c5dSMarek Vasut 47b2f18584SStephen Warren.pushsection .text.__do_div64, "ax" 487b9f9c5dSMarek VasutENTRY(__do_div64) 497b9f9c5dSMarek VasutUNWIND(.fnstart) 507b9f9c5dSMarek Vasut 517b9f9c5dSMarek Vasut @ Test for easy paths first. 527b9f9c5dSMarek Vasut subs ip, r4, #1 537b9f9c5dSMarek Vasut bls 9f @ divisor is 0 or 1 547b9f9c5dSMarek Vasut tst ip, r4 557b9f9c5dSMarek Vasut beq 8f @ divisor is power of 2 567b9f9c5dSMarek Vasut 577b9f9c5dSMarek Vasut @ See if we need to handle upper 32-bit result. 587b9f9c5dSMarek Vasut cmp xh, r4 597b9f9c5dSMarek Vasut mov yh, #0 607b9f9c5dSMarek Vasut blo 3f 617b9f9c5dSMarek Vasut 627b9f9c5dSMarek Vasut @ Align divisor with upper part of dividend. 637b9f9c5dSMarek Vasut @ The aligned divisor is stored in yl preserving the original. 647b9f9c5dSMarek Vasut @ The bit position is stored in ip. 657b9f9c5dSMarek Vasut 667b9f9c5dSMarek Vasut#if __LINUX_ARM_ARCH__ >= 5 677b9f9c5dSMarek Vasut 687b9f9c5dSMarek Vasut clz yl, r4 697b9f9c5dSMarek Vasut clz ip, xh 707b9f9c5dSMarek Vasut sub yl, yl, ip 717b9f9c5dSMarek Vasut mov ip, #1 727b9f9c5dSMarek Vasut mov ip, ip, lsl yl 737b9f9c5dSMarek Vasut mov yl, r4, lsl yl 747b9f9c5dSMarek Vasut 757b9f9c5dSMarek Vasut#else 767b9f9c5dSMarek Vasut 777b9f9c5dSMarek Vasut mov yl, r4 787b9f9c5dSMarek Vasut mov ip, #1 797b9f9c5dSMarek Vasut1: cmp yl, #0x80000000 807b9f9c5dSMarek Vasut cmpcc yl, xh 817b9f9c5dSMarek Vasut movcc yl, yl, lsl #1 827b9f9c5dSMarek Vasut movcc ip, ip, lsl #1 837b9f9c5dSMarek Vasut bcc 1b 847b9f9c5dSMarek Vasut 857b9f9c5dSMarek Vasut#endif 867b9f9c5dSMarek Vasut 877b9f9c5dSMarek Vasut @ The division loop for needed upper bit positions. 887b9f9c5dSMarek Vasut @ Break out early if dividend reaches 0. 897b9f9c5dSMarek Vasut2: cmp xh, yl 907b9f9c5dSMarek Vasut orrcs yh, yh, ip 9140d67c75SMarek Vasut subscs xh, xh, yl 9240d67c75SMarek Vasut movsne ip, ip, lsr #1 937b9f9c5dSMarek Vasut mov yl, yl, lsr #1 947b9f9c5dSMarek Vasut bne 2b 957b9f9c5dSMarek Vasut 967b9f9c5dSMarek Vasut @ See if we need to handle lower 32-bit result. 977b9f9c5dSMarek Vasut3: cmp xh, #0 987b9f9c5dSMarek Vasut mov yl, #0 997b9f9c5dSMarek Vasut cmpeq xl, r4 1007b9f9c5dSMarek Vasut movlo xh, xl 1017b9f9c5dSMarek Vasut retlo lr 1027b9f9c5dSMarek Vasut 1037b9f9c5dSMarek Vasut @ The division loop for lower bit positions. 1047b9f9c5dSMarek Vasut @ Here we shift remainer bits leftwards rather than moving the 1057b9f9c5dSMarek Vasut @ divisor for comparisons, considering the carry-out bit as well. 1067b9f9c5dSMarek Vasut mov ip, #0x80000000 1077b9f9c5dSMarek Vasut4: movs xl, xl, lsl #1 1087b9f9c5dSMarek Vasut adcs xh, xh, xh 1097b9f9c5dSMarek Vasut beq 6f 1107b9f9c5dSMarek Vasut cmpcc xh, r4 1117b9f9c5dSMarek Vasut5: orrcs yl, yl, ip 1127b9f9c5dSMarek Vasut subcs xh, xh, r4 1137b9f9c5dSMarek Vasut movs ip, ip, lsr #1 1147b9f9c5dSMarek Vasut bne 4b 1157b9f9c5dSMarek Vasut ret lr 1167b9f9c5dSMarek Vasut 1177b9f9c5dSMarek Vasut @ The top part of remainder became zero. If carry is set 1187b9f9c5dSMarek Vasut @ (the 33th bit) this is a false positive so resume the loop. 1197b9f9c5dSMarek Vasut @ Otherwise, if lower part is also null then we are done. 1207b9f9c5dSMarek Vasut6: bcs 5b 1217b9f9c5dSMarek Vasut cmp xl, #0 1227b9f9c5dSMarek Vasut reteq lr 1237b9f9c5dSMarek Vasut 1247b9f9c5dSMarek Vasut @ We still have remainer bits in the low part. Bring them up. 1257b9f9c5dSMarek Vasut 1267b9f9c5dSMarek Vasut#if __LINUX_ARM_ARCH__ >= 5 1277b9f9c5dSMarek Vasut 1287b9f9c5dSMarek Vasut clz xh, xl @ we know xh is zero here so... 1297b9f9c5dSMarek Vasut add xh, xh, #1 1307b9f9c5dSMarek Vasut mov xl, xl, lsl xh 1317b9f9c5dSMarek Vasut mov ip, ip, lsr xh 1327b9f9c5dSMarek Vasut 1337b9f9c5dSMarek Vasut#else 1347b9f9c5dSMarek Vasut 1357b9f9c5dSMarek Vasut7: movs xl, xl, lsl #1 1367b9f9c5dSMarek Vasut mov ip, ip, lsr #1 1377b9f9c5dSMarek Vasut bcc 7b 1387b9f9c5dSMarek Vasut 1397b9f9c5dSMarek Vasut#endif 1407b9f9c5dSMarek Vasut 1417b9f9c5dSMarek Vasut @ Current remainder is now 1. It is worthless to compare with 1427b9f9c5dSMarek Vasut @ divisor at this point since divisor can not be smaller than 3 here. 1437b9f9c5dSMarek Vasut @ If possible, branch for another shift in the division loop. 1447b9f9c5dSMarek Vasut @ If no bit position left then we are done. 1457b9f9c5dSMarek Vasut movs ip, ip, lsr #1 1467b9f9c5dSMarek Vasut mov xh, #1 1477b9f9c5dSMarek Vasut bne 4b 1487b9f9c5dSMarek Vasut ret lr 1497b9f9c5dSMarek Vasut 1507b9f9c5dSMarek Vasut8: @ Division by a power of 2: determine what that divisor order is 1517b9f9c5dSMarek Vasut @ then simply shift values around 1527b9f9c5dSMarek Vasut 1537b9f9c5dSMarek Vasut#if __LINUX_ARM_ARCH__ >= 5 1547b9f9c5dSMarek Vasut 1557b9f9c5dSMarek Vasut clz ip, r4 1567b9f9c5dSMarek Vasut rsb ip, ip, #31 1577b9f9c5dSMarek Vasut 1587b9f9c5dSMarek Vasut#else 1597b9f9c5dSMarek Vasut 1607b9f9c5dSMarek Vasut mov yl, r4 1617b9f9c5dSMarek Vasut cmp r4, #(1 << 16) 1627b9f9c5dSMarek Vasut mov ip, #0 1637b9f9c5dSMarek Vasut movhs yl, yl, lsr #16 1647b9f9c5dSMarek Vasut movhs ip, #16 1657b9f9c5dSMarek Vasut 1667b9f9c5dSMarek Vasut cmp yl, #(1 << 8) 1677b9f9c5dSMarek Vasut movhs yl, yl, lsr #8 1687b9f9c5dSMarek Vasut addhs ip, ip, #8 1697b9f9c5dSMarek Vasut 1707b9f9c5dSMarek Vasut cmp yl, #(1 << 4) 1717b9f9c5dSMarek Vasut movhs yl, yl, lsr #4 1727b9f9c5dSMarek Vasut addhs ip, ip, #4 1737b9f9c5dSMarek Vasut 1747b9f9c5dSMarek Vasut cmp yl, #(1 << 2) 1757b9f9c5dSMarek Vasut addhi ip, ip, #3 1767b9f9c5dSMarek Vasut addls ip, ip, yl, lsr #1 1777b9f9c5dSMarek Vasut 1787b9f9c5dSMarek Vasut#endif 1797b9f9c5dSMarek Vasut 1807b9f9c5dSMarek Vasut mov yh, xh, lsr ip 1817b9f9c5dSMarek Vasut mov yl, xl, lsr ip 1827b9f9c5dSMarek Vasut rsb ip, ip, #32 1837b9f9c5dSMarek Vasut ARM( orr yl, yl, xh, lsl ip ) 1847b9f9c5dSMarek Vasut THUMB( lsl xh, xh, ip ) 1857b9f9c5dSMarek Vasut THUMB( orr yl, yl, xh ) 1867b9f9c5dSMarek Vasut mov xh, xl, lsl ip 1877b9f9c5dSMarek Vasut mov xh, xh, lsr ip 1887b9f9c5dSMarek Vasut ret lr 1897b9f9c5dSMarek Vasut 1907b9f9c5dSMarek Vasut @ eq -> division by 1: obvious enough... 1917b9f9c5dSMarek Vasut9: moveq yl, xl 1927b9f9c5dSMarek Vasut moveq yh, xh 1937b9f9c5dSMarek Vasut moveq xh, #0 1947b9f9c5dSMarek Vasut reteq lr 1957b9f9c5dSMarek VasutUNWIND(.fnend) 1967b9f9c5dSMarek Vasut 1977b9f9c5dSMarek VasutUNWIND(.fnstart) 1987b9f9c5dSMarek VasutUNWIND(.pad #4) 1997b9f9c5dSMarek VasutUNWIND(.save {lr}) 2007b9f9c5dSMarek VasutLdiv0_64: 2017b9f9c5dSMarek Vasut @ Division by 0: 2027b9f9c5dSMarek Vasut str lr, [sp, #-8]! 2037b9f9c5dSMarek Vasut bl __div0 2047b9f9c5dSMarek Vasut 2057b9f9c5dSMarek Vasut @ as wrong as it could be... 2067b9f9c5dSMarek Vasut mov yl, #0 2077b9f9c5dSMarek Vasut mov yh, #0 2087b9f9c5dSMarek Vasut mov xh, #0 2097b9f9c5dSMarek Vasut ldr pc, [sp], #8 2107b9f9c5dSMarek Vasut 2117b9f9c5dSMarek VasutUNWIND(.fnend) 2127b9f9c5dSMarek VasutENDPROC(__do_div64) 213b2f18584SStephen Warren.popsection 214