11da177e4SLinus Torvalds/* 21da177e4SLinus Torvalds * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines 31da177e4SLinus Torvalds * 42f82af08SNicolas Pitre * Author: Nicolas Pitre <nico@fluxnic.net> 51da177e4SLinus Torvalds * - contributed to gcc-3.4 on Sep 30, 2003 61da177e4SLinus Torvalds * - adapted for the Linux kernel on Oct 2, 2003 71da177e4SLinus Torvalds */ 81da177e4SLinus Torvalds 91da177e4SLinus Torvalds/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. 101da177e4SLinus Torvalds 111da177e4SLinus TorvaldsThis file is free software; you can redistribute it and/or modify it 121da177e4SLinus Torvaldsunder the terms of the GNU General Public License as published by the 131da177e4SLinus TorvaldsFree Software Foundation; either version 2, or (at your option) any 141da177e4SLinus Torvaldslater version. 151da177e4SLinus Torvalds 161da177e4SLinus TorvaldsIn addition to the permissions in the GNU General Public License, the 171da177e4SLinus TorvaldsFree Software Foundation gives you unlimited permission to link the 181da177e4SLinus Torvaldscompiled version of this file into combinations with other programs, 191da177e4SLinus Torvaldsand to distribute those combinations without any restriction coming 201da177e4SLinus Torvaldsfrom the use of this file. (The General Public License restrictions 211da177e4SLinus Torvaldsdo apply in other respects; for example, they cover modification of 221da177e4SLinus Torvaldsthe file, and distribution when not linked into a combine 231da177e4SLinus Torvaldsexecutable.) 241da177e4SLinus Torvalds 251da177e4SLinus TorvaldsThis file is distributed in the hope that it will be useful, but 261da177e4SLinus TorvaldsWITHOUT ANY WARRANTY; without even the implied warranty of 271da177e4SLinus TorvaldsMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 281da177e4SLinus TorvaldsGeneral Public License for more details. 291da177e4SLinus Torvalds 301da177e4SLinus TorvaldsYou should have received a copy of the GNU General Public License 311da177e4SLinus Torvaldsalong with this program; see the file COPYING. If not, write to 321da177e4SLinus Torvaldsthe Free Software Foundation, 59 Temple Place - Suite 330, 331da177e4SLinus TorvaldsBoston, MA 02111-1307, USA. */ 341da177e4SLinus Torvalds 351da177e4SLinus Torvalds 361da177e4SLinus Torvalds#include <linux/linkage.h> 371da177e4SLinus Torvalds#include <asm/assembler.h> 3881479c24SLaura Abbott#include <asm/unwind.h> 391da177e4SLinus Torvalds 401da177e4SLinus Torvalds.macro ARM_DIV_BODY dividend, divisor, result, curbit 411da177e4SLinus Torvalds 421da177e4SLinus Torvalds#if __LINUX_ARM_ARCH__ >= 5 431da177e4SLinus Torvalds 441da177e4SLinus Torvalds clz \curbit, \divisor 451da177e4SLinus Torvalds clz \result, \dividend 461da177e4SLinus Torvalds sub \result, \curbit, \result 471da177e4SLinus Torvalds mov \curbit, #1 481da177e4SLinus Torvalds mov \divisor, \divisor, lsl \result 491da177e4SLinus Torvalds mov \curbit, \curbit, lsl \result 501da177e4SLinus Torvalds mov \result, #0 511da177e4SLinus Torvalds 521da177e4SLinus Torvalds#else 531da177e4SLinus Torvalds 541da177e4SLinus Torvalds @ Initially shift the divisor left 3 bits if possible, 551da177e4SLinus Torvalds @ set curbit accordingly. This allows for curbit to be located 561da177e4SLinus Torvalds @ at the left end of each 4 bit nibbles in the division loop 571da177e4SLinus Torvalds @ to save one loop in most cases. 581da177e4SLinus Torvalds tst \divisor, #0xe0000000 591da177e4SLinus Torvalds moveq \divisor, \divisor, lsl #3 601da177e4SLinus Torvalds moveq \curbit, #8 611da177e4SLinus Torvalds movne \curbit, #1 621da177e4SLinus Torvalds 631da177e4SLinus Torvalds @ Unless the divisor is very big, shift it up in multiples of 641da177e4SLinus Torvalds @ four bits, since this is the amount of unwinding in the main 651da177e4SLinus Torvalds @ division loop. Continue shifting until the divisor is 661da177e4SLinus Torvalds @ larger than the dividend. 671da177e4SLinus Torvalds1: cmp \divisor, #0x10000000 681da177e4SLinus Torvalds cmplo \divisor, \dividend 691da177e4SLinus Torvalds movlo \divisor, \divisor, lsl #4 701da177e4SLinus Torvalds movlo \curbit, \curbit, lsl #4 711da177e4SLinus Torvalds blo 1b 721da177e4SLinus Torvalds 731da177e4SLinus Torvalds @ For very big divisors, we must shift it a bit at a time, or 741da177e4SLinus Torvalds @ we will be in danger of overflowing. 751da177e4SLinus Torvalds1: cmp \divisor, #0x80000000 761da177e4SLinus Torvalds cmplo \divisor, \dividend 771da177e4SLinus Torvalds movlo \divisor, \divisor, lsl #1 781da177e4SLinus Torvalds movlo \curbit, \curbit, lsl #1 791da177e4SLinus Torvalds blo 1b 801da177e4SLinus Torvalds 811da177e4SLinus Torvalds mov \result, #0 821da177e4SLinus Torvalds 831da177e4SLinus Torvalds#endif 841da177e4SLinus Torvalds 851da177e4SLinus Torvalds @ Division loop 861da177e4SLinus Torvalds1: cmp \dividend, \divisor 871da177e4SLinus Torvalds subhs \dividend, \dividend, \divisor 881da177e4SLinus Torvalds orrhs \result, \result, \curbit 891da177e4SLinus Torvalds cmp \dividend, \divisor, lsr #1 901da177e4SLinus Torvalds subhs \dividend, \dividend, \divisor, lsr #1 911da177e4SLinus Torvalds orrhs \result, \result, \curbit, lsr #1 921da177e4SLinus Torvalds cmp \dividend, \divisor, lsr #2 931da177e4SLinus Torvalds subhs \dividend, \dividend, \divisor, lsr #2 941da177e4SLinus Torvalds orrhs \result, \result, \curbit, lsr #2 951da177e4SLinus Torvalds cmp \dividend, \divisor, lsr #3 961da177e4SLinus Torvalds subhs \dividend, \dividend, \divisor, lsr #3 971da177e4SLinus Torvalds orrhs \result, \result, \curbit, lsr #3 981da177e4SLinus Torvalds cmp \dividend, #0 @ Early termination? 99*e44fc388SStefan Agner movsne \curbit, \curbit, lsr #4 @ No, any more bits to do? 1001da177e4SLinus Torvalds movne \divisor, \divisor, lsr #4 1011da177e4SLinus Torvalds bne 1b 1021da177e4SLinus Torvalds 1031da177e4SLinus Torvalds.endm 1041da177e4SLinus Torvalds 1051da177e4SLinus Torvalds 1061da177e4SLinus Torvalds.macro ARM_DIV2_ORDER divisor, order 1071da177e4SLinus Torvalds 1081da177e4SLinus Torvalds#if __LINUX_ARM_ARCH__ >= 5 1091da177e4SLinus Torvalds 1101da177e4SLinus Torvalds clz \order, \divisor 1111da177e4SLinus Torvalds rsb \order, \order, #31 1121da177e4SLinus Torvalds 1131da177e4SLinus Torvalds#else 1141da177e4SLinus Torvalds 1151da177e4SLinus Torvalds cmp \divisor, #(1 << 16) 1161da177e4SLinus Torvalds movhs \divisor, \divisor, lsr #16 1171da177e4SLinus Torvalds movhs \order, #16 1181da177e4SLinus Torvalds movlo \order, #0 1191da177e4SLinus Torvalds 1201da177e4SLinus Torvalds cmp \divisor, #(1 << 8) 1211da177e4SLinus Torvalds movhs \divisor, \divisor, lsr #8 1221da177e4SLinus Torvalds addhs \order, \order, #8 1231da177e4SLinus Torvalds 1241da177e4SLinus Torvalds cmp \divisor, #(1 << 4) 1251da177e4SLinus Torvalds movhs \divisor, \divisor, lsr #4 1261da177e4SLinus Torvalds addhs \order, \order, #4 1271da177e4SLinus Torvalds 1281da177e4SLinus Torvalds cmp \divisor, #(1 << 2) 1291da177e4SLinus Torvalds addhi \order, \order, #3 1301da177e4SLinus Torvalds addls \order, \order, \divisor, lsr #1 1311da177e4SLinus Torvalds 1321da177e4SLinus Torvalds#endif 1331da177e4SLinus Torvalds 1341da177e4SLinus Torvalds.endm 1351da177e4SLinus Torvalds 1361da177e4SLinus Torvalds 1371da177e4SLinus Torvalds.macro ARM_MOD_BODY dividend, divisor, order, spare 1381da177e4SLinus Torvalds 1391da177e4SLinus Torvalds#if __LINUX_ARM_ARCH__ >= 5 1401da177e4SLinus Torvalds 1411da177e4SLinus Torvalds clz \order, \divisor 1421da177e4SLinus Torvalds clz \spare, \dividend 1431da177e4SLinus Torvalds sub \order, \order, \spare 1441da177e4SLinus Torvalds mov \divisor, \divisor, lsl \order 1451da177e4SLinus Torvalds 1461da177e4SLinus Torvalds#else 1471da177e4SLinus Torvalds 1481da177e4SLinus Torvalds mov \order, #0 1491da177e4SLinus Torvalds 1501da177e4SLinus Torvalds @ Unless the divisor is very big, shift it up in multiples of 1511da177e4SLinus Torvalds @ four bits, since this is the amount of unwinding in the main 1521da177e4SLinus Torvalds @ division loop. Continue shifting until the divisor is 1531da177e4SLinus Torvalds @ larger than the dividend. 1541da177e4SLinus Torvalds1: cmp \divisor, #0x10000000 1551da177e4SLinus Torvalds cmplo \divisor, \dividend 1561da177e4SLinus Torvalds movlo \divisor, \divisor, lsl #4 1571da177e4SLinus Torvalds addlo \order, \order, #4 1581da177e4SLinus Torvalds blo 1b 1591da177e4SLinus Torvalds 1601da177e4SLinus Torvalds @ For very big divisors, we must shift it a bit at a time, or 1611da177e4SLinus Torvalds @ we will be in danger of overflowing. 1621da177e4SLinus Torvalds1: cmp \divisor, #0x80000000 1631da177e4SLinus Torvalds cmplo \divisor, \dividend 1641da177e4SLinus Torvalds movlo \divisor, \divisor, lsl #1 1651da177e4SLinus Torvalds addlo \order, \order, #1 1661da177e4SLinus Torvalds blo 1b 1671da177e4SLinus Torvalds 1681da177e4SLinus Torvalds#endif 1691da177e4SLinus Torvalds 17082350ab1SAntonio Ospite @ Perform all needed subtractions to keep only the reminder. 1711da177e4SLinus Torvalds @ Do comparisons in batch of 4 first. 1721da177e4SLinus Torvalds subs \order, \order, #3 @ yes, 3 is intended here 1731da177e4SLinus Torvalds blt 2f 1741da177e4SLinus Torvalds 1751da177e4SLinus Torvalds1: cmp \dividend, \divisor 1761da177e4SLinus Torvalds subhs \dividend, \dividend, \divisor 1771da177e4SLinus Torvalds cmp \dividend, \divisor, lsr #1 1781da177e4SLinus Torvalds subhs \dividend, \dividend, \divisor, lsr #1 1791da177e4SLinus Torvalds cmp \dividend, \divisor, lsr #2 1801da177e4SLinus Torvalds subhs \dividend, \dividend, \divisor, lsr #2 1811da177e4SLinus Torvalds cmp \dividend, \divisor, lsr #3 1821da177e4SLinus Torvalds subhs \dividend, \dividend, \divisor, lsr #3 1831da177e4SLinus Torvalds cmp \dividend, #1 1841da177e4SLinus Torvalds mov \divisor, \divisor, lsr #4 185*e44fc388SStefan Agner subsge \order, \order, #4 1861da177e4SLinus Torvalds bge 1b 1871da177e4SLinus Torvalds 1881da177e4SLinus Torvalds tst \order, #3 1891da177e4SLinus Torvalds teqne \dividend, #0 1901da177e4SLinus Torvalds beq 5f 1911da177e4SLinus Torvalds 19282350ab1SAntonio Ospite @ Either 1, 2 or 3 comparison/subtractions are left. 1931da177e4SLinus Torvalds2: cmn \order, #2 1941da177e4SLinus Torvalds blt 4f 1951da177e4SLinus Torvalds beq 3f 1961da177e4SLinus Torvalds cmp \dividend, \divisor 1971da177e4SLinus Torvalds subhs \dividend, \dividend, \divisor 1981da177e4SLinus Torvalds mov \divisor, \divisor, lsr #1 1991da177e4SLinus Torvalds3: cmp \dividend, \divisor 2001da177e4SLinus Torvalds subhs \dividend, \dividend, \divisor 2011da177e4SLinus Torvalds mov \divisor, \divisor, lsr #1 2021da177e4SLinus Torvalds4: cmp \dividend, \divisor 2031da177e4SLinus Torvalds subhs \dividend, \dividend, \divisor 2041da177e4SLinus Torvalds5: 2051da177e4SLinus Torvalds.endm 2061da177e4SLinus Torvalds 2071da177e4SLinus Torvalds 20842f25bddSNicolas Pitre#ifdef CONFIG_ARM_PATCH_IDIV 20942f25bddSNicolas Pitre .align 3 21042f25bddSNicolas Pitre#endif 21142f25bddSNicolas Pitre 2121da177e4SLinus TorvaldsENTRY(__udivsi3) 213ba95e4e4SNicolas PitreENTRY(__aeabi_uidiv) 21481479c24SLaura AbbottUNWIND(.fnstart) 2151da177e4SLinus Torvalds 2161da177e4SLinus Torvalds subs r2, r1, #1 2176ebbf2ceSRussell King reteq lr 2181da177e4SLinus Torvalds bcc Ldiv0 2191da177e4SLinus Torvalds cmp r0, r1 2201da177e4SLinus Torvalds bls 11f 2211da177e4SLinus Torvalds tst r1, r2 2221da177e4SLinus Torvalds beq 12f 2231da177e4SLinus Torvalds 2241da177e4SLinus Torvalds ARM_DIV_BODY r0, r1, r2, r3 2251da177e4SLinus Torvalds 2261da177e4SLinus Torvalds mov r0, r2 2276ebbf2ceSRussell King ret lr 2281da177e4SLinus Torvalds 2291da177e4SLinus Torvalds11: moveq r0, #1 2301da177e4SLinus Torvalds movne r0, #0 2316ebbf2ceSRussell King ret lr 2321da177e4SLinus Torvalds 2331da177e4SLinus Torvalds12: ARM_DIV2_ORDER r1, r2 2341da177e4SLinus Torvalds 2351da177e4SLinus Torvalds mov r0, r0, lsr r2 2366ebbf2ceSRussell King ret lr 2371da177e4SLinus Torvalds 23881479c24SLaura AbbottUNWIND(.fnend) 23993ed3970SCatalin MarinasENDPROC(__udivsi3) 24093ed3970SCatalin MarinasENDPROC(__aeabi_uidiv) 2411da177e4SLinus Torvalds 2421da177e4SLinus TorvaldsENTRY(__umodsi3) 24381479c24SLaura AbbottUNWIND(.fnstart) 2441da177e4SLinus Torvalds 2451da177e4SLinus Torvalds subs r2, r1, #1 @ compare divisor with 1 2461da177e4SLinus Torvalds bcc Ldiv0 2471da177e4SLinus Torvalds cmpne r0, r1 @ compare dividend with divisor 2481da177e4SLinus Torvalds moveq r0, #0 2491da177e4SLinus Torvalds tsthi r1, r2 @ see if divisor is power of 2 2501da177e4SLinus Torvalds andeq r0, r0, r2 2516ebbf2ceSRussell King retls lr 2521da177e4SLinus Torvalds 2531da177e4SLinus Torvalds ARM_MOD_BODY r0, r1, r2, r3 2541da177e4SLinus Torvalds 2556ebbf2ceSRussell King ret lr 2561da177e4SLinus Torvalds 25781479c24SLaura AbbottUNWIND(.fnend) 25893ed3970SCatalin MarinasENDPROC(__umodsi3) 2591da177e4SLinus Torvalds 26042f25bddSNicolas Pitre#ifdef CONFIG_ARM_PATCH_IDIV 26142f25bddSNicolas Pitre .align 3 26242f25bddSNicolas Pitre#endif 26342f25bddSNicolas Pitre 2641da177e4SLinus TorvaldsENTRY(__divsi3) 265ba95e4e4SNicolas PitreENTRY(__aeabi_idiv) 26681479c24SLaura AbbottUNWIND(.fnstart) 2671da177e4SLinus Torvalds 2681da177e4SLinus Torvalds cmp r1, #0 2691da177e4SLinus Torvalds eor ip, r0, r1 @ save the sign of the result. 2701da177e4SLinus Torvalds beq Ldiv0 2711da177e4SLinus Torvalds rsbmi r1, r1, #0 @ loops below use unsigned. 2721da177e4SLinus Torvalds subs r2, r1, #1 @ division by 1 or -1 ? 2731da177e4SLinus Torvalds beq 10f 2741da177e4SLinus Torvalds movs r3, r0 2751da177e4SLinus Torvalds rsbmi r3, r0, #0 @ positive dividend value 2761da177e4SLinus Torvalds cmp r3, r1 2771da177e4SLinus Torvalds bls 11f 2781da177e4SLinus Torvalds tst r1, r2 @ divisor is power of 2 ? 2791da177e4SLinus Torvalds beq 12f 2801da177e4SLinus Torvalds 2811da177e4SLinus Torvalds ARM_DIV_BODY r3, r1, r0, r2 2821da177e4SLinus Torvalds 2831da177e4SLinus Torvalds cmp ip, #0 2841da177e4SLinus Torvalds rsbmi r0, r0, #0 2856ebbf2ceSRussell King ret lr 2861da177e4SLinus Torvalds 2871da177e4SLinus Torvalds10: teq ip, r0 @ same sign ? 2881da177e4SLinus Torvalds rsbmi r0, r0, #0 2896ebbf2ceSRussell King ret lr 2901da177e4SLinus Torvalds 2911da177e4SLinus Torvalds11: movlo r0, #0 2921da177e4SLinus Torvalds moveq r0, ip, asr #31 2931da177e4SLinus Torvalds orreq r0, r0, #1 2946ebbf2ceSRussell King ret lr 2951da177e4SLinus Torvalds 2961da177e4SLinus Torvalds12: ARM_DIV2_ORDER r1, r2 2971da177e4SLinus Torvalds 2981da177e4SLinus Torvalds cmp ip, #0 2991da177e4SLinus Torvalds mov r0, r3, lsr r2 3001da177e4SLinus Torvalds rsbmi r0, r0, #0 3016ebbf2ceSRussell King ret lr 3021da177e4SLinus Torvalds 30381479c24SLaura AbbottUNWIND(.fnend) 30493ed3970SCatalin MarinasENDPROC(__divsi3) 30593ed3970SCatalin MarinasENDPROC(__aeabi_idiv) 3061da177e4SLinus Torvalds 3071da177e4SLinus TorvaldsENTRY(__modsi3) 30881479c24SLaura AbbottUNWIND(.fnstart) 3091da177e4SLinus Torvalds 3101da177e4SLinus Torvalds cmp r1, #0 3111da177e4SLinus Torvalds beq Ldiv0 3121da177e4SLinus Torvalds rsbmi r1, r1, #0 @ loops below use unsigned. 3131da177e4SLinus Torvalds movs ip, r0 @ preserve sign of dividend 3141da177e4SLinus Torvalds rsbmi r0, r0, #0 @ if negative make positive 3151da177e4SLinus Torvalds subs r2, r1, #1 @ compare divisor with 1 3161da177e4SLinus Torvalds cmpne r0, r1 @ compare dividend with divisor 3171da177e4SLinus Torvalds moveq r0, #0 3181da177e4SLinus Torvalds tsthi r1, r2 @ see if divisor is power of 2 3191da177e4SLinus Torvalds andeq r0, r0, r2 3201da177e4SLinus Torvalds bls 10f 3211da177e4SLinus Torvalds 3221da177e4SLinus Torvalds ARM_MOD_BODY r0, r1, r2, r3 3231da177e4SLinus Torvalds 3241da177e4SLinus Torvalds10: cmp ip, #0 3251da177e4SLinus Torvalds rsbmi r0, r0, #0 3266ebbf2ceSRussell King ret lr 3271da177e4SLinus Torvalds 32881479c24SLaura AbbottUNWIND(.fnend) 32993ed3970SCatalin MarinasENDPROC(__modsi3) 33093ed3970SCatalin Marinas 331ba95e4e4SNicolas Pitre#ifdef CONFIG_AEABI 332ba95e4e4SNicolas Pitre 333ba95e4e4SNicolas PitreENTRY(__aeabi_uidivmod) 33481479c24SLaura AbbottUNWIND(.fnstart) 33581479c24SLaura AbbottUNWIND(.save {r0, r1, ip, lr} ) 336ba95e4e4SNicolas Pitre 337ba95e4e4SNicolas Pitre stmfd sp!, {r0, r1, ip, lr} 338ba95e4e4SNicolas Pitre bl __aeabi_uidiv 339ba95e4e4SNicolas Pitre ldmfd sp!, {r1, r2, ip, lr} 340ba95e4e4SNicolas Pitre mul r3, r0, r2 341ba95e4e4SNicolas Pitre sub r1, r1, r3 3426ebbf2ceSRussell King ret lr 343ba95e4e4SNicolas Pitre 34481479c24SLaura AbbottUNWIND(.fnend) 34593ed3970SCatalin MarinasENDPROC(__aeabi_uidivmod) 34693ed3970SCatalin Marinas 347ba95e4e4SNicolas PitreENTRY(__aeabi_idivmod) 34881479c24SLaura AbbottUNWIND(.fnstart) 34981479c24SLaura AbbottUNWIND(.save {r0, r1, ip, lr} ) 350ba95e4e4SNicolas Pitre stmfd sp!, {r0, r1, ip, lr} 351ba95e4e4SNicolas Pitre bl __aeabi_idiv 352ba95e4e4SNicolas Pitre ldmfd sp!, {r1, r2, ip, lr} 353ba95e4e4SNicolas Pitre mul r3, r0, r2 354ba95e4e4SNicolas Pitre sub r1, r1, r3 3556ebbf2ceSRussell King ret lr 356ba95e4e4SNicolas Pitre 35781479c24SLaura AbbottUNWIND(.fnend) 35893ed3970SCatalin MarinasENDPROC(__aeabi_idivmod) 35993ed3970SCatalin Marinas 360ba95e4e4SNicolas Pitre#endif 3611da177e4SLinus Torvalds 3621da177e4SLinus TorvaldsLdiv0: 36381479c24SLaura AbbottUNWIND(.fnstart) 36481479c24SLaura AbbottUNWIND(.pad #4) 36581479c24SLaura AbbottUNWIND(.save {lr}) 366499b2ea1SNicolas Pitre str lr, [sp, #-8]! 3671da177e4SLinus Torvalds bl __div0 3681da177e4SLinus Torvalds mov r0, #0 @ About as wrong as it could be. 369499b2ea1SNicolas Pitre ldr pc, [sp], #8 37081479c24SLaura AbbottUNWIND(.fnend) 37181479c24SLaura AbbottENDPROC(Ldiv0) 372