1/* 2 * linux/arch/arm/lib/div64.S 3 * 4 * Optimized computation of 64-bit dividend / 32-bit divisor 5 * 6 * Author: Nicolas Pitre 7 * Created: Oct 5, 2003 8 * Copyright: Monta Vista Software, Inc. 9 * 10 * SPDX-License-Identifier: GPL-2.0 11 */ 12 13#include <linux/linkage.h> 14#include <asm/assembler.h> 15#ifdef __UBOOT__ 16#define UNWIND(x...) 17#endif 18 19#ifdef __ARMEB__ 20#define xh r0 21#define xl r1 22#define yh r2 23#define yl r3 24#else 25#define xl r0 26#define xh r1 27#define yl r2 28#define yh r3 29#endif 30 31/* 32 * __do_div64: perform a division with 64-bit dividend and 32-bit divisor. 33 * 34 * Note: Calling convention is totally non standard for optimal code. 35 * This is meant to be used by do_div() from include/asm/div64.h only. 36 * 37 * Input parameters: 38 * xh-xl = dividend (clobbered) 39 * r4 = divisor (preserved) 40 * 41 * Output values: 42 * yh-yl = result 43 * xh = remainder 44 * 45 * Clobbered regs: xl, ip 46 */ 47 48.pushsection .text.__do_div64, "ax" 49ENTRY(__do_div64) 50UNWIND(.fnstart) 51 52 @ Test for easy paths first. 53 subs ip, r4, #1 54 bls 9f @ divisor is 0 or 1 55 tst ip, r4 56 beq 8f @ divisor is power of 2 57 58 @ See if we need to handle upper 32-bit result. 59 cmp xh, r4 60 mov yh, #0 61 blo 3f 62 63 @ Align divisor with upper part of dividend. 64 @ The aligned divisor is stored in yl preserving the original. 65 @ The bit position is stored in ip. 66 67#if __LINUX_ARM_ARCH__ >= 5 68 69 clz yl, r4 70 clz ip, xh 71 sub yl, yl, ip 72 mov ip, #1 73 mov ip, ip, lsl yl 74 mov yl, r4, lsl yl 75 76#else 77 78 mov yl, r4 79 mov ip, #1 801: cmp yl, #0x80000000 81 cmpcc yl, xh 82 movcc yl, yl, lsl #1 83 movcc ip, ip, lsl #1 84 bcc 1b 85 86#endif 87 88 @ The division loop for needed upper bit positions. 89 @ Break out early if dividend reaches 0. 902: cmp xh, yl 91 orrcs yh, yh, ip 92 subscs xh, xh, yl 93 movsne ip, ip, lsr #1 94 mov yl, yl, lsr #1 95 bne 2b 96 97 @ See if we need to handle lower 32-bit result. 983: cmp xh, #0 99 mov yl, #0 100 cmpeq xl, r4 101 movlo xh, xl 102 retlo lr 103 104 @ The division loop for lower bit positions. 105 @ Here we shift remainer bits leftwards rather than moving the 106 @ divisor for comparisons, considering the carry-out bit as well. 107 mov ip, #0x80000000 1084: movs xl, xl, lsl #1 109 adcs xh, xh, xh 110 beq 6f 111 cmpcc xh, r4 1125: orrcs yl, yl, ip 113 subcs xh, xh, r4 114 movs ip, ip, lsr #1 115 bne 4b 116 ret lr 117 118 @ The top part of remainder became zero. If carry is set 119 @ (the 33th bit) this is a false positive so resume the loop. 120 @ Otherwise, if lower part is also null then we are done. 1216: bcs 5b 122 cmp xl, #0 123 reteq lr 124 125 @ We still have remainer bits in the low part. Bring them up. 126 127#if __LINUX_ARM_ARCH__ >= 5 128 129 clz xh, xl @ we know xh is zero here so... 130 add xh, xh, #1 131 mov xl, xl, lsl xh 132 mov ip, ip, lsr xh 133 134#else 135 1367: movs xl, xl, lsl #1 137 mov ip, ip, lsr #1 138 bcc 7b 139 140#endif 141 142 @ Current remainder is now 1. It is worthless to compare with 143 @ divisor at this point since divisor can not be smaller than 3 here. 144 @ If possible, branch for another shift in the division loop. 145 @ If no bit position left then we are done. 146 movs ip, ip, lsr #1 147 mov xh, #1 148 bne 4b 149 ret lr 150 1518: @ Division by a power of 2: determine what that divisor order is 152 @ then simply shift values around 153 154#if __LINUX_ARM_ARCH__ >= 5 155 156 clz ip, r4 157 rsb ip, ip, #31 158 159#else 160 161 mov yl, r4 162 cmp r4, #(1 << 16) 163 mov ip, #0 164 movhs yl, yl, lsr #16 165 movhs ip, #16 166 167 cmp yl, #(1 << 8) 168 movhs yl, yl, lsr #8 169 addhs ip, ip, #8 170 171 cmp yl, #(1 << 4) 172 movhs yl, yl, lsr #4 173 addhs ip, ip, #4 174 175 cmp yl, #(1 << 2) 176 addhi ip, ip, #3 177 addls ip, ip, yl, lsr #1 178 179#endif 180 181 mov yh, xh, lsr ip 182 mov yl, xl, lsr ip 183 rsb ip, ip, #32 184 ARM( orr yl, yl, xh, lsl ip ) 185 THUMB( lsl xh, xh, ip ) 186 THUMB( orr yl, yl, xh ) 187 mov xh, xl, lsl ip 188 mov xh, xh, lsr ip 189 ret lr 190 191 @ eq -> division by 1: obvious enough... 1929: moveq yl, xl 193 moveq yh, xh 194 moveq xh, #0 195 reteq lr 196UNWIND(.fnend) 197 198UNWIND(.fnstart) 199UNWIND(.pad #4) 200UNWIND(.save {lr}) 201Ldiv0_64: 202 @ Division by 0: 203 str lr, [sp, #-8]! 204 bl __div0 205 206 @ as wrong as it could be... 207 mov yl, #0 208 mov yh, #0 209 mov xh, #0 210 ldr pc, [sp], #8 211 212UNWIND(.fnend) 213ENDPROC(__do_div64) 214.popsection 215