1/* 2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines 3 * 4 * Author: Nicolas Pitre <nico@fluxnic.net> 5 * - contributed to gcc-3.4 on Sep 30, 2003 6 * - adapted for the Linux kernel on Oct 2, 2003 7 */ 8 9/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. 10 11 * SPDX-License-Identifier: GPL-2.0+ 12 */ 13 14 15#include <linux/linkage.h> 16#include <asm/assembler.h> 17 18/* 19 * U-Boot compatibility bit, define empty UNWIND() macro as, since we 20 * do not support stack unwinding and define CONFIG_AEABI to make all 21 * of the functions available without diverging from Linux code. 22 */ 23#ifdef __UBOOT__ 24#define UNWIND(x...) 25#define CONFIG_AEABI 26#endif 27 28.macro ARM_DIV_BODY dividend, divisor, result, curbit 29 30#if __LINUX_ARM_ARCH__ >= 5 31 32 clz \curbit, \divisor 33 clz \result, \dividend 34 sub \result, \curbit, \result 35 mov \curbit, #1 36 mov \divisor, \divisor, lsl \result 37 mov \curbit, \curbit, lsl \result 38 mov \result, #0 39 40#else 41 42 @ Initially shift the divisor left 3 bits if possible, 43 @ set curbit accordingly. This allows for curbit to be located 44 @ at the left end of each 4 bit nibbles in the division loop 45 @ to save one loop in most cases. 46 tst \divisor, #0xe0000000 47 moveq \divisor, \divisor, lsl #3 48 moveq \curbit, #8 49 movne \curbit, #1 50 51 @ Unless the divisor is very big, shift it up in multiples of 52 @ four bits, since this is the amount of unwinding in the main 53 @ division loop. Continue shifting until the divisor is 54 @ larger than the dividend. 551: cmp \divisor, #0x10000000 56 cmplo \divisor, \dividend 57 movlo \divisor, \divisor, lsl #4 58 movlo \curbit, \curbit, lsl #4 59 blo 1b 60 61 @ For very big divisors, we must shift it a bit at a time, or 62 @ we will be in danger of overflowing. 631: cmp \divisor, #0x80000000 64 cmplo \divisor, \dividend 65 movlo \divisor, \divisor, lsl #1 66 movlo \curbit, \curbit, lsl #1 67 blo 1b 68 69 mov \result, #0 70 71#endif 72 73 @ Division loop 741: cmp \dividend, \divisor 75 subhs \dividend, \dividend, \divisor 76 orrhs \result, \result, \curbit 77 cmp \dividend, \divisor, lsr #1 78 subhs \dividend, \dividend, \divisor, lsr #1 79 orrhs \result, \result, \curbit, lsr #1 80 cmp \dividend, \divisor, lsr #2 81 subhs \dividend, \dividend, \divisor, lsr #2 82 orrhs \result, \result, \curbit, lsr #2 83 cmp \dividend, \divisor, lsr #3 84 subhs \dividend, \dividend, \divisor, lsr #3 85 orrhs \result, \result, \curbit, lsr #3 86 cmp \dividend, #0 @ Early termination? 87 movsne \curbit, \curbit, lsr #4 @ No, any more bits to do? 88 movne \divisor, \divisor, lsr #4 89 bne 1b 90 91.endm 92 93 94.macro ARM_DIV2_ORDER divisor, order 95 96#if __LINUX_ARM_ARCH__ >= 5 97 98 clz \order, \divisor 99 rsb \order, \order, #31 100 101#else 102 103 cmp \divisor, #(1 << 16) 104 movhs \divisor, \divisor, lsr #16 105 movhs \order, #16 106 movlo \order, #0 107 108 cmp \divisor, #(1 << 8) 109 movhs \divisor, \divisor, lsr #8 110 addhs \order, \order, #8 111 112 cmp \divisor, #(1 << 4) 113 movhs \divisor, \divisor, lsr #4 114 addhs \order, \order, #4 115 116 cmp \divisor, #(1 << 2) 117 addhi \order, \order, #3 118 addls \order, \order, \divisor, lsr #1 119 120#endif 121 122.endm 123 124 125.macro ARM_MOD_BODY dividend, divisor, order, spare 126 127#if __LINUX_ARM_ARCH__ >= 5 128 129 clz \order, \divisor 130 clz \spare, \dividend 131 sub \order, \order, \spare 132 mov \divisor, \divisor, lsl \order 133 134#else 135 136 mov \order, #0 137 138 @ Unless the divisor is very big, shift it up in multiples of 139 @ four bits, since this is the amount of unwinding in the main 140 @ division loop. Continue shifting until the divisor is 141 @ larger than the dividend. 1421: cmp \divisor, #0x10000000 143 cmplo \divisor, \dividend 144 movlo \divisor, \divisor, lsl #4 145 addlo \order, \order, #4 146 blo 1b 147 148 @ For very big divisors, we must shift it a bit at a time, or 149 @ we will be in danger of overflowing. 1501: cmp \divisor, #0x80000000 151 cmplo \divisor, \dividend 152 movlo \divisor, \divisor, lsl #1 153 addlo \order, \order, #1 154 blo 1b 155 156#endif 157 158 @ Perform all needed subtractions to keep only the reminder. 159 @ Do comparisons in batch of 4 first. 160 subs \order, \order, #3 @ yes, 3 is intended here 161 blt 2f 162 1631: cmp \dividend, \divisor 164 subhs \dividend, \dividend, \divisor 165 cmp \dividend, \divisor, lsr #1 166 subhs \dividend, \dividend, \divisor, lsr #1 167 cmp \dividend, \divisor, lsr #2 168 subhs \dividend, \dividend, \divisor, lsr #2 169 cmp \dividend, \divisor, lsr #3 170 subhs \dividend, \dividend, \divisor, lsr #3 171 cmp \dividend, #1 172 mov \divisor, \divisor, lsr #4 173 subsge \order, \order, #4 174 bge 1b 175 176 tst \order, #3 177 teqne \dividend, #0 178 beq 5f 179 180 @ Either 1, 2 or 3 comparison/subtractions are left. 1812: cmn \order, #2 182 blt 4f 183 beq 3f 184 cmp \dividend, \divisor 185 subhs \dividend, \dividend, \divisor 186 mov \divisor, \divisor, lsr #1 1873: cmp \dividend, \divisor 188 subhs \dividend, \dividend, \divisor 189 mov \divisor, \divisor, lsr #1 1904: cmp \dividend, \divisor 191 subhs \dividend, \dividend, \divisor 1925: 193.endm 194 195 196.pushsection .text.__udivsi3, "ax" 197ENTRY(__udivsi3) 198ENTRY(__aeabi_uidiv) 199UNWIND(.fnstart) 200 201 subs r2, r1, #1 202 reteq lr 203 bcc Ldiv0 204 cmp r0, r1 205 bls 11f 206 tst r1, r2 207 beq 12f 208 209 ARM_DIV_BODY r0, r1, r2, r3 210 211 mov r0, r2 212 ret lr 213 21411: moveq r0, #1 215 movne r0, #0 216 ret lr 217 21812: ARM_DIV2_ORDER r1, r2 219 220 mov r0, r0, lsr r2 221 ret lr 222 223UNWIND(.fnend) 224ENDPROC(__udivsi3) 225ENDPROC(__aeabi_uidiv) 226.popsection 227 228.pushsection .text.__umodsi3, "ax" 229ENTRY(__umodsi3) 230UNWIND(.fnstart) 231 232 subs r2, r1, #1 @ compare divisor with 1 233 bcc Ldiv0 234 cmpne r0, r1 @ compare dividend with divisor 235 moveq r0, #0 236 tsthi r1, r2 @ see if divisor is power of 2 237 andeq r0, r0, r2 238 retls lr 239 240 ARM_MOD_BODY r0, r1, r2, r3 241 242 ret lr 243 244UNWIND(.fnend) 245ENDPROC(__umodsi3) 246.popsection 247 248.pushsection .text.__divsi3, "ax" 249ENTRY(__divsi3) 250ENTRY(__aeabi_idiv) 251UNWIND(.fnstart) 252 253 cmp r1, #0 254 eor ip, r0, r1 @ save the sign of the result. 255 beq Ldiv0 256 rsbmi r1, r1, #0 @ loops below use unsigned. 257 subs r2, r1, #1 @ division by 1 or -1 ? 258 beq 10f 259 movs r3, r0 260 rsbmi r3, r0, #0 @ positive dividend value 261 cmp r3, r1 262 bls 11f 263 tst r1, r2 @ divisor is power of 2 ? 264 beq 12f 265 266 ARM_DIV_BODY r3, r1, r0, r2 267 268 cmp ip, #0 269 rsbmi r0, r0, #0 270 ret lr 271 27210: teq ip, r0 @ same sign ? 273 rsbmi r0, r0, #0 274 ret lr 275 27611: movlo r0, #0 277 moveq r0, ip, asr #31 278 orreq r0, r0, #1 279 ret lr 280 28112: ARM_DIV2_ORDER r1, r2 282 283 cmp ip, #0 284 mov r0, r3, lsr r2 285 rsbmi r0, r0, #0 286 ret lr 287 288UNWIND(.fnend) 289ENDPROC(__divsi3) 290ENDPROC(__aeabi_idiv) 291.popsection 292 293.pushsection .text.__modsi3, "ax" 294ENTRY(__modsi3) 295UNWIND(.fnstart) 296 297 cmp r1, #0 298 beq Ldiv0 299 rsbmi r1, r1, #0 @ loops below use unsigned. 300 movs ip, r0 @ preserve sign of dividend 301 rsbmi r0, r0, #0 @ if negative make positive 302 subs r2, r1, #1 @ compare divisor with 1 303 cmpne r0, r1 @ compare dividend with divisor 304 moveq r0, #0 305 tsthi r1, r2 @ see if divisor is power of 2 306 andeq r0, r0, r2 307 bls 10f 308 309 ARM_MOD_BODY r0, r1, r2, r3 310 31110: cmp ip, #0 312 rsbmi r0, r0, #0 313 ret lr 314 315UNWIND(.fnend) 316ENDPROC(__modsi3) 317.popsection 318 319#ifdef CONFIG_AEABI 320 321.pushsection .text.__aeabi_uidivmod, "ax" 322ENTRY(__aeabi_uidivmod) 323UNWIND(.fnstart) 324UNWIND(.save {r0, r1, ip, lr} ) 325 326 stmfd sp!, {r0, r1, ip, lr} 327 bl __aeabi_uidiv 328 ldmfd sp!, {r1, r2, ip, lr} 329 mul r3, r0, r2 330 sub r1, r1, r3 331 ret lr 332 333UNWIND(.fnend) 334ENDPROC(__aeabi_uidivmod) 335.popsection 336 337.pushsection .text.__aeabi_uidivmod, "ax" 338ENTRY(__aeabi_idivmod) 339UNWIND(.fnstart) 340UNWIND(.save {r0, r1, ip, lr} ) 341 342 stmfd sp!, {r0, r1, ip, lr} 343 bl __aeabi_idiv 344 ldmfd sp!, {r1, r2, ip, lr} 345 mul r3, r0, r2 346 sub r1, r1, r3 347 ret lr 348 349UNWIND(.fnend) 350ENDPROC(__aeabi_idivmod) 351.popsection 352 353#endif 354 355.pushsection .text.Ldiv0, "ax" 356Ldiv0: 357UNWIND(.fnstart) 358UNWIND(.pad #4) 359UNWIND(.save {lr}) 360 361 str lr, [sp, #-8]! 362 bl __div0 363 mov r0, #0 @ About as wrong as it could be. 364 ldr pc, [sp], #8 365 366UNWIND(.fnend) 367ENDPROC(Ldiv0) 368.popsection 369 370/* Thumb-1 specialities */ 371#if defined(CONFIG_SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2) 372.pushsection .text.__gnu_thumb1_case_sqi, "ax" 373ENTRY(__gnu_thumb1_case_sqi) 374 push {r1} 375 mov r1, lr 376 lsrs r1, r1, #1 377 lsls r1, r1, #1 378 ldrsb r1, [r1, r0] 379 lsls r1, r1, #1 380 add lr, lr, r1 381 pop {r1} 382 bx lr 383ENDPROC(__gnu_thumb1_case_sqi) 384.popsection 385 386.pushsection .text.__gnu_thumb1_case_uqi, "ax" 387ENTRY(__gnu_thumb1_case_uqi) 388 push {r1} 389 mov r1, lr 390 lsrs r1, r1, #1 391 lsls r1, r1, #1 392 ldrb r1, [r1, r0] 393 lsls r1, r1, #1 394 add lr, lr, r1 395 pop {r1} 396 bx lr 397ENDPROC(__gnu_thumb1_case_uqi) 398.popsection 399 400.pushsection .text.__gnu_thumb1_case_shi, "ax" 401ENTRY(__gnu_thumb1_case_shi) 402 push {r0, r1} 403 mov r1, lr 404 lsrs r1, r1, #1 405 lsls r0, r0, #1 406 lsls r1, r1, #1 407 ldrsh r1, [r1, r0] 408 lsls r1, r1, #1 409 add lr, lr, r1 410 pop {r0, r1} 411 bx lr 412ENDPROC(__gnu_thumb1_case_shi) 413.popsection 414 415.pushsection .text.__gnu_thumb1_case_uhi, "ax" 416ENTRY(__gnu_thumb1_case_uhi) 417 push {r0, r1} 418 mov r1, lr 419 lsrs r1, r1, #1 420 lsls r0, r0, #1 421 lsls r1, r1, #1 422 ldrh r1, [r1, r0] 423 lsls r1, r1, #1 424 add lr, lr, r1 425 pop {r0, r1} 426 bx lr 427ENDPROC(__gnu_thumb1_case_uhi) 428.popsection 429#endif 430