1/* SPDX-License-Identifier: GPL-2.0+ */ 2/* 3 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines 4 * 5 * Author: Nicolas Pitre <nico@fluxnic.net> 6 * - contributed to gcc-3.4 on Sep 30, 2003 7 * - adapted for the Linux kernel on Oct 2, 2003 8 */ 9/* 10 * Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. 11 */ 12 13#include <linux/linkage.h> 14#include <asm/assembler.h> 15 16/* 17 * U-Boot compatibility bit, define empty UNWIND() macro as, since we 18 * do not support stack unwinding and define CONFIG_AEABI to make all 19 * of the functions available without diverging from Linux code. 20 */ 21#ifdef __UBOOT__ 22#define UNWIND(x...) 23#define CONFIG_AEABI 24#endif 25 26.macro ARM_DIV_BODY dividend, divisor, result, curbit 27 28#if __LINUX_ARM_ARCH__ >= 5 29 30 clz \curbit, \divisor 31 clz \result, \dividend 32 sub \result, \curbit, \result 33 mov \curbit, #1 34 mov \divisor, \divisor, lsl \result 35 mov \curbit, \curbit, lsl \result 36 mov \result, #0 37 38#else 39 40 @ Initially shift the divisor left 3 bits if possible, 41 @ set curbit accordingly. This allows for curbit to be located 42 @ at the left end of each 4 bit nibbles in the division loop 43 @ to save one loop in most cases. 44 tst \divisor, #0xe0000000 45 moveq \divisor, \divisor, lsl #3 46 moveq \curbit, #8 47 movne \curbit, #1 48 49 @ Unless the divisor is very big, shift it up in multiples of 50 @ four bits, since this is the amount of unwinding in the main 51 @ division loop. Continue shifting until the divisor is 52 @ larger than the dividend. 531: cmp \divisor, #0x10000000 54 cmplo \divisor, \dividend 55 movlo \divisor, \divisor, lsl #4 56 movlo \curbit, \curbit, lsl #4 57 blo 1b 58 59 @ For very big divisors, we must shift it a bit at a time, or 60 @ we will be in danger of overflowing. 611: cmp \divisor, #0x80000000 62 cmplo \divisor, \dividend 63 movlo \divisor, \divisor, lsl #1 64 movlo \curbit, \curbit, lsl #1 65 blo 1b 66 67 mov \result, #0 68 69#endif 70 71 @ Division loop 721: cmp \dividend, \divisor 73 subhs \dividend, \dividend, \divisor 74 orrhs \result, \result, \curbit 75 cmp \dividend, \divisor, lsr #1 76 subhs \dividend, \dividend, \divisor, lsr #1 77 orrhs \result, \result, \curbit, lsr #1 78 cmp \dividend, \divisor, lsr #2 79 subhs \dividend, \dividend, \divisor, lsr #2 80 orrhs \result, \result, \curbit, lsr #2 81 cmp \dividend, \divisor, lsr #3 82 subhs \dividend, \dividend, \divisor, lsr #3 83 orrhs \result, \result, \curbit, lsr #3 84 cmp \dividend, #0 @ Early termination? 85 movsne \curbit, \curbit, lsr #4 @ No, any more bits to do? 86 movne \divisor, \divisor, lsr #4 87 bne 1b 88 89.endm 90 91 92.macro ARM_DIV2_ORDER divisor, order 93 94#if __LINUX_ARM_ARCH__ >= 5 95 96 clz \order, \divisor 97 rsb \order, \order, #31 98 99#else 100 101 cmp \divisor, #(1 << 16) 102 movhs \divisor, \divisor, lsr #16 103 movhs \order, #16 104 movlo \order, #0 105 106 cmp \divisor, #(1 << 8) 107 movhs \divisor, \divisor, lsr #8 108 addhs \order, \order, #8 109 110 cmp \divisor, #(1 << 4) 111 movhs \divisor, \divisor, lsr #4 112 addhs \order, \order, #4 113 114 cmp \divisor, #(1 << 2) 115 addhi \order, \order, #3 116 addls \order, \order, \divisor, lsr #1 117 118#endif 119 120.endm 121 122 123.macro ARM_MOD_BODY dividend, divisor, order, spare 124 125#if __LINUX_ARM_ARCH__ >= 5 126 127 clz \order, \divisor 128 clz \spare, \dividend 129 sub \order, \order, \spare 130 mov \divisor, \divisor, lsl \order 131 132#else 133 134 mov \order, #0 135 136 @ Unless the divisor is very big, shift it up in multiples of 137 @ four bits, since this is the amount of unwinding in the main 138 @ division loop. Continue shifting until the divisor is 139 @ larger than the dividend. 1401: cmp \divisor, #0x10000000 141 cmplo \divisor, \dividend 142 movlo \divisor, \divisor, lsl #4 143 addlo \order, \order, #4 144 blo 1b 145 146 @ For very big divisors, we must shift it a bit at a time, or 147 @ we will be in danger of overflowing. 1481: cmp \divisor, #0x80000000 149 cmplo \divisor, \dividend 150 movlo \divisor, \divisor, lsl #1 151 addlo \order, \order, #1 152 blo 1b 153 154#endif 155 156 @ Perform all needed subtractions to keep only the reminder. 157 @ Do comparisons in batch of 4 first. 158 subs \order, \order, #3 @ yes, 3 is intended here 159 blt 2f 160 1611: cmp \dividend, \divisor 162 subhs \dividend, \dividend, \divisor 163 cmp \dividend, \divisor, lsr #1 164 subhs \dividend, \dividend, \divisor, lsr #1 165 cmp \dividend, \divisor, lsr #2 166 subhs \dividend, \dividend, \divisor, lsr #2 167 cmp \dividend, \divisor, lsr #3 168 subhs \dividend, \dividend, \divisor, lsr #3 169 cmp \dividend, #1 170 mov \divisor, \divisor, lsr #4 171 subsge \order, \order, #4 172 bge 1b 173 174 tst \order, #3 175 teqne \dividend, #0 176 beq 5f 177 178 @ Either 1, 2 or 3 comparison/subtractions are left. 1792: cmn \order, #2 180 blt 4f 181 beq 3f 182 cmp \dividend, \divisor 183 subhs \dividend, \dividend, \divisor 184 mov \divisor, \divisor, lsr #1 1853: cmp \dividend, \divisor 186 subhs \dividend, \dividend, \divisor 187 mov \divisor, \divisor, lsr #1 1884: cmp \dividend, \divisor 189 subhs \dividend, \dividend, \divisor 1905: 191.endm 192 193 194.pushsection .text.__udivsi3, "ax" 195ENTRY(__udivsi3) 196ENTRY(__aeabi_uidiv) 197UNWIND(.fnstart) 198 199 subs r2, r1, #1 200 reteq lr 201 bcc Ldiv0 202 cmp r0, r1 203 bls 11f 204 tst r1, r2 205 beq 12f 206 207 ARM_DIV_BODY r0, r1, r2, r3 208 209 mov r0, r2 210 ret lr 211 21211: moveq r0, #1 213 movne r0, #0 214 ret lr 215 21612: ARM_DIV2_ORDER r1, r2 217 218 mov r0, r0, lsr r2 219 ret lr 220 221UNWIND(.fnend) 222ENDPROC(__udivsi3) 223ENDPROC(__aeabi_uidiv) 224.popsection 225 226.pushsection .text.__umodsi3, "ax" 227ENTRY(__umodsi3) 228UNWIND(.fnstart) 229 230 subs r2, r1, #1 @ compare divisor with 1 231 bcc Ldiv0 232 cmpne r0, r1 @ compare dividend with divisor 233 moveq r0, #0 234 tsthi r1, r2 @ see if divisor is power of 2 235 andeq r0, r0, r2 236 retls lr 237 238 ARM_MOD_BODY r0, r1, r2, r3 239 240 ret lr 241 242UNWIND(.fnend) 243ENDPROC(__umodsi3) 244.popsection 245 246.pushsection .text.__divsi3, "ax" 247ENTRY(__divsi3) 248ENTRY(__aeabi_idiv) 249UNWIND(.fnstart) 250 251 cmp r1, #0 252 eor ip, r0, r1 @ save the sign of the result. 253 beq Ldiv0 254 rsbmi r1, r1, #0 @ loops below use unsigned. 255 subs r2, r1, #1 @ division by 1 or -1 ? 256 beq 10f 257 movs r3, r0 258 rsbmi r3, r0, #0 @ positive dividend value 259 cmp r3, r1 260 bls 11f 261 tst r1, r2 @ divisor is power of 2 ? 262 beq 12f 263 264 ARM_DIV_BODY r3, r1, r0, r2 265 266 cmp ip, #0 267 rsbmi r0, r0, #0 268 ret lr 269 27010: teq ip, r0 @ same sign ? 271 rsbmi r0, r0, #0 272 ret lr 273 27411: movlo r0, #0 275 moveq r0, ip, asr #31 276 orreq r0, r0, #1 277 ret lr 278 27912: ARM_DIV2_ORDER r1, r2 280 281 cmp ip, #0 282 mov r0, r3, lsr r2 283 rsbmi r0, r0, #0 284 ret lr 285 286UNWIND(.fnend) 287ENDPROC(__divsi3) 288ENDPROC(__aeabi_idiv) 289.popsection 290 291.pushsection .text.__modsi3, "ax" 292ENTRY(__modsi3) 293UNWIND(.fnstart) 294 295 cmp r1, #0 296 beq Ldiv0 297 rsbmi r1, r1, #0 @ loops below use unsigned. 298 movs ip, r0 @ preserve sign of dividend 299 rsbmi r0, r0, #0 @ if negative make positive 300 subs r2, r1, #1 @ compare divisor with 1 301 cmpne r0, r1 @ compare dividend with divisor 302 moveq r0, #0 303 tsthi r1, r2 @ see if divisor is power of 2 304 andeq r0, r0, r2 305 bls 10f 306 307 ARM_MOD_BODY r0, r1, r2, r3 308 30910: cmp ip, #0 310 rsbmi r0, r0, #0 311 ret lr 312 313UNWIND(.fnend) 314ENDPROC(__modsi3) 315.popsection 316 317#ifdef CONFIG_AEABI 318 319.pushsection .text.__aeabi_uidivmod, "ax" 320ENTRY(__aeabi_uidivmod) 321UNWIND(.fnstart) 322UNWIND(.save {r0, r1, ip, lr} ) 323 324 stmfd sp!, {r0, r1, ip, lr} 325 bl __aeabi_uidiv 326 ldmfd sp!, {r1, r2, ip, lr} 327 mul r3, r0, r2 328 sub r1, r1, r3 329 ret lr 330 331UNWIND(.fnend) 332ENDPROC(__aeabi_uidivmod) 333.popsection 334 335.pushsection .text.__aeabi_uidivmod, "ax" 336ENTRY(__aeabi_idivmod) 337UNWIND(.fnstart) 338UNWIND(.save {r0, r1, ip, lr} ) 339 340 stmfd sp!, {r0, r1, ip, lr} 341 bl __aeabi_idiv 342 ldmfd sp!, {r1, r2, ip, lr} 343 mul r3, r0, r2 344 sub r1, r1, r3 345 ret lr 346 347UNWIND(.fnend) 348ENDPROC(__aeabi_idivmod) 349.popsection 350 351#endif 352 353.pushsection .text.Ldiv0, "ax" 354Ldiv0: 355UNWIND(.fnstart) 356UNWIND(.pad #4) 357UNWIND(.save {lr}) 358 359 str lr, [sp, #-8]! 360 bl __div0 361 mov r0, #0 @ About as wrong as it could be. 362 ldr pc, [sp], #8 363 364UNWIND(.fnend) 365ENDPROC(Ldiv0) 366.popsection 367 368/* Thumb-1 specialities */ 369#if CONFIG_IS_ENABLED(SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2) 370.pushsection .text.__gnu_thumb1_case_sqi, "ax" 371ENTRY(__gnu_thumb1_case_sqi) 372 push {r1} 373 mov r1, lr 374 lsrs r1, r1, #1 375 lsls r1, r1, #1 376 ldrsb r1, [r1, r0] 377 lsls r1, r1, #1 378 add lr, lr, r1 379 pop {r1} 380 bx lr 381ENDPROC(__gnu_thumb1_case_sqi) 382.popsection 383 384.pushsection .text.__gnu_thumb1_case_uqi, "ax" 385ENTRY(__gnu_thumb1_case_uqi) 386 push {r1} 387 mov r1, lr 388 lsrs r1, r1, #1 389 lsls r1, r1, #1 390 ldrb r1, [r1, r0] 391 lsls r1, r1, #1 392 add lr, lr, r1 393 pop {r1} 394 bx lr 395ENDPROC(__gnu_thumb1_case_uqi) 396.popsection 397 398.pushsection .text.__gnu_thumb1_case_shi, "ax" 399ENTRY(__gnu_thumb1_case_shi) 400 push {r0, r1} 401 mov r1, lr 402 lsrs r1, r1, #1 403 lsls r0, r0, #1 404 lsls r1, r1, #1 405 ldrsh r1, [r1, r0] 406 lsls r1, r1, #1 407 add lr, lr, r1 408 pop {r0, r1} 409 bx lr 410ENDPROC(__gnu_thumb1_case_shi) 411.popsection 412 413.pushsection .text.__gnu_thumb1_case_uhi, "ax" 414ENTRY(__gnu_thumb1_case_uhi) 415 push {r0, r1} 416 mov r1, lr 417 lsrs r1, r1, #1 418 lsls r0, r0, #1 419 lsls r1, r1, #1 420 ldrh r1, [r1, r0] 421 lsls r1, r1, #1 422 add lr, lr, r1 423 pop {r0, r1} 424 bx lr 425ENDPROC(__gnu_thumb1_case_uhi) 426.popsection 427#endif 428