1/* 2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines 3 * 4 * Author: Nicolas Pitre <nico@fluxnic.net> 5 * - contributed to gcc-3.4 on Sep 30, 2003 6 * - adapted for the Linux kernel on Oct 2, 2003 7 */ 8 9/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. 10 11This file is free software; you can redistribute it and/or modify it 12under the terms of the GNU General Public License as published by the 13Free Software Foundation; either version 2, or (at your option) any 14later version. 15 16In addition to the permissions in the GNU General Public License, the 17Free Software Foundation gives you unlimited permission to link the 18compiled version of this file into combinations with other programs, 19and to distribute those combinations without any restriction coming 20from the use of this file. (The General Public License restrictions 21do apply in other respects; for example, they cover modification of 22the file, and distribution when not linked into a combine 23executable.) 24 25This file is distributed in the hope that it will be useful, but 26WITHOUT ANY WARRANTY; without even the implied warranty of 27MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 28General Public License for more details. 29 30You should have received a copy of the GNU General Public License 31along with this program; see the file COPYING. If not, write to 32the Free Software Foundation, 59 Temple Place - Suite 330, 33Boston, MA 02111-1307, USA. */ 34 35 36#include <linux/linkage.h> 37#include <asm/assembler.h> 38#include <asm/unwind.h> 39#include <asm/export.h> 40 41.macro ARM_DIV_BODY dividend, divisor, result, curbit 42 43#if __LINUX_ARM_ARCH__ >= 5 44 45 clz \curbit, \divisor 46 clz \result, \dividend 47 sub \result, \curbit, \result 48 mov \curbit, #1 49 mov \divisor, \divisor, lsl \result 50 mov \curbit, \curbit, lsl \result 51 mov \result, #0 52 53#else 54 55 @ Initially shift the divisor left 3 bits if possible, 56 @ set curbit accordingly. This allows for curbit to be located 57 @ at the left end of each 4 bit nibbles in the division loop 58 @ to save one loop in most cases. 59 tst \divisor, #0xe0000000 60 moveq \divisor, \divisor, lsl #3 61 moveq \curbit, #8 62 movne \curbit, #1 63 64 @ Unless the divisor is very big, shift it up in multiples of 65 @ four bits, since this is the amount of unwinding in the main 66 @ division loop. Continue shifting until the divisor is 67 @ larger than the dividend. 681: cmp \divisor, #0x10000000 69 cmplo \divisor, \dividend 70 movlo \divisor, \divisor, lsl #4 71 movlo \curbit, \curbit, lsl #4 72 blo 1b 73 74 @ For very big divisors, we must shift it a bit at a time, or 75 @ we will be in danger of overflowing. 761: cmp \divisor, #0x80000000 77 cmplo \divisor, \dividend 78 movlo \divisor, \divisor, lsl #1 79 movlo \curbit, \curbit, lsl #1 80 blo 1b 81 82 mov \result, #0 83 84#endif 85 86 @ Division loop 871: cmp \dividend, \divisor 88 subhs \dividend, \dividend, \divisor 89 orrhs \result, \result, \curbit 90 cmp \dividend, \divisor, lsr #1 91 subhs \dividend, \dividend, \divisor, lsr #1 92 orrhs \result, \result, \curbit, lsr #1 93 cmp \dividend, \divisor, lsr #2 94 subhs \dividend, \dividend, \divisor, lsr #2 95 orrhs \result, \result, \curbit, lsr #2 96 cmp \dividend, \divisor, lsr #3 97 subhs \dividend, \dividend, \divisor, lsr #3 98 orrhs \result, \result, \curbit, lsr #3 99 cmp \dividend, #0 @ Early termination? 100 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? 101 movne \divisor, \divisor, lsr #4 102 bne 1b 103 104.endm 105 106 107.macro ARM_DIV2_ORDER divisor, order 108 109#if __LINUX_ARM_ARCH__ >= 5 110 111 clz \order, \divisor 112 rsb \order, \order, #31 113 114#else 115 116 cmp \divisor, #(1 << 16) 117 movhs \divisor, \divisor, lsr #16 118 movhs \order, #16 119 movlo \order, #0 120 121 cmp \divisor, #(1 << 8) 122 movhs \divisor, \divisor, lsr #8 123 addhs \order, \order, #8 124 125 cmp \divisor, #(1 << 4) 126 movhs \divisor, \divisor, lsr #4 127 addhs \order, \order, #4 128 129 cmp \divisor, #(1 << 2) 130 addhi \order, \order, #3 131 addls \order, \order, \divisor, lsr #1 132 133#endif 134 135.endm 136 137 138.macro ARM_MOD_BODY dividend, divisor, order, spare 139 140#if __LINUX_ARM_ARCH__ >= 5 141 142 clz \order, \divisor 143 clz \spare, \dividend 144 sub \order, \order, \spare 145 mov \divisor, \divisor, lsl \order 146 147#else 148 149 mov \order, #0 150 151 @ Unless the divisor is very big, shift it up in multiples of 152 @ four bits, since this is the amount of unwinding in the main 153 @ division loop. Continue shifting until the divisor is 154 @ larger than the dividend. 1551: cmp \divisor, #0x10000000 156 cmplo \divisor, \dividend 157 movlo \divisor, \divisor, lsl #4 158 addlo \order, \order, #4 159 blo 1b 160 161 @ For very big divisors, we must shift it a bit at a time, or 162 @ we will be in danger of overflowing. 1631: cmp \divisor, #0x80000000 164 cmplo \divisor, \dividend 165 movlo \divisor, \divisor, lsl #1 166 addlo \order, \order, #1 167 blo 1b 168 169#endif 170 171 @ Perform all needed subtractions to keep only the reminder. 172 @ Do comparisons in batch of 4 first. 173 subs \order, \order, #3 @ yes, 3 is intended here 174 blt 2f 175 1761: cmp \dividend, \divisor 177 subhs \dividend, \dividend, \divisor 178 cmp \dividend, \divisor, lsr #1 179 subhs \dividend, \dividend, \divisor, lsr #1 180 cmp \dividend, \divisor, lsr #2 181 subhs \dividend, \dividend, \divisor, lsr #2 182 cmp \dividend, \divisor, lsr #3 183 subhs \dividend, \dividend, \divisor, lsr #3 184 cmp \dividend, #1 185 mov \divisor, \divisor, lsr #4 186 subges \order, \order, #4 187 bge 1b 188 189 tst \order, #3 190 teqne \dividend, #0 191 beq 5f 192 193 @ Either 1, 2 or 3 comparison/subtractions are left. 1942: cmn \order, #2 195 blt 4f 196 beq 3f 197 cmp \dividend, \divisor 198 subhs \dividend, \dividend, \divisor 199 mov \divisor, \divisor, lsr #1 2003: cmp \dividend, \divisor 201 subhs \dividend, \dividend, \divisor 202 mov \divisor, \divisor, lsr #1 2034: cmp \dividend, \divisor 204 subhs \dividend, \dividend, \divisor 2055: 206.endm 207 208 209#ifdef CONFIG_ARM_PATCH_IDIV 210 .align 3 211#endif 212 213ENTRY(__udivsi3) 214ENTRY(__aeabi_uidiv) 215UNWIND(.fnstart) 216 217 subs r2, r1, #1 218 reteq lr 219 bcc Ldiv0 220 cmp r0, r1 221 bls 11f 222 tst r1, r2 223 beq 12f 224 225 ARM_DIV_BODY r0, r1, r2, r3 226 227 mov r0, r2 228 ret lr 229 23011: moveq r0, #1 231 movne r0, #0 232 ret lr 233 23412: ARM_DIV2_ORDER r1, r2 235 236 mov r0, r0, lsr r2 237 ret lr 238 239UNWIND(.fnend) 240ENDPROC(__udivsi3) 241ENDPROC(__aeabi_uidiv) 242EXPORT_SYMBOL(__udivsi3) 243EXPORT_SYMBOL(__aeabi_uidiv) 244 245ENTRY(__umodsi3) 246UNWIND(.fnstart) 247 248 subs r2, r1, #1 @ compare divisor with 1 249 bcc Ldiv0 250 cmpne r0, r1 @ compare dividend with divisor 251 moveq r0, #0 252 tsthi r1, r2 @ see if divisor is power of 2 253 andeq r0, r0, r2 254 retls lr 255 256 ARM_MOD_BODY r0, r1, r2, r3 257 258 ret lr 259 260UNWIND(.fnend) 261ENDPROC(__umodsi3) 262EXPORT_SYMBOL(__umodsi3) 263 264#ifdef CONFIG_ARM_PATCH_IDIV 265 .align 3 266#endif 267 268ENTRY(__divsi3) 269ENTRY(__aeabi_idiv) 270UNWIND(.fnstart) 271 272 cmp r1, #0 273 eor ip, r0, r1 @ save the sign of the result. 274 beq Ldiv0 275 rsbmi r1, r1, #0 @ loops below use unsigned. 276 subs r2, r1, #1 @ division by 1 or -1 ? 277 beq 10f 278 movs r3, r0 279 rsbmi r3, r0, #0 @ positive dividend value 280 cmp r3, r1 281 bls 11f 282 tst r1, r2 @ divisor is power of 2 ? 283 beq 12f 284 285 ARM_DIV_BODY r3, r1, r0, r2 286 287 cmp ip, #0 288 rsbmi r0, r0, #0 289 ret lr 290 29110: teq ip, r0 @ same sign ? 292 rsbmi r0, r0, #0 293 ret lr 294 29511: movlo r0, #0 296 moveq r0, ip, asr #31 297 orreq r0, r0, #1 298 ret lr 299 30012: ARM_DIV2_ORDER r1, r2 301 302 cmp ip, #0 303 mov r0, r3, lsr r2 304 rsbmi r0, r0, #0 305 ret lr 306 307UNWIND(.fnend) 308ENDPROC(__divsi3) 309ENDPROC(__aeabi_idiv) 310EXPORT_SYMBOL(__divsi3) 311EXPORT_SYMBOL(__aeabi_idiv) 312 313ENTRY(__modsi3) 314UNWIND(.fnstart) 315 316 cmp r1, #0 317 beq Ldiv0 318 rsbmi r1, r1, #0 @ loops below use unsigned. 319 movs ip, r0 @ preserve sign of dividend 320 rsbmi r0, r0, #0 @ if negative make positive 321 subs r2, r1, #1 @ compare divisor with 1 322 cmpne r0, r1 @ compare dividend with divisor 323 moveq r0, #0 324 tsthi r1, r2 @ see if divisor is power of 2 325 andeq r0, r0, r2 326 bls 10f 327 328 ARM_MOD_BODY r0, r1, r2, r3 329 33010: cmp ip, #0 331 rsbmi r0, r0, #0 332 ret lr 333 334UNWIND(.fnend) 335ENDPROC(__modsi3) 336EXPORT_SYMBOL(__modsi3) 337 338#ifdef CONFIG_AEABI 339 340ENTRY(__aeabi_uidivmod) 341UNWIND(.fnstart) 342UNWIND(.save {r0, r1, ip, lr} ) 343 344 stmfd sp!, {r0, r1, ip, lr} 345 bl __aeabi_uidiv 346 ldmfd sp!, {r1, r2, ip, lr} 347 mul r3, r0, r2 348 sub r1, r1, r3 349 ret lr 350 351UNWIND(.fnend) 352ENDPROC(__aeabi_uidivmod) 353EXPORT_SYMBOL(__aeabi_uidivmod) 354 355ENTRY(__aeabi_idivmod) 356UNWIND(.fnstart) 357UNWIND(.save {r0, r1, ip, lr} ) 358 stmfd sp!, {r0, r1, ip, lr} 359 bl __aeabi_idiv 360 ldmfd sp!, {r1, r2, ip, lr} 361 mul r3, r0, r2 362 sub r1, r1, r3 363 ret lr 364 365UNWIND(.fnend) 366ENDPROC(__aeabi_idivmod) 367EXPORT_SYMBOL(__aeabi_idivmod) 368 369#endif 370 371Ldiv0: 372UNWIND(.fnstart) 373UNWIND(.pad #4) 374UNWIND(.save {lr}) 375 str lr, [sp, #-8]! 376 bl __div0 377 mov r0, #0 @ About as wrong as it could be. 378 ldr pc, [sp], #8 379UNWIND(.fnend) 380ENDPROC(Ldiv0) 381