1/* 2 * arch/alpha/lib/divide.S 3 * 4 * (C) 1995 Linus Torvalds 5 * 6 * Alpha division.. 7 */ 8 9/* 10 * The alpha chip doesn't provide hardware division, so we have to do it 11 * by hand. The compiler expects the functions 12 * 13 * __divqu: 64-bit unsigned long divide 14 * __remqu: 64-bit unsigned long remainder 15 * __divqs/__remqs: signed 64-bit 16 * __divlu/__remlu: unsigned 32-bit 17 * __divls/__remls: signed 32-bit 18 * 19 * These are not normal C functions: instead of the normal 20 * calling sequence, these expect their arguments in registers 21 * $24 and $25, and return the result in $27. Register $28 may 22 * be clobbered (assembly temporary), anything else must be saved. 23 * 24 * In short: painful. 25 * 26 * This is a rather simple bit-at-a-time algorithm: it's very good 27 * at dividing random 64-bit numbers, but the more usual case where 28 * the divisor is small is handled better by the DEC algorithm 29 * using lookup tables. This uses much less memory, though, and is 30 * nicer on the cache.. Besides, I don't know the copyright status 31 * of the DEC code. 32 */ 33 34/* 35 * My temporaries: 36 * $0 - current bit 37 * $1 - shifted divisor 38 * $2 - modulus/quotient 39 * 40 * $23 - return address 41 * $24 - dividend 42 * $25 - divisor 43 * 44 * $27 - quotient/modulus 45 * $28 - compare status 46 */ 47 48#include <asm/export.h> 49#define halt .long 0 50 51/* 52 * Select function type and registers 53 */ 54#define mask $0 55#define divisor $1 56#define compare $28 57#define tmp1 $3 58#define tmp2 $4 59 60#ifdef DIV 61#define DIV_ONLY(x,y...) x,##y 62#define MOD_ONLY(x,y...) 63#define func(x) __div##x 64#define modulus $2 65#define quotient $27 66#define GETSIGN(x) xor $24,$25,x 67#define STACK 48 68#else 69#define DIV_ONLY(x,y...) 70#define MOD_ONLY(x,y...) x,##y 71#define func(x) __rem##x 72#define modulus $27 73#define quotient $2 74#define GETSIGN(x) bis $24,$24,x 75#define STACK 32 76#endif 77 78/* 79 * For 32-bit operations, we need to extend to 64-bit 80 */ 81#ifdef INTSIZE 82#define ufunction func(lu) 83#define sfunction func(l) 84#define LONGIFY(x) zapnot x,15,x 85#define SLONGIFY(x) addl x,0,x 86#else 87#define ufunction func(qu) 88#define sfunction func(q) 89#define LONGIFY(x) 90#define SLONGIFY(x) 91#endif 92 93.set noat 94.align 3 95.globl ufunction 96.ent ufunction 97ufunction: 98 subq $30,STACK,$30 99 .frame $30,STACK,$23 100 .prologue 0 101 1027: stq $1, 0($30) 103 bis $25,$25,divisor 104 stq $2, 8($30) 105 bis $24,$24,modulus 106 stq $0,16($30) 107 bis $31,$31,quotient 108 LONGIFY(divisor) 109 stq tmp1,24($30) 110 LONGIFY(modulus) 111 bis $31,1,mask 112 DIV_ONLY(stq tmp2,32($30)) 113 beq divisor, 9f /* div by zero */ 114 115#ifdef INTSIZE 116 /* 117 * shift divisor left, using 3-bit shifts for 118 * 32-bit divides as we can't overflow. Three-bit 119 * shifts will result in looping three times less 120 * here, but can result in two loops more later. 121 * Thus using a large shift isn't worth it (and 122 * s8add pairs better than a sll..) 123 */ 1241: cmpult divisor,modulus,compare 125 s8addq divisor,$31,divisor 126 s8addq mask,$31,mask 127 bne compare,1b 128#else 1291: cmpult divisor,modulus,compare 130 blt divisor, 2f 131 addq divisor,divisor,divisor 132 addq mask,mask,mask 133 bne compare,1b 134 unop 135#endif 136 137 /* ok, start to go right again.. */ 1382: DIV_ONLY(addq quotient,mask,tmp2) 139 srl mask,1,mask 140 cmpule divisor,modulus,compare 141 subq modulus,divisor,tmp1 142 DIV_ONLY(cmovne compare,tmp2,quotient) 143 srl divisor,1,divisor 144 cmovne compare,tmp1,modulus 145 bne mask,2b 146 1479: ldq $1, 0($30) 148 ldq $2, 8($30) 149 ldq $0,16($30) 150 ldq tmp1,24($30) 151 DIV_ONLY(ldq tmp2,32($30)) 152 addq $30,STACK,$30 153 ret $31,($23),1 154 .end ufunction 155EXPORT_SYMBOL(ufunction) 156 157/* 158 * Uhh.. Ugly signed division. I'd rather not have it at all, but 159 * it's needed in some circumstances. There are different ways to 160 * handle this, really. This does: 161 * -a / b = a / -b = -(a / b) 162 * -a % b = -(a % b) 163 * a % -b = a % b 164 * which is probably not the best solution, but at least should 165 * have the property that (x/y)*y + (x%y) = x. 166 */ 167.align 3 168.globl sfunction 169.ent sfunction 170sfunction: 171 subq $30,STACK,$30 172 .frame $30,STACK,$23 173 .prologue 0 174 bis $24,$25,$28 175 SLONGIFY($28) 176 bge $28,7b 177 stq $24,0($30) 178 subq $31,$24,$28 179 stq $25,8($30) 180 cmovlt $24,$28,$24 /* abs($24) */ 181 stq $23,16($30) 182 subq $31,$25,$28 183 stq tmp1,24($30) 184 cmovlt $25,$28,$25 /* abs($25) */ 185 unop 186 bsr $23,ufunction 187 ldq $24,0($30) 188 ldq $25,8($30) 189 GETSIGN($28) 190 subq $31,$27,tmp1 191 SLONGIFY($28) 192 ldq $23,16($30) 193 cmovlt $28,tmp1,$27 194 ldq tmp1,24($30) 195 addq $30,STACK,$30 196 ret $31,($23),1 197 .end sfunction 198EXPORT_SYMBOL(sfunction) 199