1b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */ 21da177e4SLinus Torvalds/* 31da177e4SLinus Torvalds * arch/alpha/lib/divide.S 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * (C) 1995 Linus Torvalds 61da177e4SLinus Torvalds * 71da177e4SLinus Torvalds * Alpha division.. 81da177e4SLinus Torvalds */ 91da177e4SLinus Torvalds 101da177e4SLinus Torvalds/* 111da177e4SLinus Torvalds * The alpha chip doesn't provide hardware division, so we have to do it 121da177e4SLinus Torvalds * by hand. The compiler expects the functions 131da177e4SLinus Torvalds * 141da177e4SLinus Torvalds * __divqu: 64-bit unsigned long divide 151da177e4SLinus Torvalds * __remqu: 64-bit unsigned long remainder 161da177e4SLinus Torvalds * __divqs/__remqs: signed 64-bit 171da177e4SLinus Torvalds * __divlu/__remlu: unsigned 32-bit 181da177e4SLinus Torvalds * __divls/__remls: signed 32-bit 191da177e4SLinus Torvalds * 201da177e4SLinus Torvalds * These are not normal C functions: instead of the normal 211da177e4SLinus Torvalds * calling sequence, these expect their arguments in registers 221da177e4SLinus Torvalds * $24 and $25, and return the result in $27. Register $28 may 231da177e4SLinus Torvalds * be clobbered (assembly temporary), anything else must be saved. 241da177e4SLinus Torvalds * 251da177e4SLinus Torvalds * In short: painful. 261da177e4SLinus Torvalds * 271da177e4SLinus Torvalds * This is a rather simple bit-at-a-time algorithm: it's very good 281da177e4SLinus Torvalds * at dividing random 64-bit numbers, but the more usual case where 291da177e4SLinus Torvalds * the divisor is small is handled better by the DEC algorithm 301da177e4SLinus Torvalds * using lookup tables. This uses much less memory, though, and is 311da177e4SLinus Torvalds * nicer on the cache.. Besides, I don't know the copyright status 321da177e4SLinus Torvalds * of the DEC code. 331da177e4SLinus Torvalds */ 341da177e4SLinus Torvalds 351da177e4SLinus Torvalds/* 361da177e4SLinus Torvalds * My temporaries: 371da177e4SLinus Torvalds * $0 - current bit 381da177e4SLinus Torvalds * $1 - shifted divisor 391da177e4SLinus Torvalds * $2 - modulus/quotient 401da177e4SLinus Torvalds * 411da177e4SLinus Torvalds * $23 - return address 421da177e4SLinus Torvalds * $24 - dividend 431da177e4SLinus Torvalds * $25 - divisor 441da177e4SLinus Torvalds * 451da177e4SLinus Torvalds * $27 - quotient/modulus 461da177e4SLinus Torvalds * $28 - compare status 471da177e4SLinus Torvalds */ 481da177e4SLinus Torvalds 49*f3c78e94SMasahiro Yamada#include <linux/export.h> 501da177e4SLinus Torvalds#define halt .long 0 511da177e4SLinus Torvalds 521da177e4SLinus Torvalds/* 531da177e4SLinus Torvalds * Select function type and registers 541da177e4SLinus Torvalds */ 551da177e4SLinus Torvalds#define mask $0 561da177e4SLinus Torvalds#define divisor $1 571da177e4SLinus Torvalds#define compare $28 581da177e4SLinus Torvalds#define tmp1 $3 591da177e4SLinus Torvalds#define tmp2 $4 601da177e4SLinus Torvalds 611da177e4SLinus Torvalds#ifdef DIV 621da177e4SLinus Torvalds#define DIV_ONLY(x,y...) x,##y 631da177e4SLinus Torvalds#define MOD_ONLY(x,y...) 641da177e4SLinus Torvalds#define func(x) __div##x 651da177e4SLinus Torvalds#define modulus $2 661da177e4SLinus Torvalds#define quotient $27 671da177e4SLinus Torvalds#define GETSIGN(x) xor $24,$25,x 681da177e4SLinus Torvalds#define STACK 48 691da177e4SLinus Torvalds#else 701da177e4SLinus Torvalds#define DIV_ONLY(x,y...) 711da177e4SLinus Torvalds#define MOD_ONLY(x,y...) x,##y 721da177e4SLinus Torvalds#define func(x) __rem##x 731da177e4SLinus Torvalds#define modulus $27 741da177e4SLinus Torvalds#define quotient $2 751da177e4SLinus Torvalds#define GETSIGN(x) bis $24,$24,x 761da177e4SLinus Torvalds#define STACK 32 771da177e4SLinus Torvalds#endif 781da177e4SLinus Torvalds 791da177e4SLinus Torvalds/* 801da177e4SLinus Torvalds * For 32-bit operations, we need to extend to 64-bit 811da177e4SLinus Torvalds */ 821da177e4SLinus Torvalds#ifdef INTSIZE 831da177e4SLinus Torvalds#define ufunction func(lu) 841da177e4SLinus Torvalds#define sfunction func(l) 851da177e4SLinus Torvalds#define LONGIFY(x) zapnot x,15,x 861da177e4SLinus Torvalds#define SLONGIFY(x) addl x,0,x 871da177e4SLinus Torvalds#else 881da177e4SLinus Torvalds#define ufunction func(qu) 891da177e4SLinus Torvalds#define sfunction func(q) 901da177e4SLinus Torvalds#define LONGIFY(x) 911da177e4SLinus Torvalds#define SLONGIFY(x) 921da177e4SLinus Torvalds#endif 931da177e4SLinus Torvalds 941da177e4SLinus Torvalds.set noat 951da177e4SLinus Torvalds.align 3 961da177e4SLinus Torvalds.globl ufunction 971da177e4SLinus Torvalds.ent ufunction 981da177e4SLinus Torvaldsufunction: 991da177e4SLinus Torvalds subq $30,STACK,$30 1001da177e4SLinus Torvalds .frame $30,STACK,$23 1011da177e4SLinus Torvalds .prologue 0 1021da177e4SLinus Torvalds 1031da177e4SLinus Torvalds7: stq $1, 0($30) 1041da177e4SLinus Torvalds bis $25,$25,divisor 1051da177e4SLinus Torvalds stq $2, 8($30) 1061da177e4SLinus Torvalds bis $24,$24,modulus 1071da177e4SLinus Torvalds stq $0,16($30) 1081da177e4SLinus Torvalds bis $31,$31,quotient 1091da177e4SLinus Torvalds LONGIFY(divisor) 1101da177e4SLinus Torvalds stq tmp1,24($30) 1111da177e4SLinus Torvalds LONGIFY(modulus) 1121da177e4SLinus Torvalds bis $31,1,mask 1131da177e4SLinus Torvalds DIV_ONLY(stq tmp2,32($30)) 1141da177e4SLinus Torvalds beq divisor, 9f /* div by zero */ 1151da177e4SLinus Torvalds 1161da177e4SLinus Torvalds#ifdef INTSIZE 1171da177e4SLinus Torvalds /* 1181da177e4SLinus Torvalds * shift divisor left, using 3-bit shifts for 1191da177e4SLinus Torvalds * 32-bit divides as we can't overflow. Three-bit 1201da177e4SLinus Torvalds * shifts will result in looping three times less 1211da177e4SLinus Torvalds * here, but can result in two loops more later. 1221da177e4SLinus Torvalds * Thus using a large shift isn't worth it (and 1231da177e4SLinus Torvalds * s8add pairs better than a sll..) 1241da177e4SLinus Torvalds */ 1251da177e4SLinus Torvalds1: cmpult divisor,modulus,compare 1261da177e4SLinus Torvalds s8addq divisor,$31,divisor 1271da177e4SLinus Torvalds s8addq mask,$31,mask 1281da177e4SLinus Torvalds bne compare,1b 1291da177e4SLinus Torvalds#else 1301da177e4SLinus Torvalds1: cmpult divisor,modulus,compare 1311da177e4SLinus Torvalds blt divisor, 2f 1321da177e4SLinus Torvalds addq divisor,divisor,divisor 1331da177e4SLinus Torvalds addq mask,mask,mask 1341da177e4SLinus Torvalds bne compare,1b 1351da177e4SLinus Torvalds unop 1361da177e4SLinus Torvalds#endif 1371da177e4SLinus Torvalds 1381da177e4SLinus Torvalds /* ok, start to go right again.. */ 1391da177e4SLinus Torvalds2: DIV_ONLY(addq quotient,mask,tmp2) 1401da177e4SLinus Torvalds srl mask,1,mask 1411da177e4SLinus Torvalds cmpule divisor,modulus,compare 1421da177e4SLinus Torvalds subq modulus,divisor,tmp1 1431da177e4SLinus Torvalds DIV_ONLY(cmovne compare,tmp2,quotient) 1441da177e4SLinus Torvalds srl divisor,1,divisor 1451da177e4SLinus Torvalds cmovne compare,tmp1,modulus 1461da177e4SLinus Torvalds bne mask,2b 1471da177e4SLinus Torvalds 1481da177e4SLinus Torvalds9: ldq $1, 0($30) 1491da177e4SLinus Torvalds ldq $2, 8($30) 1501da177e4SLinus Torvalds ldq $0,16($30) 1511da177e4SLinus Torvalds ldq tmp1,24($30) 1521da177e4SLinus Torvalds DIV_ONLY(ldq tmp2,32($30)) 1531da177e4SLinus Torvalds addq $30,STACK,$30 1541da177e4SLinus Torvalds ret $31,($23),1 1551da177e4SLinus Torvalds .end ufunction 15600fc0e0dSAl ViroEXPORT_SYMBOL(ufunction) 1571da177e4SLinus Torvalds 1581da177e4SLinus Torvalds/* 1591da177e4SLinus Torvalds * Uhh.. Ugly signed division. I'd rather not have it at all, but 1601da177e4SLinus Torvalds * it's needed in some circumstances. There are different ways to 1611da177e4SLinus Torvalds * handle this, really. This does: 1621da177e4SLinus Torvalds * -a / b = a / -b = -(a / b) 1631da177e4SLinus Torvalds * -a % b = -(a % b) 1641da177e4SLinus Torvalds * a % -b = a % b 1651da177e4SLinus Torvalds * which is probably not the best solution, but at least should 1661da177e4SLinus Torvalds * have the property that (x/y)*y + (x%y) = x. 1671da177e4SLinus Torvalds */ 1681da177e4SLinus Torvalds.align 3 1691da177e4SLinus Torvalds.globl sfunction 1701da177e4SLinus Torvalds.ent sfunction 1711da177e4SLinus Torvaldssfunction: 1721da177e4SLinus Torvalds subq $30,STACK,$30 1731da177e4SLinus Torvalds .frame $30,STACK,$23 1741da177e4SLinus Torvalds .prologue 0 1751da177e4SLinus Torvalds bis $24,$25,$28 1761da177e4SLinus Torvalds SLONGIFY($28) 1771da177e4SLinus Torvalds bge $28,7b 1781da177e4SLinus Torvalds stq $24,0($30) 1791da177e4SLinus Torvalds subq $31,$24,$28 1801da177e4SLinus Torvalds stq $25,8($30) 1811da177e4SLinus Torvalds cmovlt $24,$28,$24 /* abs($24) */ 1821da177e4SLinus Torvalds stq $23,16($30) 1831da177e4SLinus Torvalds subq $31,$25,$28 1841da177e4SLinus Torvalds stq tmp1,24($30) 1851da177e4SLinus Torvalds cmovlt $25,$28,$25 /* abs($25) */ 1861da177e4SLinus Torvalds unop 1871da177e4SLinus Torvalds bsr $23,ufunction 1881da177e4SLinus Torvalds ldq $24,0($30) 1891da177e4SLinus Torvalds ldq $25,8($30) 1901da177e4SLinus Torvalds GETSIGN($28) 1911da177e4SLinus Torvalds subq $31,$27,tmp1 1921da177e4SLinus Torvalds SLONGIFY($28) 1931da177e4SLinus Torvalds ldq $23,16($30) 1941da177e4SLinus Torvalds cmovlt $28,tmp1,$27 1951da177e4SLinus Torvalds ldq tmp1,24($30) 1961da177e4SLinus Torvalds addq $30,STACK,$30 1971da177e4SLinus Torvalds ret $31,($23),1 1981da177e4SLinus Torvalds .end sfunction 19900fc0e0dSAl ViroEXPORT_SYMBOL(sfunction) 200