11da177e4SLinus Torvalds #ifndef _ASM_GENERIC_DIV64_H 21da177e4SLinus Torvalds #define _ASM_GENERIC_DIV64_H 31da177e4SLinus Torvalds /* 41da177e4SLinus Torvalds * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com> 51da177e4SLinus Torvalds * Based on former asm-ppc/div64.h and asm-m68knommu/div64.h 61da177e4SLinus Torvalds * 7*461a5e51SNicolas Pitre * Optimization for constant divisors on 32-bit machines: 8*461a5e51SNicolas Pitre * Copyright (C) 2006-2015 Nicolas Pitre 9*461a5e51SNicolas Pitre * 101da177e4SLinus Torvalds * The semantics of do_div() are: 111da177e4SLinus Torvalds * 121da177e4SLinus Torvalds * uint32_t do_div(uint64_t *n, uint32_t base) 131da177e4SLinus Torvalds * { 141da177e4SLinus Torvalds * uint32_t remainder = *n % base; 151da177e4SLinus Torvalds * *n = *n / base; 161da177e4SLinus Torvalds * return remainder; 171da177e4SLinus Torvalds * } 181da177e4SLinus Torvalds * 191da177e4SLinus Torvalds * NOTE: macro parameter n is evaluated multiple times, 201da177e4SLinus Torvalds * beware of side effects! 211da177e4SLinus Torvalds */ 221da177e4SLinus Torvalds 231da177e4SLinus Torvalds #include <linux/types.h> 241da177e4SLinus Torvalds #include <linux/compiler.h> 251da177e4SLinus Torvalds 261da177e4SLinus Torvalds #if BITS_PER_LONG == 64 271da177e4SLinus Torvalds 281da177e4SLinus Torvalds # define do_div(n,base) ({ \ 291da177e4SLinus Torvalds uint32_t __base = (base); \ 301da177e4SLinus Torvalds uint32_t __rem; \ 311da177e4SLinus Torvalds __rem = ((uint64_t)(n)) % __base; \ 321da177e4SLinus Torvalds (n) = ((uint64_t)(n)) / __base; \ 331da177e4SLinus Torvalds __rem; \ 341da177e4SLinus Torvalds }) 351da177e4SLinus Torvalds 361da177e4SLinus Torvalds #elif BITS_PER_LONG == 32 371da177e4SLinus Torvalds 38911918aaSNicolas Pitre #include <linux/log2.h> 39911918aaSNicolas Pitre 40*461a5e51SNicolas Pitre /* 41*461a5e51SNicolas Pitre * If the divisor happens to be constant, we determine the appropriate 42*461a5e51SNicolas Pitre * inverse at compile time to turn the division into a few inline 43*461a5e51SNicolas Pitre * multiplications which ought to be much faster. And yet only if compiling 44*461a5e51SNicolas Pitre * with a sufficiently recent gcc version to perform proper 64-bit constant 45*461a5e51SNicolas Pitre * propagation. 46*461a5e51SNicolas Pitre * 47*461a5e51SNicolas Pitre * (It is unfortunate that gcc doesn't perform all this internally.) 48*461a5e51SNicolas Pitre */ 49*461a5e51SNicolas Pitre 50*461a5e51SNicolas Pitre #ifndef __div64_const32_is_OK 51*461a5e51SNicolas Pitre #define __div64_const32_is_OK (__GNUC__ >= 4) 52*461a5e51SNicolas Pitre #endif 53*461a5e51SNicolas Pitre 54*461a5e51SNicolas Pitre #define __div64_const32(n, ___b) \ 55*461a5e51SNicolas Pitre ({ \ 56*461a5e51SNicolas Pitre /* \ 57*461a5e51SNicolas Pitre * Multiplication by reciprocal of b: n / b = n * (p / b) / p \ 58*461a5e51SNicolas Pitre * \ 59*461a5e51SNicolas Pitre * We rely on the fact that most of this code gets optimized \ 60*461a5e51SNicolas Pitre * away at compile time due to constant propagation and only \ 61*461a5e51SNicolas Pitre * a few multiplication instructions should remain. \ 62*461a5e51SNicolas Pitre * Hence this monstrous macro (static inline doesn't always \ 63*461a5e51SNicolas Pitre * do the trick here). \ 64*461a5e51SNicolas Pitre */ \ 65*461a5e51SNicolas Pitre uint64_t ___res, ___x, ___t, ___m, ___n = (n); \ 66*461a5e51SNicolas Pitre uint32_t ___p, ___bias, ___m_lo, ___m_hi, ___n_lo, ___n_hi; \ 67*461a5e51SNicolas Pitre \ 68*461a5e51SNicolas Pitre /* determine MSB of b */ \ 69*461a5e51SNicolas Pitre ___p = 1 << ilog2(___b); \ 70*461a5e51SNicolas Pitre \ 71*461a5e51SNicolas Pitre /* compute m = ((p << 64) + b - 1) / b */ \ 72*461a5e51SNicolas Pitre ___m = (~0ULL / ___b) * ___p; \ 73*461a5e51SNicolas Pitre ___m += (((~0ULL % ___b + 1) * ___p) + ___b - 1) / ___b; \ 74*461a5e51SNicolas Pitre \ 75*461a5e51SNicolas Pitre /* one less than the dividend with highest result */ \ 76*461a5e51SNicolas Pitre ___x = ~0ULL / ___b * ___b - 1; \ 77*461a5e51SNicolas Pitre \ 78*461a5e51SNicolas Pitre /* test our ___m with res = m * x / (p << 64) */ \ 79*461a5e51SNicolas Pitre ___res = ((___m & 0xffffffff) * (___x & 0xffffffff)) >> 32; \ 80*461a5e51SNicolas Pitre ___t = ___res += (___m & 0xffffffff) * (___x >> 32); \ 81*461a5e51SNicolas Pitre ___res += (___x & 0xffffffff) * (___m >> 32); \ 82*461a5e51SNicolas Pitre ___t = (___res < ___t) ? (1ULL << 32) : 0; \ 83*461a5e51SNicolas Pitre ___res = (___res >> 32) + ___t; \ 84*461a5e51SNicolas Pitre ___res += (___m >> 32) * (___x >> 32); \ 85*461a5e51SNicolas Pitre ___res /= ___p; \ 86*461a5e51SNicolas Pitre \ 87*461a5e51SNicolas Pitre /* Now sanitize and optimize what we've got. */ \ 88*461a5e51SNicolas Pitre if (~0ULL % (___b / (___b & -___b)) == 0) { \ 89*461a5e51SNicolas Pitre /* special case, can be simplified to ... */ \ 90*461a5e51SNicolas Pitre ___n /= (___b & -___b); \ 91*461a5e51SNicolas Pitre ___m = ~0ULL / (___b / (___b & -___b)); \ 92*461a5e51SNicolas Pitre ___p = 1; \ 93*461a5e51SNicolas Pitre ___bias = 1; \ 94*461a5e51SNicolas Pitre } else if (___res != ___x / ___b) { \ 95*461a5e51SNicolas Pitre /* \ 96*461a5e51SNicolas Pitre * We can't get away without a bias to compensate \ 97*461a5e51SNicolas Pitre * for bit truncation errors. To avoid it we'd need an \ 98*461a5e51SNicolas Pitre * additional bit to represent m which would overflow \ 99*461a5e51SNicolas Pitre * a 64-bit variable. \ 100*461a5e51SNicolas Pitre * \ 101*461a5e51SNicolas Pitre * Instead we do m = p / b and n / b = (n * m + m) / p. \ 102*461a5e51SNicolas Pitre */ \ 103*461a5e51SNicolas Pitre ___bias = 1; \ 104*461a5e51SNicolas Pitre /* Compute m = (p << 64) / b */ \ 105*461a5e51SNicolas Pitre ___m = (~0ULL / ___b) * ___p; \ 106*461a5e51SNicolas Pitre ___m += ((~0ULL % ___b + 1) * ___p) / ___b; \ 107*461a5e51SNicolas Pitre } else { \ 108*461a5e51SNicolas Pitre /* \ 109*461a5e51SNicolas Pitre * Reduce m / p, and try to clear bit 31 of m when \ 110*461a5e51SNicolas Pitre * possible, otherwise that'll need extra overflow \ 111*461a5e51SNicolas Pitre * handling later. \ 112*461a5e51SNicolas Pitre */ \ 113*461a5e51SNicolas Pitre uint32_t ___bits = -(___m & -___m); \ 114*461a5e51SNicolas Pitre ___bits |= ___m >> 32; \ 115*461a5e51SNicolas Pitre ___bits = (~___bits) << 1; \ 116*461a5e51SNicolas Pitre /* \ 117*461a5e51SNicolas Pitre * If ___bits == 0 then setting bit 31 is unavoidable. \ 118*461a5e51SNicolas Pitre * Simply apply the maximum possible reduction in that \ 119*461a5e51SNicolas Pitre * case. Otherwise the MSB of ___bits indicates the \ 120*461a5e51SNicolas Pitre * best reduction we should apply. \ 121*461a5e51SNicolas Pitre */ \ 122*461a5e51SNicolas Pitre if (!___bits) { \ 123*461a5e51SNicolas Pitre ___p /= (___m & -___m); \ 124*461a5e51SNicolas Pitre ___m /= (___m & -___m); \ 125*461a5e51SNicolas Pitre } else { \ 126*461a5e51SNicolas Pitre ___p >>= ilog2(___bits); \ 127*461a5e51SNicolas Pitre ___m >>= ilog2(___bits); \ 128*461a5e51SNicolas Pitre } \ 129*461a5e51SNicolas Pitre /* No bias needed. */ \ 130*461a5e51SNicolas Pitre ___bias = 0; \ 131*461a5e51SNicolas Pitre } \ 132*461a5e51SNicolas Pitre \ 133*461a5e51SNicolas Pitre /* \ 134*461a5e51SNicolas Pitre * Now we have a combination of 2 conditions: \ 135*461a5e51SNicolas Pitre * \ 136*461a5e51SNicolas Pitre * 1) whether or not we need to apply a bias, and \ 137*461a5e51SNicolas Pitre * \ 138*461a5e51SNicolas Pitre * 2) whether or not there might be an overflow in the cross \ 139*461a5e51SNicolas Pitre * product determined by (___m & ((1 << 63) | (1 << 31))). \ 140*461a5e51SNicolas Pitre * \ 141*461a5e51SNicolas Pitre * Select the best way to do (m_bias + m * n) / (p << 64). \ 142*461a5e51SNicolas Pitre * From now on there will be actual runtime code generated. \ 143*461a5e51SNicolas Pitre */ \ 144*461a5e51SNicolas Pitre \ 145*461a5e51SNicolas Pitre ___m_lo = ___m; \ 146*461a5e51SNicolas Pitre ___m_hi = ___m >> 32; \ 147*461a5e51SNicolas Pitre ___n_lo = ___n; \ 148*461a5e51SNicolas Pitre ___n_hi = ___n >> 32; \ 149*461a5e51SNicolas Pitre \ 150*461a5e51SNicolas Pitre if (!___bias) { \ 151*461a5e51SNicolas Pitre ___res = ((uint64_t)___m_lo * ___n_lo) >> 32; \ 152*461a5e51SNicolas Pitre } else if (!(___m & ((1ULL << 63) | (1ULL << 31)))) { \ 153*461a5e51SNicolas Pitre ___res = (___m + (uint64_t)___m_lo * ___n_lo) >> 32; \ 154*461a5e51SNicolas Pitre } else { \ 155*461a5e51SNicolas Pitre ___res = ___m + (uint64_t)___m_lo * ___n_lo; \ 156*461a5e51SNicolas Pitre ___t = (___res < ___m) ? (1ULL << 32) : 0; \ 157*461a5e51SNicolas Pitre ___res = (___res >> 32) + ___t; \ 158*461a5e51SNicolas Pitre } \ 159*461a5e51SNicolas Pitre \ 160*461a5e51SNicolas Pitre if (!(___m & ((1ULL << 63) | (1ULL << 31)))) { \ 161*461a5e51SNicolas Pitre ___res += (uint64_t)___m_lo * ___n_hi; \ 162*461a5e51SNicolas Pitre ___res += (uint64_t)___m_hi * ___n_lo; \ 163*461a5e51SNicolas Pitre ___res >>= 32; \ 164*461a5e51SNicolas Pitre } else { \ 165*461a5e51SNicolas Pitre ___t = ___res += (uint64_t)___m_lo * ___n_hi; \ 166*461a5e51SNicolas Pitre ___res += (uint64_t)___m_hi * ___n_lo; \ 167*461a5e51SNicolas Pitre ___t = (___res < ___t) ? (1ULL << 32) : 0; \ 168*461a5e51SNicolas Pitre ___res = (___res >> 32) + ___t; \ 169*461a5e51SNicolas Pitre } \ 170*461a5e51SNicolas Pitre \ 171*461a5e51SNicolas Pitre ___res += (uint64_t)___m_hi * ___n_hi; \ 172*461a5e51SNicolas Pitre \ 173*461a5e51SNicolas Pitre ___res /= ___p; \ 174*461a5e51SNicolas Pitre }) 175*461a5e51SNicolas Pitre 1761da177e4SLinus Torvalds extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor); 1771da177e4SLinus Torvalds 1781da177e4SLinus Torvalds /* The unnecessary pointer compare is there 1791da177e4SLinus Torvalds * to check for type safety (n must be 64bit) 1801da177e4SLinus Torvalds */ 1811da177e4SLinus Torvalds # define do_div(n,base) ({ \ 1821da177e4SLinus Torvalds uint32_t __base = (base); \ 1831da177e4SLinus Torvalds uint32_t __rem; \ 1841da177e4SLinus Torvalds (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \ 185911918aaSNicolas Pitre if (__builtin_constant_p(__base) && \ 186911918aaSNicolas Pitre is_power_of_2(__base)) { \ 187911918aaSNicolas Pitre __rem = (n) & (__base - 1); \ 188911918aaSNicolas Pitre (n) >>= ilog2(__base); \ 189*461a5e51SNicolas Pitre } else if (__div64_const32_is_OK && \ 190*461a5e51SNicolas Pitre __builtin_constant_p(__base) && \ 191*461a5e51SNicolas Pitre __base != 0) { \ 192*461a5e51SNicolas Pitre uint32_t __res_lo, __n_lo = (n); \ 193*461a5e51SNicolas Pitre (n) = __div64_const32(n, __base); \ 194*461a5e51SNicolas Pitre /* the remainder can be computed with 32-bit regs */ \ 195*461a5e51SNicolas Pitre __res_lo = (n); \ 196*461a5e51SNicolas Pitre __rem = __n_lo - __res_lo * __base; \ 197911918aaSNicolas Pitre } else if (likely(((n) >> 32) == 0)) { \ 1981da177e4SLinus Torvalds __rem = (uint32_t)(n) % __base; \ 1991da177e4SLinus Torvalds (n) = (uint32_t)(n) / __base; \ 2001da177e4SLinus Torvalds } else \ 2011da177e4SLinus Torvalds __rem = __div64_32(&(n), __base); \ 2021da177e4SLinus Torvalds __rem; \ 2031da177e4SLinus Torvalds }) 2041da177e4SLinus Torvalds 2051da177e4SLinus Torvalds #else /* BITS_PER_LONG == ?? */ 2061da177e4SLinus Torvalds 2071da177e4SLinus Torvalds # error do_div() does not yet support the C64 2081da177e4SLinus Torvalds 2091da177e4SLinus Torvalds #endif /* BITS_PER_LONG */ 2101da177e4SLinus Torvalds 2111da177e4SLinus Torvalds #endif /* _ASM_GENERIC_DIV64_H */ 212