11da177e4SLinus Torvalds #ifndef _ASM_GENERIC_DIV64_H 21da177e4SLinus Torvalds #define _ASM_GENERIC_DIV64_H 31da177e4SLinus Torvalds /* 41da177e4SLinus Torvalds * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com> 51da177e4SLinus Torvalds * Based on former asm-ppc/div64.h and asm-m68knommu/div64.h 61da177e4SLinus Torvalds * 7461a5e51SNicolas Pitre * Optimization for constant divisors on 32-bit machines: 8461a5e51SNicolas Pitre * Copyright (C) 2006-2015 Nicolas Pitre 9461a5e51SNicolas Pitre * 101da177e4SLinus Torvalds * The semantics of do_div() are: 111da177e4SLinus Torvalds * 121da177e4SLinus Torvalds * uint32_t do_div(uint64_t *n, uint32_t base) 131da177e4SLinus Torvalds * { 141da177e4SLinus Torvalds * uint32_t remainder = *n % base; 151da177e4SLinus Torvalds * *n = *n / base; 161da177e4SLinus Torvalds * return remainder; 171da177e4SLinus Torvalds * } 181da177e4SLinus Torvalds * 191da177e4SLinus Torvalds * NOTE: macro parameter n is evaluated multiple times, 201da177e4SLinus Torvalds * beware of side effects! 211da177e4SLinus Torvalds */ 221da177e4SLinus Torvalds 231da177e4SLinus Torvalds #include <linux/types.h> 241da177e4SLinus Torvalds #include <linux/compiler.h> 251da177e4SLinus Torvalds 261da177e4SLinus Torvalds #if BITS_PER_LONG == 64 271da177e4SLinus Torvalds 28*6ec72e61SRandy Dunlap /** 29*6ec72e61SRandy Dunlap * do_div - returns 2 values: calculate remainder and update new dividend 30*6ec72e61SRandy Dunlap * @n: pointer to uint64_t dividend (will be updated) 31*6ec72e61SRandy Dunlap * @base: uint32_t divisor 32*6ec72e61SRandy Dunlap * 33*6ec72e61SRandy Dunlap * Summary: 34*6ec72e61SRandy Dunlap * ``uint32_t remainder = *n % base;`` 35*6ec72e61SRandy Dunlap * ``*n = *n / base;`` 36*6ec72e61SRandy Dunlap * 37*6ec72e61SRandy Dunlap * Return: (uint32_t)remainder 38*6ec72e61SRandy Dunlap * 39*6ec72e61SRandy Dunlap * NOTE: macro parameter @n is evaluated multiple times, 40*6ec72e61SRandy Dunlap * beware of side effects! 41*6ec72e61SRandy Dunlap */ 421da177e4SLinus Torvalds # define do_div(n,base) ({ \ 431da177e4SLinus Torvalds uint32_t __base = (base); \ 441da177e4SLinus Torvalds uint32_t __rem; \ 451da177e4SLinus Torvalds __rem = ((uint64_t)(n)) % __base; \ 461da177e4SLinus Torvalds (n) = ((uint64_t)(n)) / __base; \ 471da177e4SLinus Torvalds __rem; \ 481da177e4SLinus Torvalds }) 491da177e4SLinus Torvalds 501da177e4SLinus Torvalds #elif BITS_PER_LONG == 32 511da177e4SLinus Torvalds 52911918aaSNicolas Pitre #include <linux/log2.h> 53911918aaSNicolas Pitre 54461a5e51SNicolas Pitre /* 55461a5e51SNicolas Pitre * If the divisor happens to be constant, we determine the appropriate 56461a5e51SNicolas Pitre * inverse at compile time to turn the division into a few inline 57461a5e51SNicolas Pitre * multiplications which ought to be much faster. And yet only if compiling 58461a5e51SNicolas Pitre * with a sufficiently recent gcc version to perform proper 64-bit constant 59461a5e51SNicolas Pitre * propagation. 60461a5e51SNicolas Pitre * 61461a5e51SNicolas Pitre * (It is unfortunate that gcc doesn't perform all this internally.) 62461a5e51SNicolas Pitre */ 63461a5e51SNicolas Pitre 64461a5e51SNicolas Pitre #ifndef __div64_const32_is_OK 65461a5e51SNicolas Pitre #define __div64_const32_is_OK (__GNUC__ >= 4) 66461a5e51SNicolas Pitre #endif 67461a5e51SNicolas Pitre 68461a5e51SNicolas Pitre #define __div64_const32(n, ___b) \ 69461a5e51SNicolas Pitre ({ \ 70461a5e51SNicolas Pitre /* \ 71461a5e51SNicolas Pitre * Multiplication by reciprocal of b: n / b = n * (p / b) / p \ 72461a5e51SNicolas Pitre * \ 73461a5e51SNicolas Pitre * We rely on the fact that most of this code gets optimized \ 74461a5e51SNicolas Pitre * away at compile time due to constant propagation and only \ 75461a5e51SNicolas Pitre * a few multiplication instructions should remain. \ 76461a5e51SNicolas Pitre * Hence this monstrous macro (static inline doesn't always \ 77461a5e51SNicolas Pitre * do the trick here). \ 78461a5e51SNicolas Pitre */ \ 79461a5e51SNicolas Pitre uint64_t ___res, ___x, ___t, ___m, ___n = (n); \ 80f682b27cSNicolas Pitre uint32_t ___p, ___bias; \ 81461a5e51SNicolas Pitre \ 82461a5e51SNicolas Pitre /* determine MSB of b */ \ 83461a5e51SNicolas Pitre ___p = 1 << ilog2(___b); \ 84461a5e51SNicolas Pitre \ 85461a5e51SNicolas Pitre /* compute m = ((p << 64) + b - 1) / b */ \ 86461a5e51SNicolas Pitre ___m = (~0ULL / ___b) * ___p; \ 87461a5e51SNicolas Pitre ___m += (((~0ULL % ___b + 1) * ___p) + ___b - 1) / ___b; \ 88461a5e51SNicolas Pitre \ 89461a5e51SNicolas Pitre /* one less than the dividend with highest result */ \ 90461a5e51SNicolas Pitre ___x = ~0ULL / ___b * ___b - 1; \ 91461a5e51SNicolas Pitre \ 92461a5e51SNicolas Pitre /* test our ___m with res = m * x / (p << 64) */ \ 93461a5e51SNicolas Pitre ___res = ((___m & 0xffffffff) * (___x & 0xffffffff)) >> 32; \ 94461a5e51SNicolas Pitre ___t = ___res += (___m & 0xffffffff) * (___x >> 32); \ 95461a5e51SNicolas Pitre ___res += (___x & 0xffffffff) * (___m >> 32); \ 96461a5e51SNicolas Pitre ___t = (___res < ___t) ? (1ULL << 32) : 0; \ 97461a5e51SNicolas Pitre ___res = (___res >> 32) + ___t; \ 98461a5e51SNicolas Pitre ___res += (___m >> 32) * (___x >> 32); \ 99461a5e51SNicolas Pitre ___res /= ___p; \ 100461a5e51SNicolas Pitre \ 101461a5e51SNicolas Pitre /* Now sanitize and optimize what we've got. */ \ 102461a5e51SNicolas Pitre if (~0ULL % (___b / (___b & -___b)) == 0) { \ 103461a5e51SNicolas Pitre /* special case, can be simplified to ... */ \ 104461a5e51SNicolas Pitre ___n /= (___b & -___b); \ 105461a5e51SNicolas Pitre ___m = ~0ULL / (___b / (___b & -___b)); \ 106461a5e51SNicolas Pitre ___p = 1; \ 107461a5e51SNicolas Pitre ___bias = 1; \ 108461a5e51SNicolas Pitre } else if (___res != ___x / ___b) { \ 109461a5e51SNicolas Pitre /* \ 110461a5e51SNicolas Pitre * We can't get away without a bias to compensate \ 111461a5e51SNicolas Pitre * for bit truncation errors. To avoid it we'd need an \ 112461a5e51SNicolas Pitre * additional bit to represent m which would overflow \ 113461a5e51SNicolas Pitre * a 64-bit variable. \ 114461a5e51SNicolas Pitre * \ 115461a5e51SNicolas Pitre * Instead we do m = p / b and n / b = (n * m + m) / p. \ 116461a5e51SNicolas Pitre */ \ 117461a5e51SNicolas Pitre ___bias = 1; \ 118461a5e51SNicolas Pitre /* Compute m = (p << 64) / b */ \ 119461a5e51SNicolas Pitre ___m = (~0ULL / ___b) * ___p; \ 120461a5e51SNicolas Pitre ___m += ((~0ULL % ___b + 1) * ___p) / ___b; \ 121461a5e51SNicolas Pitre } else { \ 122461a5e51SNicolas Pitre /* \ 123461a5e51SNicolas Pitre * Reduce m / p, and try to clear bit 31 of m when \ 124461a5e51SNicolas Pitre * possible, otherwise that'll need extra overflow \ 125461a5e51SNicolas Pitre * handling later. \ 126461a5e51SNicolas Pitre */ \ 127461a5e51SNicolas Pitre uint32_t ___bits = -(___m & -___m); \ 128461a5e51SNicolas Pitre ___bits |= ___m >> 32; \ 129461a5e51SNicolas Pitre ___bits = (~___bits) << 1; \ 130461a5e51SNicolas Pitre /* \ 131461a5e51SNicolas Pitre * If ___bits == 0 then setting bit 31 is unavoidable. \ 132461a5e51SNicolas Pitre * Simply apply the maximum possible reduction in that \ 133461a5e51SNicolas Pitre * case. Otherwise the MSB of ___bits indicates the \ 134461a5e51SNicolas Pitre * best reduction we should apply. \ 135461a5e51SNicolas Pitre */ \ 136461a5e51SNicolas Pitre if (!___bits) { \ 137461a5e51SNicolas Pitre ___p /= (___m & -___m); \ 138461a5e51SNicolas Pitre ___m /= (___m & -___m); \ 139461a5e51SNicolas Pitre } else { \ 140461a5e51SNicolas Pitre ___p >>= ilog2(___bits); \ 141461a5e51SNicolas Pitre ___m >>= ilog2(___bits); \ 142461a5e51SNicolas Pitre } \ 143461a5e51SNicolas Pitre /* No bias needed. */ \ 144461a5e51SNicolas Pitre ___bias = 0; \ 145461a5e51SNicolas Pitre } \ 146461a5e51SNicolas Pitre \ 147461a5e51SNicolas Pitre /* \ 148461a5e51SNicolas Pitre * Now we have a combination of 2 conditions: \ 149461a5e51SNicolas Pitre * \ 150461a5e51SNicolas Pitre * 1) whether or not we need to apply a bias, and \ 151461a5e51SNicolas Pitre * \ 152461a5e51SNicolas Pitre * 2) whether or not there might be an overflow in the cross \ 153461a5e51SNicolas Pitre * product determined by (___m & ((1 << 63) | (1 << 31))). \ 154461a5e51SNicolas Pitre * \ 155f682b27cSNicolas Pitre * Select the best way to do (m_bias + m * n) / (1 << 64). \ 156461a5e51SNicolas Pitre * From now on there will be actual runtime code generated. \ 157461a5e51SNicolas Pitre */ \ 158f682b27cSNicolas Pitre ___res = __arch_xprod_64(___m, ___n, ___bias); \ 159461a5e51SNicolas Pitre \ 160461a5e51SNicolas Pitre ___res /= ___p; \ 161461a5e51SNicolas Pitre }) 162461a5e51SNicolas Pitre 163f682b27cSNicolas Pitre #ifndef __arch_xprod_64 164f682b27cSNicolas Pitre /* 165f682b27cSNicolas Pitre * Default C implementation for __arch_xprod_64() 166f682b27cSNicolas Pitre * 167f682b27cSNicolas Pitre * Prototype: uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) 168f682b27cSNicolas Pitre * Semantic: retval = ((bias ? m : 0) + m * n) >> 64 169f682b27cSNicolas Pitre * 170f682b27cSNicolas Pitre * The product is a 128-bit value, scaled down to 64 bits. 171f682b27cSNicolas Pitre * Assuming constant propagation to optimize away unused conditional code. 172f682b27cSNicolas Pitre * Architectures may provide their own optimized assembly implementation. 173f682b27cSNicolas Pitre */ 174f682b27cSNicolas Pitre static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) 175f682b27cSNicolas Pitre { 176f682b27cSNicolas Pitre uint32_t m_lo = m; 177f682b27cSNicolas Pitre uint32_t m_hi = m >> 32; 178f682b27cSNicolas Pitre uint32_t n_lo = n; 179f682b27cSNicolas Pitre uint32_t n_hi = n >> 32; 180f682b27cSNicolas Pitre uint64_t res, tmp; 181f682b27cSNicolas Pitre 182f682b27cSNicolas Pitre if (!bias) { 183f682b27cSNicolas Pitre res = ((uint64_t)m_lo * n_lo) >> 32; 184f682b27cSNicolas Pitre } else if (!(m & ((1ULL << 63) | (1ULL << 31)))) { 185f682b27cSNicolas Pitre /* there can't be any overflow here */ 186f682b27cSNicolas Pitre res = (m + (uint64_t)m_lo * n_lo) >> 32; 187f682b27cSNicolas Pitre } else { 188f682b27cSNicolas Pitre res = m + (uint64_t)m_lo * n_lo; 189f682b27cSNicolas Pitre tmp = (res < m) ? (1ULL << 32) : 0; 190f682b27cSNicolas Pitre res = (res >> 32) + tmp; 191f682b27cSNicolas Pitre } 192f682b27cSNicolas Pitre 193f682b27cSNicolas Pitre if (!(m & ((1ULL << 63) | (1ULL << 31)))) { 194f682b27cSNicolas Pitre /* there can't be any overflow here */ 195f682b27cSNicolas Pitre res += (uint64_t)m_lo * n_hi; 196f682b27cSNicolas Pitre res += (uint64_t)m_hi * n_lo; 197f682b27cSNicolas Pitre res >>= 32; 198f682b27cSNicolas Pitre } else { 199f682b27cSNicolas Pitre tmp = res += (uint64_t)m_lo * n_hi; 200f682b27cSNicolas Pitre res += (uint64_t)m_hi * n_lo; 201f682b27cSNicolas Pitre tmp = (res < tmp) ? (1ULL << 32) : 0; 202f682b27cSNicolas Pitre res = (res >> 32) + tmp; 203f682b27cSNicolas Pitre } 204f682b27cSNicolas Pitre 205f682b27cSNicolas Pitre res += (uint64_t)m_hi * n_hi; 206f682b27cSNicolas Pitre 207f682b27cSNicolas Pitre return res; 208f682b27cSNicolas Pitre } 209f682b27cSNicolas Pitre #endif 210f682b27cSNicolas Pitre 211dce1eb93SNicolas Pitre #ifndef __div64_32 2121da177e4SLinus Torvalds extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor); 213dce1eb93SNicolas Pitre #endif 2141da177e4SLinus Torvalds 2151da177e4SLinus Torvalds /* The unnecessary pointer compare is there 2161da177e4SLinus Torvalds * to check for type safety (n must be 64bit) 2171da177e4SLinus Torvalds */ 2181da177e4SLinus Torvalds # define do_div(n,base) ({ \ 2191da177e4SLinus Torvalds uint32_t __base = (base); \ 2201da177e4SLinus Torvalds uint32_t __rem; \ 2211da177e4SLinus Torvalds (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \ 222911918aaSNicolas Pitre if (__builtin_constant_p(__base) && \ 223911918aaSNicolas Pitre is_power_of_2(__base)) { \ 224911918aaSNicolas Pitre __rem = (n) & (__base - 1); \ 225911918aaSNicolas Pitre (n) >>= ilog2(__base); \ 226461a5e51SNicolas Pitre } else if (__div64_const32_is_OK && \ 227461a5e51SNicolas Pitre __builtin_constant_p(__base) && \ 228461a5e51SNicolas Pitre __base != 0) { \ 229461a5e51SNicolas Pitre uint32_t __res_lo, __n_lo = (n); \ 230461a5e51SNicolas Pitre (n) = __div64_const32(n, __base); \ 231461a5e51SNicolas Pitre /* the remainder can be computed with 32-bit regs */ \ 232461a5e51SNicolas Pitre __res_lo = (n); \ 233461a5e51SNicolas Pitre __rem = __n_lo - __res_lo * __base; \ 234911918aaSNicolas Pitre } else if (likely(((n) >> 32) == 0)) { \ 2351da177e4SLinus Torvalds __rem = (uint32_t)(n) % __base; \ 2361da177e4SLinus Torvalds (n) = (uint32_t)(n) / __base; \ 2371da177e4SLinus Torvalds } else \ 2381da177e4SLinus Torvalds __rem = __div64_32(&(n), __base); \ 2391da177e4SLinus Torvalds __rem; \ 2401da177e4SLinus Torvalds }) 2411da177e4SLinus Torvalds 2421da177e4SLinus Torvalds #else /* BITS_PER_LONG == ?? */ 2431da177e4SLinus Torvalds 2441da177e4SLinus Torvalds # error do_div() does not yet support the C64 2451da177e4SLinus Torvalds 2461da177e4SLinus Torvalds #endif /* BITS_PER_LONG */ 2471da177e4SLinus Torvalds 2481da177e4SLinus Torvalds #endif /* _ASM_GENERIC_DIV64_H */ 249