11da177e4SLinus Torvalds #ifndef _ASM_GENERIC_DIV64_H 21da177e4SLinus Torvalds #define _ASM_GENERIC_DIV64_H 31da177e4SLinus Torvalds /* 41da177e4SLinus Torvalds * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com> 51da177e4SLinus Torvalds * Based on former asm-ppc/div64.h and asm-m68knommu/div64.h 61da177e4SLinus Torvalds * 7461a5e51SNicolas Pitre * Optimization for constant divisors on 32-bit machines: 8461a5e51SNicolas Pitre * Copyright (C) 2006-2015 Nicolas Pitre 9461a5e51SNicolas Pitre * 101da177e4SLinus Torvalds * The semantics of do_div() are: 111da177e4SLinus Torvalds * 121da177e4SLinus Torvalds * uint32_t do_div(uint64_t *n, uint32_t base) 131da177e4SLinus Torvalds * { 141da177e4SLinus Torvalds * uint32_t remainder = *n % base; 151da177e4SLinus Torvalds * *n = *n / base; 161da177e4SLinus Torvalds * return remainder; 171da177e4SLinus Torvalds * } 181da177e4SLinus Torvalds * 191da177e4SLinus Torvalds * NOTE: macro parameter n is evaluated multiple times, 201da177e4SLinus Torvalds * beware of side effects! 211da177e4SLinus Torvalds */ 221da177e4SLinus Torvalds 231da177e4SLinus Torvalds #include <linux/types.h> 241da177e4SLinus Torvalds #include <linux/compiler.h> 251da177e4SLinus Torvalds 261da177e4SLinus Torvalds #if BITS_PER_LONG == 64 271da177e4SLinus Torvalds 281da177e4SLinus Torvalds # define do_div(n,base) ({ \ 291da177e4SLinus Torvalds uint32_t __base = (base); \ 301da177e4SLinus Torvalds uint32_t __rem; \ 311da177e4SLinus Torvalds __rem = ((uint64_t)(n)) % __base; \ 321da177e4SLinus Torvalds (n) = ((uint64_t)(n)) / __base; \ 331da177e4SLinus Torvalds __rem; \ 341da177e4SLinus Torvalds }) 351da177e4SLinus Torvalds 361da177e4SLinus Torvalds #elif BITS_PER_LONG == 32 371da177e4SLinus Torvalds 38911918aaSNicolas Pitre #include <linux/log2.h> 39911918aaSNicolas Pitre 40461a5e51SNicolas Pitre /* 41461a5e51SNicolas Pitre * If the divisor happens to be constant, we determine the appropriate 42461a5e51SNicolas Pitre * inverse at compile time to turn the division into a few inline 43461a5e51SNicolas Pitre * multiplications which ought to be much faster. And yet only if compiling 44461a5e51SNicolas Pitre * with a sufficiently recent gcc version to perform proper 64-bit constant 45461a5e51SNicolas Pitre * propagation. 46461a5e51SNicolas Pitre * 47461a5e51SNicolas Pitre * (It is unfortunate that gcc doesn't perform all this internally.) 48461a5e51SNicolas Pitre */ 49461a5e51SNicolas Pitre 50461a5e51SNicolas Pitre #ifndef __div64_const32_is_OK 51461a5e51SNicolas Pitre #define __div64_const32_is_OK (__GNUC__ >= 4) 52461a5e51SNicolas Pitre #endif 53461a5e51SNicolas Pitre 54461a5e51SNicolas Pitre #define __div64_const32(n, ___b) \ 55461a5e51SNicolas Pitre ({ \ 56461a5e51SNicolas Pitre /* \ 57461a5e51SNicolas Pitre * Multiplication by reciprocal of b: n / b = n * (p / b) / p \ 58461a5e51SNicolas Pitre * \ 59461a5e51SNicolas Pitre * We rely on the fact that most of this code gets optimized \ 60461a5e51SNicolas Pitre * away at compile time due to constant propagation and only \ 61461a5e51SNicolas Pitre * a few multiplication instructions should remain. \ 62461a5e51SNicolas Pitre * Hence this monstrous macro (static inline doesn't always \ 63461a5e51SNicolas Pitre * do the trick here). \ 64461a5e51SNicolas Pitre */ \ 65461a5e51SNicolas Pitre uint64_t ___res, ___x, ___t, ___m, ___n = (n); \ 66*f682b27cSNicolas Pitre uint32_t ___p, ___bias; \ 67461a5e51SNicolas Pitre \ 68461a5e51SNicolas Pitre /* determine MSB of b */ \ 69461a5e51SNicolas Pitre ___p = 1 << ilog2(___b); \ 70461a5e51SNicolas Pitre \ 71461a5e51SNicolas Pitre /* compute m = ((p << 64) + b - 1) / b */ \ 72461a5e51SNicolas Pitre ___m = (~0ULL / ___b) * ___p; \ 73461a5e51SNicolas Pitre ___m += (((~0ULL % ___b + 1) * ___p) + ___b - 1) / ___b; \ 74461a5e51SNicolas Pitre \ 75461a5e51SNicolas Pitre /* one less than the dividend with highest result */ \ 76461a5e51SNicolas Pitre ___x = ~0ULL / ___b * ___b - 1; \ 77461a5e51SNicolas Pitre \ 78461a5e51SNicolas Pitre /* test our ___m with res = m * x / (p << 64) */ \ 79461a5e51SNicolas Pitre ___res = ((___m & 0xffffffff) * (___x & 0xffffffff)) >> 32; \ 80461a5e51SNicolas Pitre ___t = ___res += (___m & 0xffffffff) * (___x >> 32); \ 81461a5e51SNicolas Pitre ___res += (___x & 0xffffffff) * (___m >> 32); \ 82461a5e51SNicolas Pitre ___t = (___res < ___t) ? (1ULL << 32) : 0; \ 83461a5e51SNicolas Pitre ___res = (___res >> 32) + ___t; \ 84461a5e51SNicolas Pitre ___res += (___m >> 32) * (___x >> 32); \ 85461a5e51SNicolas Pitre ___res /= ___p; \ 86461a5e51SNicolas Pitre \ 87461a5e51SNicolas Pitre /* Now sanitize and optimize what we've got. */ \ 88461a5e51SNicolas Pitre if (~0ULL % (___b / (___b & -___b)) == 0) { \ 89461a5e51SNicolas Pitre /* special case, can be simplified to ... */ \ 90461a5e51SNicolas Pitre ___n /= (___b & -___b); \ 91461a5e51SNicolas Pitre ___m = ~0ULL / (___b / (___b & -___b)); \ 92461a5e51SNicolas Pitre ___p = 1; \ 93461a5e51SNicolas Pitre ___bias = 1; \ 94461a5e51SNicolas Pitre } else if (___res != ___x / ___b) { \ 95461a5e51SNicolas Pitre /* \ 96461a5e51SNicolas Pitre * We can't get away without a bias to compensate \ 97461a5e51SNicolas Pitre * for bit truncation errors. To avoid it we'd need an \ 98461a5e51SNicolas Pitre * additional bit to represent m which would overflow \ 99461a5e51SNicolas Pitre * a 64-bit variable. \ 100461a5e51SNicolas Pitre * \ 101461a5e51SNicolas Pitre * Instead we do m = p / b and n / b = (n * m + m) / p. \ 102461a5e51SNicolas Pitre */ \ 103461a5e51SNicolas Pitre ___bias = 1; \ 104461a5e51SNicolas Pitre /* Compute m = (p << 64) / b */ \ 105461a5e51SNicolas Pitre ___m = (~0ULL / ___b) * ___p; \ 106461a5e51SNicolas Pitre ___m += ((~0ULL % ___b + 1) * ___p) / ___b; \ 107461a5e51SNicolas Pitre } else { \ 108461a5e51SNicolas Pitre /* \ 109461a5e51SNicolas Pitre * Reduce m / p, and try to clear bit 31 of m when \ 110461a5e51SNicolas Pitre * possible, otherwise that'll need extra overflow \ 111461a5e51SNicolas Pitre * handling later. \ 112461a5e51SNicolas Pitre */ \ 113461a5e51SNicolas Pitre uint32_t ___bits = -(___m & -___m); \ 114461a5e51SNicolas Pitre ___bits |= ___m >> 32; \ 115461a5e51SNicolas Pitre ___bits = (~___bits) << 1; \ 116461a5e51SNicolas Pitre /* \ 117461a5e51SNicolas Pitre * If ___bits == 0 then setting bit 31 is unavoidable. \ 118461a5e51SNicolas Pitre * Simply apply the maximum possible reduction in that \ 119461a5e51SNicolas Pitre * case. Otherwise the MSB of ___bits indicates the \ 120461a5e51SNicolas Pitre * best reduction we should apply. \ 121461a5e51SNicolas Pitre */ \ 122461a5e51SNicolas Pitre if (!___bits) { \ 123461a5e51SNicolas Pitre ___p /= (___m & -___m); \ 124461a5e51SNicolas Pitre ___m /= (___m & -___m); \ 125461a5e51SNicolas Pitre } else { \ 126461a5e51SNicolas Pitre ___p >>= ilog2(___bits); \ 127461a5e51SNicolas Pitre ___m >>= ilog2(___bits); \ 128461a5e51SNicolas Pitre } \ 129461a5e51SNicolas Pitre /* No bias needed. */ \ 130461a5e51SNicolas Pitre ___bias = 0; \ 131461a5e51SNicolas Pitre } \ 132461a5e51SNicolas Pitre \ 133461a5e51SNicolas Pitre /* \ 134461a5e51SNicolas Pitre * Now we have a combination of 2 conditions: \ 135461a5e51SNicolas Pitre * \ 136461a5e51SNicolas Pitre * 1) whether or not we need to apply a bias, and \ 137461a5e51SNicolas Pitre * \ 138461a5e51SNicolas Pitre * 2) whether or not there might be an overflow in the cross \ 139461a5e51SNicolas Pitre * product determined by (___m & ((1 << 63) | (1 << 31))). \ 140461a5e51SNicolas Pitre * \ 141*f682b27cSNicolas Pitre * Select the best way to do (m_bias + m * n) / (1 << 64). \ 142461a5e51SNicolas Pitre * From now on there will be actual runtime code generated. \ 143461a5e51SNicolas Pitre */ \ 144*f682b27cSNicolas Pitre ___res = __arch_xprod_64(___m, ___n, ___bias); \ 145461a5e51SNicolas Pitre \ 146461a5e51SNicolas Pitre ___res /= ___p; \ 147461a5e51SNicolas Pitre }) 148461a5e51SNicolas Pitre 149*f682b27cSNicolas Pitre #ifndef __arch_xprod_64 150*f682b27cSNicolas Pitre /* 151*f682b27cSNicolas Pitre * Default C implementation for __arch_xprod_64() 152*f682b27cSNicolas Pitre * 153*f682b27cSNicolas Pitre * Prototype: uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) 154*f682b27cSNicolas Pitre * Semantic: retval = ((bias ? m : 0) + m * n) >> 64 155*f682b27cSNicolas Pitre * 156*f682b27cSNicolas Pitre * The product is a 128-bit value, scaled down to 64 bits. 157*f682b27cSNicolas Pitre * Assuming constant propagation to optimize away unused conditional code. 158*f682b27cSNicolas Pitre * Architectures may provide their own optimized assembly implementation. 159*f682b27cSNicolas Pitre */ 160*f682b27cSNicolas Pitre static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) 161*f682b27cSNicolas Pitre { 162*f682b27cSNicolas Pitre uint32_t m_lo = m; 163*f682b27cSNicolas Pitre uint32_t m_hi = m >> 32; 164*f682b27cSNicolas Pitre uint32_t n_lo = n; 165*f682b27cSNicolas Pitre uint32_t n_hi = n >> 32; 166*f682b27cSNicolas Pitre uint64_t res, tmp; 167*f682b27cSNicolas Pitre 168*f682b27cSNicolas Pitre if (!bias) { 169*f682b27cSNicolas Pitre res = ((uint64_t)m_lo * n_lo) >> 32; 170*f682b27cSNicolas Pitre } else if (!(m & ((1ULL << 63) | (1ULL << 31)))) { 171*f682b27cSNicolas Pitre /* there can't be any overflow here */ 172*f682b27cSNicolas Pitre res = (m + (uint64_t)m_lo * n_lo) >> 32; 173*f682b27cSNicolas Pitre } else { 174*f682b27cSNicolas Pitre res = m + (uint64_t)m_lo * n_lo; 175*f682b27cSNicolas Pitre tmp = (res < m) ? (1ULL << 32) : 0; 176*f682b27cSNicolas Pitre res = (res >> 32) + tmp; 177*f682b27cSNicolas Pitre } 178*f682b27cSNicolas Pitre 179*f682b27cSNicolas Pitre if (!(m & ((1ULL << 63) | (1ULL << 31)))) { 180*f682b27cSNicolas Pitre /* there can't be any overflow here */ 181*f682b27cSNicolas Pitre res += (uint64_t)m_lo * n_hi; 182*f682b27cSNicolas Pitre res += (uint64_t)m_hi * n_lo; 183*f682b27cSNicolas Pitre res >>= 32; 184*f682b27cSNicolas Pitre } else { 185*f682b27cSNicolas Pitre tmp = res += (uint64_t)m_lo * n_hi; 186*f682b27cSNicolas Pitre res += (uint64_t)m_hi * n_lo; 187*f682b27cSNicolas Pitre tmp = (res < tmp) ? (1ULL << 32) : 0; 188*f682b27cSNicolas Pitre res = (res >> 32) + tmp; 189*f682b27cSNicolas Pitre } 190*f682b27cSNicolas Pitre 191*f682b27cSNicolas Pitre res += (uint64_t)m_hi * n_hi; 192*f682b27cSNicolas Pitre 193*f682b27cSNicolas Pitre return res; 194*f682b27cSNicolas Pitre } 195*f682b27cSNicolas Pitre #endif 196*f682b27cSNicolas Pitre 1971da177e4SLinus Torvalds extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor); 1981da177e4SLinus Torvalds 1991da177e4SLinus Torvalds /* The unnecessary pointer compare is there 2001da177e4SLinus Torvalds * to check for type safety (n must be 64bit) 2011da177e4SLinus Torvalds */ 2021da177e4SLinus Torvalds # define do_div(n,base) ({ \ 2031da177e4SLinus Torvalds uint32_t __base = (base); \ 2041da177e4SLinus Torvalds uint32_t __rem; \ 2051da177e4SLinus Torvalds (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \ 206911918aaSNicolas Pitre if (__builtin_constant_p(__base) && \ 207911918aaSNicolas Pitre is_power_of_2(__base)) { \ 208911918aaSNicolas Pitre __rem = (n) & (__base - 1); \ 209911918aaSNicolas Pitre (n) >>= ilog2(__base); \ 210461a5e51SNicolas Pitre } else if (__div64_const32_is_OK && \ 211461a5e51SNicolas Pitre __builtin_constant_p(__base) && \ 212461a5e51SNicolas Pitre __base != 0) { \ 213461a5e51SNicolas Pitre uint32_t __res_lo, __n_lo = (n); \ 214461a5e51SNicolas Pitre (n) = __div64_const32(n, __base); \ 215461a5e51SNicolas Pitre /* the remainder can be computed with 32-bit regs */ \ 216461a5e51SNicolas Pitre __res_lo = (n); \ 217461a5e51SNicolas Pitre __rem = __n_lo - __res_lo * __base; \ 218911918aaSNicolas Pitre } else if (likely(((n) >> 32) == 0)) { \ 2191da177e4SLinus Torvalds __rem = (uint32_t)(n) % __base; \ 2201da177e4SLinus Torvalds (n) = (uint32_t)(n) / __base; \ 2211da177e4SLinus Torvalds } else \ 2221da177e4SLinus Torvalds __rem = __div64_32(&(n), __base); \ 2231da177e4SLinus Torvalds __rem; \ 2241da177e4SLinus Torvalds }) 2251da177e4SLinus Torvalds 2261da177e4SLinus Torvalds #else /* BITS_PER_LONG == ?? */ 2271da177e4SLinus Torvalds 2281da177e4SLinus Torvalds # error do_div() does not yet support the C64 2291da177e4SLinus Torvalds 2301da177e4SLinus Torvalds #endif /* BITS_PER_LONG */ 2311da177e4SLinus Torvalds 2321da177e4SLinus Torvalds #endif /* _ASM_GENERIC_DIV64_H */ 233