1b2441318SGreg Kroah-Hartman /* SPDX-License-Identifier: GPL-2.0 */ 21da177e4SLinus Torvalds #ifndef _ASM_GENERIC_DIV64_H 31da177e4SLinus Torvalds #define _ASM_GENERIC_DIV64_H 41da177e4SLinus Torvalds /* 51da177e4SLinus Torvalds * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com> 61da177e4SLinus Torvalds * Based on former asm-ppc/div64.h and asm-m68knommu/div64.h 71da177e4SLinus Torvalds * 8461a5e51SNicolas Pitre * Optimization for constant divisors on 32-bit machines: 9461a5e51SNicolas Pitre * Copyright (C) 2006-2015 Nicolas Pitre 10461a5e51SNicolas Pitre * 111da177e4SLinus Torvalds * The semantics of do_div() are: 121da177e4SLinus Torvalds * 131da177e4SLinus Torvalds * uint32_t do_div(uint64_t *n, uint32_t base) 141da177e4SLinus Torvalds * { 151da177e4SLinus Torvalds * uint32_t remainder = *n % base; 161da177e4SLinus Torvalds * *n = *n / base; 171da177e4SLinus Torvalds * return remainder; 181da177e4SLinus Torvalds * } 191da177e4SLinus Torvalds * 201da177e4SLinus Torvalds * NOTE: macro parameter n is evaluated multiple times, 211da177e4SLinus Torvalds * beware of side effects! 221da177e4SLinus Torvalds */ 231da177e4SLinus Torvalds 241da177e4SLinus Torvalds #include <linux/types.h> 251da177e4SLinus Torvalds #include <linux/compiler.h> 261da177e4SLinus Torvalds 271da177e4SLinus Torvalds #if BITS_PER_LONG == 64 281da177e4SLinus Torvalds 296ec72e61SRandy Dunlap /** 306ec72e61SRandy Dunlap * do_div - returns 2 values: calculate remainder and update new dividend 316ec72e61SRandy Dunlap * @n: pointer to uint64_t dividend (will be updated) 326ec72e61SRandy Dunlap * @base: uint32_t divisor 336ec72e61SRandy Dunlap * 346ec72e61SRandy Dunlap * Summary: 356ec72e61SRandy Dunlap * ``uint32_t remainder = *n % base;`` 366ec72e61SRandy Dunlap * ``*n = *n / base;`` 376ec72e61SRandy Dunlap * 386ec72e61SRandy Dunlap * Return: (uint32_t)remainder 396ec72e61SRandy Dunlap * 406ec72e61SRandy Dunlap * NOTE: macro parameter @n is evaluated multiple times, 416ec72e61SRandy Dunlap * beware of side effects! 426ec72e61SRandy Dunlap */ 431da177e4SLinus Torvalds # define do_div(n,base) ({ \ 441da177e4SLinus Torvalds uint32_t __base = (base); \ 451da177e4SLinus Torvalds uint32_t __rem; \ 461da177e4SLinus Torvalds __rem = ((uint64_t)(n)) % __base; \ 471da177e4SLinus Torvalds (n) = ((uint64_t)(n)) / __base; \ 481da177e4SLinus Torvalds __rem; \ 491da177e4SLinus Torvalds }) 501da177e4SLinus Torvalds 511da177e4SLinus Torvalds #elif BITS_PER_LONG == 32 521da177e4SLinus Torvalds 53911918aaSNicolas Pitre #include <linux/log2.h> 54911918aaSNicolas Pitre 55461a5e51SNicolas Pitre /* 56461a5e51SNicolas Pitre * If the divisor happens to be constant, we determine the appropriate 57461a5e51SNicolas Pitre * inverse at compile time to turn the division into a few inline 58461a5e51SNicolas Pitre * multiplications which ought to be much faster. And yet only if compiling 59461a5e51SNicolas Pitre * with a sufficiently recent gcc version to perform proper 64-bit constant 60461a5e51SNicolas Pitre * propagation. 61461a5e51SNicolas Pitre * 62461a5e51SNicolas Pitre * (It is unfortunate that gcc doesn't perform all this internally.) 63461a5e51SNicolas Pitre */ 64461a5e51SNicolas Pitre 65461a5e51SNicolas Pitre #ifndef __div64_const32_is_OK 66461a5e51SNicolas Pitre #define __div64_const32_is_OK (__GNUC__ >= 4) 67461a5e51SNicolas Pitre #endif 68461a5e51SNicolas Pitre 69461a5e51SNicolas Pitre #define __div64_const32(n, ___b) \ 70461a5e51SNicolas Pitre ({ \ 71461a5e51SNicolas Pitre /* \ 72461a5e51SNicolas Pitre * Multiplication by reciprocal of b: n / b = n * (p / b) / p \ 73461a5e51SNicolas Pitre * \ 74461a5e51SNicolas Pitre * We rely on the fact that most of this code gets optimized \ 75461a5e51SNicolas Pitre * away at compile time due to constant propagation and only \ 76461a5e51SNicolas Pitre * a few multiplication instructions should remain. \ 77461a5e51SNicolas Pitre * Hence this monstrous macro (static inline doesn't always \ 78461a5e51SNicolas Pitre * do the trick here). \ 79461a5e51SNicolas Pitre */ \ 80461a5e51SNicolas Pitre uint64_t ___res, ___x, ___t, ___m, ___n = (n); \ 81f682b27cSNicolas Pitre uint32_t ___p, ___bias; \ 82461a5e51SNicolas Pitre \ 83461a5e51SNicolas Pitre /* determine MSB of b */ \ 84461a5e51SNicolas Pitre ___p = 1 << ilog2(___b); \ 85461a5e51SNicolas Pitre \ 86461a5e51SNicolas Pitre /* compute m = ((p << 64) + b - 1) / b */ \ 87461a5e51SNicolas Pitre ___m = (~0ULL / ___b) * ___p; \ 88461a5e51SNicolas Pitre ___m += (((~0ULL % ___b + 1) * ___p) + ___b - 1) / ___b; \ 89461a5e51SNicolas Pitre \ 90461a5e51SNicolas Pitre /* one less than the dividend with highest result */ \ 91461a5e51SNicolas Pitre ___x = ~0ULL / ___b * ___b - 1; \ 92461a5e51SNicolas Pitre \ 93461a5e51SNicolas Pitre /* test our ___m with res = m * x / (p << 64) */ \ 94461a5e51SNicolas Pitre ___res = ((___m & 0xffffffff) * (___x & 0xffffffff)) >> 32; \ 95461a5e51SNicolas Pitre ___t = ___res += (___m & 0xffffffff) * (___x >> 32); \ 96461a5e51SNicolas Pitre ___res += (___x & 0xffffffff) * (___m >> 32); \ 97461a5e51SNicolas Pitre ___t = (___res < ___t) ? (1ULL << 32) : 0; \ 98461a5e51SNicolas Pitre ___res = (___res >> 32) + ___t; \ 99461a5e51SNicolas Pitre ___res += (___m >> 32) * (___x >> 32); \ 100461a5e51SNicolas Pitre ___res /= ___p; \ 101461a5e51SNicolas Pitre \ 102461a5e51SNicolas Pitre /* Now sanitize and optimize what we've got. */ \ 103461a5e51SNicolas Pitre if (~0ULL % (___b / (___b & -___b)) == 0) { \ 104461a5e51SNicolas Pitre /* special case, can be simplified to ... */ \ 105461a5e51SNicolas Pitre ___n /= (___b & -___b); \ 106461a5e51SNicolas Pitre ___m = ~0ULL / (___b / (___b & -___b)); \ 107461a5e51SNicolas Pitre ___p = 1; \ 108461a5e51SNicolas Pitre ___bias = 1; \ 109461a5e51SNicolas Pitre } else if (___res != ___x / ___b) { \ 110461a5e51SNicolas Pitre /* \ 111461a5e51SNicolas Pitre * We can't get away without a bias to compensate \ 112461a5e51SNicolas Pitre * for bit truncation errors. To avoid it we'd need an \ 113461a5e51SNicolas Pitre * additional bit to represent m which would overflow \ 114461a5e51SNicolas Pitre * a 64-bit variable. \ 115461a5e51SNicolas Pitre * \ 116461a5e51SNicolas Pitre * Instead we do m = p / b and n / b = (n * m + m) / p. \ 117461a5e51SNicolas Pitre */ \ 118461a5e51SNicolas Pitre ___bias = 1; \ 119461a5e51SNicolas Pitre /* Compute m = (p << 64) / b */ \ 120461a5e51SNicolas Pitre ___m = (~0ULL / ___b) * ___p; \ 121461a5e51SNicolas Pitre ___m += ((~0ULL % ___b + 1) * ___p) / ___b; \ 122461a5e51SNicolas Pitre } else { \ 123461a5e51SNicolas Pitre /* \ 124461a5e51SNicolas Pitre * Reduce m / p, and try to clear bit 31 of m when \ 125461a5e51SNicolas Pitre * possible, otherwise that'll need extra overflow \ 126461a5e51SNicolas Pitre * handling later. \ 127461a5e51SNicolas Pitre */ \ 128461a5e51SNicolas Pitre uint32_t ___bits = -(___m & -___m); \ 129461a5e51SNicolas Pitre ___bits |= ___m >> 32; \ 130461a5e51SNicolas Pitre ___bits = (~___bits) << 1; \ 131461a5e51SNicolas Pitre /* \ 132461a5e51SNicolas Pitre * If ___bits == 0 then setting bit 31 is unavoidable. \ 133461a5e51SNicolas Pitre * Simply apply the maximum possible reduction in that \ 134461a5e51SNicolas Pitre * case. Otherwise the MSB of ___bits indicates the \ 135461a5e51SNicolas Pitre * best reduction we should apply. \ 136461a5e51SNicolas Pitre */ \ 137461a5e51SNicolas Pitre if (!___bits) { \ 138461a5e51SNicolas Pitre ___p /= (___m & -___m); \ 139461a5e51SNicolas Pitre ___m /= (___m & -___m); \ 140461a5e51SNicolas Pitre } else { \ 141461a5e51SNicolas Pitre ___p >>= ilog2(___bits); \ 142461a5e51SNicolas Pitre ___m >>= ilog2(___bits); \ 143461a5e51SNicolas Pitre } \ 144461a5e51SNicolas Pitre /* No bias needed. */ \ 145461a5e51SNicolas Pitre ___bias = 0; \ 146461a5e51SNicolas Pitre } \ 147461a5e51SNicolas Pitre \ 148461a5e51SNicolas Pitre /* \ 149461a5e51SNicolas Pitre * Now we have a combination of 2 conditions: \ 150461a5e51SNicolas Pitre * \ 151461a5e51SNicolas Pitre * 1) whether or not we need to apply a bias, and \ 152461a5e51SNicolas Pitre * \ 153461a5e51SNicolas Pitre * 2) whether or not there might be an overflow in the cross \ 154461a5e51SNicolas Pitre * product determined by (___m & ((1 << 63) | (1 << 31))). \ 155461a5e51SNicolas Pitre * \ 156f682b27cSNicolas Pitre * Select the best way to do (m_bias + m * n) / (1 << 64). \ 157461a5e51SNicolas Pitre * From now on there will be actual runtime code generated. \ 158461a5e51SNicolas Pitre */ \ 159f682b27cSNicolas Pitre ___res = __arch_xprod_64(___m, ___n, ___bias); \ 160461a5e51SNicolas Pitre \ 161461a5e51SNicolas Pitre ___res /= ___p; \ 162461a5e51SNicolas Pitre }) 163461a5e51SNicolas Pitre 164f682b27cSNicolas Pitre #ifndef __arch_xprod_64 165f682b27cSNicolas Pitre /* 166f682b27cSNicolas Pitre * Default C implementation for __arch_xprod_64() 167f682b27cSNicolas Pitre * 168f682b27cSNicolas Pitre * Prototype: uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) 169f682b27cSNicolas Pitre * Semantic: retval = ((bias ? m : 0) + m * n) >> 64 170f682b27cSNicolas Pitre * 171f682b27cSNicolas Pitre * The product is a 128-bit value, scaled down to 64 bits. 172f682b27cSNicolas Pitre * Assuming constant propagation to optimize away unused conditional code. 173f682b27cSNicolas Pitre * Architectures may provide their own optimized assembly implementation. 174f682b27cSNicolas Pitre */ 175f682b27cSNicolas Pitre static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) 176f682b27cSNicolas Pitre { 177f682b27cSNicolas Pitre uint32_t m_lo = m; 178f682b27cSNicolas Pitre uint32_t m_hi = m >> 32; 179f682b27cSNicolas Pitre uint32_t n_lo = n; 180f682b27cSNicolas Pitre uint32_t n_hi = n >> 32; 181*602828c1SNicolas Pitre uint64_t res; 182*602828c1SNicolas Pitre uint32_t res_lo, res_hi, tmp; 183f682b27cSNicolas Pitre 184f682b27cSNicolas Pitre if (!bias) { 185f682b27cSNicolas Pitre res = ((uint64_t)m_lo * n_lo) >> 32; 186f682b27cSNicolas Pitre } else if (!(m & ((1ULL << 63) | (1ULL << 31)))) { 187f682b27cSNicolas Pitre /* there can't be any overflow here */ 188f682b27cSNicolas Pitre res = (m + (uint64_t)m_lo * n_lo) >> 32; 189f682b27cSNicolas Pitre } else { 190f682b27cSNicolas Pitre res = m + (uint64_t)m_lo * n_lo; 191*602828c1SNicolas Pitre res_lo = res >> 32; 192*602828c1SNicolas Pitre res_hi = (res_lo < m_hi); 193*602828c1SNicolas Pitre res = res_lo | ((uint64_t)res_hi << 32); 194f682b27cSNicolas Pitre } 195f682b27cSNicolas Pitre 196f682b27cSNicolas Pitre if (!(m & ((1ULL << 63) | (1ULL << 31)))) { 197f682b27cSNicolas Pitre /* there can't be any overflow here */ 198f682b27cSNicolas Pitre res += (uint64_t)m_lo * n_hi; 199f682b27cSNicolas Pitre res += (uint64_t)m_hi * n_lo; 200f682b27cSNicolas Pitre res >>= 32; 201f682b27cSNicolas Pitre } else { 202*602828c1SNicolas Pitre res += (uint64_t)m_lo * n_hi; 203*602828c1SNicolas Pitre tmp = res >> 32; 204f682b27cSNicolas Pitre res += (uint64_t)m_hi * n_lo; 205*602828c1SNicolas Pitre res_lo = res >> 32; 206*602828c1SNicolas Pitre res_hi = (res_lo < tmp); 207*602828c1SNicolas Pitre res = res_lo | ((uint64_t)res_hi << 32); 208f682b27cSNicolas Pitre } 209f682b27cSNicolas Pitre 210f682b27cSNicolas Pitre res += (uint64_t)m_hi * n_hi; 211f682b27cSNicolas Pitre 212f682b27cSNicolas Pitre return res; 213f682b27cSNicolas Pitre } 214f682b27cSNicolas Pitre #endif 215f682b27cSNicolas Pitre 216dce1eb93SNicolas Pitre #ifndef __div64_32 2171da177e4SLinus Torvalds extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor); 218dce1eb93SNicolas Pitre #endif 2191da177e4SLinus Torvalds 2201da177e4SLinus Torvalds /* The unnecessary pointer compare is there 2211da177e4SLinus Torvalds * to check for type safety (n must be 64bit) 2221da177e4SLinus Torvalds */ 2231da177e4SLinus Torvalds # define do_div(n,base) ({ \ 2241da177e4SLinus Torvalds uint32_t __base = (base); \ 2251da177e4SLinus Torvalds uint32_t __rem; \ 2261da177e4SLinus Torvalds (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \ 227911918aaSNicolas Pitre if (__builtin_constant_p(__base) && \ 228911918aaSNicolas Pitre is_power_of_2(__base)) { \ 229911918aaSNicolas Pitre __rem = (n) & (__base - 1); \ 230911918aaSNicolas Pitre (n) >>= ilog2(__base); \ 231461a5e51SNicolas Pitre } else if (__div64_const32_is_OK && \ 232461a5e51SNicolas Pitre __builtin_constant_p(__base) && \ 233461a5e51SNicolas Pitre __base != 0) { \ 234461a5e51SNicolas Pitre uint32_t __res_lo, __n_lo = (n); \ 235461a5e51SNicolas Pitre (n) = __div64_const32(n, __base); \ 236461a5e51SNicolas Pitre /* the remainder can be computed with 32-bit regs */ \ 237461a5e51SNicolas Pitre __res_lo = (n); \ 238461a5e51SNicolas Pitre __rem = __n_lo - __res_lo * __base; \ 239911918aaSNicolas Pitre } else if (likely(((n) >> 32) == 0)) { \ 2401da177e4SLinus Torvalds __rem = (uint32_t)(n) % __base; \ 2411da177e4SLinus Torvalds (n) = (uint32_t)(n) / __base; \ 2421da177e4SLinus Torvalds } else \ 2431da177e4SLinus Torvalds __rem = __div64_32(&(n), __base); \ 2441da177e4SLinus Torvalds __rem; \ 2451da177e4SLinus Torvalds }) 2461da177e4SLinus Torvalds 2471da177e4SLinus Torvalds #else /* BITS_PER_LONG == ?? */ 2481da177e4SLinus Torvalds 2491da177e4SLinus Torvalds # error do_div() does not yet support the C64 2501da177e4SLinus Torvalds 2511da177e4SLinus Torvalds #endif /* BITS_PER_LONG */ 2521da177e4SLinus Torvalds 2531da177e4SLinus Torvalds #endif /* _ASM_GENERIC_DIV64_H */ 254