div64.h (461a5e51060c93f5844113f4be9dba513cc92830) | div64.h (f682b27c57aec2f0ca8927f9bb7c267c6165ad5a) |
---|---|
1#ifndef _ASM_GENERIC_DIV64_H 2#define _ASM_GENERIC_DIV64_H 3/* 4 * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com> 5 * Based on former asm-ppc/div64.h and asm-m68knommu/div64.h 6 * 7 * Optimization for constant divisors on 32-bit machines: 8 * Copyright (C) 2006-2015 Nicolas Pitre --- 49 unchanged lines hidden (view full) --- 58 * \ 59 * We rely on the fact that most of this code gets optimized \ 60 * away at compile time due to constant propagation and only \ 61 * a few multiplication instructions should remain. \ 62 * Hence this monstrous macro (static inline doesn't always \ 63 * do the trick here). \ 64 */ \ 65 uint64_t ___res, ___x, ___t, ___m, ___n = (n); \ | 1#ifndef _ASM_GENERIC_DIV64_H 2#define _ASM_GENERIC_DIV64_H 3/* 4 * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com> 5 * Based on former asm-ppc/div64.h and asm-m68knommu/div64.h 6 * 7 * Optimization for constant divisors on 32-bit machines: 8 * Copyright (C) 2006-2015 Nicolas Pitre --- 49 unchanged lines hidden (view full) --- 58 * \ 59 * We rely on the fact that most of this code gets optimized \ 60 * away at compile time due to constant propagation and only \ 61 * a few multiplication instructions should remain. \ 62 * Hence this monstrous macro (static inline doesn't always \ 63 * do the trick here). \ 64 */ \ 65 uint64_t ___res, ___x, ___t, ___m, ___n = (n); \ |
66 uint32_t ___p, ___bias, ___m_lo, ___m_hi, ___n_lo, ___n_hi; \ | 66 uint32_t ___p, ___bias; \ |
67 \ 68 /* determine MSB of b */ \ 69 ___p = 1 << ilog2(___b); \ 70 \ 71 /* compute m = ((p << 64) + b - 1) / b */ \ 72 ___m = (~0ULL / ___b) * ___p; \ 73 ___m += (((~0ULL % ___b + 1) * ___p) + ___b - 1) / ___b; \ 74 \ --- 58 unchanged lines hidden (view full) --- 133 /* \ 134 * Now we have a combination of 2 conditions: \ 135 * \ 136 * 1) whether or not we need to apply a bias, and \ 137 * \ 138 * 2) whether or not there might be an overflow in the cross \ 139 * product determined by (___m & ((1 << 63) | (1 << 31))). \ 140 * \ | 67 \ 68 /* determine MSB of b */ \ 69 ___p = 1 << ilog2(___b); \ 70 \ 71 /* compute m = ((p << 64) + b - 1) / b */ \ 72 ___m = (~0ULL / ___b) * ___p; \ 73 ___m += (((~0ULL % ___b + 1) * ___p) + ___b - 1) / ___b; \ 74 \ --- 58 unchanged lines hidden (view full) --- 133 /* \ 134 * Now we have a combination of 2 conditions: \ 135 * \ 136 * 1) whether or not we need to apply a bias, and \ 137 * \ 138 * 2) whether or not there might be an overflow in the cross \ 139 * product determined by (___m & ((1 << 63) | (1 << 31))). \ 140 * \ |
141 * Select the best way to do (m_bias + m * n) / (p << 64). \ | 141 * Select the best way to do (m_bias + m * n) / (1 << 64). \ |
142 * From now on there will be actual runtime code generated. \ 143 */ \ | 142 * From now on there will be actual runtime code generated. \ 143 */ \ |
144 ___res = __arch_xprod_64(___m, ___n, ___bias); \ |
|
144 \ | 145 \ |
145 ___m_lo = ___m; \ 146 ___m_hi = ___m >> 32; \ 147 ___n_lo = ___n; \ 148 ___n_hi = ___n >> 32; \ 149 \ 150 if (!___bias) { \ 151 ___res = ((uint64_t)___m_lo * ___n_lo) >> 32; \ 152 } else if (!(___m & ((1ULL << 63) | (1ULL << 31)))) { \ 153 ___res = (___m + (uint64_t)___m_lo * ___n_lo) >> 32; \ 154 } else { \ 155 ___res = ___m + (uint64_t)___m_lo * ___n_lo; \ 156 ___t = (___res < ___m) ? (1ULL << 32) : 0; \ 157 ___res = (___res >> 32) + ___t; \ 158 } \ 159 \ 160 if (!(___m & ((1ULL << 63) | (1ULL << 31)))) { \ 161 ___res += (uint64_t)___m_lo * ___n_hi; \ 162 ___res += (uint64_t)___m_hi * ___n_lo; \ 163 ___res >>= 32; \ 164 } else { \ 165 ___t = ___res += (uint64_t)___m_lo * ___n_hi; \ 166 ___res += (uint64_t)___m_hi * ___n_lo; \ 167 ___t = (___res < ___t) ? (1ULL << 32) : 0; \ 168 ___res = (___res >> 32) + ___t; \ 169 } \ 170 \ 171 ___res += (uint64_t)___m_hi * ___n_hi; \ 172 \ | |
173 ___res /= ___p; \ 174}) 175 | 146 ___res /= ___p; \ 147}) 148 |
149#ifndef __arch_xprod_64 150/* 151 * Default C implementation for __arch_xprod_64() 152 * 153 * Prototype: uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) 154 * Semantic: retval = ((bias ? m : 0) + m * n) >> 64 155 * 156 * The product is a 128-bit value, scaled down to 64 bits. 157 * Assuming constant propagation to optimize away unused conditional code. 158 * Architectures may provide their own optimized assembly implementation. 159 */ 160static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) 161{ 162 uint32_t m_lo = m; 163 uint32_t m_hi = m >> 32; 164 uint32_t n_lo = n; 165 uint32_t n_hi = n >> 32; 166 uint64_t res, tmp; 167 168 if (!bias) { 169 res = ((uint64_t)m_lo * n_lo) >> 32; 170 } else if (!(m & ((1ULL << 63) | (1ULL << 31)))) { 171 /* there can't be any overflow here */ 172 res = (m + (uint64_t)m_lo * n_lo) >> 32; 173 } else { 174 res = m + (uint64_t)m_lo * n_lo; 175 tmp = (res < m) ? (1ULL << 32) : 0; 176 res = (res >> 32) + tmp; 177 } 178 179 if (!(m & ((1ULL << 63) | (1ULL << 31)))) { 180 /* there can't be any overflow here */ 181 res += (uint64_t)m_lo * n_hi; 182 res += (uint64_t)m_hi * n_lo; 183 res >>= 32; 184 } else { 185 tmp = res += (uint64_t)m_lo * n_hi; 186 res += (uint64_t)m_hi * n_lo; 187 tmp = (res < tmp) ? (1ULL << 32) : 0; 188 res = (res >> 32) + tmp; 189 } 190 191 res += (uint64_t)m_hi * n_hi; 192 193 return res; 194} 195#endif 196 |
|
176extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor); 177 178/* The unnecessary pointer compare is there 179 * to check for type safety (n must be 64bit) 180 */ 181# define do_div(n,base) ({ \ 182 uint32_t __base = (base); \ 183 uint32_t __rem; \ --- 28 unchanged lines hidden --- | 197extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor); 198 199/* The unnecessary pointer compare is there 200 * to check for type safety (n must be 64bit) 201 */ 202# define do_div(n,base) ({ \ 203 uint32_t __base = (base); \ 204 uint32_t __rem; \ --- 28 unchanged lines hidden --- |