1 /* 2 * Copyright 2012-15 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 26 #include "dm_services.h" 27 #include "include/fixed31_32.h" 28 29 static inline unsigned long long abs_i64( 30 long long arg) 31 { 32 if (arg > 0) 33 return (unsigned long long)arg; 34 else 35 return (unsigned long long)(-arg); 36 } 37 38 /* 39 * @brief 40 * result = dividend / divisor 41 * *remainder = dividend % divisor 42 */ 43 static inline unsigned long long complete_integer_division_u64( 44 unsigned long long dividend, 45 unsigned long long divisor, 46 unsigned long long *remainder) 47 { 48 unsigned long long result; 49 50 ASSERT(divisor); 51 52 result = div64_u64_rem(dividend, divisor, remainder); 53 54 return result; 55 } 56 57 58 #define FRACTIONAL_PART_MASK \ 59 ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1) 60 61 #define GET_INTEGER_PART(x) \ 62 ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART) 63 64 #define GET_FRACTIONAL_PART(x) \ 65 (FRACTIONAL_PART_MASK & (x)) 66 67 struct fixed31_32 dc_fixpt_from_fraction(long long numerator, long long denominator) 68 { 69 struct fixed31_32 res; 70 71 bool arg1_negative = numerator < 0; 72 bool arg2_negative = denominator < 0; 73 74 unsigned long long arg1_value = arg1_negative ? -numerator : numerator; 75 unsigned long long arg2_value = arg2_negative ? -denominator : denominator; 76 77 unsigned long long remainder; 78 79 /* determine integer part */ 80 81 unsigned long long res_value = complete_integer_division_u64( 82 arg1_value, arg2_value, &remainder); 83 84 ASSERT(res_value <= LONG_MAX); 85 86 /* determine fractional part */ 87 { 88 unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART; 89 90 do { 91 remainder <<= 1; 92 93 res_value <<= 1; 94 95 if (remainder >= arg2_value) { 96 res_value |= 1; 97 remainder -= arg2_value; 98 } 99 } while (--i != 0); 100 } 101 102 /* round up LSB */ 103 { 104 unsigned long long summand = (remainder << 1) >= arg2_value; 105 106 ASSERT(res_value <= LLONG_MAX - summand); 107 108 res_value += summand; 109 } 110 111 res.value = (long long)res_value; 112 113 if (arg1_negative ^ arg2_negative) 114 res.value = -res.value; 115 116 return res; 117 } 118 119 struct fixed31_32 dc_fixpt_mul(struct fixed31_32 arg1, struct fixed31_32 arg2) 120 { 121 struct fixed31_32 res; 122 123 bool arg1_negative = arg1.value < 0; 124 bool arg2_negative = arg2.value < 0; 125 126 unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value; 127 unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value; 128 129 unsigned long long arg1_int = GET_INTEGER_PART(arg1_value); 130 unsigned long long arg2_int = GET_INTEGER_PART(arg2_value); 131 132 unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value); 133 unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value); 134 135 unsigned long long tmp; 136 137 res.value = arg1_int * arg2_int; 138 139 ASSERT(res.value <= LONG_MAX); 140 141 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; 142 143 tmp = arg1_int * arg2_fra; 144 145 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 146 147 res.value += tmp; 148 149 tmp = arg2_int * arg1_fra; 150 151 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 152 153 res.value += tmp; 154 155 tmp = arg1_fra * arg2_fra; 156 157 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + 158 (tmp >= (unsigned long long)dc_fixpt_half.value); 159 160 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 161 162 res.value += tmp; 163 164 if (arg1_negative ^ arg2_negative) 165 res.value = -res.value; 166 167 return res; 168 } 169 170 struct fixed31_32 dc_fixpt_sqr(struct fixed31_32 arg) 171 { 172 struct fixed31_32 res; 173 174 unsigned long long arg_value = abs_i64(arg.value); 175 176 unsigned long long arg_int = GET_INTEGER_PART(arg_value); 177 178 unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value); 179 180 unsigned long long tmp; 181 182 res.value = arg_int * arg_int; 183 184 ASSERT(res.value <= LONG_MAX); 185 186 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; 187 188 tmp = arg_int * arg_fra; 189 190 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 191 192 res.value += tmp; 193 194 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 195 196 res.value += tmp; 197 198 tmp = arg_fra * arg_fra; 199 200 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + 201 (tmp >= (unsigned long long)dc_fixpt_half.value); 202 203 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 204 205 res.value += tmp; 206 207 return res; 208 } 209 210 struct fixed31_32 dc_fixpt_recip(struct fixed31_32 arg) 211 { 212 /* 213 * @note 214 * Good idea to use Newton's method 215 */ 216 217 ASSERT(arg.value); 218 219 return dc_fixpt_from_fraction( 220 dc_fixpt_one.value, 221 arg.value); 222 } 223 224 struct fixed31_32 dc_fixpt_sinc(struct fixed31_32 arg) 225 { 226 struct fixed31_32 square; 227 228 struct fixed31_32 res = dc_fixpt_one; 229 230 int n = 27; 231 232 struct fixed31_32 arg_norm = arg; 233 234 if (dc_fixpt_le( 235 dc_fixpt_two_pi, 236 dc_fixpt_abs(arg))) { 237 arg_norm = dc_fixpt_sub( 238 arg_norm, 239 dc_fixpt_mul_int( 240 dc_fixpt_two_pi, 241 (int)div64_s64( 242 arg_norm.value, 243 dc_fixpt_two_pi.value))); 244 } 245 246 square = dc_fixpt_sqr(arg_norm); 247 248 do { 249 res = dc_fixpt_sub( 250 dc_fixpt_one, 251 dc_fixpt_div_int( 252 dc_fixpt_mul( 253 square, 254 res), 255 n * (n - 1))); 256 257 n -= 2; 258 } while (n > 2); 259 260 if (arg.value != arg_norm.value) 261 res = dc_fixpt_div( 262 dc_fixpt_mul(res, arg_norm), 263 arg); 264 265 return res; 266 } 267 268 struct fixed31_32 dc_fixpt_sin(struct fixed31_32 arg) 269 { 270 return dc_fixpt_mul( 271 arg, 272 dc_fixpt_sinc(arg)); 273 } 274 275 struct fixed31_32 dc_fixpt_cos(struct fixed31_32 arg) 276 { 277 /* TODO implement argument normalization */ 278 279 const struct fixed31_32 square = dc_fixpt_sqr(arg); 280 281 struct fixed31_32 res = dc_fixpt_one; 282 283 int n = 26; 284 285 do { 286 res = dc_fixpt_sub( 287 dc_fixpt_one, 288 dc_fixpt_div_int( 289 dc_fixpt_mul( 290 square, 291 res), 292 n * (n - 1))); 293 294 n -= 2; 295 } while (n != 0); 296 297 return res; 298 } 299 300 /* 301 * @brief 302 * result = exp(arg), 303 * where abs(arg) < 1 304 * 305 * Calculated as Taylor series. 306 */ 307 static struct fixed31_32 fixed31_32_exp_from_taylor_series(struct fixed31_32 arg) 308 { 309 unsigned int n = 9; 310 311 struct fixed31_32 res = dc_fixpt_from_fraction( 312 n + 2, 313 n + 1); 314 /* TODO find correct res */ 315 316 ASSERT(dc_fixpt_lt(arg, dc_fixpt_one)); 317 318 do 319 res = dc_fixpt_add( 320 dc_fixpt_one, 321 dc_fixpt_div_int( 322 dc_fixpt_mul( 323 arg, 324 res), 325 n)); 326 while (--n != 1); 327 328 return dc_fixpt_add( 329 dc_fixpt_one, 330 dc_fixpt_mul( 331 arg, 332 res)); 333 } 334 335 struct fixed31_32 dc_fixpt_exp(struct fixed31_32 arg) 336 { 337 /* 338 * @brief 339 * Main equation is: 340 * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r), 341 * where m = round(x / ln(2)), r = x - m * ln(2) 342 */ 343 344 if (dc_fixpt_le( 345 dc_fixpt_ln2_div_2, 346 dc_fixpt_abs(arg))) { 347 int m = dc_fixpt_round( 348 dc_fixpt_div( 349 arg, 350 dc_fixpt_ln2)); 351 352 struct fixed31_32 r = dc_fixpt_sub( 353 arg, 354 dc_fixpt_mul_int( 355 dc_fixpt_ln2, 356 m)); 357 358 ASSERT(m != 0); 359 360 ASSERT(dc_fixpt_lt( 361 dc_fixpt_abs(r), 362 dc_fixpt_one)); 363 364 if (m > 0) 365 return dc_fixpt_shl( 366 fixed31_32_exp_from_taylor_series(r), 367 (unsigned char)m); 368 else 369 return dc_fixpt_div_int( 370 fixed31_32_exp_from_taylor_series(r), 371 1LL << -m); 372 } else if (arg.value != 0) 373 return fixed31_32_exp_from_taylor_series(arg); 374 else 375 return dc_fixpt_one; 376 } 377 378 struct fixed31_32 dc_fixpt_log(struct fixed31_32 arg) 379 { 380 struct fixed31_32 res = dc_fixpt_neg(dc_fixpt_one); 381 /* TODO improve 1st estimation */ 382 383 struct fixed31_32 error; 384 385 ASSERT(arg.value > 0); 386 /* TODO if arg is negative, return NaN */ 387 /* TODO if arg is zero, return -INF */ 388 389 do { 390 struct fixed31_32 res1 = dc_fixpt_add( 391 dc_fixpt_sub( 392 res, 393 dc_fixpt_one), 394 dc_fixpt_div( 395 arg, 396 dc_fixpt_exp(res))); 397 398 error = dc_fixpt_sub( 399 res, 400 res1); 401 402 res = res1; 403 /* TODO determine max_allowed_error based on quality of exp() */ 404 } while (abs_i64(error.value) > 100ULL); 405 406 return res; 407 } 408 409 410 /* this function is a generic helper to translate fixed point value to 411 * specified integer format that will consist of integer_bits integer part and 412 * fractional_bits fractional part. For example it is used in 413 * dc_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional 414 * part in 32 bits. It is used in hw programming (scaler) 415 */ 416 417 static inline unsigned int ux_dy( 418 long long value, 419 unsigned int integer_bits, 420 unsigned int fractional_bits) 421 { 422 /* 1. create mask of integer part */ 423 unsigned int result = (1 << integer_bits) - 1; 424 /* 2. mask out fractional part */ 425 unsigned int fractional_part = FRACTIONAL_PART_MASK & value; 426 /* 3. shrink fixed point integer part to be of integer_bits width*/ 427 result &= GET_INTEGER_PART(value); 428 /* 4. make space for fractional part to be filled in after integer */ 429 result <<= fractional_bits; 430 /* 5. shrink fixed point fractional part to of fractional_bits width*/ 431 fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits; 432 /* 6. merge the result */ 433 return result | fractional_part; 434 } 435 436 static inline unsigned int clamp_ux_dy( 437 long long value, 438 unsigned int integer_bits, 439 unsigned int fractional_bits, 440 unsigned int min_clamp) 441 { 442 unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits); 443 444 if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART))) 445 return (1 << (integer_bits + fractional_bits)) - 1; 446 else if (truncated_val > min_clamp) 447 return truncated_val; 448 else 449 return min_clamp; 450 } 451 452 unsigned int dc_fixpt_u4d19(struct fixed31_32 arg) 453 { 454 return ux_dy(arg.value, 4, 19); 455 } 456 457 unsigned int dc_fixpt_u3d19(struct fixed31_32 arg) 458 { 459 return ux_dy(arg.value, 3, 19); 460 } 461 462 unsigned int dc_fixpt_u2d19(struct fixed31_32 arg) 463 { 464 return ux_dy(arg.value, 2, 19); 465 } 466 467 unsigned int dc_fixpt_u0d19(struct fixed31_32 arg) 468 { 469 return ux_dy(arg.value, 0, 19); 470 } 471 472 unsigned int dc_fixpt_clamp_u0d14(struct fixed31_32 arg) 473 { 474 return clamp_ux_dy(arg.value, 0, 14, 1); 475 } 476 477 unsigned int dc_fixpt_clamp_u0d10(struct fixed31_32 arg) 478 { 479 return clamp_ux_dy(arg.value, 0, 10, 1); 480 } 481 482 int dc_fixpt_s4d19(struct fixed31_32 arg) 483 { 484 if (arg.value < 0) 485 return -(int)ux_dy(dc_fixpt_abs(arg).value, 4, 19); 486 else 487 return ux_dy(arg.value, 4, 19); 488 } 489