1 /* 2 * Copyright 2012-15 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 26 #include "dm_services.h" 27 #include "include/fixed31_32.h" 28 29 static inline uint64_t abs_i64( 30 int64_t arg) 31 { 32 if (arg > 0) 33 return (uint64_t)arg; 34 else 35 return (uint64_t)(-arg); 36 } 37 38 /* 39 * @brief 40 * result = dividend / divisor 41 * *remainder = dividend % divisor 42 */ 43 static inline uint64_t complete_integer_division_u64( 44 uint64_t dividend, 45 uint64_t divisor, 46 uint64_t *remainder) 47 { 48 uint64_t result; 49 50 ASSERT(divisor); 51 52 result = div64_u64_rem(dividend, divisor, remainder); 53 54 return result; 55 } 56 57 58 #define FRACTIONAL_PART_MASK \ 59 ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1) 60 61 #define GET_INTEGER_PART(x) \ 62 ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART) 63 64 #define GET_FRACTIONAL_PART(x) \ 65 (FRACTIONAL_PART_MASK & (x)) 66 67 struct fixed31_32 dal_fixed31_32_from_fraction( 68 int64_t numerator, 69 int64_t denominator) 70 { 71 struct fixed31_32 res; 72 73 bool arg1_negative = numerator < 0; 74 bool arg2_negative = denominator < 0; 75 76 uint64_t arg1_value = arg1_negative ? -numerator : numerator; 77 uint64_t arg2_value = arg2_negative ? -denominator : denominator; 78 79 uint64_t remainder; 80 81 /* determine integer part */ 82 83 uint64_t res_value = complete_integer_division_u64( 84 arg1_value, arg2_value, &remainder); 85 86 ASSERT(res_value <= LONG_MAX); 87 88 /* determine fractional part */ 89 { 90 uint32_t i = FIXED31_32_BITS_PER_FRACTIONAL_PART; 91 92 do { 93 remainder <<= 1; 94 95 res_value <<= 1; 96 97 if (remainder >= arg2_value) { 98 res_value |= 1; 99 remainder -= arg2_value; 100 } 101 } while (--i != 0); 102 } 103 104 /* round up LSB */ 105 { 106 uint64_t summand = (remainder << 1) >= arg2_value; 107 108 ASSERT(res_value <= LLONG_MAX - summand); 109 110 res_value += summand; 111 } 112 113 res.value = (int64_t)res_value; 114 115 if (arg1_negative ^ arg2_negative) 116 res.value = -res.value; 117 118 return res; 119 } 120 121 struct fixed31_32 dal_fixed31_32_from_int_nonconst( 122 int64_t arg) 123 { 124 struct fixed31_32 res; 125 126 ASSERT((LONG_MIN <= arg) && (arg <= LONG_MAX)); 127 128 res.value = arg << FIXED31_32_BITS_PER_FRACTIONAL_PART; 129 130 return res; 131 } 132 133 struct fixed31_32 dal_fixed31_32_shl( 134 struct fixed31_32 arg, 135 uint8_t shift) 136 { 137 struct fixed31_32 res; 138 139 ASSERT(((arg.value >= 0) && (arg.value <= LLONG_MAX >> shift)) || 140 ((arg.value < 0) && (arg.value >= LLONG_MIN >> shift))); 141 142 res.value = arg.value << shift; 143 144 return res; 145 } 146 147 struct fixed31_32 dal_fixed31_32_add( 148 struct fixed31_32 arg1, 149 struct fixed31_32 arg2) 150 { 151 struct fixed31_32 res; 152 153 ASSERT(((arg1.value >= 0) && (LLONG_MAX - arg1.value >= arg2.value)) || 154 ((arg1.value < 0) && (LLONG_MIN - arg1.value <= arg2.value))); 155 156 res.value = arg1.value + arg2.value; 157 158 return res; 159 } 160 161 struct fixed31_32 dal_fixed31_32_sub( 162 struct fixed31_32 arg1, 163 struct fixed31_32 arg2) 164 { 165 struct fixed31_32 res; 166 167 ASSERT(((arg2.value >= 0) && (LLONG_MIN + arg2.value <= arg1.value)) || 168 ((arg2.value < 0) && (LLONG_MAX + arg2.value >= arg1.value))); 169 170 res.value = arg1.value - arg2.value; 171 172 return res; 173 } 174 175 struct fixed31_32 dal_fixed31_32_mul( 176 struct fixed31_32 arg1, 177 struct fixed31_32 arg2) 178 { 179 struct fixed31_32 res; 180 181 bool arg1_negative = arg1.value < 0; 182 bool arg2_negative = arg2.value < 0; 183 184 uint64_t arg1_value = arg1_negative ? -arg1.value : arg1.value; 185 uint64_t arg2_value = arg2_negative ? -arg2.value : arg2.value; 186 187 uint64_t arg1_int = GET_INTEGER_PART(arg1_value); 188 uint64_t arg2_int = GET_INTEGER_PART(arg2_value); 189 190 uint64_t arg1_fra = GET_FRACTIONAL_PART(arg1_value); 191 uint64_t arg2_fra = GET_FRACTIONAL_PART(arg2_value); 192 193 uint64_t tmp; 194 195 res.value = arg1_int * arg2_int; 196 197 ASSERT(res.value <= LONG_MAX); 198 199 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; 200 201 tmp = arg1_int * arg2_fra; 202 203 ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); 204 205 res.value += tmp; 206 207 tmp = arg2_int * arg1_fra; 208 209 ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); 210 211 res.value += tmp; 212 213 tmp = arg1_fra * arg2_fra; 214 215 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + 216 (tmp >= (uint64_t)dal_fixed31_32_half.value); 217 218 ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); 219 220 res.value += tmp; 221 222 if (arg1_negative ^ arg2_negative) 223 res.value = -res.value; 224 225 return res; 226 } 227 228 struct fixed31_32 dal_fixed31_32_sqr( 229 struct fixed31_32 arg) 230 { 231 struct fixed31_32 res; 232 233 uint64_t arg_value = abs_i64(arg.value); 234 235 uint64_t arg_int = GET_INTEGER_PART(arg_value); 236 237 uint64_t arg_fra = GET_FRACTIONAL_PART(arg_value); 238 239 uint64_t tmp; 240 241 res.value = arg_int * arg_int; 242 243 ASSERT(res.value <= LONG_MAX); 244 245 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; 246 247 tmp = arg_int * arg_fra; 248 249 ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); 250 251 res.value += tmp; 252 253 ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); 254 255 res.value += tmp; 256 257 tmp = arg_fra * arg_fra; 258 259 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + 260 (tmp >= (uint64_t)dal_fixed31_32_half.value); 261 262 ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); 263 264 res.value += tmp; 265 266 return res; 267 } 268 269 struct fixed31_32 dal_fixed31_32_recip( 270 struct fixed31_32 arg) 271 { 272 /* 273 * @note 274 * Good idea to use Newton's method 275 */ 276 277 ASSERT(arg.value); 278 279 return dal_fixed31_32_from_fraction( 280 dal_fixed31_32_one.value, 281 arg.value); 282 } 283 284 struct fixed31_32 dal_fixed31_32_sinc( 285 struct fixed31_32 arg) 286 { 287 struct fixed31_32 square; 288 289 struct fixed31_32 res = dal_fixed31_32_one; 290 291 int32_t n = 27; 292 293 struct fixed31_32 arg_norm = arg; 294 295 if (dal_fixed31_32_le( 296 dal_fixed31_32_two_pi, 297 dal_fixed31_32_abs(arg))) { 298 arg_norm = dal_fixed31_32_sub( 299 arg_norm, 300 dal_fixed31_32_mul_int( 301 dal_fixed31_32_two_pi, 302 (int32_t)div64_s64( 303 arg_norm.value, 304 dal_fixed31_32_two_pi.value))); 305 } 306 307 square = dal_fixed31_32_sqr(arg_norm); 308 309 do { 310 res = dal_fixed31_32_sub( 311 dal_fixed31_32_one, 312 dal_fixed31_32_div_int( 313 dal_fixed31_32_mul( 314 square, 315 res), 316 n * (n - 1))); 317 318 n -= 2; 319 } while (n > 2); 320 321 if (arg.value != arg_norm.value) 322 res = dal_fixed31_32_div( 323 dal_fixed31_32_mul(res, arg_norm), 324 arg); 325 326 return res; 327 } 328 329 struct fixed31_32 dal_fixed31_32_sin( 330 struct fixed31_32 arg) 331 { 332 return dal_fixed31_32_mul( 333 arg, 334 dal_fixed31_32_sinc(arg)); 335 } 336 337 struct fixed31_32 dal_fixed31_32_cos( 338 struct fixed31_32 arg) 339 { 340 /* TODO implement argument normalization */ 341 342 const struct fixed31_32 square = dal_fixed31_32_sqr(arg); 343 344 struct fixed31_32 res = dal_fixed31_32_one; 345 346 int32_t n = 26; 347 348 do { 349 res = dal_fixed31_32_sub( 350 dal_fixed31_32_one, 351 dal_fixed31_32_div_int( 352 dal_fixed31_32_mul( 353 square, 354 res), 355 n * (n - 1))); 356 357 n -= 2; 358 } while (n != 0); 359 360 return res; 361 } 362 363 /* 364 * @brief 365 * result = exp(arg), 366 * where abs(arg) < 1 367 * 368 * Calculated as Taylor series. 369 */ 370 static struct fixed31_32 fixed31_32_exp_from_taylor_series( 371 struct fixed31_32 arg) 372 { 373 uint32_t n = 9; 374 375 struct fixed31_32 res = dal_fixed31_32_from_fraction( 376 n + 2, 377 n + 1); 378 /* TODO find correct res */ 379 380 ASSERT(dal_fixed31_32_lt(arg, dal_fixed31_32_one)); 381 382 do 383 res = dal_fixed31_32_add( 384 dal_fixed31_32_one, 385 dal_fixed31_32_div_int( 386 dal_fixed31_32_mul( 387 arg, 388 res), 389 n)); 390 while (--n != 1); 391 392 return dal_fixed31_32_add( 393 dal_fixed31_32_one, 394 dal_fixed31_32_mul( 395 arg, 396 res)); 397 } 398 399 struct fixed31_32 dal_fixed31_32_exp( 400 struct fixed31_32 arg) 401 { 402 /* 403 * @brief 404 * Main equation is: 405 * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r), 406 * where m = round(x / ln(2)), r = x - m * ln(2) 407 */ 408 409 if (dal_fixed31_32_le( 410 dal_fixed31_32_ln2_div_2, 411 dal_fixed31_32_abs(arg))) { 412 int32_t m = dal_fixed31_32_round( 413 dal_fixed31_32_div( 414 arg, 415 dal_fixed31_32_ln2)); 416 417 struct fixed31_32 r = dal_fixed31_32_sub( 418 arg, 419 dal_fixed31_32_mul_int( 420 dal_fixed31_32_ln2, 421 m)); 422 423 ASSERT(m != 0); 424 425 ASSERT(dal_fixed31_32_lt( 426 dal_fixed31_32_abs(r), 427 dal_fixed31_32_one)); 428 429 if (m > 0) 430 return dal_fixed31_32_shl( 431 fixed31_32_exp_from_taylor_series(r), 432 (uint8_t)m); 433 else 434 return dal_fixed31_32_div_int( 435 fixed31_32_exp_from_taylor_series(r), 436 1LL << -m); 437 } else if (arg.value != 0) 438 return fixed31_32_exp_from_taylor_series(arg); 439 else 440 return dal_fixed31_32_one; 441 } 442 443 struct fixed31_32 dal_fixed31_32_log( 444 struct fixed31_32 arg) 445 { 446 struct fixed31_32 res = dal_fixed31_32_neg(dal_fixed31_32_one); 447 /* TODO improve 1st estimation */ 448 449 struct fixed31_32 error; 450 451 ASSERT(arg.value > 0); 452 /* TODO if arg is negative, return NaN */ 453 /* TODO if arg is zero, return -INF */ 454 455 do { 456 struct fixed31_32 res1 = dal_fixed31_32_add( 457 dal_fixed31_32_sub( 458 res, 459 dal_fixed31_32_one), 460 dal_fixed31_32_div( 461 arg, 462 dal_fixed31_32_exp(res))); 463 464 error = dal_fixed31_32_sub( 465 res, 466 res1); 467 468 res = res1; 469 /* TODO determine max_allowed_error based on quality of exp() */ 470 } while (abs_i64(error.value) > 100ULL); 471 472 return res; 473 } 474 475 struct fixed31_32 dal_fixed31_32_pow( 476 struct fixed31_32 arg1, 477 struct fixed31_32 arg2) 478 { 479 return dal_fixed31_32_exp( 480 dal_fixed31_32_mul( 481 dal_fixed31_32_log(arg1), 482 arg2)); 483 } 484 485 int32_t dal_fixed31_32_floor( 486 struct fixed31_32 arg) 487 { 488 uint64_t arg_value = abs_i64(arg.value); 489 490 if (arg.value >= 0) 491 return (int32_t)GET_INTEGER_PART(arg_value); 492 else 493 return -(int32_t)GET_INTEGER_PART(arg_value); 494 } 495 496 int32_t dal_fixed31_32_round( 497 struct fixed31_32 arg) 498 { 499 uint64_t arg_value = abs_i64(arg.value); 500 501 const int64_t summand = dal_fixed31_32_half.value; 502 503 ASSERT(LLONG_MAX - (int64_t)arg_value >= summand); 504 505 arg_value += summand; 506 507 if (arg.value >= 0) 508 return (int32_t)GET_INTEGER_PART(arg_value); 509 else 510 return -(int32_t)GET_INTEGER_PART(arg_value); 511 } 512 513 int32_t dal_fixed31_32_ceil( 514 struct fixed31_32 arg) 515 { 516 uint64_t arg_value = abs_i64(arg.value); 517 518 const int64_t summand = dal_fixed31_32_one.value - 519 dal_fixed31_32_epsilon.value; 520 521 ASSERT(LLONG_MAX - (int64_t)arg_value >= summand); 522 523 arg_value += summand; 524 525 if (arg.value >= 0) 526 return (int32_t)GET_INTEGER_PART(arg_value); 527 else 528 return -(int32_t)GET_INTEGER_PART(arg_value); 529 } 530 531 /* this function is a generic helper to translate fixed point value to 532 * specified integer format that will consist of integer_bits integer part and 533 * fractional_bits fractional part. For example it is used in 534 * dal_fixed31_32_u2d19 to receive 2 bits integer part and 19 bits fractional 535 * part in 32 bits. It is used in hw programming (scaler) 536 */ 537 538 static inline uint32_t ux_dy( 539 int64_t value, 540 uint32_t integer_bits, 541 uint32_t fractional_bits) 542 { 543 /* 1. create mask of integer part */ 544 uint32_t result = (1 << integer_bits) - 1; 545 /* 2. mask out fractional part */ 546 uint32_t fractional_part = FRACTIONAL_PART_MASK & value; 547 /* 3. shrink fixed point integer part to be of integer_bits width*/ 548 result &= GET_INTEGER_PART(value); 549 /* 4. make space for fractional part to be filled in after integer */ 550 result <<= fractional_bits; 551 /* 5. shrink fixed point fractional part to of fractional_bits width*/ 552 fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits; 553 /* 6. merge the result */ 554 return result | fractional_part; 555 } 556 557 uint32_t dal_fixed31_32_u2d19( 558 struct fixed31_32 arg) 559 { 560 return ux_dy(arg.value, 2, 19); 561 } 562 563 uint32_t dal_fixed31_32_u0d19( 564 struct fixed31_32 arg) 565 { 566 return ux_dy(arg.value, 0, 19); 567 } 568