1 /* 2 * Copyright 2012-15 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 26 #include "dm_services.h" 27 #include "include/fixed31_32.h" 28 29 static inline uint64_t abs_i64( 30 int64_t arg) 31 { 32 if (arg > 0) 33 return (uint64_t)arg; 34 else 35 return (uint64_t)(-arg); 36 } 37 38 /* 39 * @brief 40 * result = dividend / divisor 41 * *remainder = dividend % divisor 42 */ 43 static inline uint64_t complete_integer_division_u64( 44 uint64_t dividend, 45 uint64_t divisor, 46 uint64_t *remainder) 47 { 48 uint64_t result; 49 50 ASSERT(divisor); 51 52 result = div64_u64_rem(dividend, divisor, remainder); 53 54 return result; 55 } 56 57 #define BITS_PER_FRACTIONAL_PART \ 58 32 59 60 #define FRACTIONAL_PART_MASK \ 61 ((1ULL << BITS_PER_FRACTIONAL_PART) - 1) 62 63 #define GET_INTEGER_PART(x) \ 64 ((x) >> BITS_PER_FRACTIONAL_PART) 65 66 #define GET_FRACTIONAL_PART(x) \ 67 (FRACTIONAL_PART_MASK & (x)) 68 69 struct fixed31_32 dal_fixed31_32_from_fraction( 70 int64_t numerator, 71 int64_t denominator) 72 { 73 struct fixed31_32 res; 74 75 bool arg1_negative = numerator < 0; 76 bool arg2_negative = denominator < 0; 77 78 uint64_t arg1_value = arg1_negative ? -numerator : numerator; 79 uint64_t arg2_value = arg2_negative ? -denominator : denominator; 80 81 uint64_t remainder; 82 83 /* determine integer part */ 84 85 uint64_t res_value = complete_integer_division_u64( 86 arg1_value, arg2_value, &remainder); 87 88 ASSERT(res_value <= LONG_MAX); 89 90 /* determine fractional part */ 91 { 92 uint32_t i = BITS_PER_FRACTIONAL_PART; 93 94 do { 95 remainder <<= 1; 96 97 res_value <<= 1; 98 99 if (remainder >= arg2_value) { 100 res_value |= 1; 101 remainder -= arg2_value; 102 } 103 } while (--i != 0); 104 } 105 106 /* round up LSB */ 107 { 108 uint64_t summand = (remainder << 1) >= arg2_value; 109 110 ASSERT(res_value <= LLONG_MAX - summand); 111 112 res_value += summand; 113 } 114 115 res.value = (int64_t)res_value; 116 117 if (arg1_negative ^ arg2_negative) 118 res.value = -res.value; 119 120 return res; 121 } 122 123 struct fixed31_32 dal_fixed31_32_from_int( 124 int64_t arg) 125 { 126 struct fixed31_32 res; 127 128 ASSERT((LONG_MIN <= arg) && (arg <= LONG_MAX)); 129 130 res.value = arg << BITS_PER_FRACTIONAL_PART; 131 132 return res; 133 } 134 135 struct fixed31_32 dal_fixed31_32_neg( 136 struct fixed31_32 arg) 137 { 138 struct fixed31_32 res; 139 140 res.value = -arg.value; 141 142 return res; 143 } 144 145 struct fixed31_32 dal_fixed31_32_abs( 146 struct fixed31_32 arg) 147 { 148 if (arg.value < 0) 149 return dal_fixed31_32_neg(arg); 150 else 151 return arg; 152 } 153 154 bool dal_fixed31_32_lt( 155 struct fixed31_32 arg1, 156 struct fixed31_32 arg2) 157 { 158 return arg1.value < arg2.value; 159 } 160 161 bool dal_fixed31_32_le( 162 struct fixed31_32 arg1, 163 struct fixed31_32 arg2) 164 { 165 return arg1.value <= arg2.value; 166 } 167 168 bool dal_fixed31_32_eq( 169 struct fixed31_32 arg1, 170 struct fixed31_32 arg2) 171 { 172 return arg1.value == arg2.value; 173 } 174 175 struct fixed31_32 dal_fixed31_32_min( 176 struct fixed31_32 arg1, 177 struct fixed31_32 arg2) 178 { 179 if (arg1.value <= arg2.value) 180 return arg1; 181 else 182 return arg2; 183 } 184 185 struct fixed31_32 dal_fixed31_32_max( 186 struct fixed31_32 arg1, 187 struct fixed31_32 arg2) 188 { 189 if (arg1.value <= arg2.value) 190 return arg2; 191 else 192 return arg1; 193 } 194 195 struct fixed31_32 dal_fixed31_32_clamp( 196 struct fixed31_32 arg, 197 struct fixed31_32 min_value, 198 struct fixed31_32 max_value) 199 { 200 if (dal_fixed31_32_le(arg, min_value)) 201 return min_value; 202 else if (dal_fixed31_32_le(max_value, arg)) 203 return max_value; 204 else 205 return arg; 206 } 207 208 struct fixed31_32 dal_fixed31_32_shl( 209 struct fixed31_32 arg, 210 uint8_t shift) 211 { 212 struct fixed31_32 res; 213 214 ASSERT(((arg.value >= 0) && (arg.value <= LLONG_MAX >> shift)) || 215 ((arg.value < 0) && (arg.value >= LLONG_MIN >> shift))); 216 217 res.value = arg.value << shift; 218 219 return res; 220 } 221 222 struct fixed31_32 dal_fixed31_32_shr( 223 struct fixed31_32 arg, 224 uint8_t shift) 225 { 226 struct fixed31_32 res; 227 228 ASSERT(shift < 64); 229 230 res.value = arg.value >> shift; 231 232 return res; 233 } 234 235 struct fixed31_32 dal_fixed31_32_add( 236 struct fixed31_32 arg1, 237 struct fixed31_32 arg2) 238 { 239 struct fixed31_32 res; 240 241 ASSERT(((arg1.value >= 0) && (LLONG_MAX - arg1.value >= arg2.value)) || 242 ((arg1.value < 0) && (LLONG_MIN - arg1.value <= arg2.value))); 243 244 res.value = arg1.value + arg2.value; 245 246 return res; 247 } 248 249 struct fixed31_32 dal_fixed31_32_add_int( 250 struct fixed31_32 arg1, 251 int32_t arg2) 252 { 253 return dal_fixed31_32_add( 254 arg1, 255 dal_fixed31_32_from_int(arg2)); 256 } 257 258 struct fixed31_32 dal_fixed31_32_sub_int( 259 struct fixed31_32 arg1, 260 int32_t arg2) 261 { 262 return dal_fixed31_32_sub( 263 arg1, 264 dal_fixed31_32_from_int(arg2)); 265 } 266 267 struct fixed31_32 dal_fixed31_32_sub( 268 struct fixed31_32 arg1, 269 struct fixed31_32 arg2) 270 { 271 struct fixed31_32 res; 272 273 ASSERT(((arg2.value >= 0) && (LLONG_MIN + arg2.value <= arg1.value)) || 274 ((arg2.value < 0) && (LLONG_MAX + arg2.value >= arg1.value))); 275 276 res.value = arg1.value - arg2.value; 277 278 return res; 279 } 280 281 struct fixed31_32 dal_fixed31_32_mul_int( 282 struct fixed31_32 arg1, 283 int32_t arg2) 284 { 285 return dal_fixed31_32_mul( 286 arg1, 287 dal_fixed31_32_from_int(arg2)); 288 } 289 290 struct fixed31_32 dal_fixed31_32_mul( 291 struct fixed31_32 arg1, 292 struct fixed31_32 arg2) 293 { 294 struct fixed31_32 res; 295 296 bool arg1_negative = arg1.value < 0; 297 bool arg2_negative = arg2.value < 0; 298 299 uint64_t arg1_value = arg1_negative ? -arg1.value : arg1.value; 300 uint64_t arg2_value = arg2_negative ? -arg2.value : arg2.value; 301 302 uint64_t arg1_int = GET_INTEGER_PART(arg1_value); 303 uint64_t arg2_int = GET_INTEGER_PART(arg2_value); 304 305 uint64_t arg1_fra = GET_FRACTIONAL_PART(arg1_value); 306 uint64_t arg2_fra = GET_FRACTIONAL_PART(arg2_value); 307 308 uint64_t tmp; 309 310 res.value = arg1_int * arg2_int; 311 312 ASSERT(res.value <= LONG_MAX); 313 314 res.value <<= BITS_PER_FRACTIONAL_PART; 315 316 tmp = arg1_int * arg2_fra; 317 318 ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); 319 320 res.value += tmp; 321 322 tmp = arg2_int * arg1_fra; 323 324 ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); 325 326 res.value += tmp; 327 328 tmp = arg1_fra * arg2_fra; 329 330 tmp = (tmp >> BITS_PER_FRACTIONAL_PART) + 331 (tmp >= (uint64_t)dal_fixed31_32_half.value); 332 333 ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); 334 335 res.value += tmp; 336 337 if (arg1_negative ^ arg2_negative) 338 res.value = -res.value; 339 340 return res; 341 } 342 343 struct fixed31_32 dal_fixed31_32_sqr( 344 struct fixed31_32 arg) 345 { 346 struct fixed31_32 res; 347 348 uint64_t arg_value = abs_i64(arg.value); 349 350 uint64_t arg_int = GET_INTEGER_PART(arg_value); 351 352 uint64_t arg_fra = GET_FRACTIONAL_PART(arg_value); 353 354 uint64_t tmp; 355 356 res.value = arg_int * arg_int; 357 358 ASSERT(res.value <= LONG_MAX); 359 360 res.value <<= BITS_PER_FRACTIONAL_PART; 361 362 tmp = arg_int * arg_fra; 363 364 ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); 365 366 res.value += tmp; 367 368 ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); 369 370 res.value += tmp; 371 372 tmp = arg_fra * arg_fra; 373 374 tmp = (tmp >> BITS_PER_FRACTIONAL_PART) + 375 (tmp >= (uint64_t)dal_fixed31_32_half.value); 376 377 ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); 378 379 res.value += tmp; 380 381 return res; 382 } 383 384 struct fixed31_32 dal_fixed31_32_div_int( 385 struct fixed31_32 arg1, 386 int64_t arg2) 387 { 388 return dal_fixed31_32_from_fraction( 389 arg1.value, 390 dal_fixed31_32_from_int(arg2).value); 391 } 392 393 struct fixed31_32 dal_fixed31_32_div( 394 struct fixed31_32 arg1, 395 struct fixed31_32 arg2) 396 { 397 return dal_fixed31_32_from_fraction( 398 arg1.value, 399 arg2.value); 400 } 401 402 struct fixed31_32 dal_fixed31_32_recip( 403 struct fixed31_32 arg) 404 { 405 /* 406 * @note 407 * Good idea to use Newton's method 408 */ 409 410 ASSERT(arg.value); 411 412 return dal_fixed31_32_from_fraction( 413 dal_fixed31_32_one.value, 414 arg.value); 415 } 416 417 struct fixed31_32 dal_fixed31_32_sinc( 418 struct fixed31_32 arg) 419 { 420 struct fixed31_32 square; 421 422 struct fixed31_32 res = dal_fixed31_32_one; 423 424 int32_t n = 27; 425 426 struct fixed31_32 arg_norm = arg; 427 428 if (dal_fixed31_32_le( 429 dal_fixed31_32_two_pi, 430 dal_fixed31_32_abs(arg))) { 431 arg_norm = dal_fixed31_32_sub( 432 arg_norm, 433 dal_fixed31_32_mul_int( 434 dal_fixed31_32_two_pi, 435 (int32_t)div64_s64( 436 arg_norm.value, 437 dal_fixed31_32_two_pi.value))); 438 } 439 440 square = dal_fixed31_32_sqr(arg_norm); 441 442 do { 443 res = dal_fixed31_32_sub( 444 dal_fixed31_32_one, 445 dal_fixed31_32_div_int( 446 dal_fixed31_32_mul( 447 square, 448 res), 449 n * (n - 1))); 450 451 n -= 2; 452 } while (n > 2); 453 454 if (arg.value != arg_norm.value) 455 res = dal_fixed31_32_div( 456 dal_fixed31_32_mul(res, arg_norm), 457 arg); 458 459 return res; 460 } 461 462 struct fixed31_32 dal_fixed31_32_sin( 463 struct fixed31_32 arg) 464 { 465 return dal_fixed31_32_mul( 466 arg, 467 dal_fixed31_32_sinc(arg)); 468 } 469 470 struct fixed31_32 dal_fixed31_32_cos( 471 struct fixed31_32 arg) 472 { 473 /* TODO implement argument normalization */ 474 475 const struct fixed31_32 square = dal_fixed31_32_sqr(arg); 476 477 struct fixed31_32 res = dal_fixed31_32_one; 478 479 int32_t n = 26; 480 481 do { 482 res = dal_fixed31_32_sub( 483 dal_fixed31_32_one, 484 dal_fixed31_32_div_int( 485 dal_fixed31_32_mul( 486 square, 487 res), 488 n * (n - 1))); 489 490 n -= 2; 491 } while (n != 0); 492 493 return res; 494 } 495 496 /* 497 * @brief 498 * result = exp(arg), 499 * where abs(arg) < 1 500 * 501 * Calculated as Taylor series. 502 */ 503 static struct fixed31_32 fixed31_32_exp_from_taylor_series( 504 struct fixed31_32 arg) 505 { 506 uint32_t n = 9; 507 508 struct fixed31_32 res = dal_fixed31_32_from_fraction( 509 n + 2, 510 n + 1); 511 /* TODO find correct res */ 512 513 ASSERT(dal_fixed31_32_lt(arg, dal_fixed31_32_one)); 514 515 do 516 res = dal_fixed31_32_add( 517 dal_fixed31_32_one, 518 dal_fixed31_32_div_int( 519 dal_fixed31_32_mul( 520 arg, 521 res), 522 n)); 523 while (--n != 1); 524 525 return dal_fixed31_32_add( 526 dal_fixed31_32_one, 527 dal_fixed31_32_mul( 528 arg, 529 res)); 530 } 531 532 struct fixed31_32 dal_fixed31_32_exp( 533 struct fixed31_32 arg) 534 { 535 /* 536 * @brief 537 * Main equation is: 538 * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r), 539 * where m = round(x / ln(2)), r = x - m * ln(2) 540 */ 541 542 if (dal_fixed31_32_le( 543 dal_fixed31_32_ln2_div_2, 544 dal_fixed31_32_abs(arg))) { 545 int32_t m = dal_fixed31_32_round( 546 dal_fixed31_32_div( 547 arg, 548 dal_fixed31_32_ln2)); 549 550 struct fixed31_32 r = dal_fixed31_32_sub( 551 arg, 552 dal_fixed31_32_mul_int( 553 dal_fixed31_32_ln2, 554 m)); 555 556 ASSERT(m != 0); 557 558 ASSERT(dal_fixed31_32_lt( 559 dal_fixed31_32_abs(r), 560 dal_fixed31_32_one)); 561 562 if (m > 0) 563 return dal_fixed31_32_shl( 564 fixed31_32_exp_from_taylor_series(r), 565 (uint8_t)m); 566 else 567 return dal_fixed31_32_div_int( 568 fixed31_32_exp_from_taylor_series(r), 569 1LL << -m); 570 } else if (arg.value != 0) 571 return fixed31_32_exp_from_taylor_series(arg); 572 else 573 return dal_fixed31_32_one; 574 } 575 576 struct fixed31_32 dal_fixed31_32_log( 577 struct fixed31_32 arg) 578 { 579 struct fixed31_32 res = dal_fixed31_32_neg(dal_fixed31_32_one); 580 /* TODO improve 1st estimation */ 581 582 struct fixed31_32 error; 583 584 ASSERT(arg.value > 0); 585 /* TODO if arg is negative, return NaN */ 586 /* TODO if arg is zero, return -INF */ 587 588 do { 589 struct fixed31_32 res1 = dal_fixed31_32_add( 590 dal_fixed31_32_sub( 591 res, 592 dal_fixed31_32_one), 593 dal_fixed31_32_div( 594 arg, 595 dal_fixed31_32_exp(res))); 596 597 error = dal_fixed31_32_sub( 598 res, 599 res1); 600 601 res = res1; 602 /* TODO determine max_allowed_error based on quality of exp() */ 603 } while (abs_i64(error.value) > 100ULL); 604 605 return res; 606 } 607 608 struct fixed31_32 dal_fixed31_32_pow( 609 struct fixed31_32 arg1, 610 struct fixed31_32 arg2) 611 { 612 return dal_fixed31_32_exp( 613 dal_fixed31_32_mul( 614 dal_fixed31_32_log(arg1), 615 arg2)); 616 } 617 618 int32_t dal_fixed31_32_floor( 619 struct fixed31_32 arg) 620 { 621 uint64_t arg_value = abs_i64(arg.value); 622 623 if (arg.value >= 0) 624 return (int32_t)GET_INTEGER_PART(arg_value); 625 else 626 return -(int32_t)GET_INTEGER_PART(arg_value); 627 } 628 629 int32_t dal_fixed31_32_round( 630 struct fixed31_32 arg) 631 { 632 uint64_t arg_value = abs_i64(arg.value); 633 634 const int64_t summand = dal_fixed31_32_half.value; 635 636 ASSERT(LLONG_MAX - (int64_t)arg_value >= summand); 637 638 arg_value += summand; 639 640 if (arg.value >= 0) 641 return (int32_t)GET_INTEGER_PART(arg_value); 642 else 643 return -(int32_t)GET_INTEGER_PART(arg_value); 644 } 645 646 int32_t dal_fixed31_32_ceil( 647 struct fixed31_32 arg) 648 { 649 uint64_t arg_value = abs_i64(arg.value); 650 651 const int64_t summand = dal_fixed31_32_one.value - 652 dal_fixed31_32_epsilon.value; 653 654 ASSERT(LLONG_MAX - (int64_t)arg_value >= summand); 655 656 arg_value += summand; 657 658 if (arg.value >= 0) 659 return (int32_t)GET_INTEGER_PART(arg_value); 660 else 661 return -(int32_t)GET_INTEGER_PART(arg_value); 662 } 663 664 /* this function is a generic helper to translate fixed point value to 665 * specified integer format that will consist of integer_bits integer part and 666 * fractional_bits fractional part. For example it is used in 667 * dal_fixed31_32_u2d19 to receive 2 bits integer part and 19 bits fractional 668 * part in 32 bits. It is used in hw programming (scaler) 669 */ 670 671 static inline uint32_t ux_dy( 672 int64_t value, 673 uint32_t integer_bits, 674 uint32_t fractional_bits) 675 { 676 /* 1. create mask of integer part */ 677 uint32_t result = (1 << integer_bits) - 1; 678 /* 2. mask out fractional part */ 679 uint32_t fractional_part = FRACTIONAL_PART_MASK & value; 680 /* 3. shrink fixed point integer part to be of integer_bits width*/ 681 result &= GET_INTEGER_PART(value); 682 /* 4. make space for fractional part to be filled in after integer */ 683 result <<= fractional_bits; 684 /* 5. shrink fixed point fractional part to of fractional_bits width*/ 685 fractional_part >>= BITS_PER_FRACTIONAL_PART - fractional_bits; 686 /* 6. merge the result */ 687 return result | fractional_part; 688 } 689 690 uint32_t dal_fixed31_32_u2d19( 691 struct fixed31_32 arg) 692 { 693 return ux_dy(arg.value, 2, 19); 694 } 695 696 uint32_t dal_fixed31_32_u0d19( 697 struct fixed31_32 arg) 698 { 699 return ux_dy(arg.value, 0, 19); 700 } 701