1 /* 2 * Copyright 2012-15 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 26 #include "dm_services.h" 27 #include "include/fixed31_32.h" 28 29 static const struct fixed31_32 dc_fixpt_pi = { 13493037705LL }; 30 static const struct fixed31_32 dc_fixpt_two_pi = { 26986075409LL }; 31 static const struct fixed31_32 dc_fixpt_ln2 = { 2977044471LL }; 32 static const struct fixed31_32 dc_fixpt_ln2_div_2 = { 1488522236LL }; 33 34 static inline unsigned long long abs_i64( 35 long long arg) 36 { 37 if (arg > 0) 38 return (unsigned long long)arg; 39 else 40 return (unsigned long long)(-arg); 41 } 42 43 /* 44 * @brief 45 * result = dividend / divisor 46 * *remainder = dividend % divisor 47 */ 48 static inline unsigned long long complete_integer_division_u64( 49 unsigned long long dividend, 50 unsigned long long divisor, 51 unsigned long long *remainder) 52 { 53 unsigned long long result; 54 55 ASSERT(divisor); 56 57 result = div64_u64_rem(dividend, divisor, remainder); 58 59 return result; 60 } 61 62 63 #define FRACTIONAL_PART_MASK \ 64 ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1) 65 66 #define GET_INTEGER_PART(x) \ 67 ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART) 68 69 #define GET_FRACTIONAL_PART(x) \ 70 (FRACTIONAL_PART_MASK & (x)) 71 72 struct fixed31_32 dc_fixpt_from_fraction(long long numerator, long long denominator) 73 { 74 struct fixed31_32 res; 75 76 bool arg1_negative = numerator < 0; 77 bool arg2_negative = denominator < 0; 78 79 unsigned long long arg1_value = arg1_negative ? -numerator : numerator; 80 unsigned long long arg2_value = arg2_negative ? -denominator : denominator; 81 82 unsigned long long remainder; 83 84 /* determine integer part */ 85 86 unsigned long long res_value = complete_integer_division_u64( 87 arg1_value, arg2_value, &remainder); 88 89 ASSERT(res_value <= LONG_MAX); 90 91 /* determine fractional part */ 92 { 93 unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART; 94 95 do { 96 remainder <<= 1; 97 98 res_value <<= 1; 99 100 if (remainder >= arg2_value) { 101 res_value |= 1; 102 remainder -= arg2_value; 103 } 104 } while (--i != 0); 105 } 106 107 /* round up LSB */ 108 { 109 unsigned long long summand = (remainder << 1) >= arg2_value; 110 111 ASSERT(res_value <= LLONG_MAX - summand); 112 113 res_value += summand; 114 } 115 116 res.value = (long long)res_value; 117 118 if (arg1_negative ^ arg2_negative) 119 res.value = -res.value; 120 121 return res; 122 } 123 124 struct fixed31_32 dc_fixpt_mul(struct fixed31_32 arg1, struct fixed31_32 arg2) 125 { 126 struct fixed31_32 res; 127 128 bool arg1_negative = arg1.value < 0; 129 bool arg2_negative = arg2.value < 0; 130 131 unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value; 132 unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value; 133 134 unsigned long long arg1_int = GET_INTEGER_PART(arg1_value); 135 unsigned long long arg2_int = GET_INTEGER_PART(arg2_value); 136 137 unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value); 138 unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value); 139 140 unsigned long long tmp; 141 142 res.value = arg1_int * arg2_int; 143 144 ASSERT(res.value <= LONG_MAX); 145 146 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; 147 148 tmp = arg1_int * arg2_fra; 149 150 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 151 152 res.value += tmp; 153 154 tmp = arg2_int * arg1_fra; 155 156 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 157 158 res.value += tmp; 159 160 tmp = arg1_fra * arg2_fra; 161 162 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + 163 (tmp >= (unsigned long long)dc_fixpt_half.value); 164 165 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 166 167 res.value += tmp; 168 169 if (arg1_negative ^ arg2_negative) 170 res.value = -res.value; 171 172 return res; 173 } 174 175 struct fixed31_32 dc_fixpt_sqr(struct fixed31_32 arg) 176 { 177 struct fixed31_32 res; 178 179 unsigned long long arg_value = abs_i64(arg.value); 180 181 unsigned long long arg_int = GET_INTEGER_PART(arg_value); 182 183 unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value); 184 185 unsigned long long tmp; 186 187 res.value = arg_int * arg_int; 188 189 ASSERT(res.value <= LONG_MAX); 190 191 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; 192 193 tmp = arg_int * arg_fra; 194 195 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 196 197 res.value += tmp; 198 199 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 200 201 res.value += tmp; 202 203 tmp = arg_fra * arg_fra; 204 205 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + 206 (tmp >= (unsigned long long)dc_fixpt_half.value); 207 208 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 209 210 res.value += tmp; 211 212 return res; 213 } 214 215 struct fixed31_32 dc_fixpt_recip(struct fixed31_32 arg) 216 { 217 /* 218 * @note 219 * Good idea to use Newton's method 220 */ 221 222 ASSERT(arg.value); 223 224 return dc_fixpt_from_fraction( 225 dc_fixpt_one.value, 226 arg.value); 227 } 228 229 struct fixed31_32 dc_fixpt_sinc(struct fixed31_32 arg) 230 { 231 struct fixed31_32 square; 232 233 struct fixed31_32 res = dc_fixpt_one; 234 235 int n = 27; 236 237 struct fixed31_32 arg_norm = arg; 238 239 if (dc_fixpt_le( 240 dc_fixpt_two_pi, 241 dc_fixpt_abs(arg))) { 242 arg_norm = dc_fixpt_sub( 243 arg_norm, 244 dc_fixpt_mul_int( 245 dc_fixpt_two_pi, 246 (int)div64_s64( 247 arg_norm.value, 248 dc_fixpt_two_pi.value))); 249 } 250 251 square = dc_fixpt_sqr(arg_norm); 252 253 do { 254 res = dc_fixpt_sub( 255 dc_fixpt_one, 256 dc_fixpt_div_int( 257 dc_fixpt_mul( 258 square, 259 res), 260 n * (n - 1))); 261 262 n -= 2; 263 } while (n > 2); 264 265 if (arg.value != arg_norm.value) 266 res = dc_fixpt_div( 267 dc_fixpt_mul(res, arg_norm), 268 arg); 269 270 return res; 271 } 272 273 struct fixed31_32 dc_fixpt_sin(struct fixed31_32 arg) 274 { 275 return dc_fixpt_mul( 276 arg, 277 dc_fixpt_sinc(arg)); 278 } 279 280 struct fixed31_32 dc_fixpt_cos(struct fixed31_32 arg) 281 { 282 /* TODO implement argument normalization */ 283 284 const struct fixed31_32 square = dc_fixpt_sqr(arg); 285 286 struct fixed31_32 res = dc_fixpt_one; 287 288 int n = 26; 289 290 do { 291 res = dc_fixpt_sub( 292 dc_fixpt_one, 293 dc_fixpt_div_int( 294 dc_fixpt_mul( 295 square, 296 res), 297 n * (n - 1))); 298 299 n -= 2; 300 } while (n != 0); 301 302 return res; 303 } 304 305 /* 306 * @brief 307 * result = exp(arg), 308 * where abs(arg) < 1 309 * 310 * Calculated as Taylor series. 311 */ 312 static struct fixed31_32 fixed31_32_exp_from_taylor_series(struct fixed31_32 arg) 313 { 314 unsigned int n = 9; 315 316 struct fixed31_32 res = dc_fixpt_from_fraction( 317 n + 2, 318 n + 1); 319 /* TODO find correct res */ 320 321 ASSERT(dc_fixpt_lt(arg, dc_fixpt_one)); 322 323 do 324 res = dc_fixpt_add( 325 dc_fixpt_one, 326 dc_fixpt_div_int( 327 dc_fixpt_mul( 328 arg, 329 res), 330 n)); 331 while (--n != 1); 332 333 return dc_fixpt_add( 334 dc_fixpt_one, 335 dc_fixpt_mul( 336 arg, 337 res)); 338 } 339 340 struct fixed31_32 dc_fixpt_exp(struct fixed31_32 arg) 341 { 342 /* 343 * @brief 344 * Main equation is: 345 * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r), 346 * where m = round(x / ln(2)), r = x - m * ln(2) 347 */ 348 349 if (dc_fixpt_le( 350 dc_fixpt_ln2_div_2, 351 dc_fixpt_abs(arg))) { 352 int m = dc_fixpt_round( 353 dc_fixpt_div( 354 arg, 355 dc_fixpt_ln2)); 356 357 struct fixed31_32 r = dc_fixpt_sub( 358 arg, 359 dc_fixpt_mul_int( 360 dc_fixpt_ln2, 361 m)); 362 363 ASSERT(m != 0); 364 365 ASSERT(dc_fixpt_lt( 366 dc_fixpt_abs(r), 367 dc_fixpt_one)); 368 369 if (m > 0) 370 return dc_fixpt_shl( 371 fixed31_32_exp_from_taylor_series(r), 372 (unsigned char)m); 373 else 374 return dc_fixpt_div_int( 375 fixed31_32_exp_from_taylor_series(r), 376 1LL << -m); 377 } else if (arg.value != 0) 378 return fixed31_32_exp_from_taylor_series(arg); 379 else 380 return dc_fixpt_one; 381 } 382 383 struct fixed31_32 dc_fixpt_log(struct fixed31_32 arg) 384 { 385 struct fixed31_32 res = dc_fixpt_neg(dc_fixpt_one); 386 /* TODO improve 1st estimation */ 387 388 struct fixed31_32 error; 389 390 ASSERT(arg.value > 0); 391 /* TODO if arg is negative, return NaN */ 392 /* TODO if arg is zero, return -INF */ 393 394 do { 395 struct fixed31_32 res1 = dc_fixpt_add( 396 dc_fixpt_sub( 397 res, 398 dc_fixpt_one), 399 dc_fixpt_div( 400 arg, 401 dc_fixpt_exp(res))); 402 403 error = dc_fixpt_sub( 404 res, 405 res1); 406 407 res = res1; 408 /* TODO determine max_allowed_error based on quality of exp() */ 409 } while (abs_i64(error.value) > 100ULL); 410 411 return res; 412 } 413 414 415 /* this function is a generic helper to translate fixed point value to 416 * specified integer format that will consist of integer_bits integer part and 417 * fractional_bits fractional part. For example it is used in 418 * dc_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional 419 * part in 32 bits. It is used in hw programming (scaler) 420 */ 421 422 static inline unsigned int ux_dy( 423 long long value, 424 unsigned int integer_bits, 425 unsigned int fractional_bits) 426 { 427 /* 1. create mask of integer part */ 428 unsigned int result = (1 << integer_bits) - 1; 429 /* 2. mask out fractional part */ 430 unsigned int fractional_part = FRACTIONAL_PART_MASK & value; 431 /* 3. shrink fixed point integer part to be of integer_bits width*/ 432 result &= GET_INTEGER_PART(value); 433 /* 4. make space for fractional part to be filled in after integer */ 434 result <<= fractional_bits; 435 /* 5. shrink fixed point fractional part to of fractional_bits width*/ 436 fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits; 437 /* 6. merge the result */ 438 return result | fractional_part; 439 } 440 441 static inline unsigned int clamp_ux_dy( 442 long long value, 443 unsigned int integer_bits, 444 unsigned int fractional_bits, 445 unsigned int min_clamp) 446 { 447 unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits); 448 449 if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART))) 450 return (1 << (integer_bits + fractional_bits)) - 1; 451 else if (truncated_val > min_clamp) 452 return truncated_val; 453 else 454 return min_clamp; 455 } 456 457 unsigned int dc_fixpt_u4d19(struct fixed31_32 arg) 458 { 459 return ux_dy(arg.value, 4, 19); 460 } 461 462 unsigned int dc_fixpt_u3d19(struct fixed31_32 arg) 463 { 464 return ux_dy(arg.value, 3, 19); 465 } 466 467 unsigned int dc_fixpt_u2d19(struct fixed31_32 arg) 468 { 469 return ux_dy(arg.value, 2, 19); 470 } 471 472 unsigned int dc_fixpt_u0d19(struct fixed31_32 arg) 473 { 474 return ux_dy(arg.value, 0, 19); 475 } 476 477 unsigned int dc_fixpt_clamp_u0d14(struct fixed31_32 arg) 478 { 479 return clamp_ux_dy(arg.value, 0, 14, 1); 480 } 481 482 unsigned int dc_fixpt_clamp_u0d10(struct fixed31_32 arg) 483 { 484 return clamp_ux_dy(arg.value, 0, 10, 1); 485 } 486 487 int dc_fixpt_s4d19(struct fixed31_32 arg) 488 { 489 if (arg.value < 0) 490 return -(int)ux_dy(dc_fixpt_abs(arg).value, 4, 19); 491 else 492 return ux_dy(arg.value, 4, 19); 493 } 494