1 /* 2 * Copyright 2012-15 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 26 #include "dm_services.h" 27 #include "include/fixed31_32.h" 28 29 static const struct fixed31_32 dc_fixpt_two_pi = { 26986075409LL }; 30 static const struct fixed31_32 dc_fixpt_ln2 = { 2977044471LL }; 31 static const struct fixed31_32 dc_fixpt_ln2_div_2 = { 1488522236LL }; 32 33 static inline unsigned long long abs_i64( 34 long long arg) 35 { 36 if (arg > 0) 37 return (unsigned long long)arg; 38 else 39 return (unsigned long long)(-arg); 40 } 41 42 /* 43 * @brief 44 * result = dividend / divisor 45 * *remainder = dividend % divisor 46 */ 47 static inline unsigned long long complete_integer_division_u64( 48 unsigned long long dividend, 49 unsigned long long divisor, 50 unsigned long long *remainder) 51 { 52 unsigned long long result; 53 54 ASSERT(divisor); 55 56 result = div64_u64_rem(dividend, divisor, remainder); 57 58 return result; 59 } 60 61 62 #define FRACTIONAL_PART_MASK \ 63 ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1) 64 65 #define GET_INTEGER_PART(x) \ 66 ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART) 67 68 #define GET_FRACTIONAL_PART(x) \ 69 (FRACTIONAL_PART_MASK & (x)) 70 71 struct fixed31_32 dc_fixpt_from_fraction(long long numerator, long long denominator) 72 { 73 struct fixed31_32 res; 74 75 bool arg1_negative = numerator < 0; 76 bool arg2_negative = denominator < 0; 77 78 unsigned long long arg1_value = arg1_negative ? -numerator : numerator; 79 unsigned long long arg2_value = arg2_negative ? -denominator : denominator; 80 81 unsigned long long remainder; 82 83 /* determine integer part */ 84 85 unsigned long long res_value = complete_integer_division_u64( 86 arg1_value, arg2_value, &remainder); 87 88 ASSERT(res_value <= LONG_MAX); 89 90 /* determine fractional part */ 91 { 92 unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART; 93 94 do { 95 remainder <<= 1; 96 97 res_value <<= 1; 98 99 if (remainder >= arg2_value) { 100 res_value |= 1; 101 remainder -= arg2_value; 102 } 103 } while (--i != 0); 104 } 105 106 /* round up LSB */ 107 { 108 unsigned long long summand = (remainder << 1) >= arg2_value; 109 110 ASSERT(res_value <= LLONG_MAX - summand); 111 112 res_value += summand; 113 } 114 115 res.value = (long long)res_value; 116 117 if (arg1_negative ^ arg2_negative) 118 res.value = -res.value; 119 120 return res; 121 } 122 123 struct fixed31_32 dc_fixpt_mul(struct fixed31_32 arg1, struct fixed31_32 arg2) 124 { 125 struct fixed31_32 res; 126 127 bool arg1_negative = arg1.value < 0; 128 bool arg2_negative = arg2.value < 0; 129 130 unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value; 131 unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value; 132 133 unsigned long long arg1_int = GET_INTEGER_PART(arg1_value); 134 unsigned long long arg2_int = GET_INTEGER_PART(arg2_value); 135 136 unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value); 137 unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value); 138 139 unsigned long long tmp; 140 141 res.value = arg1_int * arg2_int; 142 143 ASSERT(res.value <= LONG_MAX); 144 145 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; 146 147 tmp = arg1_int * arg2_fra; 148 149 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 150 151 res.value += tmp; 152 153 tmp = arg2_int * arg1_fra; 154 155 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 156 157 res.value += tmp; 158 159 tmp = arg1_fra * arg2_fra; 160 161 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + 162 (tmp >= (unsigned long long)dc_fixpt_half.value); 163 164 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 165 166 res.value += tmp; 167 168 if (arg1_negative ^ arg2_negative) 169 res.value = -res.value; 170 171 return res; 172 } 173 174 struct fixed31_32 dc_fixpt_sqr(struct fixed31_32 arg) 175 { 176 struct fixed31_32 res; 177 178 unsigned long long arg_value = abs_i64(arg.value); 179 180 unsigned long long arg_int = GET_INTEGER_PART(arg_value); 181 182 unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value); 183 184 unsigned long long tmp; 185 186 res.value = arg_int * arg_int; 187 188 ASSERT(res.value <= LONG_MAX); 189 190 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; 191 192 tmp = arg_int * arg_fra; 193 194 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 195 196 res.value += tmp; 197 198 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 199 200 res.value += tmp; 201 202 tmp = arg_fra * arg_fra; 203 204 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + 205 (tmp >= (unsigned long long)dc_fixpt_half.value); 206 207 ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); 208 209 res.value += tmp; 210 211 return res; 212 } 213 214 struct fixed31_32 dc_fixpt_recip(struct fixed31_32 arg) 215 { 216 /* 217 * @note 218 * Good idea to use Newton's method 219 */ 220 221 ASSERT(arg.value); 222 223 return dc_fixpt_from_fraction( 224 dc_fixpt_one.value, 225 arg.value); 226 } 227 228 struct fixed31_32 dc_fixpt_sinc(struct fixed31_32 arg) 229 { 230 struct fixed31_32 square; 231 232 struct fixed31_32 res = dc_fixpt_one; 233 234 int n = 27; 235 236 struct fixed31_32 arg_norm = arg; 237 238 if (dc_fixpt_le( 239 dc_fixpt_two_pi, 240 dc_fixpt_abs(arg))) { 241 arg_norm = dc_fixpt_sub( 242 arg_norm, 243 dc_fixpt_mul_int( 244 dc_fixpt_two_pi, 245 (int)div64_s64( 246 arg_norm.value, 247 dc_fixpt_two_pi.value))); 248 } 249 250 square = dc_fixpt_sqr(arg_norm); 251 252 do { 253 res = dc_fixpt_sub( 254 dc_fixpt_one, 255 dc_fixpt_div_int( 256 dc_fixpt_mul( 257 square, 258 res), 259 n * (n - 1))); 260 261 n -= 2; 262 } while (n > 2); 263 264 if (arg.value != arg_norm.value) 265 res = dc_fixpt_div( 266 dc_fixpt_mul(res, arg_norm), 267 arg); 268 269 return res; 270 } 271 272 struct fixed31_32 dc_fixpt_sin(struct fixed31_32 arg) 273 { 274 return dc_fixpt_mul( 275 arg, 276 dc_fixpt_sinc(arg)); 277 } 278 279 struct fixed31_32 dc_fixpt_cos(struct fixed31_32 arg) 280 { 281 /* TODO implement argument normalization */ 282 283 const struct fixed31_32 square = dc_fixpt_sqr(arg); 284 285 struct fixed31_32 res = dc_fixpt_one; 286 287 int n = 26; 288 289 do { 290 res = dc_fixpt_sub( 291 dc_fixpt_one, 292 dc_fixpt_div_int( 293 dc_fixpt_mul( 294 square, 295 res), 296 n * (n - 1))); 297 298 n -= 2; 299 } while (n != 0); 300 301 return res; 302 } 303 304 /* 305 * @brief 306 * result = exp(arg), 307 * where abs(arg) < 1 308 * 309 * Calculated as Taylor series. 310 */ 311 static struct fixed31_32 fixed31_32_exp_from_taylor_series(struct fixed31_32 arg) 312 { 313 unsigned int n = 9; 314 315 struct fixed31_32 res = dc_fixpt_from_fraction( 316 n + 2, 317 n + 1); 318 /* TODO find correct res */ 319 320 ASSERT(dc_fixpt_lt(arg, dc_fixpt_one)); 321 322 do 323 res = dc_fixpt_add( 324 dc_fixpt_one, 325 dc_fixpt_div_int( 326 dc_fixpt_mul( 327 arg, 328 res), 329 n)); 330 while (--n != 1); 331 332 return dc_fixpt_add( 333 dc_fixpt_one, 334 dc_fixpt_mul( 335 arg, 336 res)); 337 } 338 339 struct fixed31_32 dc_fixpt_exp(struct fixed31_32 arg) 340 { 341 /* 342 * @brief 343 * Main equation is: 344 * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r), 345 * where m = round(x / ln(2)), r = x - m * ln(2) 346 */ 347 348 if (dc_fixpt_le( 349 dc_fixpt_ln2_div_2, 350 dc_fixpt_abs(arg))) { 351 int m = dc_fixpt_round( 352 dc_fixpt_div( 353 arg, 354 dc_fixpt_ln2)); 355 356 struct fixed31_32 r = dc_fixpt_sub( 357 arg, 358 dc_fixpt_mul_int( 359 dc_fixpt_ln2, 360 m)); 361 362 ASSERT(m != 0); 363 364 ASSERT(dc_fixpt_lt( 365 dc_fixpt_abs(r), 366 dc_fixpt_one)); 367 368 if (m > 0) 369 return dc_fixpt_shl( 370 fixed31_32_exp_from_taylor_series(r), 371 (unsigned char)m); 372 else 373 return dc_fixpt_div_int( 374 fixed31_32_exp_from_taylor_series(r), 375 1LL << -m); 376 } else if (arg.value != 0) 377 return fixed31_32_exp_from_taylor_series(arg); 378 else 379 return dc_fixpt_one; 380 } 381 382 struct fixed31_32 dc_fixpt_log(struct fixed31_32 arg) 383 { 384 struct fixed31_32 res = dc_fixpt_neg(dc_fixpt_one); 385 /* TODO improve 1st estimation */ 386 387 struct fixed31_32 error; 388 389 ASSERT(arg.value > 0); 390 /* TODO if arg is negative, return NaN */ 391 /* TODO if arg is zero, return -INF */ 392 393 do { 394 struct fixed31_32 res1 = dc_fixpt_add( 395 dc_fixpt_sub( 396 res, 397 dc_fixpt_one), 398 dc_fixpt_div( 399 arg, 400 dc_fixpt_exp(res))); 401 402 error = dc_fixpt_sub( 403 res, 404 res1); 405 406 res = res1; 407 /* TODO determine max_allowed_error based on quality of exp() */ 408 } while (abs_i64(error.value) > 100ULL); 409 410 return res; 411 } 412 413 414 /* this function is a generic helper to translate fixed point value to 415 * specified integer format that will consist of integer_bits integer part and 416 * fractional_bits fractional part. For example it is used in 417 * dc_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional 418 * part in 32 bits. It is used in hw programming (scaler) 419 */ 420 421 static inline unsigned int ux_dy( 422 long long value, 423 unsigned int integer_bits, 424 unsigned int fractional_bits) 425 { 426 /* 1. create mask of integer part */ 427 unsigned int result = (1 << integer_bits) - 1; 428 /* 2. mask out fractional part */ 429 unsigned int fractional_part = FRACTIONAL_PART_MASK & value; 430 /* 3. shrink fixed point integer part to be of integer_bits width*/ 431 result &= GET_INTEGER_PART(value); 432 /* 4. make space for fractional part to be filled in after integer */ 433 result <<= fractional_bits; 434 /* 5. shrink fixed point fractional part to of fractional_bits width*/ 435 fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits; 436 /* 6. merge the result */ 437 return result | fractional_part; 438 } 439 440 static inline unsigned int clamp_ux_dy( 441 long long value, 442 unsigned int integer_bits, 443 unsigned int fractional_bits, 444 unsigned int min_clamp) 445 { 446 unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits); 447 448 if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART))) 449 return (1 << (integer_bits + fractional_bits)) - 1; 450 else if (truncated_val > min_clamp) 451 return truncated_val; 452 else 453 return min_clamp; 454 } 455 456 unsigned int dc_fixpt_u4d19(struct fixed31_32 arg) 457 { 458 return ux_dy(arg.value, 4, 19); 459 } 460 461 unsigned int dc_fixpt_u3d19(struct fixed31_32 arg) 462 { 463 return ux_dy(arg.value, 3, 19); 464 } 465 466 unsigned int dc_fixpt_u2d19(struct fixed31_32 arg) 467 { 468 return ux_dy(arg.value, 2, 19); 469 } 470 471 unsigned int dc_fixpt_u0d19(struct fixed31_32 arg) 472 { 473 return ux_dy(arg.value, 0, 19); 474 } 475 476 unsigned int dc_fixpt_clamp_u0d14(struct fixed31_32 arg) 477 { 478 return clamp_ux_dy(arg.value, 0, 14, 1); 479 } 480 481 unsigned int dc_fixpt_clamp_u0d10(struct fixed31_32 arg) 482 { 483 return clamp_ux_dy(arg.value, 0, 10, 1); 484 } 485 486 int dc_fixpt_s4d19(struct fixed31_32 arg) 487 { 488 if (arg.value < 0) 489 return -(int)ux_dy(dc_fixpt_abs(arg).value, 4, 19); 490 else 491 return ux_dy(arg.value, 4, 19); 492 } 493