1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 #include "dc.h" 26 #include "reg_helper.h" 27 #include "dcn10_dpp.h" 28 29 #include "dcn10_cm_common.h" 30 #include "custom_float.h" 31 32 #define REG(reg) reg 33 34 #define CTX \ 35 ctx 36 37 #undef FN 38 #define FN(reg_name, field_name) \ 39 reg->shifts.field_name, reg->masks.field_name 40 41 void cm_helper_program_color_matrices( 42 struct dc_context *ctx, 43 const uint16_t *regval, 44 const struct color_matrices_reg *reg) 45 { 46 uint32_t cur_csc_reg; 47 unsigned int i = 0; 48 49 for (cur_csc_reg = reg->csc_c11_c12; 50 cur_csc_reg <= reg->csc_c33_c34; 51 cur_csc_reg++) { 52 53 const uint16_t *regval0 = &(regval[2 * i]); 54 const uint16_t *regval1 = &(regval[(2 * i) + 1]); 55 56 REG_SET_2(cur_csc_reg, 0, 57 csc_c11, *regval0, 58 csc_c12, *regval1); 59 60 i++; 61 } 62 63 } 64 65 void cm_helper_program_xfer_func( 66 struct dc_context *ctx, 67 const struct pwl_params *params, 68 const struct xfer_func_reg *reg) 69 { 70 uint32_t reg_region_cur; 71 unsigned int i = 0; 72 73 REG_SET_2(reg->start_cntl_b, 0, 74 exp_region_start, params->arr_points[0].custom_float_x, 75 exp_resion_start_segment, 0); 76 REG_SET_2(reg->start_cntl_g, 0, 77 exp_region_start, params->arr_points[0].custom_float_x, 78 exp_resion_start_segment, 0); 79 REG_SET_2(reg->start_cntl_r, 0, 80 exp_region_start, params->arr_points[0].custom_float_x, 81 exp_resion_start_segment, 0); 82 83 REG_SET(reg->start_slope_cntl_b, 0, 84 field_region_linear_slope, params->arr_points[0].custom_float_slope); 85 REG_SET(reg->start_slope_cntl_g, 0, 86 field_region_linear_slope, params->arr_points[0].custom_float_slope); 87 REG_SET(reg->start_slope_cntl_r, 0, 88 field_region_linear_slope, params->arr_points[0].custom_float_slope); 89 90 REG_SET(reg->start_end_cntl1_b, 0, 91 field_region_end, params->arr_points[1].custom_float_x); 92 REG_SET_2(reg->start_end_cntl2_b, 0, 93 field_region_end_slope, params->arr_points[1].custom_float_slope, 94 field_region_end_base, params->arr_points[1].custom_float_y); 95 96 REG_SET(reg->start_end_cntl1_g, 0, 97 field_region_end, params->arr_points[1].custom_float_x); 98 REG_SET_2(reg->start_end_cntl2_g, 0, 99 field_region_end_slope, params->arr_points[1].custom_float_slope, 100 field_region_end_base, params->arr_points[1].custom_float_y); 101 102 REG_SET(reg->start_end_cntl1_r, 0, 103 field_region_end, params->arr_points[1].custom_float_x); 104 REG_SET_2(reg->start_end_cntl2_r, 0, 105 field_region_end_slope, params->arr_points[1].custom_float_slope, 106 field_region_end_base, params->arr_points[1].custom_float_y); 107 108 for (reg_region_cur = reg->region_start; 109 reg_region_cur <= reg->region_end; 110 reg_region_cur++) { 111 112 const struct gamma_curve *curve0 = &(params->arr_curve_points[2 * i]); 113 const struct gamma_curve *curve1 = &(params->arr_curve_points[(2 * i) + 1]); 114 115 REG_SET_4(reg_region_cur, 0, 116 exp_region0_lut_offset, curve0->offset, 117 exp_region0_num_segments, curve0->segments_num, 118 exp_region1_lut_offset, curve1->offset, 119 exp_region1_num_segments, curve1->segments_num); 120 121 i++; 122 } 123 124 } 125 126 127 128 bool cm_helper_convert_to_custom_float( 129 struct pwl_result_data *rgb_resulted, 130 struct curve_points *arr_points, 131 uint32_t hw_points_num, 132 bool fixpoint) 133 { 134 struct custom_float_format fmt; 135 136 struct pwl_result_data *rgb = rgb_resulted; 137 138 uint32_t i = 0; 139 140 fmt.exponenta_bits = 6; 141 fmt.mantissa_bits = 12; 142 fmt.sign = false; 143 144 if (!convert_to_custom_float_format(arr_points[0].x, &fmt, 145 &arr_points[0].custom_float_x)) { 146 BREAK_TO_DEBUGGER(); 147 return false; 148 } 149 150 if (!convert_to_custom_float_format(arr_points[0].offset, &fmt, 151 &arr_points[0].custom_float_offset)) { 152 BREAK_TO_DEBUGGER(); 153 return false; 154 } 155 156 if (!convert_to_custom_float_format(arr_points[0].slope, &fmt, 157 &arr_points[0].custom_float_slope)) { 158 BREAK_TO_DEBUGGER(); 159 return false; 160 } 161 162 fmt.mantissa_bits = 10; 163 fmt.sign = false; 164 165 if (!convert_to_custom_float_format(arr_points[1].x, &fmt, 166 &arr_points[1].custom_float_x)) { 167 BREAK_TO_DEBUGGER(); 168 return false; 169 } 170 171 if (fixpoint == true) 172 arr_points[1].custom_float_y = dc_fixpt_clamp_u0d14(arr_points[1].y); 173 else if (!convert_to_custom_float_format(arr_points[1].y, &fmt, 174 &arr_points[1].custom_float_y)) { 175 BREAK_TO_DEBUGGER(); 176 return false; 177 } 178 179 if (!convert_to_custom_float_format(arr_points[1].slope, &fmt, 180 &arr_points[1].custom_float_slope)) { 181 BREAK_TO_DEBUGGER(); 182 return false; 183 } 184 185 if (hw_points_num == 0 || rgb_resulted == NULL || fixpoint == true) 186 return true; 187 188 fmt.mantissa_bits = 12; 189 fmt.sign = true; 190 191 while (i != hw_points_num) { 192 if (!convert_to_custom_float_format(rgb->red, &fmt, 193 &rgb->red_reg)) { 194 BREAK_TO_DEBUGGER(); 195 return false; 196 } 197 198 if (!convert_to_custom_float_format(rgb->green, &fmt, 199 &rgb->green_reg)) { 200 BREAK_TO_DEBUGGER(); 201 return false; 202 } 203 204 if (!convert_to_custom_float_format(rgb->blue, &fmt, 205 &rgb->blue_reg)) { 206 BREAK_TO_DEBUGGER(); 207 return false; 208 } 209 210 if (!convert_to_custom_float_format(rgb->delta_red, &fmt, 211 &rgb->delta_red_reg)) { 212 BREAK_TO_DEBUGGER(); 213 return false; 214 } 215 216 if (!convert_to_custom_float_format(rgb->delta_green, &fmt, 217 &rgb->delta_green_reg)) { 218 BREAK_TO_DEBUGGER(); 219 return false; 220 } 221 222 if (!convert_to_custom_float_format(rgb->delta_blue, &fmt, 223 &rgb->delta_blue_reg)) { 224 BREAK_TO_DEBUGGER(); 225 return false; 226 } 227 228 ++rgb; 229 ++i; 230 } 231 232 return true; 233 } 234 235 /* driver uses 32 regions or less, but DCN HW has 34, extra 2 are set to 0 */ 236 #define MAX_REGIONS_NUMBER 34 237 #define MAX_LOW_POINT 25 238 #define NUMBER_REGIONS 32 239 #define NUMBER_SW_SEGMENTS 16 240 241 bool cm_helper_translate_curve_to_hw_format( 242 const struct dc_transfer_func *output_tf, 243 struct pwl_params *lut_params, bool fixpoint) 244 { 245 struct curve_points *arr_points; 246 struct pwl_result_data *rgb_resulted; 247 struct pwl_result_data *rgb; 248 struct pwl_result_data *rgb_plus_1; 249 struct fixed31_32 y_r; 250 struct fixed31_32 y_g; 251 struct fixed31_32 y_b; 252 struct fixed31_32 y1_min; 253 struct fixed31_32 y3_max; 254 255 int32_t region_start, region_end; 256 int32_t i; 257 uint32_t j, k, seg_distr[MAX_REGIONS_NUMBER], increment, start_index, hw_points; 258 259 if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS) 260 return false; 261 262 PERF_TRACE(); 263 264 arr_points = lut_params->arr_points; 265 rgb_resulted = lut_params->rgb_resulted; 266 hw_points = 0; 267 268 memset(lut_params, 0, sizeof(struct pwl_params)); 269 memset(seg_distr, 0, sizeof(seg_distr)); 270 271 if (output_tf->tf == TRANSFER_FUNCTION_PQ) { 272 /* 32 segments 273 * segments are from 2^-25 to 2^7 274 */ 275 for (i = 0; i < NUMBER_REGIONS ; i++) 276 seg_distr[i] = 3; 277 278 region_start = -MAX_LOW_POINT; 279 region_end = NUMBER_REGIONS - MAX_LOW_POINT; 280 } else { 281 /* 10 segments 282 * segment is from 2^-10 to 2^0 283 * There are less than 256 points, for optimization 284 */ 285 seg_distr[0] = 3; 286 seg_distr[1] = 4; 287 seg_distr[2] = 4; 288 seg_distr[3] = 4; 289 seg_distr[4] = 4; 290 seg_distr[5] = 4; 291 seg_distr[6] = 4; 292 seg_distr[7] = 4; 293 seg_distr[8] = 4; 294 seg_distr[9] = 4; 295 296 region_start = -10; 297 region_end = 0; 298 } 299 300 for (i = region_end - region_start; i < MAX_REGIONS_NUMBER ; i++) 301 seg_distr[i] = -1; 302 303 for (k = 0; k < MAX_REGIONS_NUMBER; k++) { 304 if (seg_distr[k] != -1) 305 hw_points += (1 << seg_distr[k]); 306 } 307 308 j = 0; 309 for (k = 0; k < (region_end - region_start); k++) { 310 increment = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]); 311 start_index = (region_start + k + MAX_LOW_POINT) * 312 NUMBER_SW_SEGMENTS; 313 for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS; 314 i += increment) { 315 if (j == hw_points - 1) 316 break; 317 rgb_resulted[j].red = output_tf->tf_pts.red[i]; 318 rgb_resulted[j].green = output_tf->tf_pts.green[i]; 319 rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; 320 j++; 321 } 322 } 323 324 /* last point */ 325 start_index = (region_end + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS; 326 rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index]; 327 rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; 328 rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; 329 330 arr_points[0].x = dc_fixpt_pow(dc_fixpt_from_int(2), 331 dc_fixpt_from_int(region_start)); 332 arr_points[1].x = dc_fixpt_pow(dc_fixpt_from_int(2), 333 dc_fixpt_from_int(region_end)); 334 335 y_r = rgb_resulted[0].red; 336 y_g = rgb_resulted[0].green; 337 y_b = rgb_resulted[0].blue; 338 339 y1_min = dc_fixpt_min(y_r, dc_fixpt_min(y_g, y_b)); 340 341 arr_points[0].y = y1_min; 342 arr_points[0].slope = dc_fixpt_div(arr_points[0].y, arr_points[0].x); 343 y_r = rgb_resulted[hw_points - 1].red; 344 y_g = rgb_resulted[hw_points - 1].green; 345 y_b = rgb_resulted[hw_points - 1].blue; 346 347 /* see comment above, m_arrPoints[1].y should be the Y value for the 348 * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1) 349 */ 350 y3_max = dc_fixpt_max(y_r, dc_fixpt_max(y_g, y_b)); 351 352 arr_points[1].y = y3_max; 353 354 arr_points[1].slope = dc_fixpt_zero; 355 356 if (output_tf->tf == TRANSFER_FUNCTION_PQ) { 357 /* for PQ, we want to have a straight line from last HW X point, 358 * and the slope to be such that we hit 1.0 at 10000 nits. 359 */ 360 const struct fixed31_32 end_value = 361 dc_fixpt_from_int(125); 362 363 arr_points[1].slope = dc_fixpt_div( 364 dc_fixpt_sub(dc_fixpt_one, arr_points[1].y), 365 dc_fixpt_sub(end_value, arr_points[1].x)); 366 } 367 368 lut_params->hw_points_num = hw_points; 369 370 k = 0; 371 for (i = 1; i < MAX_REGIONS_NUMBER; i++) { 372 if (seg_distr[k] != -1) { 373 lut_params->arr_curve_points[k].segments_num = 374 seg_distr[k]; 375 lut_params->arr_curve_points[i].offset = 376 lut_params->arr_curve_points[k].offset + (1 << seg_distr[k]); 377 } 378 k++; 379 } 380 381 if (seg_distr[k] != -1) 382 lut_params->arr_curve_points[k].segments_num = seg_distr[k]; 383 384 rgb = rgb_resulted; 385 rgb_plus_1 = rgb_resulted + 1; 386 387 i = 1; 388 while (i != hw_points + 1) { 389 if (dc_fixpt_lt(rgb_plus_1->red, rgb->red)) 390 rgb_plus_1->red = rgb->red; 391 if (dc_fixpt_lt(rgb_plus_1->green, rgb->green)) 392 rgb_plus_1->green = rgb->green; 393 if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue)) 394 rgb_plus_1->blue = rgb->blue; 395 396 rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red); 397 rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green); 398 rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue); 399 400 if (fixpoint == true) { 401 rgb->delta_red_reg = dc_fixpt_clamp_u0d10(rgb->delta_red); 402 rgb->delta_green_reg = dc_fixpt_clamp_u0d10(rgb->delta_green); 403 rgb->delta_blue_reg = dc_fixpt_clamp_u0d10(rgb->delta_blue); 404 rgb->red_reg = dc_fixpt_clamp_u0d14(rgb->red); 405 rgb->green_reg = dc_fixpt_clamp_u0d14(rgb->green); 406 rgb->blue_reg = dc_fixpt_clamp_u0d14(rgb->blue); 407 } 408 409 ++rgb_plus_1; 410 ++rgb; 411 ++i; 412 } 413 cm_helper_convert_to_custom_float(rgb_resulted, 414 lut_params->arr_points, 415 hw_points, fixpoint); 416 417 return true; 418 } 419 420 #define NUM_DEGAMMA_REGIONS 12 421 422 423 bool cm_helper_translate_curve_to_degamma_hw_format( 424 const struct dc_transfer_func *output_tf, 425 struct pwl_params *lut_params) 426 { 427 struct curve_points *arr_points; 428 struct pwl_result_data *rgb_resulted; 429 struct pwl_result_data *rgb; 430 struct pwl_result_data *rgb_plus_1; 431 struct fixed31_32 y_r; 432 struct fixed31_32 y_g; 433 struct fixed31_32 y_b; 434 struct fixed31_32 y1_min; 435 struct fixed31_32 y3_max; 436 437 int32_t region_start, region_end; 438 int32_t i; 439 uint32_t j, k, seg_distr[MAX_REGIONS_NUMBER], increment, start_index, hw_points; 440 441 if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS) 442 return false; 443 444 PERF_TRACE(); 445 446 arr_points = lut_params->arr_points; 447 rgb_resulted = lut_params->rgb_resulted; 448 hw_points = 0; 449 450 memset(lut_params, 0, sizeof(struct pwl_params)); 451 memset(seg_distr, 0, sizeof(seg_distr)); 452 453 region_start = -NUM_DEGAMMA_REGIONS; 454 region_end = 0; 455 456 457 for (i = region_end - region_start; i < MAX_REGIONS_NUMBER ; i++) 458 seg_distr[i] = -1; 459 /* 12 segments 460 * segments are from 2^-12 to 0 461 */ 462 for (i = 0; i < NUM_DEGAMMA_REGIONS ; i++) 463 seg_distr[i] = 4; 464 465 for (k = 0; k < MAX_REGIONS_NUMBER; k++) { 466 if (seg_distr[k] != -1) 467 hw_points += (1 << seg_distr[k]); 468 } 469 470 j = 0; 471 for (k = 0; k < (region_end - region_start); k++) { 472 increment = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]); 473 start_index = (region_start + k + MAX_LOW_POINT) * 474 NUMBER_SW_SEGMENTS; 475 for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS; 476 i += increment) { 477 if (j == hw_points - 1) 478 break; 479 rgb_resulted[j].red = output_tf->tf_pts.red[i]; 480 rgb_resulted[j].green = output_tf->tf_pts.green[i]; 481 rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; 482 j++; 483 } 484 } 485 486 /* last point */ 487 start_index = (region_end + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS; 488 rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index]; 489 rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; 490 rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; 491 492 arr_points[0].x = dc_fixpt_pow(dc_fixpt_from_int(2), 493 dc_fixpt_from_int(region_start)); 494 arr_points[1].x = dc_fixpt_pow(dc_fixpt_from_int(2), 495 dc_fixpt_from_int(region_end)); 496 497 y_r = rgb_resulted[0].red; 498 y_g = rgb_resulted[0].green; 499 y_b = rgb_resulted[0].blue; 500 501 y1_min = dc_fixpt_min(y_r, dc_fixpt_min(y_g, y_b)); 502 503 arr_points[0].y = y1_min; 504 arr_points[0].slope = dc_fixpt_div(arr_points[0].y, arr_points[0].x); 505 y_r = rgb_resulted[hw_points - 1].red; 506 y_g = rgb_resulted[hw_points - 1].green; 507 y_b = rgb_resulted[hw_points - 1].blue; 508 509 /* see comment above, m_arrPoints[1].y should be the Y value for the 510 * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1) 511 */ 512 y3_max = dc_fixpt_max(y_r, dc_fixpt_max(y_g, y_b)); 513 514 arr_points[1].y = y3_max; 515 516 arr_points[1].slope = dc_fixpt_zero; 517 518 if (output_tf->tf == TRANSFER_FUNCTION_PQ) { 519 /* for PQ, we want to have a straight line from last HW X point, 520 * and the slope to be such that we hit 1.0 at 10000 nits. 521 */ 522 const struct fixed31_32 end_value = 523 dc_fixpt_from_int(125); 524 525 arr_points[1].slope = dc_fixpt_div( 526 dc_fixpt_sub(dc_fixpt_one, arr_points[1].y), 527 dc_fixpt_sub(end_value, arr_points[1].x)); 528 } 529 530 lut_params->hw_points_num = hw_points; 531 532 k = 0; 533 for (i = 1; i < MAX_REGIONS_NUMBER; i++) { 534 if (seg_distr[k] != -1) { 535 lut_params->arr_curve_points[k].segments_num = 536 seg_distr[k]; 537 lut_params->arr_curve_points[i].offset = 538 lut_params->arr_curve_points[k].offset + (1 << seg_distr[k]); 539 } 540 k++; 541 } 542 543 if (seg_distr[k] != -1) 544 lut_params->arr_curve_points[k].segments_num = seg_distr[k]; 545 546 rgb = rgb_resulted; 547 rgb_plus_1 = rgb_resulted + 1; 548 549 i = 1; 550 while (i != hw_points + 1) { 551 if (dc_fixpt_lt(rgb_plus_1->red, rgb->red)) 552 rgb_plus_1->red = rgb->red; 553 if (dc_fixpt_lt(rgb_plus_1->green, rgb->green)) 554 rgb_plus_1->green = rgb->green; 555 if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue)) 556 rgb_plus_1->blue = rgb->blue; 557 558 rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red); 559 rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green); 560 rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue); 561 562 ++rgb_plus_1; 563 ++rgb; 564 ++i; 565 } 566 cm_helper_convert_to_custom_float(rgb_resulted, 567 lut_params->arr_points, 568 hw_points, false); 569 570 return true; 571 } 572