1 2 /* 3 * Copyright 2017 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: AMD 24 * 25 */ 26 #include <drm/drm_dsc.h> 27 28 #include "os_types.h" 29 #include "rc_calc.h" 30 #include "qp_tables.h" 31 32 #define table_hash(mode, bpc, max_min) ((mode << 16) | (bpc << 8) | max_min) 33 34 #define MODE_SELECT(val444, val422, val420) \ 35 (cm == CM_444 || cm == CM_RGB) ? (val444) : (cm == CM_422 ? (val422) : (val420)) 36 37 38 #define TABLE_CASE(mode, bpc, max) case (table_hash(mode, BPC_##bpc, max)): \ 39 table = qp_table_##mode##_##bpc##bpc_##max; \ 40 table_size = sizeof(qp_table_##mode##_##bpc##bpc_##max)/sizeof(*qp_table_##mode##_##bpc##bpc_##max); \ 41 break 42 43 44 static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, 45 enum max_min max_min, float bpp) 46 { 47 int mode = MODE_SELECT(444, 422, 420); 48 int sel = table_hash(mode, bpc, max_min); 49 int table_size = 0; 50 int index; 51 const struct qp_entry *table = 0L; 52 53 // alias enum 54 enum { min = DAL_MM_MIN, max = DAL_MM_MAX }; 55 switch (sel) { 56 TABLE_CASE(444, 8, max); 57 TABLE_CASE(444, 8, min); 58 TABLE_CASE(444, 10, max); 59 TABLE_CASE(444, 10, min); 60 TABLE_CASE(444, 12, max); 61 TABLE_CASE(444, 12, min); 62 TABLE_CASE(422, 8, max); 63 TABLE_CASE(422, 8, min); 64 TABLE_CASE(422, 10, max); 65 TABLE_CASE(422, 10, min); 66 TABLE_CASE(422, 12, max); 67 TABLE_CASE(422, 12, min); 68 TABLE_CASE(420, 8, max); 69 TABLE_CASE(420, 8, min); 70 TABLE_CASE(420, 10, max); 71 TABLE_CASE(420, 10, min); 72 TABLE_CASE(420, 12, max); 73 TABLE_CASE(420, 12, min); 74 } 75 76 if (table == 0) 77 return; 78 79 index = (bpp - table[0].bpp) * 2; 80 81 /* requested size is bigger than the table */ 82 if (index >= table_size) { 83 dm_error("ERROR: Requested rc_calc to find a bpp entry that exceeds the table size\n"); 84 return; 85 } 86 87 memcpy(qps, table[index].qps, sizeof(qp_set)); 88 } 89 90 static double dsc_roundf(double num) 91 { 92 if (num < 0.0) 93 num = num - 0.5; 94 else 95 num = num + 0.5; 96 97 return (int)(num); 98 } 99 100 static double dsc_ceil(double num) 101 { 102 double retval = (int)num; 103 104 if (retval != num && num > 0) 105 retval = num + 1; 106 107 return (int)retval; 108 } 109 110 static void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp) 111 { 112 int *p = ofs; 113 114 if (mode == CM_444 || mode == CM_RGB) { 115 *p++ = (bpp <= 6) ? (0) : ((((bpp >= 8) && (bpp <= 12))) ? (2) : ((bpp >= 15) ? (10) : ((((bpp > 6) && (bpp < 8))) ? (0 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (2 + dsc_roundf((bpp - 12) * (8 / 3.0)))))); 116 *p++ = (bpp <= 6) ? (-2) : ((((bpp >= 8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (8) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (8 / 3.0)))))); 117 *p++ = (bpp <= 6) ? (-2) : ((((bpp >= 8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (6) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (6 / 3.0)))))); 118 *p++ = (bpp <= 6) ? (-4) : ((((bpp >= 8) && (bpp <= 12))) ? (-2) : ((bpp >= 15) ? (4) : ((((bpp > 6) && (bpp < 8))) ? (-4 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (-2 + dsc_roundf((bpp - 12) * (6 / 3.0)))))); 119 *p++ = (bpp <= 6) ? (-6) : ((((bpp >= 8) && (bpp <= 12))) ? (-4) : ((bpp >= 15) ? (2) : ((((bpp > 6) && (bpp < 8))) ? (-6 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (-4 + dsc_roundf((bpp - 12) * (6 / 3.0)))))); 120 *p++ = (bpp <= 12) ? (-6) : ((bpp >= 15) ? (0) : (-6 + dsc_roundf((bpp - 12) * (6 / 3.0)))); 121 *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-2) : (-8 + dsc_roundf((bpp - 12) * (6 / 3.0)))); 122 *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-4) : (-8 + dsc_roundf((bpp - 12) * (4 / 3.0)))); 123 *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-6) : (-8 + dsc_roundf((bpp - 12) * (2 / 3.0)))); 124 *p++ = (bpp <= 12) ? (-10) : ((bpp >= 15) ? (-8) : (-10 + dsc_roundf((bpp - 12) * (2 / 3.0)))); 125 *p++ = -10; 126 *p++ = (bpp <= 6) ? (-12) : ((bpp >= 8) ? (-10) : (-12 + dsc_roundf((bpp - 6) * (2 / 2.0)))); 127 *p++ = -12; 128 *p++ = -12; 129 *p++ = -12; 130 } else if (mode == CM_422) { 131 *p++ = (bpp <= 8) ? (2) : ((bpp >= 10) ? (10) : (2 + dsc_roundf((bpp - 8) * (8 / 2.0)))); 132 *p++ = (bpp <= 8) ? (0) : ((bpp >= 10) ? (8) : (0 + dsc_roundf((bpp - 8) * (8 / 2.0)))); 133 *p++ = (bpp <= 8) ? (0) : ((bpp >= 10) ? (6) : (0 + dsc_roundf((bpp - 8) * (6 / 2.0)))); 134 *p++ = (bpp <= 8) ? (-2) : ((bpp >= 10) ? (4) : (-2 + dsc_roundf((bpp - 8) * (6 / 2.0)))); 135 *p++ = (bpp <= 8) ? (-4) : ((bpp >= 10) ? (2) : (-4 + dsc_roundf((bpp - 8) * (6 / 2.0)))); 136 *p++ = (bpp <= 8) ? (-6) : ((bpp >= 10) ? (0) : (-6 + dsc_roundf((bpp - 8) * (6 / 2.0)))); 137 *p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-2) : (-8 + dsc_roundf((bpp - 8) * (6 / 2.0)))); 138 *p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-4) : (-8 + dsc_roundf((bpp - 8) * (4 / 2.0)))); 139 *p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-6) : (-8 + dsc_roundf((bpp - 8) * (2 / 2.0)))); 140 *p++ = (bpp <= 8) ? (-10) : ((bpp >= 10) ? (-8) : (-10 + dsc_roundf((bpp - 8) * (2 / 2.0)))); 141 *p++ = -10; 142 *p++ = (bpp <= 6) ? (-12) : ((bpp >= 7) ? (-10) : (-12 + dsc_roundf((bpp - 6) * (2.0 / 1)))); 143 *p++ = -12; 144 *p++ = -12; 145 *p++ = -12; 146 } else { 147 *p++ = (bpp <= 6) ? (2) : ((bpp >= 8) ? (10) : (2 + dsc_roundf((bpp - 6) * (8 / 2.0)))); 148 *p++ = (bpp <= 6) ? (0) : ((bpp >= 8) ? (8) : (0 + dsc_roundf((bpp - 6) * (8 / 2.0)))); 149 *p++ = (bpp <= 6) ? (0) : ((bpp >= 8) ? (6) : (0 + dsc_roundf((bpp - 6) * (6 / 2.0)))); 150 *p++ = (bpp <= 6) ? (-2) : ((bpp >= 8) ? (4) : (-2 + dsc_roundf((bpp - 6) * (6 / 2.0)))); 151 *p++ = (bpp <= 6) ? (-4) : ((bpp >= 8) ? (2) : (-4 + dsc_roundf((bpp - 6) * (6 / 2.0)))); 152 *p++ = (bpp <= 6) ? (-6) : ((bpp >= 8) ? (0) : (-6 + dsc_roundf((bpp - 6) * (6 / 2.0)))); 153 *p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-2) : (-8 + dsc_roundf((bpp - 6) * (6 / 2.0)))); 154 *p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-4) : (-8 + dsc_roundf((bpp - 6) * (4 / 2.0)))); 155 *p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-6) : (-8 + dsc_roundf((bpp - 6) * (2 / 2.0)))); 156 *p++ = (bpp <= 6) ? (-10) : ((bpp >= 8) ? (-8) : (-10 + dsc_roundf((bpp - 6) * (2 / 2.0)))); 157 *p++ = -10; 158 *p++ = (bpp <= 4) ? (-12) : ((bpp >= 5) ? (-10) : (-12 + dsc_roundf((bpp - 4) * (2 / 1.0)))); 159 *p++ = -12; 160 *p++ = -12; 161 *p++ = -12; 162 } 163 } 164 165 static int median3(int a, int b, int c) 166 { 167 if (a > b) 168 swap(a, b); 169 if (b > c) 170 swap(b, c); 171 if (a > b) 172 swap(b, c); 173 174 return b; 175 } 176 177 static void _do_calc_rc_params(struct rc_params *rc, enum colour_mode cm, 178 enum bits_per_comp bpc, u16 drm_bpp, 179 bool is_navite_422_or_420, 180 int slice_width, int slice_height, 181 int minor_version) 182 { 183 float bpp; 184 float bpp_group; 185 float initial_xmit_delay_factor; 186 int padding_pixels; 187 int i; 188 189 bpp = ((float)drm_bpp / 16.0); 190 /* in native_422 or native_420 modes, the bits_per_pixel is double the 191 * target bpp (the latter is what calc_rc_params expects) 192 */ 193 if (is_navite_422_or_420) 194 bpp /= 2.0; 195 196 rc->rc_quant_incr_limit0 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0); 197 rc->rc_quant_incr_limit1 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0); 198 199 bpp_group = MODE_SELECT(bpp, bpp * 2.0, bpp * 2.0); 200 201 switch (cm) { 202 case CM_420: 203 rc->initial_fullness_offset = (bpp >= 6) ? (2048) : ((bpp <= 4) ? (6144) : ((((bpp > 4) && (bpp <= 5))) ? (6144 - dsc_roundf((bpp - 4) * (512))) : (5632 - dsc_roundf((bpp - 5) * (3584))))); 204 rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)((3 * bpc * 3) - (3 * bpp_group))); 205 rc->second_line_bpg_offset = median3(0, 12, (int)((3 * bpc * 3) - (3 * bpp_group))); 206 break; 207 case CM_422: 208 rc->initial_fullness_offset = (bpp >= 8) ? (2048) : ((bpp <= 7) ? (5632) : (5632 - dsc_roundf((bpp - 7) * (3584)))); 209 rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)((3 * bpc * 4) - (3 * bpp_group))); 210 rc->second_line_bpg_offset = 0; 211 break; 212 case CM_444: 213 case CM_RGB: 214 rc->initial_fullness_offset = (bpp >= 12) ? (2048) : ((bpp <= 8) ? (6144) : ((((bpp > 8) && (bpp <= 10))) ? (6144 - dsc_roundf((bpp - 8) * (512 / 2))) : (5632 - dsc_roundf((bpp - 10) * (3584 / 2))))); 215 rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)(((3 * bpc + (cm == CM_444 ? 0 : 2)) * 3) - (3 * bpp_group))); 216 rc->second_line_bpg_offset = 0; 217 break; 218 } 219 220 initial_xmit_delay_factor = (cm == CM_444 || cm == CM_RGB) ? 1.0 : 2.0; 221 rc->initial_xmit_delay = dsc_roundf(8192.0/2.0/bpp/initial_xmit_delay_factor); 222 223 if (cm == CM_422 || cm == CM_420) 224 slice_width /= 2; 225 226 padding_pixels = ((slice_width % 3) != 0) ? (3 - (slice_width % 3)) * (rc->initial_xmit_delay / slice_width) : 0; 227 if (3 * bpp_group >= (((rc->initial_xmit_delay + 2) / 3) * (3 + (cm == CM_422)))) { 228 if ((rc->initial_xmit_delay + padding_pixels) % 3 == 1) 229 rc->initial_xmit_delay++; 230 } 231 232 rc->flatness_min_qp = ((bpc == BPC_8) ? (3) : ((bpc == BPC_10) ? (7) : (11))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0); 233 rc->flatness_max_qp = ((bpc == BPC_8) ? (12) : ((bpc == BPC_10) ? (16) : (20))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0); 234 rc->flatness_det_thresh = 2 << (bpc - 8); 235 236 get_qp_set(rc->qp_min, cm, bpc, DAL_MM_MIN, bpp); 237 get_qp_set(rc->qp_max, cm, bpc, DAL_MM_MAX, bpp); 238 if (cm == CM_444 && minor_version == 1) { 239 for (i = 0; i < QP_SET_SIZE; ++i) { 240 rc->qp_min[i] = rc->qp_min[i] > 0 ? rc->qp_min[i] - 1 : 0; 241 rc->qp_max[i] = rc->qp_max[i] > 0 ? rc->qp_max[i] - 1 : 0; 242 } 243 } 244 get_ofs_set(rc->ofs, cm, bpp); 245 246 /* fixed parameters */ 247 rc->rc_model_size = 8192; 248 rc->rc_edge_factor = 6; 249 rc->rc_tgt_offset_hi = 3; 250 rc->rc_tgt_offset_lo = 3; 251 252 rc->rc_buf_thresh[0] = 896; 253 rc->rc_buf_thresh[1] = 1792; 254 rc->rc_buf_thresh[2] = 2688; 255 rc->rc_buf_thresh[3] = 3584; 256 rc->rc_buf_thresh[4] = 4480; 257 rc->rc_buf_thresh[5] = 5376; 258 rc->rc_buf_thresh[6] = 6272; 259 rc->rc_buf_thresh[7] = 6720; 260 rc->rc_buf_thresh[8] = 7168; 261 rc->rc_buf_thresh[9] = 7616; 262 rc->rc_buf_thresh[10] = 7744; 263 rc->rc_buf_thresh[11] = 7872; 264 rc->rc_buf_thresh[12] = 8000; 265 rc->rc_buf_thresh[13] = 8064; 266 } 267 268 static u32 _do_bytes_per_pixel_calc(int slice_width, u16 drm_bpp, 269 bool is_navite_422_or_420) 270 { 271 float bpp; 272 u32 bytes_per_pixel; 273 double d_bytes_per_pixel; 274 275 bpp = ((float)drm_bpp / 16.0); 276 d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width; 277 // TODO: Make sure the formula for calculating this is precise (ceiling 278 // vs. floor, and at what point they should be applied) 279 if (is_navite_422_or_420) 280 d_bytes_per_pixel /= 2; 281 282 bytes_per_pixel = (u32)dsc_ceil(d_bytes_per_pixel * 0x10000000); 283 284 return bytes_per_pixel; 285 } 286 287 static u32 _do_calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz, 288 u32 bpp_increment_div) 289 { 290 u32 dsc_target_bpp_x16; 291 float f_dsc_target_bpp; 292 float f_stream_bandwidth_100bps; 293 // bpp_increment_div is actually precision 294 u32 precision = bpp_increment_div; 295 296 f_stream_bandwidth_100bps = stream_bandwidth_kbps * 10.0f; 297 f_dsc_target_bpp = f_stream_bandwidth_100bps / pix_clk_100hz; 298 299 // Round down to the nearest precision stop to bring it into DSC spec 300 // range 301 dsc_target_bpp_x16 = (u32)(f_dsc_target_bpp * precision); 302 dsc_target_bpp_x16 = (dsc_target_bpp_x16 * 16) / precision; 303 304 return dsc_target_bpp_x16; 305 } 306 307 /** 308 * calc_rc_params - reads the user's cmdline mode 309 * @rc: DC internal DSC parameters 310 * @pps: DRM struct with all required DSC values 311 * 312 * This function expects a drm_dsc_config data struct with all the required DSC 313 * values previously filled out by our driver and based on this information it 314 * computes some of the DSC values. 315 * 316 * @note This calculation requires float point operation, most of it executes 317 * under kernel_fpu_{begin,end}. 318 */ 319 void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps) 320 { 321 enum colour_mode mode; 322 enum bits_per_comp bpc; 323 bool is_navite_422_or_420; 324 u16 drm_bpp = pps->bits_per_pixel; 325 int slice_width = pps->slice_width; 326 int slice_height = pps->slice_height; 327 328 mode = pps->convert_rgb ? CM_RGB : (pps->simple_422 ? CM_444 : 329 (pps->native_422 ? CM_422 : 330 pps->native_420 ? CM_420 : CM_444)); 331 bpc = (pps->bits_per_component == 8) ? BPC_8 : (pps->bits_per_component == 10) 332 ? BPC_10 : BPC_12; 333 334 is_navite_422_or_420 = pps->native_422 || pps->native_420; 335 336 DC_FP_START(); 337 _do_calc_rc_params(rc, mode, bpc, drm_bpp, is_navite_422_or_420, 338 slice_width, slice_height, 339 pps->dsc_version_minor); 340 DC_FP_END(); 341 } 342 343 /** 344 * calc_dsc_bytes_per_pixel - calculate bytes per pixel 345 * @pps: DRM struct with all required DSC values 346 * 347 * Based on the information inside drm_dsc_config, this function calculates the 348 * total of bytes per pixel. 349 * 350 * @note This calculation requires float point operation, most of it executes 351 * under kernel_fpu_{begin,end}. 352 * 353 * Return: 354 * Return the number of bytes per pixel 355 */ 356 u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps) 357 358 { 359 u32 ret; 360 u16 drm_bpp = pps->bits_per_pixel; 361 int slice_width = pps->slice_width; 362 bool is_navite_422_or_420 = pps->native_422 || pps->native_420; 363 364 DC_FP_START(); 365 ret = _do_bytes_per_pixel_calc(slice_width, drm_bpp, 366 is_navite_422_or_420); 367 DC_FP_END(); 368 return ret; 369 } 370 371 /** 372 * calc_dsc_bpp_x16 - retrieve the dsc bits per pixel 373 * @stream_bandwidth_kbps: 374 * @pix_clk_100hz: 375 * @bpp_increment_div: 376 * 377 * Calculate the total of bits per pixel for DSC configuration. 378 * 379 * @note This calculation requires float point operation, most of it executes 380 * under kernel_fpu_{begin,end}. 381 */ 382 u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz, 383 u32 bpp_increment_div) 384 { 385 u32 dsc_bpp; 386 387 DC_FP_START(); 388 dsc_bpp = _do_calc_dsc_bpp_x16(stream_bandwidth_kbps, pix_clk_100hz, 389 bpp_increment_div); 390 DC_FP_END(); 391 return dsc_bpp; 392 } 393