1 /* 2 * Copyright 2020 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 26 27 #include "dm_services.h" 28 #include "reg_helper.h" 29 #include "dcn30_hubbub.h" 30 31 32 #define CTX \ 33 hubbub1->base.ctx 34 #define DC_LOGGER \ 35 hubbub1->base.ctx->logger 36 #define REG(reg)\ 37 hubbub1->regs->reg 38 39 #undef FN 40 #define FN(reg_name, field_name) \ 41 hubbub1->shifts->field_name, hubbub1->masks->field_name 42 43 #ifdef NUM_VMID 44 #undef NUM_VMID 45 #endif 46 #define NUM_VMID 16 47 48 49 static uint32_t convert_and_clamp( 50 uint32_t wm_ns, 51 uint32_t refclk_mhz, 52 uint32_t clamp_value) 53 { 54 uint32_t ret_val = 0; 55 ret_val = wm_ns * refclk_mhz; 56 ret_val /= 1000; 57 58 if (ret_val > clamp_value) 59 ret_val = clamp_value; 60 61 return ret_val; 62 } 63 64 int hubbub3_init_dchub_sys_ctx(struct hubbub *hubbub, 65 struct dcn_hubbub_phys_addr_config *pa_config) 66 { 67 struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); 68 struct dcn_vmid_page_table_config phys_config; 69 70 REG_SET(DCN_VM_FB_LOCATION_BASE, 0, 71 FB_BASE, pa_config->system_aperture.fb_base >> 24); 72 REG_SET(DCN_VM_FB_LOCATION_TOP, 0, 73 FB_TOP, pa_config->system_aperture.fb_top >> 24); 74 REG_SET(DCN_VM_FB_OFFSET, 0, 75 FB_OFFSET, pa_config->system_aperture.fb_offset >> 24); 76 REG_SET(DCN_VM_AGP_BOT, 0, 77 AGP_BOT, pa_config->system_aperture.agp_bot >> 24); 78 REG_SET(DCN_VM_AGP_TOP, 0, 79 AGP_TOP, pa_config->system_aperture.agp_top >> 24); 80 REG_SET(DCN_VM_AGP_BASE, 0, 81 AGP_BASE, pa_config->system_aperture.agp_base >> 24); 82 83 if (pa_config->gart_config.page_table_start_addr != pa_config->gart_config.page_table_end_addr) { 84 phys_config.page_table_start_addr = pa_config->gart_config.page_table_start_addr >> 12; 85 phys_config.page_table_end_addr = pa_config->gart_config.page_table_end_addr >> 12; 86 phys_config.page_table_base_addr = pa_config->gart_config.page_table_base_addr; 87 phys_config.depth = 0; 88 phys_config.block_size = 0; 89 // Init VMID 0 based on PA config 90 dcn20_vmid_setup(&hubbub1->vmid[0], &phys_config); 91 } 92 93 return NUM_VMID; 94 } 95 96 bool hubbub3_program_watermarks( 97 struct hubbub *hubbub, 98 struct dcn_watermark_set *watermarks, 99 unsigned int refclk_mhz, 100 bool safe_to_lower) 101 { 102 struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); 103 bool wm_pending = false; 104 105 if (hubbub21_program_urgent_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower)) 106 wm_pending = true; 107 108 if (hubbub21_program_stutter_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower)) 109 wm_pending = true; 110 111 if (hubbub21_program_pstate_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower)) 112 wm_pending = true; 113 114 /* 115 * The DCHub arbiter has a mechanism to dynamically rate limit the DCHub request stream to the fabric. 116 * If the memory controller is fully utilized and the DCHub requestors are 117 * well ahead of their amortized schedule, then it is safe to prevent the next winner 118 * from being committed and sent to the fabric. 119 * The utilization of the memory controller is approximated by ensuring that 120 * the number of outstanding requests is greater than a threshold specified 121 * by the ARB_MIN_REQ_OUTSTANDING. To determine that the DCHub requestors are well ahead of the amortized schedule, 122 * the slack of the next winner is compared with the ARB_SAT_LEVEL in DLG RefClk cycles. 123 * 124 * TODO: Revisit request limit after figure out right number. request limit for Renoir isn't decided yet, set maximum value (0x1FF) 125 * to turn off it for now. 126 */ 127 REG_SET(DCHUBBUB_ARB_SAT_LEVEL, 0, 128 DCHUBBUB_ARB_SAT_LEVEL, 60 * refclk_mhz); 129 REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND, 130 DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF); 131 132 hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); 133 134 return wm_pending; 135 } 136 137 bool hubbub3_dcc_support_swizzle( 138 enum swizzle_mode_values swizzle, 139 unsigned int bytes_per_element, 140 enum segment_order *segment_order_horz, 141 enum segment_order *segment_order_vert) 142 { 143 bool standard_swizzle = false; 144 bool display_swizzle = false; 145 bool render_swizzle = false; 146 147 switch (swizzle) { 148 case DC_SW_4KB_S: 149 case DC_SW_64KB_S: 150 case DC_SW_VAR_S: 151 case DC_SW_4KB_S_X: 152 case DC_SW_64KB_S_X: 153 case DC_SW_VAR_S_X: 154 standard_swizzle = true; 155 break; 156 case DC_SW_4KB_R: 157 case DC_SW_64KB_R: 158 case DC_SW_VAR_R: 159 case DC_SW_4KB_R_X: 160 case DC_SW_64KB_R_X: 161 case DC_SW_VAR_R_X: 162 render_swizzle = true; 163 break; 164 case DC_SW_4KB_D: 165 case DC_SW_64KB_D: 166 case DC_SW_VAR_D: 167 case DC_SW_4KB_D_X: 168 case DC_SW_64KB_D_X: 169 case DC_SW_VAR_D_X: 170 display_swizzle = true; 171 break; 172 default: 173 break; 174 } 175 176 if (standard_swizzle) { 177 if (bytes_per_element == 1) { 178 *segment_order_horz = segment_order__contiguous; 179 *segment_order_vert = segment_order__na; 180 return true; 181 } 182 if (bytes_per_element == 2) { 183 *segment_order_horz = segment_order__non_contiguous; 184 *segment_order_vert = segment_order__contiguous; 185 return true; 186 } 187 if (bytes_per_element == 4) { 188 *segment_order_horz = segment_order__non_contiguous; 189 *segment_order_vert = segment_order__contiguous; 190 return true; 191 } 192 if (bytes_per_element == 8) { 193 *segment_order_horz = segment_order__na; 194 *segment_order_vert = segment_order__contiguous; 195 return true; 196 } 197 } 198 if (render_swizzle) { 199 if (bytes_per_element == 1) { 200 *segment_order_horz = segment_order__contiguous; 201 *segment_order_vert = segment_order__na; 202 return true; 203 } 204 if (bytes_per_element == 2) { 205 *segment_order_horz = segment_order__non_contiguous; 206 *segment_order_vert = segment_order__contiguous; 207 return true; 208 } 209 if (bytes_per_element == 4) { 210 *segment_order_horz = segment_order__contiguous; 211 *segment_order_vert = segment_order__non_contiguous; 212 return true; 213 } 214 if (bytes_per_element == 8) { 215 *segment_order_horz = segment_order__contiguous; 216 *segment_order_vert = segment_order__non_contiguous; 217 return true; 218 } 219 } 220 if (display_swizzle && bytes_per_element == 8) { 221 *segment_order_horz = segment_order__contiguous; 222 *segment_order_vert = segment_order__non_contiguous; 223 return true; 224 } 225 226 return false; 227 } 228 229 static void hubbub3_get_blk256_size(unsigned int *blk256_width, unsigned int *blk256_height, 230 unsigned int bytes_per_element) 231 { 232 /* copied from DML. might want to refactor DML to leverage from DML */ 233 /* DML : get_blk256_size */ 234 if (bytes_per_element == 1) { 235 *blk256_width = 16; 236 *blk256_height = 16; 237 } else if (bytes_per_element == 2) { 238 *blk256_width = 16; 239 *blk256_height = 8; 240 } else if (bytes_per_element == 4) { 241 *blk256_width = 8; 242 *blk256_height = 8; 243 } else if (bytes_per_element == 8) { 244 *blk256_width = 8; 245 *blk256_height = 4; 246 } 247 } 248 249 static void hubbub3_det_request_size( 250 unsigned int detile_buf_size, 251 unsigned int height, 252 unsigned int width, 253 unsigned int bpe, 254 bool *req128_horz_wc, 255 bool *req128_vert_wc) 256 { 257 unsigned int blk256_height = 0; 258 unsigned int blk256_width = 0; 259 unsigned int swath_bytes_horz_wc, swath_bytes_vert_wc; 260 261 hubbub3_get_blk256_size(&blk256_width, &blk256_height, bpe); 262 263 swath_bytes_horz_wc = width * blk256_height * bpe; 264 swath_bytes_vert_wc = height * blk256_width * bpe; 265 266 *req128_horz_wc = (2 * swath_bytes_horz_wc <= detile_buf_size) ? 267 false : /* full 256B request */ 268 true; /* half 128b request */ 269 270 *req128_vert_wc = (2 * swath_bytes_vert_wc <= detile_buf_size) ? 271 false : /* full 256B request */ 272 true; /* half 128b request */ 273 } 274 275 bool hubbub3_get_dcc_compression_cap(struct hubbub *hubbub, 276 const struct dc_dcc_surface_param *input, 277 struct dc_surface_dcc_cap *output) 278 { 279 struct dc *dc = hubbub->ctx->dc; 280 /* implement section 1.6.2.1 of DCN1_Programming_Guide.docx */ 281 enum dcc_control dcc_control; 282 unsigned int bpe; 283 enum segment_order segment_order_horz, segment_order_vert; 284 bool req128_horz_wc, req128_vert_wc; 285 286 memset(output, 0, sizeof(*output)); 287 288 if (dc->debug.disable_dcc == DCC_DISABLE) 289 return false; 290 291 if (!hubbub->funcs->dcc_support_pixel_format(input->format, 292 &bpe)) 293 return false; 294 295 if (!hubbub->funcs->dcc_support_swizzle(input->swizzle_mode, bpe, 296 &segment_order_horz, &segment_order_vert)) 297 return false; 298 299 hubbub3_det_request_size(TO_DCN20_HUBBUB(hubbub)->detile_buf_size, 300 input->surface_size.height, input->surface_size.width, 301 bpe, &req128_horz_wc, &req128_vert_wc); 302 303 if (!req128_horz_wc && !req128_vert_wc) { 304 dcc_control = dcc_control__256_256_xxx; 305 } else if (input->scan == SCAN_DIRECTION_HORIZONTAL) { 306 if (!req128_horz_wc) 307 dcc_control = dcc_control__256_256_xxx; 308 else if (segment_order_horz == segment_order__contiguous) 309 dcc_control = dcc_control__128_128_xxx; 310 else 311 dcc_control = dcc_control__256_64_64; 312 } else if (input->scan == SCAN_DIRECTION_VERTICAL) { 313 if (!req128_vert_wc) 314 dcc_control = dcc_control__256_256_xxx; 315 else if (segment_order_vert == segment_order__contiguous) 316 dcc_control = dcc_control__128_128_xxx; 317 else 318 dcc_control = dcc_control__256_64_64; 319 } else { 320 if ((req128_horz_wc && 321 segment_order_horz == segment_order__non_contiguous) || 322 (req128_vert_wc && 323 segment_order_vert == segment_order__non_contiguous)) 324 /* access_dir not known, must use most constraining */ 325 dcc_control = dcc_control__256_64_64; 326 else 327 /* reg128 is true for either horz and vert 328 * but segment_order is contiguous 329 */ 330 dcc_control = dcc_control__128_128_xxx; 331 } 332 333 /* Exception for 64KB_R_X */ 334 if ((bpe == 2) && (input->swizzle_mode == DC_SW_64KB_R_X)) 335 dcc_control = dcc_control__128_128_xxx; 336 337 if (dc->debug.disable_dcc == DCC_HALF_REQ_DISALBE && 338 dcc_control != dcc_control__256_256_xxx) 339 return false; 340 341 switch (dcc_control) { 342 case dcc_control__256_256_xxx: 343 output->grph.rgb.max_uncompressed_blk_size = 256; 344 output->grph.rgb.max_compressed_blk_size = 256; 345 output->grph.rgb.independent_64b_blks = false; 346 output->grph.rgb.dcc_controls.dcc_256_256_unconstrained = 1; 347 output->grph.rgb.dcc_controls.dcc_256_128_128 = 1; 348 break; 349 case dcc_control__128_128_xxx: 350 output->grph.rgb.max_uncompressed_blk_size = 128; 351 output->grph.rgb.max_compressed_blk_size = 128; 352 output->grph.rgb.independent_64b_blks = false; 353 output->grph.rgb.dcc_controls.dcc_128_128_uncontrained = 1; 354 output->grph.rgb.dcc_controls.dcc_256_128_128 = 1; 355 break; 356 case dcc_control__256_64_64: 357 output->grph.rgb.max_uncompressed_blk_size = 256; 358 output->grph.rgb.max_compressed_blk_size = 64; 359 output->grph.rgb.independent_64b_blks = true; 360 output->grph.rgb.dcc_controls.dcc_256_64_64 = 1; 361 break; 362 case dcc_control__256_128_128: 363 output->grph.rgb.max_uncompressed_blk_size = 256; 364 output->grph.rgb.max_compressed_blk_size = 128; 365 output->grph.rgb.independent_64b_blks = false; 366 output->grph.rgb.dcc_controls.dcc_256_128_128 = 1; 367 break; 368 } 369 output->capable = true; 370 output->const_color_support = true; 371 372 return true; 373 } 374 375 void hubbub3_force_wm_propagate_to_pipes(struct hubbub *hubbub) 376 { 377 struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); 378 uint32_t refclk_mhz = hubbub->ctx->dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000; 379 uint32_t prog_wm_value = convert_and_clamp(hubbub1->watermarks.a.urgent_ns, 380 refclk_mhz, 0x1fffff); 381 382 REG_SET_2(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, 0, 383 DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, prog_wm_value, 384 DCHUBBUB_ARB_VM_ROW_URGENCY_WATERMARK_A, prog_wm_value); 385 } 386 387 void hubbub3_force_pstate_change_control(struct hubbub *hubbub, 388 bool force, bool allow) 389 { 390 struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); 391 392 REG_UPDATE_2(DCHUBBUB_ARB_DRAM_STATE_CNTL, 393 DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_VALUE, allow, 394 DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_ENABLE, force); 395 } 396 397 /* Copy values from WM set A to all other sets */ 398 void hubbub3_init_watermarks(struct hubbub *hubbub) 399 { 400 struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); 401 uint32_t reg; 402 403 reg = REG_READ(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A); 404 REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, reg); 405 REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, reg); 406 REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, reg); 407 408 reg = REG_READ(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A); 409 REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, reg); 410 REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_C, reg); 411 REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_D, reg); 412 413 reg = REG_READ(DCHUBBUB_ARB_FRAC_URG_BW_NOM_A); 414 REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, reg); 415 REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_NOM_C, reg); 416 REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_NOM_D, reg); 417 418 reg = REG_READ(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A); 419 REG_WRITE(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, reg); 420 REG_WRITE(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_C, reg); 421 REG_WRITE(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D, reg); 422 423 reg = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A); 424 REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, reg); 425 REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, reg); 426 REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, reg); 427 428 reg = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A); 429 REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, reg); 430 REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, reg); 431 REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, reg); 432 433 reg = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A); 434 REG_WRITE(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, reg); 435 REG_WRITE(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, reg); 436 REG_WRITE(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, reg); 437 } 438 439 static const struct hubbub_funcs hubbub30_funcs = { 440 .update_dchub = hubbub2_update_dchub, 441 .init_dchub_sys_ctx = hubbub3_init_dchub_sys_ctx, 442 .init_vm_ctx = hubbub2_init_vm_ctx, 443 .dcc_support_swizzle = hubbub3_dcc_support_swizzle, 444 .dcc_support_pixel_format = hubbub2_dcc_support_pixel_format, 445 .get_dcc_compression_cap = hubbub3_get_dcc_compression_cap, 446 .wm_read_state = hubbub21_wm_read_state, 447 .get_dchub_ref_freq = hubbub2_get_dchub_ref_freq, 448 .program_watermarks = hubbub3_program_watermarks, 449 .allow_self_refresh_control = hubbub1_allow_self_refresh_control, 450 .is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled, 451 .force_wm_propagate_to_pipes = hubbub3_force_wm_propagate_to_pipes, 452 .force_pstate_change_control = hubbub3_force_pstate_change_control, 453 .init_watermarks = hubbub3_init_watermarks, 454 }; 455 456 void hubbub3_construct(struct dcn20_hubbub *hubbub3, 457 struct dc_context *ctx, 458 const struct dcn_hubbub_registers *hubbub_regs, 459 const struct dcn_hubbub_shift *hubbub_shift, 460 const struct dcn_hubbub_mask *hubbub_mask) 461 { 462 hubbub3->base.ctx = ctx; 463 hubbub3->base.funcs = &hubbub30_funcs; 464 hubbub3->regs = hubbub_regs; 465 hubbub3->shifts = hubbub_shift; 466 hubbub3->masks = hubbub_mask; 467 468 hubbub3->debug_test_index_pstate = 0xB; 469 hubbub3->detile_buf_size = 184 * 1024; /* 184KB for DCN3 */ 470 } 471 472