1 /*
2  * Copyright 2020 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: AMD
23  *
24  */
25 
26 
27 #include "dm_services.h"
28 #include "reg_helper.h"
29 #include "dcn30_hubbub.h"
30 
31 
32 #define CTX \
33 	hubbub1->base.ctx
34 #define DC_LOGGER \
35 	hubbub1->base.ctx->logger
36 #define REG(reg)\
37 	hubbub1->regs->reg
38 
39 #undef FN
40 #define FN(reg_name, field_name) \
41 	hubbub1->shifts->field_name, hubbub1->masks->field_name
42 
43 #ifdef NUM_VMID
44 #undef NUM_VMID
45 #endif
46 #define NUM_VMID 16
47 
48 
49 static uint32_t convert_and_clamp(
50 	uint32_t wm_ns,
51 	uint32_t refclk_mhz,
52 	uint32_t clamp_value)
53 {
54 	uint32_t ret_val = 0;
55 	ret_val = wm_ns * refclk_mhz;
56 	ret_val /= 1000;
57 
58 	if (ret_val > clamp_value)
59 		ret_val = clamp_value;
60 
61 	return ret_val;
62 }
63 
64 int hubbub3_init_dchub_sys_ctx(struct hubbub *hubbub,
65 		struct dcn_hubbub_phys_addr_config *pa_config)
66 {
67 	struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub);
68 	struct dcn_vmid_page_table_config phys_config;
69 
70 	REG_SET(DCN_VM_FB_LOCATION_BASE, 0,
71 			FB_BASE, pa_config->system_aperture.fb_base >> 24);
72 	REG_SET(DCN_VM_FB_LOCATION_TOP, 0,
73 			FB_TOP, pa_config->system_aperture.fb_top >> 24);
74 	REG_SET(DCN_VM_FB_OFFSET, 0,
75 			FB_OFFSET, pa_config->system_aperture.fb_offset >> 24);
76 	REG_SET(DCN_VM_AGP_BOT, 0,
77 			AGP_BOT, pa_config->system_aperture.agp_bot >> 24);
78 	REG_SET(DCN_VM_AGP_TOP, 0,
79 			AGP_TOP, pa_config->system_aperture.agp_top >> 24);
80 	REG_SET(DCN_VM_AGP_BASE, 0,
81 			AGP_BASE, pa_config->system_aperture.agp_base >> 24);
82 
83 	if (pa_config->gart_config.page_table_start_addr != pa_config->gart_config.page_table_end_addr) {
84 		phys_config.page_table_start_addr = pa_config->gart_config.page_table_start_addr >> 12;
85 		phys_config.page_table_end_addr = pa_config->gart_config.page_table_end_addr >> 12;
86 		phys_config.page_table_base_addr = pa_config->gart_config.page_table_base_addr;
87 		phys_config.depth = 0;
88 		phys_config.block_size = 0;
89 		// Init VMID 0 based on PA config
90 		dcn20_vmid_setup(&hubbub1->vmid[0], &phys_config);
91 	}
92 
93 	return NUM_VMID;
94 }
95 
96 bool hubbub3_program_watermarks(
97 		struct hubbub *hubbub,
98 		struct dcn_watermark_set *watermarks,
99 		unsigned int refclk_mhz,
100 		bool safe_to_lower)
101 {
102 	struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub);
103 	bool wm_pending = false;
104 
105 	if (hubbub21_program_urgent_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower))
106 		wm_pending = true;
107 
108 	if (hubbub21_program_stutter_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower))
109 		wm_pending = true;
110 
111 	if (hubbub21_program_pstate_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower))
112 		wm_pending = true;
113 
114 	/*
115 	 * The DCHub arbiter has a mechanism to dynamically rate limit the DCHub request stream to the fabric.
116 	 * If the memory controller is fully utilized and the DCHub requestors are
117 	 * well ahead of their amortized schedule, then it is safe to prevent the next winner
118 	 * from being committed and sent to the fabric.
119 	 * The utilization of the memory controller is approximated by ensuring that
120 	 * the number of outstanding requests is greater than a threshold specified
121 	 * by the ARB_MIN_REQ_OUTSTANDING. To determine that the DCHub requestors are well ahead of the amortized schedule,
122 	 * the slack of the next winner is compared with the ARB_SAT_LEVEL in DLG RefClk cycles.
123 	 *
124 	 * TODO: Revisit request limit after figure out right number. request limit for Renoir isn't decided yet, set maximum value (0x1FF)
125 	 * to turn off it for now.
126 	 */
127 	REG_SET(DCHUBBUB_ARB_SAT_LEVEL, 0,
128 			DCHUBBUB_ARB_SAT_LEVEL, 60 * refclk_mhz);
129 	REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND,
130 			DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF);
131 
132 	hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter);
133 
134 	return wm_pending;
135 }
136 
137 bool hubbub3_dcc_support_swizzle(
138 		enum swizzle_mode_values swizzle,
139 		unsigned int bytes_per_element,
140 		enum segment_order *segment_order_horz,
141 		enum segment_order *segment_order_vert)
142 {
143 	bool standard_swizzle = false;
144 	bool display_swizzle = false;
145 	bool render_swizzle = false;
146 
147 	switch (swizzle) {
148 	case DC_SW_4KB_S:
149 	case DC_SW_64KB_S:
150 	case DC_SW_VAR_S:
151 	case DC_SW_4KB_S_X:
152 	case DC_SW_64KB_S_X:
153 	case DC_SW_VAR_S_X:
154 		standard_swizzle = true;
155 		break;
156 	case DC_SW_4KB_R:
157 	case DC_SW_64KB_R:
158 	case DC_SW_VAR_R:
159 	case DC_SW_4KB_R_X:
160 	case DC_SW_64KB_R_X:
161 	case DC_SW_VAR_R_X:
162 		render_swizzle = true;
163 		break;
164 	case DC_SW_4KB_D:
165 	case DC_SW_64KB_D:
166 	case DC_SW_VAR_D:
167 	case DC_SW_4KB_D_X:
168 	case DC_SW_64KB_D_X:
169 	case DC_SW_VAR_D_X:
170 		display_swizzle = true;
171 		break;
172 	default:
173 		break;
174 	}
175 
176 	if (standard_swizzle) {
177 		if (bytes_per_element == 1) {
178 			*segment_order_horz = segment_order__contiguous;
179 			*segment_order_vert = segment_order__na;
180 			return true;
181 		}
182 		if (bytes_per_element == 2) {
183 			*segment_order_horz = segment_order__non_contiguous;
184 			*segment_order_vert = segment_order__contiguous;
185 			return true;
186 		}
187 		if (bytes_per_element == 4) {
188 			*segment_order_horz = segment_order__non_contiguous;
189 			*segment_order_vert = segment_order__contiguous;
190 			return true;
191 		}
192 		if (bytes_per_element == 8) {
193 			*segment_order_horz = segment_order__na;
194 			*segment_order_vert = segment_order__contiguous;
195 			return true;
196 		}
197 	}
198 	if (render_swizzle) {
199 		if (bytes_per_element == 1) {
200 			*segment_order_horz = segment_order__contiguous;
201 			*segment_order_vert = segment_order__na;
202 			return true;
203 		}
204 		if (bytes_per_element == 2) {
205 			*segment_order_horz = segment_order__non_contiguous;
206 			*segment_order_vert = segment_order__contiguous;
207 			return true;
208 		}
209 		if (bytes_per_element == 4) {
210 			*segment_order_horz = segment_order__contiguous;
211 			*segment_order_vert = segment_order__non_contiguous;
212 			return true;
213 		}
214 		if (bytes_per_element == 8) {
215 			*segment_order_horz = segment_order__contiguous;
216 			*segment_order_vert = segment_order__non_contiguous;
217 			return true;
218 		}
219 	}
220 	if (display_swizzle && bytes_per_element == 8) {
221 		*segment_order_horz = segment_order__contiguous;
222 		*segment_order_vert = segment_order__non_contiguous;
223 		return true;
224 	}
225 
226 	return false;
227 }
228 
229 static void hubbub3_get_blk256_size(unsigned int *blk256_width, unsigned int *blk256_height,
230 		unsigned int bytes_per_element)
231 {
232 	/* copied from DML.  might want to refactor DML to leverage from DML */
233 	/* DML : get_blk256_size */
234 	if (bytes_per_element == 1) {
235 		*blk256_width = 16;
236 		*blk256_height = 16;
237 	} else if (bytes_per_element == 2) {
238 		*blk256_width = 16;
239 		*blk256_height = 8;
240 	} else if (bytes_per_element == 4) {
241 		*blk256_width = 8;
242 		*blk256_height = 8;
243 	} else if (bytes_per_element == 8) {
244 		*blk256_width = 8;
245 		*blk256_height = 4;
246 	}
247 }
248 
249 static void hubbub3_det_request_size(
250 		unsigned int detile_buf_size,
251 		unsigned int height,
252 		unsigned int width,
253 		unsigned int bpe,
254 		bool *req128_horz_wc,
255 		bool *req128_vert_wc)
256 {
257 	unsigned int blk256_height = 0;
258 	unsigned int blk256_width = 0;
259 	unsigned int swath_bytes_horz_wc, swath_bytes_vert_wc;
260 
261 	hubbub3_get_blk256_size(&blk256_width, &blk256_height, bpe);
262 
263 	swath_bytes_horz_wc = width * blk256_height * bpe;
264 	swath_bytes_vert_wc = height * blk256_width * bpe;
265 
266 	*req128_horz_wc = (2 * swath_bytes_horz_wc <= detile_buf_size) ?
267 			false : /* full 256B request */
268 			true; /* half 128b request */
269 
270 	*req128_vert_wc = (2 * swath_bytes_vert_wc <= detile_buf_size) ?
271 			false : /* full 256B request */
272 			true; /* half 128b request */
273 }
274 
275 bool hubbub3_get_dcc_compression_cap(struct hubbub *hubbub,
276 		const struct dc_dcc_surface_param *input,
277 		struct dc_surface_dcc_cap *output)
278 {
279 	struct dc *dc = hubbub->ctx->dc;
280 	/* implement section 1.6.2.1 of DCN1_Programming_Guide.docx */
281 	enum dcc_control dcc_control;
282 	unsigned int bpe;
283 	enum segment_order segment_order_horz, segment_order_vert;
284 	bool req128_horz_wc, req128_vert_wc;
285 
286 	memset(output, 0, sizeof(*output));
287 
288 	if (dc->debug.disable_dcc == DCC_DISABLE)
289 		return false;
290 
291 	if (!hubbub->funcs->dcc_support_pixel_format(input->format,
292 			&bpe))
293 		return false;
294 
295 	if (!hubbub->funcs->dcc_support_swizzle(input->swizzle_mode, bpe,
296 			&segment_order_horz, &segment_order_vert))
297 		return false;
298 
299 	hubbub3_det_request_size(TO_DCN20_HUBBUB(hubbub)->detile_buf_size,
300 			input->surface_size.height,  input->surface_size.width,
301 			bpe, &req128_horz_wc, &req128_vert_wc);
302 
303 	if (!req128_horz_wc && !req128_vert_wc) {
304 		dcc_control = dcc_control__256_256_xxx;
305 	} else if (input->scan == SCAN_DIRECTION_HORIZONTAL) {
306 		if (!req128_horz_wc)
307 			dcc_control = dcc_control__256_256_xxx;
308 		else if (segment_order_horz == segment_order__contiguous)
309 			dcc_control = dcc_control__128_128_xxx;
310 		else
311 			dcc_control = dcc_control__256_64_64;
312 	} else if (input->scan == SCAN_DIRECTION_VERTICAL) {
313 		if (!req128_vert_wc)
314 			dcc_control = dcc_control__256_256_xxx;
315 		else if (segment_order_vert == segment_order__contiguous)
316 			dcc_control = dcc_control__128_128_xxx;
317 		else
318 			dcc_control = dcc_control__256_64_64;
319 	} else {
320 		if ((req128_horz_wc &&
321 			segment_order_horz == segment_order__non_contiguous) ||
322 			(req128_vert_wc &&
323 			segment_order_vert == segment_order__non_contiguous))
324 			/* access_dir not known, must use most constraining */
325 			dcc_control = dcc_control__256_64_64;
326 		else
327 			/* reg128 is true for either horz and vert
328 			 * but segment_order is contiguous
329 			 */
330 			dcc_control = dcc_control__128_128_xxx;
331 	}
332 
333 	/* Exception for 64KB_R_X */
334 	if ((bpe == 2) && (input->swizzle_mode == DC_SW_64KB_R_X))
335 		dcc_control = dcc_control__128_128_xxx;
336 
337 	if (dc->debug.disable_dcc == DCC_HALF_REQ_DISALBE &&
338 		dcc_control != dcc_control__256_256_xxx)
339 		return false;
340 
341 	switch (dcc_control) {
342 	case dcc_control__256_256_xxx:
343 		output->grph.rgb.max_uncompressed_blk_size = 256;
344 		output->grph.rgb.max_compressed_blk_size = 256;
345 		output->grph.rgb.independent_64b_blks = false;
346 		output->grph.rgb.dcc_controls.dcc_256_256_unconstrained = 1;
347 		output->grph.rgb.dcc_controls.dcc_256_128_128 = 1;
348 		break;
349 	case dcc_control__128_128_xxx:
350 		output->grph.rgb.max_uncompressed_blk_size = 128;
351 		output->grph.rgb.max_compressed_blk_size = 128;
352 		output->grph.rgb.independent_64b_blks = false;
353 		output->grph.rgb.dcc_controls.dcc_128_128_uncontrained = 1;
354 		output->grph.rgb.dcc_controls.dcc_256_128_128 = 1;
355 		break;
356 	case dcc_control__256_64_64:
357 		output->grph.rgb.max_uncompressed_blk_size = 256;
358 		output->grph.rgb.max_compressed_blk_size = 64;
359 		output->grph.rgb.independent_64b_blks = true;
360 		output->grph.rgb.dcc_controls.dcc_256_64_64 = 1;
361 		break;
362 	case dcc_control__256_128_128:
363 		output->grph.rgb.max_uncompressed_blk_size = 256;
364 		output->grph.rgb.max_compressed_blk_size = 128;
365 		output->grph.rgb.independent_64b_blks = false;
366 		output->grph.rgb.dcc_controls.dcc_256_128_128 = 1;
367 		break;
368 	}
369 	output->capable = true;
370 	output->const_color_support = true;
371 
372 	return true;
373 }
374 
375 void hubbub3_force_wm_propagate_to_pipes(struct hubbub *hubbub)
376 {
377 	struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub);
378 	uint32_t refclk_mhz = hubbub->ctx->dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000;
379 	uint32_t prog_wm_value = convert_and_clamp(hubbub1->watermarks.a.urgent_ns,
380 			refclk_mhz, 0x1fffff);
381 
382 	REG_SET_2(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, 0,
383 			DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, prog_wm_value,
384 			DCHUBBUB_ARB_VM_ROW_URGENCY_WATERMARK_A, prog_wm_value);
385 }
386 
387 void hubbub3_force_pstate_change_control(struct hubbub *hubbub,
388 		bool force, bool allow)
389 {
390 	struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub);
391 
392 	REG_UPDATE_2(DCHUBBUB_ARB_DRAM_STATE_CNTL,
393 			DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_VALUE, allow,
394 			DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_ENABLE, force);
395 }
396 
397 /* Copy values from WM set A to all other sets */
398 void hubbub3_init_watermarks(struct hubbub *hubbub)
399 {
400 	struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub);
401 	uint32_t reg;
402 
403 	reg = REG_READ(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A);
404 	REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, reg);
405 	REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, reg);
406 	REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, reg);
407 
408 	reg = REG_READ(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A);
409 	REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, reg);
410 	REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_C, reg);
411 	REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_D, reg);
412 
413 	reg = REG_READ(DCHUBBUB_ARB_FRAC_URG_BW_NOM_A);
414 	REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, reg);
415 	REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_NOM_C, reg);
416 	REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_NOM_D, reg);
417 
418 	reg = REG_READ(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A);
419 	REG_WRITE(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, reg);
420 	REG_WRITE(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_C, reg);
421 	REG_WRITE(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D, reg);
422 
423 	reg = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A);
424 	REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, reg);
425 	REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, reg);
426 	REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, reg);
427 
428 	reg = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A);
429 	REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, reg);
430 	REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, reg);
431 	REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, reg);
432 
433 	reg = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A);
434 	REG_WRITE(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, reg);
435 	REG_WRITE(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, reg);
436 	REG_WRITE(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, reg);
437 }
438 
439 static const struct hubbub_funcs hubbub30_funcs = {
440 	.update_dchub = hubbub2_update_dchub,
441 	.init_dchub_sys_ctx = hubbub3_init_dchub_sys_ctx,
442 	.init_vm_ctx = hubbub2_init_vm_ctx,
443 	.dcc_support_swizzle = hubbub3_dcc_support_swizzle,
444 	.dcc_support_pixel_format = hubbub2_dcc_support_pixel_format,
445 	.get_dcc_compression_cap = hubbub3_get_dcc_compression_cap,
446 	.wm_read_state = hubbub21_wm_read_state,
447 	.get_dchub_ref_freq = hubbub2_get_dchub_ref_freq,
448 	.program_watermarks = hubbub3_program_watermarks,
449 	.allow_self_refresh_control = hubbub1_allow_self_refresh_control,
450 	.is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled,
451 	.force_wm_propagate_to_pipes = hubbub3_force_wm_propagate_to_pipes,
452 	.force_pstate_change_control = hubbub3_force_pstate_change_control,
453 	.init_watermarks = hubbub3_init_watermarks,
454 };
455 
456 void hubbub3_construct(struct dcn20_hubbub *hubbub3,
457 	struct dc_context *ctx,
458 	const struct dcn_hubbub_registers *hubbub_regs,
459 	const struct dcn_hubbub_shift *hubbub_shift,
460 	const struct dcn_hubbub_mask *hubbub_mask)
461 {
462 	hubbub3->base.ctx = ctx;
463 	hubbub3->base.funcs = &hubbub30_funcs;
464 	hubbub3->regs = hubbub_regs;
465 	hubbub3->shifts = hubbub_shift;
466 	hubbub3->masks = hubbub_mask;
467 
468 	hubbub3->debug_test_index_pstate = 0xB;
469 	hubbub3->detile_buf_size = 184 * 1024; /* 184KB for DCN3 */
470 }
471 
472