1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "soc15.h"
28 #include "soc15_common.h"
29 #include "vega10_enum.h"
30 
31 #include "gc/gc_9_4_3_offset.h"
32 #include "gc/gc_9_4_3_sh_mask.h"
33 
34 #include "gfx_v9_4_3.h"
35 
36 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
37 
38 static uint64_t gfx_v9_4_3_get_gpu_clock_counter(struct amdgpu_device *adev)
39 {
40 	uint64_t clock;
41 
42 	amdgpu_gfx_off_ctrl(adev, false);
43 	mutex_lock(&adev->gfx.gpu_clock_mutex);
44 	WREG32_SOC15(GC, 0, regRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
45 	clock = (uint64_t)RREG32_SOC15(GC, 0, regRLC_GPU_CLOCK_COUNT_LSB) |
46 		((uint64_t)RREG32_SOC15(GC, 0, regRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
47 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
48 	amdgpu_gfx_off_ctrl(adev, true);
49 
50 	return clock;
51 }
52 
53 static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev,
54 				    u32 se_num,
55 				    u32 sh_num,
56 				    u32 instance)
57 {
58 	u32 data;
59 
60 	if (instance == 0xffffffff)
61 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
62 				     INSTANCE_BROADCAST_WRITES, 1);
63 	else
64 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
65 				     INSTANCE_INDEX, instance);
66 
67 	if (se_num == 0xffffffff)
68 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
69 				     SE_BROADCAST_WRITES, 1);
70 	else
71 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
72 
73 	if (sh_num == 0xffffffff)
74 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
75 				     SH_BROADCAST_WRITES, 1);
76 	else
77 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
78 
79 	WREG32_SOC15_RLC_SHADOW_EX(reg, GC, 0, regGRBM_GFX_INDEX, data);
80 }
81 
82 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
83 {
84 	WREG32_SOC15_RLC(GC, 0, regSQ_IND_INDEX,
85 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
86 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
87 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
88 		(SQ_IND_INDEX__FORCE_READ_MASK));
89 	return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
90 }
91 
92 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
93 			   uint32_t wave, uint32_t thread,
94 			   uint32_t regno, uint32_t num, uint32_t *out)
95 {
96 	WREG32_SOC15_RLC(GC, 0, regSQ_IND_INDEX,
97 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
98 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
99 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
100 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
101 		(SQ_IND_INDEX__FORCE_READ_MASK) |
102 		(SQ_IND_INDEX__AUTO_INCR_MASK));
103 	while (num--)
104 		*(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
105 }
106 
107 static void gfx_v9_4_3_read_wave_data(struct amdgpu_device *adev,
108 				      uint32_t simd, uint32_t wave,
109 				      uint32_t *dst, int *no_fields)
110 {
111 	/* type 1 wave data */
112 	dst[(*no_fields)++] = 1;
113 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
114 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
115 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
116 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
117 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
118 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
119 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
120 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
121 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
122 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
123 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
124 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
125 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
126 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
127 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
128 }
129 
130 static void gfx_v9_4_3_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
131 				       uint32_t wave, uint32_t start,
132 				       uint32_t size, uint32_t *dst)
133 {
134 	wave_read_regs(adev, simd, wave, 0,
135 		       start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
136 }
137 
138 static void gfx_v9_4_3_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
139 				       uint32_t wave, uint32_t thread,
140 				       uint32_t start, uint32_t size,
141 				       uint32_t *dst)
142 {
143 	wave_read_regs(adev, simd, wave, thread,
144 		       start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
145 }
146 
147 static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev,
148 					u32 me, u32 pipe, u32 q, u32 vm)
149 {
150 	soc15_grbm_select(adev, me, pipe, q, vm);
151 }
152 
153 static bool gfx_v9_4_3_is_rlc_enabled(struct amdgpu_device *adev)
154 {
155 	uint32_t rlc_setting;
156 
157 	/* if RLC is not enabled, do nothing */
158 	rlc_setting = RREG32_SOC15(GC, 0, regRLC_CNTL);
159 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
160 		return false;
161 
162 	return true;
163 }
164 
165 static void gfx_v9_4_3_set_safe_mode(struct amdgpu_device *adev)
166 {
167 	uint32_t data;
168 	unsigned i;
169 
170 	data = RLC_SAFE_MODE__CMD_MASK;
171 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
172 	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
173 
174 	/* wait for RLC_SAFE_MODE */
175 	for (i = 0; i < adev->usec_timeout; i++) {
176 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
177 			break;
178 		udelay(1);
179 	}
180 }
181 
182 static void gfx_v9_4_3_unset_safe_mode(struct amdgpu_device *adev)
183 {
184 	uint32_t data;
185 
186 	data = RLC_SAFE_MODE__CMD_MASK;
187 	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
188 }
189 
190 static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev)
191 {
192 	/* init spm vmid with 0xf */
193 	if (adev->gfx.rlc.funcs->update_spm_vmid)
194 		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
195 
196 	return 0;
197 }
198 
199 static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev)
200 {
201 	u32 i, j, k;
202 	u32 mask;
203 
204 	mutex_lock(&adev->grbm_idx_mutex);
205 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
206 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
207 			gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff);
208 			for (k = 0; k < adev->usec_timeout; k++) {
209 				if (RREG32_SOC15(GC, 0, regRLC_SERDES_CU_MASTER_BUSY) == 0)
210 					break;
211 				udelay(1);
212 			}
213 			if (k == adev->usec_timeout) {
214 				gfx_v9_4_3_select_se_sh(adev, 0xffffffff,
215 						      0xffffffff, 0xffffffff);
216 				mutex_unlock(&adev->grbm_idx_mutex);
217 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
218 					 i, j);
219 				return;
220 			}
221 		}
222 	}
223 	gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
224 	mutex_unlock(&adev->grbm_idx_mutex);
225 
226 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
227 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
228 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
229 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
230 	for (k = 0; k < adev->usec_timeout; k++) {
231 		if ((RREG32_SOC15(GC, 0, regRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
232 			break;
233 		udelay(1);
234 	}
235 }
236 
237 static void gfx_v9_4_3_enable_gui_idle_interrupt(struct amdgpu_device *adev,
238 					       bool enable)
239 {
240 	u32 tmp;
241 
242 	/* These interrupts should be enabled to drive DS clock */
243 
244 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0);
245 
246 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
247 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
248 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
249 	if (adev->gfx.num_gfx_rings)
250 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
251 
252 	WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp);
253 }
254 
255 static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev)
256 {
257 	WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
258 	gfx_v9_4_3_enable_gui_idle_interrupt(adev, false);
259 	gfx_v9_4_3_wait_for_rlc_serdes(adev);
260 }
261 
262 static void gfx_v9_4_3_rlc_reset(struct amdgpu_device *adev)
263 {
264 	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
265 	udelay(50);
266 	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
267 	udelay(50);
268 }
269 
270 static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev)
271 {
272 #ifdef AMDGPU_RLC_DEBUG_RETRY
273 	u32 rlc_ucode_ver;
274 #endif
275 
276 	WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
277 	udelay(50);
278 
279 	/* carrizo do enable cp interrupt after cp inited */
280 	if (!(adev->flags & AMD_IS_APU)) {
281 		gfx_v9_4_3_enable_gui_idle_interrupt(adev, true);
282 		udelay(50);
283 	}
284 
285 #ifdef AMDGPU_RLC_DEBUG_RETRY
286 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
287 	rlc_ucode_ver = RREG32_SOC15(GC, 0, regRLC_GPM_GENERAL_6);
288 	if (rlc_ucode_ver == 0x108) {
289 		dev_info(adev->dev,
290 			 "Using rlc debug ucode. regRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
291 			 rlc_ucode_ver, adev->gfx.rlc_fw_version);
292 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
293 		 * default is 0x9C4 to create a 100us interval */
294 		WREG32_SOC15(GC, 0, regRLC_GPM_TIMER_INT_3, 0x9C4);
295 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
296 		 * to disable the page fault retry interrupts, default is
297 		 * 0x100 (256) */
298 		WREG32_SOC15(GC, 0, regRLC_GPM_GENERAL_12, 0x100);
299 	}
300 #endif
301 }
302 
303 static int gfx_v9_4_3_rlc_load_microcode(struct amdgpu_device *adev)
304 {
305 	const struct rlc_firmware_header_v2_0 *hdr;
306 	const __le32 *fw_data;
307 	unsigned i, fw_size;
308 
309 	if (!adev->gfx.rlc_fw)
310 		return -EINVAL;
311 
312 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
313 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
314 
315 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
316 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
317 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
318 
319 	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
320 			RLCG_UCODE_LOADING_START_ADDRESS);
321 	for (i = 0; i < fw_size; i++) {
322 		if (amdgpu_emu_mode == 1 && i % 100 == 0) {
323 			dev_info(adev->dev, "Write RLC ucode data %u DWs\n", i);
324 			msleep(1);
325 		}
326 		WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
327 	}
328 	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
329 
330 	return 0;
331 }
332 
333 static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev)
334 {
335 	int r;
336 
337 	adev->gfx.rlc.funcs->stop(adev);
338 
339 	/* disable CG */
340 	WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
341 
342 	/* TODO: revisit pg function */
343 	/* gfx_v9_4_3_init_pg(adev);*/
344 
345 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
346 		/* legacy rlc firmware loading */
347 		r = gfx_v9_4_3_rlc_load_microcode(adev);
348 		if (r)
349 			return r;
350 	}
351 
352 	adev->gfx.rlc.funcs->start(adev);
353 
354 	return 0;
355 }
356 
357 static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
358 {
359 	u32 reg, data;
360 
361 	reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
362 	if (amdgpu_sriov_is_pp_one_vf(adev))
363 		data = RREG32_NO_KIQ(reg);
364 	else
365 		data = RREG32(reg);
366 
367 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
368 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
369 
370 	if (amdgpu_sriov_is_pp_one_vf(adev))
371 		WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
372 	else
373 		WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
374 }
375 
376 static const struct soc15_reg_rlcg rlcg_access_gc_9_4_3[] = {
377 	{SOC15_REG_ENTRY(GC, 0, regGRBM_GFX_INDEX)},
378 	{SOC15_REG_ENTRY(GC, 0, regSQ_IND_INDEX)},
379 };
380 
381 static bool gfx_v9_4_3_check_rlcg_range(struct amdgpu_device *adev,
382 					uint32_t offset,
383 					struct soc15_reg_rlcg *entries, int arr_size)
384 {
385 	int i;
386 	uint32_t reg;
387 
388 	if (!entries)
389 		return false;
390 
391 	for (i = 0; i < arr_size; i++) {
392 		const struct soc15_reg_rlcg *entry;
393 
394 		entry = &entries[i];
395 		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
396 		if (offset == reg)
397 			return true;
398 	}
399 
400 	return false;
401 }
402 
403 static bool gfx_v9_4_3_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
404 {
405 	return gfx_v9_4_3_check_rlcg_range(adev, offset,
406 					(void *)rlcg_access_gc_9_4_3,
407 					ARRAY_SIZE(rlcg_access_gc_9_4_3));
408 }
409 
410 const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = {
411 	.get_gpu_clock_counter = &gfx_v9_4_3_get_gpu_clock_counter,
412 	.select_se_sh = &gfx_v9_4_3_select_se_sh,
413 	.read_wave_data = &gfx_v9_4_3_read_wave_data,
414 	.read_wave_sgprs = &gfx_v9_4_3_read_wave_sgprs,
415 	.read_wave_vgprs = &gfx_v9_4_3_read_wave_vgprs,
416 	.select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q,
417 };
418 
419 const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = {
420 	.is_rlc_enabled = gfx_v9_4_3_is_rlc_enabled,
421 	.set_safe_mode = gfx_v9_4_3_set_safe_mode,
422 	.unset_safe_mode = gfx_v9_4_3_unset_safe_mode,
423 	.init = gfx_v9_4_3_rlc_init,
424 	.resume = gfx_v9_4_3_rlc_resume,
425 	.stop = gfx_v9_4_3_rlc_stop,
426 	.reset = gfx_v9_4_3_rlc_reset,
427 	.start = gfx_v9_4_3_rlc_start,
428 	.update_spm_vmid = gfx_v9_4_3_update_spm_vmid,
429 	.is_rlcg_access_range = gfx_v9_4_3_is_rlcg_access_range,
430 };
431