xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c (revision 023e41632e065d49bcbe31b3c4b336217f96a271)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include <linux/module.h>
24 #include <linux/fdtable.h>
25 #include <linux/uaccess.h>
26 #include <linux/firmware.h>
27 #include <linux/mmu_context.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_amdkfd.h"
31 #include "amdgpu_ucode.h"
32 #include "gfx_v8_0.h"
33 #include "gca/gfx_8_0_sh_mask.h"
34 #include "gca/gfx_8_0_d.h"
35 #include "gca/gfx_8_0_enum.h"
36 #include "oss/oss_3_0_sh_mask.h"
37 #include "oss/oss_3_0_d.h"
38 #include "gmc/gmc_8_1_sh_mask.h"
39 #include "gmc/gmc_8_1_d.h"
40 #include "vi_structs.h"
41 #include "vid.h"
42 
43 enum hqd_dequeue_request_type {
44 	NO_ACTION = 0,
45 	DRAIN_PIPE,
46 	RESET_WAVES
47 };
48 
49 /*
50  * Register access functions
51  */
52 
53 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
54 		uint32_t sh_mem_config,
55 		uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
56 		uint32_t sh_mem_bases);
57 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
58 		unsigned int vmid);
59 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
60 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
61 			uint32_t queue_id, uint32_t __user *wptr,
62 			uint32_t wptr_shift, uint32_t wptr_mask,
63 			struct mm_struct *mm);
64 static int kgd_hqd_dump(struct kgd_dev *kgd,
65 			uint32_t pipe_id, uint32_t queue_id,
66 			uint32_t (**dump)[2], uint32_t *n_regs);
67 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
68 			     uint32_t __user *wptr, struct mm_struct *mm);
69 static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
70 			     uint32_t engine_id, uint32_t queue_id,
71 			     uint32_t (**dump)[2], uint32_t *n_regs);
72 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
73 		uint32_t pipe_id, uint32_t queue_id);
74 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
75 static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
76 				enum kfd_preempt_type reset_type,
77 				unsigned int utimeout, uint32_t pipe_id,
78 				uint32_t queue_id);
79 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
80 				unsigned int utimeout);
81 static int kgd_address_watch_disable(struct kgd_dev *kgd);
82 static int kgd_address_watch_execute(struct kgd_dev *kgd,
83 					unsigned int watch_point_id,
84 					uint32_t cntl_val,
85 					uint32_t addr_hi,
86 					uint32_t addr_lo);
87 static int kgd_wave_control_execute(struct kgd_dev *kgd,
88 					uint32_t gfx_index_val,
89 					uint32_t sq_cmd);
90 static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
91 					unsigned int watch_point_id,
92 					unsigned int reg_offset);
93 
94 static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
95 		uint8_t vmid);
96 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
97 		uint8_t vmid);
98 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
99 static void set_scratch_backing_va(struct kgd_dev *kgd,
100 					uint64_t va, uint32_t vmid);
101 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
102 		uint64_t page_table_base);
103 static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
104 static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
105 
106 /* Because of REG_GET_FIELD() being used, we put this function in the
107  * asic specific file.
108  */
109 static int get_tile_config(struct kgd_dev *kgd,
110 		struct tile_config *config)
111 {
112 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
113 
114 	config->gb_addr_config = adev->gfx.config.gb_addr_config;
115 	config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
116 				MC_ARB_RAMCFG, NOOFBANK);
117 	config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
118 				MC_ARB_RAMCFG, NOOFRANKS);
119 
120 	config->tile_config_ptr = adev->gfx.config.tile_mode_array;
121 	config->num_tile_configs =
122 			ARRAY_SIZE(adev->gfx.config.tile_mode_array);
123 	config->macro_tile_config_ptr =
124 			adev->gfx.config.macrotile_mode_array;
125 	config->num_macro_tile_configs =
126 			ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
127 
128 	return 0;
129 }
130 
131 static const struct kfd2kgd_calls kfd2kgd = {
132 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
133 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
134 	.init_interrupts = kgd_init_interrupts,
135 	.hqd_load = kgd_hqd_load,
136 	.hqd_sdma_load = kgd_hqd_sdma_load,
137 	.hqd_dump = kgd_hqd_dump,
138 	.hqd_sdma_dump = kgd_hqd_sdma_dump,
139 	.hqd_is_occupied = kgd_hqd_is_occupied,
140 	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
141 	.hqd_destroy = kgd_hqd_destroy,
142 	.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
143 	.address_watch_disable = kgd_address_watch_disable,
144 	.address_watch_execute = kgd_address_watch_execute,
145 	.wave_control_execute = kgd_wave_control_execute,
146 	.address_watch_get_offset = kgd_address_watch_get_offset,
147 	.get_atc_vmid_pasid_mapping_pasid =
148 			get_atc_vmid_pasid_mapping_pasid,
149 	.get_atc_vmid_pasid_mapping_valid =
150 			get_atc_vmid_pasid_mapping_valid,
151 	.get_fw_version = get_fw_version,
152 	.set_scratch_backing_va = set_scratch_backing_va,
153 	.get_tile_config = get_tile_config,
154 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
155 	.invalidate_tlbs = invalidate_tlbs,
156 	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
157 };
158 
159 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
160 {
161 	return (struct kfd2kgd_calls *)&kfd2kgd;
162 }
163 
164 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
165 {
166 	return (struct amdgpu_device *)kgd;
167 }
168 
169 static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
170 			uint32_t queue, uint32_t vmid)
171 {
172 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
173 	uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
174 
175 	mutex_lock(&adev->srbm_mutex);
176 	WREG32(mmSRBM_GFX_CNTL, value);
177 }
178 
179 static void unlock_srbm(struct kgd_dev *kgd)
180 {
181 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
182 
183 	WREG32(mmSRBM_GFX_CNTL, 0);
184 	mutex_unlock(&adev->srbm_mutex);
185 }
186 
187 static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
188 				uint32_t queue_id)
189 {
190 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
191 
192 	uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
193 	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
194 
195 	lock_srbm(kgd, mec, pipe, queue_id, 0);
196 }
197 
198 static void release_queue(struct kgd_dev *kgd)
199 {
200 	unlock_srbm(kgd);
201 }
202 
203 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
204 					uint32_t sh_mem_config,
205 					uint32_t sh_mem_ape1_base,
206 					uint32_t sh_mem_ape1_limit,
207 					uint32_t sh_mem_bases)
208 {
209 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
210 
211 	lock_srbm(kgd, 0, 0, 0, vmid);
212 
213 	WREG32(mmSH_MEM_CONFIG, sh_mem_config);
214 	WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
215 	WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
216 	WREG32(mmSH_MEM_BASES, sh_mem_bases);
217 
218 	unlock_srbm(kgd);
219 }
220 
221 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
222 					unsigned int vmid)
223 {
224 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
225 
226 	/*
227 	 * We have to assume that there is no outstanding mapping.
228 	 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
229 	 * a mapping is in progress or because a mapping finished
230 	 * and the SW cleared it.
231 	 * So the protocol is to always wait & clear.
232 	 */
233 	uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
234 			ATC_VMID0_PASID_MAPPING__VALID_MASK;
235 
236 	WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
237 
238 	while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
239 		cpu_relax();
240 	WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
241 
242 	/* Mapping vmid to pasid also for IH block */
243 	WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
244 
245 	return 0;
246 }
247 
248 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
249 {
250 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
251 	uint32_t mec;
252 	uint32_t pipe;
253 
254 	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
255 	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
256 
257 	lock_srbm(kgd, mec, pipe, 0, 0);
258 
259 	WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
260 			CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
261 
262 	unlock_srbm(kgd);
263 
264 	return 0;
265 }
266 
267 static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)
268 {
269 	uint32_t retval;
270 
271 	retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
272 		m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
273 	pr_debug("kfd: sdma base address: 0x%x\n", retval);
274 
275 	return retval;
276 }
277 
278 static inline struct vi_mqd *get_mqd(void *mqd)
279 {
280 	return (struct vi_mqd *)mqd;
281 }
282 
283 static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
284 {
285 	return (struct vi_sdma_mqd *)mqd;
286 }
287 
288 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
289 			uint32_t queue_id, uint32_t __user *wptr,
290 			uint32_t wptr_shift, uint32_t wptr_mask,
291 			struct mm_struct *mm)
292 {
293 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
294 	struct vi_mqd *m;
295 	uint32_t *mqd_hqd;
296 	uint32_t reg, wptr_val, data;
297 	bool valid_wptr = false;
298 
299 	m = get_mqd(mqd);
300 
301 	acquire_queue(kgd, pipe_id, queue_id);
302 
303 	/* HIQ is set during driver init period with vmid set to 0*/
304 	if (m->cp_hqd_vmid == 0) {
305 		uint32_t value, mec, pipe;
306 
307 		mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
308 		pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
309 
310 		pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
311 			mec, pipe, queue_id);
312 		value = RREG32(mmRLC_CP_SCHEDULERS);
313 		value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
314 			((mec << 5) | (pipe << 3) | queue_id | 0x80));
315 		WREG32(mmRLC_CP_SCHEDULERS, value);
316 	}
317 
318 	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
319 	mqd_hqd = &m->cp_mqd_base_addr_lo;
320 
321 	for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_CONTROL; reg++)
322 		WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
323 
324 	/* Tonga errata: EOP RPTR/WPTR should be left unmodified.
325 	 * This is safe since EOP RPTR==WPTR for any inactive HQD
326 	 * on ASICs that do not support context-save.
327 	 * EOP writes/reads can start anywhere in the ring.
328 	 */
329 	if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) {
330 		WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
331 		WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
332 		WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
333 	}
334 
335 	for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++)
336 		WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
337 
338 	/* Copy userspace write pointer value to register.
339 	 * Activate doorbell logic to monitor subsequent changes.
340 	 */
341 	data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
342 			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
343 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
344 
345 	/* read_user_ptr may take the mm->mmap_sem.
346 	 * release srbm_mutex to avoid circular dependency between
347 	 * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex.
348 	 */
349 	release_queue(kgd);
350 	valid_wptr = read_user_wptr(mm, wptr, wptr_val);
351 	acquire_queue(kgd, pipe_id, queue_id);
352 	if (valid_wptr)
353 		WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
354 
355 	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
356 	WREG32(mmCP_HQD_ACTIVE, data);
357 
358 	release_queue(kgd);
359 
360 	return 0;
361 }
362 
363 static int kgd_hqd_dump(struct kgd_dev *kgd,
364 			uint32_t pipe_id, uint32_t queue_id,
365 			uint32_t (**dump)[2], uint32_t *n_regs)
366 {
367 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
368 	uint32_t i = 0, reg;
369 #define HQD_N_REGS (54+4)
370 #define DUMP_REG(addr) do {				\
371 		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
372 			break;				\
373 		(*dump)[i][0] = (addr) << 2;		\
374 		(*dump)[i++][1] = RREG32(addr);		\
375 	} while (0)
376 
377 	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
378 	if (*dump == NULL)
379 		return -ENOMEM;
380 
381 	acquire_queue(kgd, pipe_id, queue_id);
382 
383 	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
384 	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
385 	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
386 	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
387 
388 	for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++)
389 		DUMP_REG(reg);
390 
391 	release_queue(kgd);
392 
393 	WARN_ON_ONCE(i != HQD_N_REGS);
394 	*n_regs = i;
395 
396 	return 0;
397 }
398 
399 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
400 			     uint32_t __user *wptr, struct mm_struct *mm)
401 {
402 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
403 	struct vi_sdma_mqd *m;
404 	unsigned long end_jiffies;
405 	uint32_t sdma_base_addr;
406 	uint32_t data;
407 
408 	m = get_sdma_mqd(mqd);
409 	sdma_base_addr = get_sdma_base_addr(m);
410 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
411 		m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
412 
413 	end_jiffies = msecs_to_jiffies(2000) + jiffies;
414 	while (true) {
415 		data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
416 		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
417 			break;
418 		if (time_after(jiffies, end_jiffies))
419 			return -ETIME;
420 		usleep_range(500, 1000);
421 	}
422 	if (m->sdma_engine_id) {
423 		data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL);
424 		data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL,
425 				RESUME_CTX, 0);
426 		WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data);
427 	} else {
428 		data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL);
429 		data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
430 				RESUME_CTX, 0);
431 		WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
432 	}
433 
434 	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
435 			     ENABLE, 1);
436 	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
437 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
438 
439 	if (read_user_wptr(mm, wptr, data))
440 		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
441 	else
442 		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
443 		       m->sdmax_rlcx_rb_rptr);
444 
445 	WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
446 				m->sdmax_rlcx_virtual_addr);
447 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
448 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
449 			m->sdmax_rlcx_rb_base_hi);
450 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
451 			m->sdmax_rlcx_rb_rptr_addr_lo);
452 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
453 			m->sdmax_rlcx_rb_rptr_addr_hi);
454 
455 	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
456 			     RB_ENABLE, 1);
457 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
458 
459 	return 0;
460 }
461 
462 static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
463 			     uint32_t engine_id, uint32_t queue_id,
464 			     uint32_t (**dump)[2], uint32_t *n_regs)
465 {
466 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
467 	uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
468 		queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
469 	uint32_t i = 0, reg;
470 #undef HQD_N_REGS
471 #define HQD_N_REGS (19+4+2+3+7)
472 
473 	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
474 	if (*dump == NULL)
475 		return -ENOMEM;
476 
477 	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
478 		DUMP_REG(sdma_offset + reg);
479 	for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
480 	     reg++)
481 		DUMP_REG(sdma_offset + reg);
482 	for (reg = mmSDMA0_RLC0_CSA_ADDR_LO; reg <= mmSDMA0_RLC0_CSA_ADDR_HI;
483 	     reg++)
484 		DUMP_REG(sdma_offset + reg);
485 	for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_DUMMY_REG;
486 	     reg++)
487 		DUMP_REG(sdma_offset + reg);
488 	for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL;
489 	     reg++)
490 		DUMP_REG(sdma_offset + reg);
491 
492 	WARN_ON_ONCE(i != HQD_N_REGS);
493 	*n_regs = i;
494 
495 	return 0;
496 }
497 
498 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
499 				uint32_t pipe_id, uint32_t queue_id)
500 {
501 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
502 	uint32_t act;
503 	bool retval = false;
504 	uint32_t low, high;
505 
506 	acquire_queue(kgd, pipe_id, queue_id);
507 	act = RREG32(mmCP_HQD_ACTIVE);
508 	if (act) {
509 		low = lower_32_bits(queue_address >> 8);
510 		high = upper_32_bits(queue_address >> 8);
511 
512 		if (low == RREG32(mmCP_HQD_PQ_BASE) &&
513 				high == RREG32(mmCP_HQD_PQ_BASE_HI))
514 			retval = true;
515 	}
516 	release_queue(kgd);
517 	return retval;
518 }
519 
520 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
521 {
522 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
523 	struct vi_sdma_mqd *m;
524 	uint32_t sdma_base_addr;
525 	uint32_t sdma_rlc_rb_cntl;
526 
527 	m = get_sdma_mqd(mqd);
528 	sdma_base_addr = get_sdma_base_addr(m);
529 
530 	sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
531 
532 	if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
533 		return true;
534 
535 	return false;
536 }
537 
538 static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
539 				enum kfd_preempt_type reset_type,
540 				unsigned int utimeout, uint32_t pipe_id,
541 				uint32_t queue_id)
542 {
543 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
544 	uint32_t temp;
545 	enum hqd_dequeue_request_type type;
546 	unsigned long flags, end_jiffies;
547 	int retry;
548 	struct vi_mqd *m = get_mqd(mqd);
549 
550 	if (adev->in_gpu_reset)
551 		return -EIO;
552 
553 	acquire_queue(kgd, pipe_id, queue_id);
554 
555 	if (m->cp_hqd_vmid == 0)
556 		WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0);
557 
558 	switch (reset_type) {
559 	case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
560 		type = DRAIN_PIPE;
561 		break;
562 	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
563 		type = RESET_WAVES;
564 		break;
565 	default:
566 		type = DRAIN_PIPE;
567 		break;
568 	}
569 
570 	/* Workaround: If IQ timer is active and the wait time is close to or
571 	 * equal to 0, dequeueing is not safe. Wait until either the wait time
572 	 * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
573 	 * cleared before continuing. Also, ensure wait times are set to at
574 	 * least 0x3.
575 	 */
576 	local_irq_save(flags);
577 	preempt_disable();
578 	retry = 5000; /* wait for 500 usecs at maximum */
579 	while (true) {
580 		temp = RREG32(mmCP_HQD_IQ_TIMER);
581 		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
582 			pr_debug("HW is processing IQ\n");
583 			goto loop;
584 		}
585 		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
586 			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
587 					== 3) /* SEM-rearm is safe */
588 				break;
589 			/* Wait time 3 is safe for CP, but our MMIO read/write
590 			 * time is close to 1 microsecond, so check for 10 to
591 			 * leave more buffer room
592 			 */
593 			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
594 					>= 10)
595 				break;
596 			pr_debug("IQ timer is active\n");
597 		} else
598 			break;
599 loop:
600 		if (!retry) {
601 			pr_err("CP HQD IQ timer status time out\n");
602 			break;
603 		}
604 		ndelay(100);
605 		--retry;
606 	}
607 	retry = 1000;
608 	while (true) {
609 		temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
610 		if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
611 			break;
612 		pr_debug("Dequeue request is pending\n");
613 
614 		if (!retry) {
615 			pr_err("CP HQD dequeue request time out\n");
616 			break;
617 		}
618 		ndelay(100);
619 		--retry;
620 	}
621 	local_irq_restore(flags);
622 	preempt_enable();
623 
624 	WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
625 
626 	end_jiffies = (utimeout * HZ / 1000) + jiffies;
627 	while (true) {
628 		temp = RREG32(mmCP_HQD_ACTIVE);
629 		if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
630 			break;
631 		if (time_after(jiffies, end_jiffies)) {
632 			pr_err("cp queue preemption time out.\n");
633 			release_queue(kgd);
634 			return -ETIME;
635 		}
636 		usleep_range(500, 1000);
637 	}
638 
639 	release_queue(kgd);
640 	return 0;
641 }
642 
643 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
644 				unsigned int utimeout)
645 {
646 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
647 	struct vi_sdma_mqd *m;
648 	uint32_t sdma_base_addr;
649 	uint32_t temp;
650 	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
651 
652 	m = get_sdma_mqd(mqd);
653 	sdma_base_addr = get_sdma_base_addr(m);
654 
655 	temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
656 	temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
657 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
658 
659 	while (true) {
660 		temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
661 		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
662 			break;
663 		if (time_after(jiffies, end_jiffies))
664 			return -ETIME;
665 		usleep_range(500, 1000);
666 	}
667 
668 	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
669 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
670 		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
671 		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
672 
673 	m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
674 
675 	return 0;
676 }
677 
678 static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
679 							uint8_t vmid)
680 {
681 	uint32_t reg;
682 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
683 
684 	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
685 	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
686 }
687 
688 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
689 								uint8_t vmid)
690 {
691 	uint32_t reg;
692 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
693 
694 	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
695 	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
696 }
697 
698 static int kgd_address_watch_disable(struct kgd_dev *kgd)
699 {
700 	return 0;
701 }
702 
703 static int kgd_address_watch_execute(struct kgd_dev *kgd,
704 					unsigned int watch_point_id,
705 					uint32_t cntl_val,
706 					uint32_t addr_hi,
707 					uint32_t addr_lo)
708 {
709 	return 0;
710 }
711 
712 static int kgd_wave_control_execute(struct kgd_dev *kgd,
713 					uint32_t gfx_index_val,
714 					uint32_t sq_cmd)
715 {
716 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
717 	uint32_t data = 0;
718 
719 	mutex_lock(&adev->grbm_idx_mutex);
720 
721 	WREG32(mmGRBM_GFX_INDEX, gfx_index_val);
722 	WREG32(mmSQ_CMD, sq_cmd);
723 
724 	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
725 		INSTANCE_BROADCAST_WRITES, 1);
726 	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
727 		SH_BROADCAST_WRITES, 1);
728 	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
729 		SE_BROADCAST_WRITES, 1);
730 
731 	WREG32(mmGRBM_GFX_INDEX, data);
732 	mutex_unlock(&adev->grbm_idx_mutex);
733 
734 	return 0;
735 }
736 
737 static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
738 					unsigned int watch_point_id,
739 					unsigned int reg_offset)
740 {
741 	return 0;
742 }
743 
744 static void set_scratch_backing_va(struct kgd_dev *kgd,
745 					uint64_t va, uint32_t vmid)
746 {
747 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
748 
749 	lock_srbm(kgd, 0, 0, 0, vmid);
750 	WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
751 	unlock_srbm(kgd);
752 }
753 
754 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
755 {
756 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
757 	const union amdgpu_firmware_header *hdr;
758 
759 	switch (type) {
760 	case KGD_ENGINE_PFP:
761 		hdr = (const union amdgpu_firmware_header *)
762 						adev->gfx.pfp_fw->data;
763 		break;
764 
765 	case KGD_ENGINE_ME:
766 		hdr = (const union amdgpu_firmware_header *)
767 						adev->gfx.me_fw->data;
768 		break;
769 
770 	case KGD_ENGINE_CE:
771 		hdr = (const union amdgpu_firmware_header *)
772 						adev->gfx.ce_fw->data;
773 		break;
774 
775 	case KGD_ENGINE_MEC1:
776 		hdr = (const union amdgpu_firmware_header *)
777 						adev->gfx.mec_fw->data;
778 		break;
779 
780 	case KGD_ENGINE_MEC2:
781 		hdr = (const union amdgpu_firmware_header *)
782 						adev->gfx.mec2_fw->data;
783 		break;
784 
785 	case KGD_ENGINE_RLC:
786 		hdr = (const union amdgpu_firmware_header *)
787 						adev->gfx.rlc_fw->data;
788 		break;
789 
790 	case KGD_ENGINE_SDMA1:
791 		hdr = (const union amdgpu_firmware_header *)
792 						adev->sdma.instance[0].fw->data;
793 		break;
794 
795 	case KGD_ENGINE_SDMA2:
796 		hdr = (const union amdgpu_firmware_header *)
797 						adev->sdma.instance[1].fw->data;
798 		break;
799 
800 	default:
801 		return 0;
802 	}
803 
804 	if (hdr == NULL)
805 		return 0;
806 
807 	/* Only 12 bit in use*/
808 	return hdr->common.ucode_version;
809 }
810 
811 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
812 		uint64_t page_table_base)
813 {
814 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
815 
816 	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
817 		pr_err("trying to set page table base for wrong VMID\n");
818 		return;
819 	}
820 	WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8,
821 			lower_32_bits(page_table_base));
822 }
823 
824 static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
825 {
826 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
827 	int vmid;
828 	unsigned int tmp;
829 
830 	if (adev->in_gpu_reset)
831 		return -EIO;
832 
833 	for (vmid = 0; vmid < 16; vmid++) {
834 		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
835 			continue;
836 
837 		tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
838 		if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
839 			(tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
840 			WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
841 			RREG32(mmVM_INVALIDATE_RESPONSE);
842 			break;
843 		}
844 	}
845 
846 	return 0;
847 }
848 
849 static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
850 {
851 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
852 
853 	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
854 		pr_err("non kfd vmid %d\n", vmid);
855 		return -EINVAL;
856 	}
857 
858 	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
859 	RREG32(mmVM_INVALIDATE_RESPONSE);
860 	return 0;
861 }
862