xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c (revision 2612e3bbc0386368a850140a6c9b990cd496a5ec)
13e205a08SOak Zeng /*
23e205a08SOak Zeng  * Copyright 2019 Advanced Micro Devices, Inc.
33e205a08SOak Zeng  *
43e205a08SOak Zeng  * Permission is hereby granted, free of charge, to any person obtaining a
53e205a08SOak Zeng  * copy of this software and associated documentation files (the "Software"),
63e205a08SOak Zeng  * to deal in the Software without restriction, including without limitation
73e205a08SOak Zeng  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
83e205a08SOak Zeng  * and/or sell copies of the Software, and to permit persons to whom the
93e205a08SOak Zeng  * Software is furnished to do so, subject to the following conditions:
103e205a08SOak Zeng  *
113e205a08SOak Zeng  * The above copyright notice and this permission notice shall be included in
123e205a08SOak Zeng  * all copies or substantial portions of the Software.
133e205a08SOak Zeng  *
143e205a08SOak Zeng  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
153e205a08SOak Zeng  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
163e205a08SOak Zeng  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
173e205a08SOak Zeng  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
183e205a08SOak Zeng  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
193e205a08SOak Zeng  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
203e205a08SOak Zeng  * OTHER DEALINGS IN THE SOFTWARE.
213e205a08SOak Zeng  */
223e205a08SOak Zeng #include <linux/module.h>
233e205a08SOak Zeng #include <linux/fdtable.h>
243e205a08SOak Zeng #include <linux/uaccess.h>
253e205a08SOak Zeng #include <linux/firmware.h>
263e205a08SOak Zeng #include "amdgpu.h"
273e205a08SOak Zeng #include "amdgpu_amdkfd.h"
285f5cb2afSSouptick Joarder #include "amdgpu_amdkfd_arcturus.h"
2901f64820SJonathan Kim #include "amdgpu_reset.h"
303e205a08SOak Zeng #include "sdma0/sdma0_4_2_2_offset.h"
313e205a08SOak Zeng #include "sdma0/sdma0_4_2_2_sh_mask.h"
323e205a08SOak Zeng #include "sdma1/sdma1_4_2_2_offset.h"
333e205a08SOak Zeng #include "sdma1/sdma1_4_2_2_sh_mask.h"
343e205a08SOak Zeng #include "sdma2/sdma2_4_2_2_offset.h"
353e205a08SOak Zeng #include "sdma2/sdma2_4_2_2_sh_mask.h"
363e205a08SOak Zeng #include "sdma3/sdma3_4_2_2_offset.h"
373e205a08SOak Zeng #include "sdma3/sdma3_4_2_2_sh_mask.h"
383e205a08SOak Zeng #include "sdma4/sdma4_4_2_2_offset.h"
393e205a08SOak Zeng #include "sdma4/sdma4_4_2_2_sh_mask.h"
403e205a08SOak Zeng #include "sdma5/sdma5_4_2_2_offset.h"
413e205a08SOak Zeng #include "sdma5/sdma5_4_2_2_sh_mask.h"
423e205a08SOak Zeng #include "sdma6/sdma6_4_2_2_offset.h"
433e205a08SOak Zeng #include "sdma6/sdma6_4_2_2_sh_mask.h"
443e205a08SOak Zeng #include "sdma7/sdma7_4_2_2_offset.h"
453e205a08SOak Zeng #include "sdma7/sdma7_4_2_2_sh_mask.h"
463e205a08SOak Zeng #include "v9_structs.h"
473e205a08SOak Zeng #include "soc15.h"
483e205a08SOak Zeng #include "soc15d.h"
493e205a08SOak Zeng #include "amdgpu_amdkfd_gfx_v9.h"
50ad5901dfSYong Zhao #include "gfxhub_v1_0.h"
51ad5901dfSYong Zhao #include "mmhub_v9_4.h"
5201f64820SJonathan Kim #include "gc/gc_9_0_offset.h"
5301f64820SJonathan Kim #include "gc/gc_9_0_sh_mask.h"
543e205a08SOak Zeng 
553e205a08SOak Zeng #define HQD_N_REGS 56
563e205a08SOak Zeng #define DUMP_REG(addr) do {				\
573e205a08SOak Zeng 		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
583e205a08SOak Zeng 			break;				\
593e205a08SOak Zeng 		(*dump)[i][0] = (addr) << 2;		\
603e205a08SOak Zeng 		(*dump)[i++][1] = RREG32(addr);		\
613e205a08SOak Zeng 	} while (0)
623e205a08SOak Zeng 
get_sdma_mqd(void * mqd)633e205a08SOak Zeng static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
643e205a08SOak Zeng {
653e205a08SOak Zeng 	return (struct v9_sdma_mqd *)mqd;
663e205a08SOak Zeng }
673e205a08SOak Zeng 
get_sdma_rlc_reg_offset(struct amdgpu_device * adev,unsigned int engine_id,unsigned int queue_id)68b55a8b8bSYong Zhao static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
693e205a08SOak Zeng 				unsigned int engine_id,
703e205a08SOak Zeng 				unsigned int queue_id)
713e205a08SOak Zeng {
72a434b94cSYong Zhao 	uint32_t sdma_engine_reg_base = 0;
73a434b94cSYong Zhao 	uint32_t sdma_rlc_reg_offset;
743e205a08SOak Zeng 
75a434b94cSYong Zhao 	switch (engine_id) {
76a434b94cSYong Zhao 	default:
77a434b94cSYong Zhao 		dev_warn(adev->dev,
78a434b94cSYong Zhao 			 "Invalid sdma engine id (%d), using engine id 0\n",
79a434b94cSYong Zhao 			 engine_id);
802541f95cSJoe Perches 		fallthrough;
81a434b94cSYong Zhao 	case 0:
82a434b94cSYong Zhao 		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
83a434b94cSYong Zhao 				mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
84a434b94cSYong Zhao 		break;
85a434b94cSYong Zhao 	case 1:
86a434b94cSYong Zhao 		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
87a434b94cSYong Zhao 				mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL;
88a434b94cSYong Zhao 		break;
89a434b94cSYong Zhao 	case 2:
90a434b94cSYong Zhao 		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0,
91a434b94cSYong Zhao 				mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
92a434b94cSYong Zhao 		break;
93a434b94cSYong Zhao 	case 3:
94a434b94cSYong Zhao 		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0,
95a434b94cSYong Zhao 				mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL;
96a434b94cSYong Zhao 		break;
97a434b94cSYong Zhao 	case 4:
98a434b94cSYong Zhao 		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0,
99a434b94cSYong Zhao 				mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL;
100a434b94cSYong Zhao 		break;
101a434b94cSYong Zhao 	case 5:
102a434b94cSYong Zhao 		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0,
103a434b94cSYong Zhao 				mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL;
104a434b94cSYong Zhao 		break;
105a434b94cSYong Zhao 	case 6:
106a434b94cSYong Zhao 		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0,
107a434b94cSYong Zhao 				mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL;
108a434b94cSYong Zhao 		break;
109a434b94cSYong Zhao 	case 7:
110a434b94cSYong Zhao 		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0,
111a434b94cSYong Zhao 				mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL;
112a434b94cSYong Zhao 		break;
113a434b94cSYong Zhao 	}
114a434b94cSYong Zhao 
115a434b94cSYong Zhao 	sdma_rlc_reg_offset = sdma_engine_reg_base
116b55a8b8bSYong Zhao 		+ queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
1173e205a08SOak Zeng 
118b55a8b8bSYong Zhao 	pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
119a434b94cSYong Zhao 			queue_id, sdma_rlc_reg_offset);
1203e205a08SOak Zeng 
121a434b94cSYong Zhao 	return sdma_rlc_reg_offset;
1223e205a08SOak Zeng }
1233e205a08SOak Zeng 
kgd_arcturus_hqd_sdma_load(struct amdgpu_device * adev,void * mqd,uint32_t __user * wptr,struct mm_struct * mm)124420185fdSGraham Sider int kgd_arcturus_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
1253e205a08SOak Zeng 			     uint32_t __user *wptr, struct mm_struct *mm)
1263e205a08SOak Zeng {
1273e205a08SOak Zeng 	struct v9_sdma_mqd *m;
128b55a8b8bSYong Zhao 	uint32_t sdma_rlc_reg_offset;
1293e205a08SOak Zeng 	unsigned long end_jiffies;
1303e205a08SOak Zeng 	uint32_t data;
1313e205a08SOak Zeng 	uint64_t data64;
1323e205a08SOak Zeng 	uint64_t __user *wptr64 = (uint64_t __user *)wptr;
1333e205a08SOak Zeng 
1343e205a08SOak Zeng 	m = get_sdma_mqd(mqd);
135b55a8b8bSYong Zhao 	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
1363e205a08SOak Zeng 					    m->sdma_queue_id);
1373e205a08SOak Zeng 
138b55a8b8bSYong Zhao 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
1393e205a08SOak Zeng 		m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
1403e205a08SOak Zeng 
1413e205a08SOak Zeng 	end_jiffies = msecs_to_jiffies(2000) + jiffies;
1423e205a08SOak Zeng 	while (true) {
143b55a8b8bSYong Zhao 		data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
1443e205a08SOak Zeng 		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
1453e205a08SOak Zeng 			break;
146812330ebSYong Zhao 		if (time_after(jiffies, end_jiffies)) {
147812330ebSYong Zhao 			pr_err("SDMA RLC not idle in %s\n", __func__);
1483e205a08SOak Zeng 			return -ETIME;
149812330ebSYong Zhao 		}
1503e205a08SOak Zeng 		usleep_range(500, 1000);
1513e205a08SOak Zeng 	}
1523e205a08SOak Zeng 
153b55a8b8bSYong Zhao 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
1543e205a08SOak Zeng 	       m->sdmax_rlcx_doorbell_offset);
1553e205a08SOak Zeng 
1563e205a08SOak Zeng 	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
1573e205a08SOak Zeng 			     ENABLE, 1);
158b55a8b8bSYong Zhao 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
159b55a8b8bSYong Zhao 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
160b55a8b8bSYong Zhao 				m->sdmax_rlcx_rb_rptr);
161b55a8b8bSYong Zhao 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
1623e205a08SOak Zeng 				m->sdmax_rlcx_rb_rptr_hi);
1633e205a08SOak Zeng 
164b55a8b8bSYong Zhao 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
1653e205a08SOak Zeng 	if (read_user_wptr(mm, wptr64, data64)) {
166b55a8b8bSYong Zhao 		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
1673e205a08SOak Zeng 		       lower_32_bits(data64));
168b55a8b8bSYong Zhao 		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
1693e205a08SOak Zeng 		       upper_32_bits(data64));
1703e205a08SOak Zeng 	} else {
171b55a8b8bSYong Zhao 		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
1723e205a08SOak Zeng 		       m->sdmax_rlcx_rb_rptr);
173b55a8b8bSYong Zhao 		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
1743e205a08SOak Zeng 		       m->sdmax_rlcx_rb_rptr_hi);
1753e205a08SOak Zeng 	}
176b55a8b8bSYong Zhao 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
1773e205a08SOak Zeng 
178b55a8b8bSYong Zhao 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
179b55a8b8bSYong Zhao 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
1803e205a08SOak Zeng 			m->sdmax_rlcx_rb_base_hi);
181b55a8b8bSYong Zhao 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
1823e205a08SOak Zeng 			m->sdmax_rlcx_rb_rptr_addr_lo);
183b55a8b8bSYong Zhao 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
1843e205a08SOak Zeng 			m->sdmax_rlcx_rb_rptr_addr_hi);
1853e205a08SOak Zeng 
1863e205a08SOak Zeng 	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
1873e205a08SOak Zeng 			     RB_ENABLE, 1);
188b55a8b8bSYong Zhao 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
1893e205a08SOak Zeng 
1903e205a08SOak Zeng 	return 0;
1913e205a08SOak Zeng }
1923e205a08SOak Zeng 
kgd_arcturus_hqd_sdma_dump(struct amdgpu_device * adev,uint32_t engine_id,uint32_t queue_id,uint32_t (** dump)[2],uint32_t * n_regs)193420185fdSGraham Sider int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev,
1943e205a08SOak Zeng 			     uint32_t engine_id, uint32_t queue_id,
1953e205a08SOak Zeng 			     uint32_t (**dump)[2], uint32_t *n_regs)
1963e205a08SOak Zeng {
197b55a8b8bSYong Zhao 	uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
198b55a8b8bSYong Zhao 			engine_id, queue_id);
1993e205a08SOak Zeng 	uint32_t i = 0, reg;
2003e205a08SOak Zeng #undef HQD_N_REGS
2013e205a08SOak Zeng #define HQD_N_REGS (19+6+7+10)
2023e205a08SOak Zeng 
2033e205a08SOak Zeng 	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
2043e205a08SOak Zeng 	if (*dump == NULL)
2053e205a08SOak Zeng 		return -ENOMEM;
2063e205a08SOak Zeng 
2073e205a08SOak Zeng 	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
208b55a8b8bSYong Zhao 		DUMP_REG(sdma_rlc_reg_offset + reg);
2093e205a08SOak Zeng 	for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
210b55a8b8bSYong Zhao 		DUMP_REG(sdma_rlc_reg_offset + reg);
2113e205a08SOak Zeng 	for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
2123e205a08SOak Zeng 	     reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
213b55a8b8bSYong Zhao 		DUMP_REG(sdma_rlc_reg_offset + reg);
2143e205a08SOak Zeng 	for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
2153e205a08SOak Zeng 	     reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
216b55a8b8bSYong Zhao 		DUMP_REG(sdma_rlc_reg_offset + reg);
2173e205a08SOak Zeng 
2183e205a08SOak Zeng 	WARN_ON_ONCE(i != HQD_N_REGS);
2193e205a08SOak Zeng 	*n_regs = i;
2203e205a08SOak Zeng 
2213e205a08SOak Zeng 	return 0;
2223e205a08SOak Zeng }
2233e205a08SOak Zeng 
kgd_arcturus_hqd_sdma_is_occupied(struct amdgpu_device * adev,void * mqd)224420185fdSGraham Sider bool kgd_arcturus_hqd_sdma_is_occupied(struct amdgpu_device *adev,
225420185fdSGraham Sider 				void *mqd)
2263e205a08SOak Zeng {
2273e205a08SOak Zeng 	struct v9_sdma_mqd *m;
228b55a8b8bSYong Zhao 	uint32_t sdma_rlc_reg_offset;
2293e205a08SOak Zeng 	uint32_t sdma_rlc_rb_cntl;
2303e205a08SOak Zeng 
2313e205a08SOak Zeng 	m = get_sdma_mqd(mqd);
232b55a8b8bSYong Zhao 	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
2333e205a08SOak Zeng 					    m->sdma_queue_id);
2343e205a08SOak Zeng 
235b55a8b8bSYong Zhao 	sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
2363e205a08SOak Zeng 
2373e205a08SOak Zeng 	if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
2383e205a08SOak Zeng 		return true;
2393e205a08SOak Zeng 
2403e205a08SOak Zeng 	return false;
2413e205a08SOak Zeng }
2423e205a08SOak Zeng 
kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device * adev,void * mqd,unsigned int utimeout)243420185fdSGraham Sider int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
2443e205a08SOak Zeng 				unsigned int utimeout)
2453e205a08SOak Zeng {
2463e205a08SOak Zeng 	struct v9_sdma_mqd *m;
247b55a8b8bSYong Zhao 	uint32_t sdma_rlc_reg_offset;
2483e205a08SOak Zeng 	uint32_t temp;
2493e205a08SOak Zeng 	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
2503e205a08SOak Zeng 
2513e205a08SOak Zeng 	m = get_sdma_mqd(mqd);
252b55a8b8bSYong Zhao 	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
2533e205a08SOak Zeng 					    m->sdma_queue_id);
2543e205a08SOak Zeng 
255b55a8b8bSYong Zhao 	temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
2563e205a08SOak Zeng 	temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
257b55a8b8bSYong Zhao 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
2583e205a08SOak Zeng 
2593e205a08SOak Zeng 	while (true) {
260b55a8b8bSYong Zhao 		temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
2613e205a08SOak Zeng 		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
2623e205a08SOak Zeng 			break;
263812330ebSYong Zhao 		if (time_after(jiffies, end_jiffies)) {
264812330ebSYong Zhao 			pr_err("SDMA RLC not idle in %s\n", __func__);
2653e205a08SOak Zeng 			return -ETIME;
266812330ebSYong Zhao 		}
2673e205a08SOak Zeng 		usleep_range(500, 1000);
2683e205a08SOak Zeng 	}
2693e205a08SOak Zeng 
270b55a8b8bSYong Zhao 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
271b55a8b8bSYong Zhao 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
272b55a8b8bSYong Zhao 		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
2733e205a08SOak Zeng 		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
2743e205a08SOak Zeng 
275b55a8b8bSYong Zhao 	m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
2763e205a08SOak Zeng 	m->sdmax_rlcx_rb_rptr_hi =
277b55a8b8bSYong Zhao 		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
2783e205a08SOak Zeng 
2793e205a08SOak Zeng 	return 0;
2803e205a08SOak Zeng }
2813e205a08SOak Zeng 
28201f64820SJonathan Kim /*
28301f64820SJonathan Kim  * Helper used to suspend/resume gfx pipe for image post process work to set
28401f64820SJonathan Kim  * barrier behaviour.
28501f64820SJonathan Kim  */
suspend_resume_compute_scheduler(struct amdgpu_device * adev,bool suspend)28601f64820SJonathan Kim static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool suspend)
28701f64820SJonathan Kim {
28801f64820SJonathan Kim 	int i, r = 0;
28901f64820SJonathan Kim 
29001f64820SJonathan Kim 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
29101f64820SJonathan Kim 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
29201f64820SJonathan Kim 
29301f64820SJonathan Kim 		if (!(ring && ring->sched.thread))
29401f64820SJonathan Kim 			continue;
29501f64820SJonathan Kim 
29601f64820SJonathan Kim 		/* stop secheduler and drain ring. */
29701f64820SJonathan Kim 		if (suspend) {
29801f64820SJonathan Kim 			drm_sched_stop(&ring->sched, NULL);
29901f64820SJonathan Kim 			r = amdgpu_fence_wait_empty(ring);
30001f64820SJonathan Kim 			if (r)
30101f64820SJonathan Kim 				goto out;
30201f64820SJonathan Kim 		} else {
30301f64820SJonathan Kim 			drm_sched_start(&ring->sched, false);
30401f64820SJonathan Kim 		}
30501f64820SJonathan Kim 	}
30601f64820SJonathan Kim 
30701f64820SJonathan Kim out:
30801f64820SJonathan Kim 	/* return on resume or failure to drain rings. */
30901f64820SJonathan Kim 	if (!suspend || r)
31001f64820SJonathan Kim 		return r;
31101f64820SJonathan Kim 
312*9bd443cbSJonathan Kim 	return amdgpu_device_ip_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GFX);
31301f64820SJonathan Kim }
31401f64820SJonathan Kim 
set_barrier_auto_waitcnt(struct amdgpu_device * adev,bool enable_waitcnt)31501f64820SJonathan Kim static void set_barrier_auto_waitcnt(struct amdgpu_device *adev, bool enable_waitcnt)
31601f64820SJonathan Kim {
31701f64820SJonathan Kim 	uint32_t data;
31801f64820SJonathan Kim 
31901f64820SJonathan Kim 	WRITE_ONCE(adev->barrier_has_auto_waitcnt, enable_waitcnt);
32001f64820SJonathan Kim 
32101f64820SJonathan Kim 	if (!down_read_trylock(&adev->reset_domain->sem))
32201f64820SJonathan Kim 		return;
32301f64820SJonathan Kim 
32401f64820SJonathan Kim 	amdgpu_amdkfd_suspend(adev, false);
32501f64820SJonathan Kim 
32601f64820SJonathan Kim 	if (suspend_resume_compute_scheduler(adev, true))
32701f64820SJonathan Kim 		goto out;
32801f64820SJonathan Kim 
32901f64820SJonathan Kim 	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG));
33001f64820SJonathan Kim 	data = REG_SET_FIELD(data, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
33101f64820SJonathan Kim 						!enable_waitcnt);
33201f64820SJonathan Kim 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG), data);
33301f64820SJonathan Kim 
33401f64820SJonathan Kim out:
33501f64820SJonathan Kim 	suspend_resume_compute_scheduler(adev, false);
33601f64820SJonathan Kim 
33701f64820SJonathan Kim 	amdgpu_amdkfd_resume(adev, false);
33801f64820SJonathan Kim 
33901f64820SJonathan Kim 	up_read(&adev->reset_domain->sem);
34001f64820SJonathan Kim }
34101f64820SJonathan Kim 
34201f64820SJonathan Kim /*
34301f64820SJonathan Kim  * restore_dbg_registers is ignored here but is a general interface requirement
34401f64820SJonathan Kim  * for devices that support GFXOFF and where the RLC save/restore list
34501f64820SJonathan Kim  * does not support hw registers for debugging i.e. the driver has to manually
34601f64820SJonathan Kim  * initialize the debug mode registers after it has disabled GFX off during the
34701f64820SJonathan Kim  * debug session.
34801f64820SJonathan Kim  */
kgd_arcturus_enable_debug_trap(struct amdgpu_device * adev,bool restore_dbg_registers,uint32_t vmid)34901f64820SJonathan Kim static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
35001f64820SJonathan Kim 				bool restore_dbg_registers,
35101f64820SJonathan Kim 				uint32_t vmid)
35201f64820SJonathan Kim {
35301f64820SJonathan Kim 	mutex_lock(&adev->grbm_idx_mutex);
35401f64820SJonathan Kim 
35501f64820SJonathan Kim 	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
35601f64820SJonathan Kim 
35701f64820SJonathan Kim 	set_barrier_auto_waitcnt(adev, true);
35801f64820SJonathan Kim 
35901f64820SJonathan Kim 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
36001f64820SJonathan Kim 
36101f64820SJonathan Kim 	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
36201f64820SJonathan Kim 
36301f64820SJonathan Kim 	mutex_unlock(&adev->grbm_idx_mutex);
36401f64820SJonathan Kim 
36501f64820SJonathan Kim 	return 0;
36601f64820SJonathan Kim }
36701f64820SJonathan Kim 
36801f64820SJonathan Kim /*
36901f64820SJonathan Kim  * keep_trap_enabled is ignored here but is a general interface requirement
37001f64820SJonathan Kim  * for devices that support multi-process debugging where the performance
37101f64820SJonathan Kim  * overhead from trap temporary setup needs to be bypassed when the debug
37201f64820SJonathan Kim  * session has ended.
37301f64820SJonathan Kim  */
kgd_arcturus_disable_debug_trap(struct amdgpu_device * adev,bool keep_trap_enabled,uint32_t vmid)37401f64820SJonathan Kim static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
37501f64820SJonathan Kim 					bool keep_trap_enabled,
37601f64820SJonathan Kim 					uint32_t vmid)
37701f64820SJonathan Kim {
37801f64820SJonathan Kim 
37901f64820SJonathan Kim 	mutex_lock(&adev->grbm_idx_mutex);
38001f64820SJonathan Kim 
38101f64820SJonathan Kim 	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
38201f64820SJonathan Kim 
38301f64820SJonathan Kim 	set_barrier_auto_waitcnt(adev, false);
38401f64820SJonathan Kim 
38501f64820SJonathan Kim 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
38601f64820SJonathan Kim 
38701f64820SJonathan Kim 	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
38801f64820SJonathan Kim 
38901f64820SJonathan Kim 	mutex_unlock(&adev->grbm_idx_mutex);
39001f64820SJonathan Kim 
39101f64820SJonathan Kim 	return 0;
39201f64820SJonathan Kim }
393e392c887SYong Zhao const struct kfd2kgd_calls arcturus_kfd2kgd = {
3943e205a08SOak Zeng 	.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
3953e205a08SOak Zeng 	.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
3963e205a08SOak Zeng 	.init_interrupts = kgd_gfx_v9_init_interrupts,
3973e205a08SOak Zeng 	.hqd_load = kgd_gfx_v9_hqd_load,
39835cd89d5SAaron Liu 	.hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
3995073506cSJonathan Kim 	.hqd_sdma_load = kgd_arcturus_hqd_sdma_load,
4003e205a08SOak Zeng 	.hqd_dump = kgd_gfx_v9_hqd_dump,
4015073506cSJonathan Kim 	.hqd_sdma_dump = kgd_arcturus_hqd_sdma_dump,
4023e205a08SOak Zeng 	.hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
4035073506cSJonathan Kim 	.hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied,
4043e205a08SOak Zeng 	.hqd_destroy = kgd_gfx_v9_hqd_destroy,
4055073506cSJonathan Kim 	.hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy,
4063e205a08SOak Zeng 	.wave_control_execute = kgd_gfx_v9_wave_control_execute,
40756fc40abSYong Zhao 	.get_atc_vmid_pasid_mapping_info =
40856fc40abSYong Zhao 				kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
4099fb1506eSOak Zeng 	.set_vm_context_page_table_base =
4109fb1506eSOak Zeng 				kgd_gfx_v9_set_vm_context_page_table_base,
41101f64820SJonathan Kim 	.enable_debug_trap = kgd_arcturus_enable_debug_trap,
41201f64820SJonathan Kim 	.disable_debug_trap = kgd_arcturus_disable_debug_trap,
413101827e1SJonathan Kim 	.validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
414101827e1SJonathan Kim 	.set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
415aea1b473SJonathan Kim 	.set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
416e0f85f46SJonathan Kim 	.set_address_watch = kgd_gfx_v9_set_address_watch,
417e0f85f46SJonathan Kim 	.clear_address_watch = kgd_gfx_v9_clear_address_watch,
4187cee6a68SJonathan Kim 	.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
4197cee6a68SJonathan Kim 	.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
420f270921aSMukul Joshi 	.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
421f270921aSMukul Joshi 	.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings
4223e205a08SOak Zeng };
423