13e205a08SOak Zeng /*
23e205a08SOak Zeng * Copyright 2019 Advanced Micro Devices, Inc.
33e205a08SOak Zeng *
43e205a08SOak Zeng * Permission is hereby granted, free of charge, to any person obtaining a
53e205a08SOak Zeng * copy of this software and associated documentation files (the "Software"),
63e205a08SOak Zeng * to deal in the Software without restriction, including without limitation
73e205a08SOak Zeng * the rights to use, copy, modify, merge, publish, distribute, sublicense,
83e205a08SOak Zeng * and/or sell copies of the Software, and to permit persons to whom the
93e205a08SOak Zeng * Software is furnished to do so, subject to the following conditions:
103e205a08SOak Zeng *
113e205a08SOak Zeng * The above copyright notice and this permission notice shall be included in
123e205a08SOak Zeng * all copies or substantial portions of the Software.
133e205a08SOak Zeng *
143e205a08SOak Zeng * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
153e205a08SOak Zeng * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
163e205a08SOak Zeng * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
173e205a08SOak Zeng * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
183e205a08SOak Zeng * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
193e205a08SOak Zeng * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
203e205a08SOak Zeng * OTHER DEALINGS IN THE SOFTWARE.
213e205a08SOak Zeng */
223e205a08SOak Zeng #include <linux/module.h>
233e205a08SOak Zeng #include <linux/fdtable.h>
243e205a08SOak Zeng #include <linux/uaccess.h>
253e205a08SOak Zeng #include <linux/firmware.h>
263e205a08SOak Zeng #include "amdgpu.h"
273e205a08SOak Zeng #include "amdgpu_amdkfd.h"
285f5cb2afSSouptick Joarder #include "amdgpu_amdkfd_arcturus.h"
2901f64820SJonathan Kim #include "amdgpu_reset.h"
303e205a08SOak Zeng #include "sdma0/sdma0_4_2_2_offset.h"
313e205a08SOak Zeng #include "sdma0/sdma0_4_2_2_sh_mask.h"
323e205a08SOak Zeng #include "sdma1/sdma1_4_2_2_offset.h"
333e205a08SOak Zeng #include "sdma1/sdma1_4_2_2_sh_mask.h"
343e205a08SOak Zeng #include "sdma2/sdma2_4_2_2_offset.h"
353e205a08SOak Zeng #include "sdma2/sdma2_4_2_2_sh_mask.h"
363e205a08SOak Zeng #include "sdma3/sdma3_4_2_2_offset.h"
373e205a08SOak Zeng #include "sdma3/sdma3_4_2_2_sh_mask.h"
383e205a08SOak Zeng #include "sdma4/sdma4_4_2_2_offset.h"
393e205a08SOak Zeng #include "sdma4/sdma4_4_2_2_sh_mask.h"
403e205a08SOak Zeng #include "sdma5/sdma5_4_2_2_offset.h"
413e205a08SOak Zeng #include "sdma5/sdma5_4_2_2_sh_mask.h"
423e205a08SOak Zeng #include "sdma6/sdma6_4_2_2_offset.h"
433e205a08SOak Zeng #include "sdma6/sdma6_4_2_2_sh_mask.h"
443e205a08SOak Zeng #include "sdma7/sdma7_4_2_2_offset.h"
453e205a08SOak Zeng #include "sdma7/sdma7_4_2_2_sh_mask.h"
463e205a08SOak Zeng #include "v9_structs.h"
473e205a08SOak Zeng #include "soc15.h"
483e205a08SOak Zeng #include "soc15d.h"
493e205a08SOak Zeng #include "amdgpu_amdkfd_gfx_v9.h"
50ad5901dfSYong Zhao #include "gfxhub_v1_0.h"
51ad5901dfSYong Zhao #include "mmhub_v9_4.h"
5201f64820SJonathan Kim #include "gc/gc_9_0_offset.h"
5301f64820SJonathan Kim #include "gc/gc_9_0_sh_mask.h"
543e205a08SOak Zeng
553e205a08SOak Zeng #define HQD_N_REGS 56
563e205a08SOak Zeng #define DUMP_REG(addr) do { \
573e205a08SOak Zeng if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
583e205a08SOak Zeng break; \
593e205a08SOak Zeng (*dump)[i][0] = (addr) << 2; \
603e205a08SOak Zeng (*dump)[i++][1] = RREG32(addr); \
613e205a08SOak Zeng } while (0)
623e205a08SOak Zeng
get_sdma_mqd(void * mqd)633e205a08SOak Zeng static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
643e205a08SOak Zeng {
653e205a08SOak Zeng return (struct v9_sdma_mqd *)mqd;
663e205a08SOak Zeng }
673e205a08SOak Zeng
get_sdma_rlc_reg_offset(struct amdgpu_device * adev,unsigned int engine_id,unsigned int queue_id)68b55a8b8bSYong Zhao static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
693e205a08SOak Zeng unsigned int engine_id,
703e205a08SOak Zeng unsigned int queue_id)
713e205a08SOak Zeng {
72a434b94cSYong Zhao uint32_t sdma_engine_reg_base = 0;
73a434b94cSYong Zhao uint32_t sdma_rlc_reg_offset;
743e205a08SOak Zeng
75a434b94cSYong Zhao switch (engine_id) {
76a434b94cSYong Zhao default:
77a434b94cSYong Zhao dev_warn(adev->dev,
78a434b94cSYong Zhao "Invalid sdma engine id (%d), using engine id 0\n",
79a434b94cSYong Zhao engine_id);
802541f95cSJoe Perches fallthrough;
81a434b94cSYong Zhao case 0:
82a434b94cSYong Zhao sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
83a434b94cSYong Zhao mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
84a434b94cSYong Zhao break;
85a434b94cSYong Zhao case 1:
86a434b94cSYong Zhao sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
87a434b94cSYong Zhao mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL;
88a434b94cSYong Zhao break;
89a434b94cSYong Zhao case 2:
90a434b94cSYong Zhao sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0,
91a434b94cSYong Zhao mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
92a434b94cSYong Zhao break;
93a434b94cSYong Zhao case 3:
94a434b94cSYong Zhao sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0,
95a434b94cSYong Zhao mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL;
96a434b94cSYong Zhao break;
97a434b94cSYong Zhao case 4:
98a434b94cSYong Zhao sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0,
99a434b94cSYong Zhao mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL;
100a434b94cSYong Zhao break;
101a434b94cSYong Zhao case 5:
102a434b94cSYong Zhao sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0,
103a434b94cSYong Zhao mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL;
104a434b94cSYong Zhao break;
105a434b94cSYong Zhao case 6:
106a434b94cSYong Zhao sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0,
107a434b94cSYong Zhao mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL;
108a434b94cSYong Zhao break;
109a434b94cSYong Zhao case 7:
110a434b94cSYong Zhao sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0,
111a434b94cSYong Zhao mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL;
112a434b94cSYong Zhao break;
113a434b94cSYong Zhao }
114a434b94cSYong Zhao
115a434b94cSYong Zhao sdma_rlc_reg_offset = sdma_engine_reg_base
116b55a8b8bSYong Zhao + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
1173e205a08SOak Zeng
118b55a8b8bSYong Zhao pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
119a434b94cSYong Zhao queue_id, sdma_rlc_reg_offset);
1203e205a08SOak Zeng
121a434b94cSYong Zhao return sdma_rlc_reg_offset;
1223e205a08SOak Zeng }
1233e205a08SOak Zeng
kgd_arcturus_hqd_sdma_load(struct amdgpu_device * adev,void * mqd,uint32_t __user * wptr,struct mm_struct * mm)124420185fdSGraham Sider int kgd_arcturus_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
1253e205a08SOak Zeng uint32_t __user *wptr, struct mm_struct *mm)
1263e205a08SOak Zeng {
1273e205a08SOak Zeng struct v9_sdma_mqd *m;
128b55a8b8bSYong Zhao uint32_t sdma_rlc_reg_offset;
1293e205a08SOak Zeng unsigned long end_jiffies;
1303e205a08SOak Zeng uint32_t data;
1313e205a08SOak Zeng uint64_t data64;
1323e205a08SOak Zeng uint64_t __user *wptr64 = (uint64_t __user *)wptr;
1333e205a08SOak Zeng
1343e205a08SOak Zeng m = get_sdma_mqd(mqd);
135b55a8b8bSYong Zhao sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
1363e205a08SOak Zeng m->sdma_queue_id);
1373e205a08SOak Zeng
138b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
1393e205a08SOak Zeng m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
1403e205a08SOak Zeng
1413e205a08SOak Zeng end_jiffies = msecs_to_jiffies(2000) + jiffies;
1423e205a08SOak Zeng while (true) {
143b55a8b8bSYong Zhao data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
1443e205a08SOak Zeng if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
1453e205a08SOak Zeng break;
146812330ebSYong Zhao if (time_after(jiffies, end_jiffies)) {
147812330ebSYong Zhao pr_err("SDMA RLC not idle in %s\n", __func__);
1483e205a08SOak Zeng return -ETIME;
149812330ebSYong Zhao }
1503e205a08SOak Zeng usleep_range(500, 1000);
1513e205a08SOak Zeng }
1523e205a08SOak Zeng
153b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
1543e205a08SOak Zeng m->sdmax_rlcx_doorbell_offset);
1553e205a08SOak Zeng
1563e205a08SOak Zeng data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
1573e205a08SOak Zeng ENABLE, 1);
158b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
159b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
160b55a8b8bSYong Zhao m->sdmax_rlcx_rb_rptr);
161b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
1623e205a08SOak Zeng m->sdmax_rlcx_rb_rptr_hi);
1633e205a08SOak Zeng
164b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
1653e205a08SOak Zeng if (read_user_wptr(mm, wptr64, data64)) {
166b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
1673e205a08SOak Zeng lower_32_bits(data64));
168b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
1693e205a08SOak Zeng upper_32_bits(data64));
1703e205a08SOak Zeng } else {
171b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
1723e205a08SOak Zeng m->sdmax_rlcx_rb_rptr);
173b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
1743e205a08SOak Zeng m->sdmax_rlcx_rb_rptr_hi);
1753e205a08SOak Zeng }
176b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
1773e205a08SOak Zeng
178b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
179b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
1803e205a08SOak Zeng m->sdmax_rlcx_rb_base_hi);
181b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
1823e205a08SOak Zeng m->sdmax_rlcx_rb_rptr_addr_lo);
183b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
1843e205a08SOak Zeng m->sdmax_rlcx_rb_rptr_addr_hi);
1853e205a08SOak Zeng
1863e205a08SOak Zeng data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
1873e205a08SOak Zeng RB_ENABLE, 1);
188b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
1893e205a08SOak Zeng
1903e205a08SOak Zeng return 0;
1913e205a08SOak Zeng }
1923e205a08SOak Zeng
kgd_arcturus_hqd_sdma_dump(struct amdgpu_device * adev,uint32_t engine_id,uint32_t queue_id,uint32_t (** dump)[2],uint32_t * n_regs)193420185fdSGraham Sider int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev,
1943e205a08SOak Zeng uint32_t engine_id, uint32_t queue_id,
1953e205a08SOak Zeng uint32_t (**dump)[2], uint32_t *n_regs)
1963e205a08SOak Zeng {
197b55a8b8bSYong Zhao uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
198b55a8b8bSYong Zhao engine_id, queue_id);
1993e205a08SOak Zeng uint32_t i = 0, reg;
2003e205a08SOak Zeng #undef HQD_N_REGS
2013e205a08SOak Zeng #define HQD_N_REGS (19+6+7+10)
2023e205a08SOak Zeng
2033e205a08SOak Zeng *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
2043e205a08SOak Zeng if (*dump == NULL)
2053e205a08SOak Zeng return -ENOMEM;
2063e205a08SOak Zeng
2073e205a08SOak Zeng for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
208b55a8b8bSYong Zhao DUMP_REG(sdma_rlc_reg_offset + reg);
2093e205a08SOak Zeng for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
210b55a8b8bSYong Zhao DUMP_REG(sdma_rlc_reg_offset + reg);
2113e205a08SOak Zeng for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
2123e205a08SOak Zeng reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
213b55a8b8bSYong Zhao DUMP_REG(sdma_rlc_reg_offset + reg);
2143e205a08SOak Zeng for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
2153e205a08SOak Zeng reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
216b55a8b8bSYong Zhao DUMP_REG(sdma_rlc_reg_offset + reg);
2173e205a08SOak Zeng
2183e205a08SOak Zeng WARN_ON_ONCE(i != HQD_N_REGS);
2193e205a08SOak Zeng *n_regs = i;
2203e205a08SOak Zeng
2213e205a08SOak Zeng return 0;
2223e205a08SOak Zeng }
2233e205a08SOak Zeng
kgd_arcturus_hqd_sdma_is_occupied(struct amdgpu_device * adev,void * mqd)224420185fdSGraham Sider bool kgd_arcturus_hqd_sdma_is_occupied(struct amdgpu_device *adev,
225420185fdSGraham Sider void *mqd)
2263e205a08SOak Zeng {
2273e205a08SOak Zeng struct v9_sdma_mqd *m;
228b55a8b8bSYong Zhao uint32_t sdma_rlc_reg_offset;
2293e205a08SOak Zeng uint32_t sdma_rlc_rb_cntl;
2303e205a08SOak Zeng
2313e205a08SOak Zeng m = get_sdma_mqd(mqd);
232b55a8b8bSYong Zhao sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
2333e205a08SOak Zeng m->sdma_queue_id);
2343e205a08SOak Zeng
235b55a8b8bSYong Zhao sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
2363e205a08SOak Zeng
2373e205a08SOak Zeng if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
2383e205a08SOak Zeng return true;
2393e205a08SOak Zeng
2403e205a08SOak Zeng return false;
2413e205a08SOak Zeng }
2423e205a08SOak Zeng
kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device * adev,void * mqd,unsigned int utimeout)243420185fdSGraham Sider int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
2443e205a08SOak Zeng unsigned int utimeout)
2453e205a08SOak Zeng {
2463e205a08SOak Zeng struct v9_sdma_mqd *m;
247b55a8b8bSYong Zhao uint32_t sdma_rlc_reg_offset;
2483e205a08SOak Zeng uint32_t temp;
2493e205a08SOak Zeng unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
2503e205a08SOak Zeng
2513e205a08SOak Zeng m = get_sdma_mqd(mqd);
252b55a8b8bSYong Zhao sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
2533e205a08SOak Zeng m->sdma_queue_id);
2543e205a08SOak Zeng
255b55a8b8bSYong Zhao temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
2563e205a08SOak Zeng temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
257b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
2583e205a08SOak Zeng
2593e205a08SOak Zeng while (true) {
260b55a8b8bSYong Zhao temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
2613e205a08SOak Zeng if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
2623e205a08SOak Zeng break;
263812330ebSYong Zhao if (time_after(jiffies, end_jiffies)) {
264812330ebSYong Zhao pr_err("SDMA RLC not idle in %s\n", __func__);
2653e205a08SOak Zeng return -ETIME;
266812330ebSYong Zhao }
2673e205a08SOak Zeng usleep_range(500, 1000);
2683e205a08SOak Zeng }
2693e205a08SOak Zeng
270b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
271b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
272b55a8b8bSYong Zhao RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
2733e205a08SOak Zeng SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
2743e205a08SOak Zeng
275b55a8b8bSYong Zhao m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
2763e205a08SOak Zeng m->sdmax_rlcx_rb_rptr_hi =
277b55a8b8bSYong Zhao RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
2783e205a08SOak Zeng
2793e205a08SOak Zeng return 0;
2803e205a08SOak Zeng }
2813e205a08SOak Zeng
28201f64820SJonathan Kim /*
28301f64820SJonathan Kim * Helper used to suspend/resume gfx pipe for image post process work to set
28401f64820SJonathan Kim * barrier behaviour.
28501f64820SJonathan Kim */
suspend_resume_compute_scheduler(struct amdgpu_device * adev,bool suspend)28601f64820SJonathan Kim static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool suspend)
28701f64820SJonathan Kim {
28801f64820SJonathan Kim int i, r = 0;
28901f64820SJonathan Kim
29001f64820SJonathan Kim for (i = 0; i < adev->gfx.num_compute_rings; i++) {
29101f64820SJonathan Kim struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
29201f64820SJonathan Kim
29301f64820SJonathan Kim if (!(ring && ring->sched.thread))
29401f64820SJonathan Kim continue;
29501f64820SJonathan Kim
29601f64820SJonathan Kim /* stop secheduler and drain ring. */
29701f64820SJonathan Kim if (suspend) {
29801f64820SJonathan Kim drm_sched_stop(&ring->sched, NULL);
29901f64820SJonathan Kim r = amdgpu_fence_wait_empty(ring);
30001f64820SJonathan Kim if (r)
30101f64820SJonathan Kim goto out;
30201f64820SJonathan Kim } else {
30301f64820SJonathan Kim drm_sched_start(&ring->sched, false);
30401f64820SJonathan Kim }
30501f64820SJonathan Kim }
30601f64820SJonathan Kim
30701f64820SJonathan Kim out:
30801f64820SJonathan Kim /* return on resume or failure to drain rings. */
30901f64820SJonathan Kim if (!suspend || r)
31001f64820SJonathan Kim return r;
31101f64820SJonathan Kim
312*9bd443cbSJonathan Kim return amdgpu_device_ip_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GFX);
31301f64820SJonathan Kim }
31401f64820SJonathan Kim
set_barrier_auto_waitcnt(struct amdgpu_device * adev,bool enable_waitcnt)31501f64820SJonathan Kim static void set_barrier_auto_waitcnt(struct amdgpu_device *adev, bool enable_waitcnt)
31601f64820SJonathan Kim {
31701f64820SJonathan Kim uint32_t data;
31801f64820SJonathan Kim
31901f64820SJonathan Kim WRITE_ONCE(adev->barrier_has_auto_waitcnt, enable_waitcnt);
32001f64820SJonathan Kim
32101f64820SJonathan Kim if (!down_read_trylock(&adev->reset_domain->sem))
32201f64820SJonathan Kim return;
32301f64820SJonathan Kim
32401f64820SJonathan Kim amdgpu_amdkfd_suspend(adev, false);
32501f64820SJonathan Kim
32601f64820SJonathan Kim if (suspend_resume_compute_scheduler(adev, true))
32701f64820SJonathan Kim goto out;
32801f64820SJonathan Kim
32901f64820SJonathan Kim data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG));
33001f64820SJonathan Kim data = REG_SET_FIELD(data, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
33101f64820SJonathan Kim !enable_waitcnt);
33201f64820SJonathan Kim WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG), data);
33301f64820SJonathan Kim
33401f64820SJonathan Kim out:
33501f64820SJonathan Kim suspend_resume_compute_scheduler(adev, false);
33601f64820SJonathan Kim
33701f64820SJonathan Kim amdgpu_amdkfd_resume(adev, false);
33801f64820SJonathan Kim
33901f64820SJonathan Kim up_read(&adev->reset_domain->sem);
34001f64820SJonathan Kim }
34101f64820SJonathan Kim
34201f64820SJonathan Kim /*
34301f64820SJonathan Kim * restore_dbg_registers is ignored here but is a general interface requirement
34401f64820SJonathan Kim * for devices that support GFXOFF and where the RLC save/restore list
34501f64820SJonathan Kim * does not support hw registers for debugging i.e. the driver has to manually
34601f64820SJonathan Kim * initialize the debug mode registers after it has disabled GFX off during the
34701f64820SJonathan Kim * debug session.
34801f64820SJonathan Kim */
kgd_arcturus_enable_debug_trap(struct amdgpu_device * adev,bool restore_dbg_registers,uint32_t vmid)34901f64820SJonathan Kim static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
35001f64820SJonathan Kim bool restore_dbg_registers,
35101f64820SJonathan Kim uint32_t vmid)
35201f64820SJonathan Kim {
35301f64820SJonathan Kim mutex_lock(&adev->grbm_idx_mutex);
35401f64820SJonathan Kim
35501f64820SJonathan Kim kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
35601f64820SJonathan Kim
35701f64820SJonathan Kim set_barrier_auto_waitcnt(adev, true);
35801f64820SJonathan Kim
35901f64820SJonathan Kim WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
36001f64820SJonathan Kim
36101f64820SJonathan Kim kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
36201f64820SJonathan Kim
36301f64820SJonathan Kim mutex_unlock(&adev->grbm_idx_mutex);
36401f64820SJonathan Kim
36501f64820SJonathan Kim return 0;
36601f64820SJonathan Kim }
36701f64820SJonathan Kim
36801f64820SJonathan Kim /*
36901f64820SJonathan Kim * keep_trap_enabled is ignored here but is a general interface requirement
37001f64820SJonathan Kim * for devices that support multi-process debugging where the performance
37101f64820SJonathan Kim * overhead from trap temporary setup needs to be bypassed when the debug
37201f64820SJonathan Kim * session has ended.
37301f64820SJonathan Kim */
kgd_arcturus_disable_debug_trap(struct amdgpu_device * adev,bool keep_trap_enabled,uint32_t vmid)37401f64820SJonathan Kim static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
37501f64820SJonathan Kim bool keep_trap_enabled,
37601f64820SJonathan Kim uint32_t vmid)
37701f64820SJonathan Kim {
37801f64820SJonathan Kim
37901f64820SJonathan Kim mutex_lock(&adev->grbm_idx_mutex);
38001f64820SJonathan Kim
38101f64820SJonathan Kim kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
38201f64820SJonathan Kim
38301f64820SJonathan Kim set_barrier_auto_waitcnt(adev, false);
38401f64820SJonathan Kim
38501f64820SJonathan Kim WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
38601f64820SJonathan Kim
38701f64820SJonathan Kim kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
38801f64820SJonathan Kim
38901f64820SJonathan Kim mutex_unlock(&adev->grbm_idx_mutex);
39001f64820SJonathan Kim
39101f64820SJonathan Kim return 0;
39201f64820SJonathan Kim }
393e392c887SYong Zhao const struct kfd2kgd_calls arcturus_kfd2kgd = {
3943e205a08SOak Zeng .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
3953e205a08SOak Zeng .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
3963e205a08SOak Zeng .init_interrupts = kgd_gfx_v9_init_interrupts,
3973e205a08SOak Zeng .hqd_load = kgd_gfx_v9_hqd_load,
39835cd89d5SAaron Liu .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
3995073506cSJonathan Kim .hqd_sdma_load = kgd_arcturus_hqd_sdma_load,
4003e205a08SOak Zeng .hqd_dump = kgd_gfx_v9_hqd_dump,
4015073506cSJonathan Kim .hqd_sdma_dump = kgd_arcturus_hqd_sdma_dump,
4023e205a08SOak Zeng .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
4035073506cSJonathan Kim .hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied,
4043e205a08SOak Zeng .hqd_destroy = kgd_gfx_v9_hqd_destroy,
4055073506cSJonathan Kim .hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy,
4063e205a08SOak Zeng .wave_control_execute = kgd_gfx_v9_wave_control_execute,
40756fc40abSYong Zhao .get_atc_vmid_pasid_mapping_info =
40856fc40abSYong Zhao kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
4099fb1506eSOak Zeng .set_vm_context_page_table_base =
4109fb1506eSOak Zeng kgd_gfx_v9_set_vm_context_page_table_base,
41101f64820SJonathan Kim .enable_debug_trap = kgd_arcturus_enable_debug_trap,
41201f64820SJonathan Kim .disable_debug_trap = kgd_arcturus_disable_debug_trap,
413101827e1SJonathan Kim .validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
414101827e1SJonathan Kim .set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
415aea1b473SJonathan Kim .set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
416e0f85f46SJonathan Kim .set_address_watch = kgd_gfx_v9_set_address_watch,
417e0f85f46SJonathan Kim .clear_address_watch = kgd_gfx_v9_clear_address_watch,
4187cee6a68SJonathan Kim .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
4197cee6a68SJonathan Kim .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
420f270921aSMukul Joshi .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
421f270921aSMukul Joshi .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings
4223e205a08SOak Zeng };
423