xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c (revision 7e24a55b2122746c2eef192296fc84624354f895)
1157e72e8SLikun Gao /*
2157e72e8SLikun Gao  * Copyright 2019 Advanced Micro Devices, Inc.
3157e72e8SLikun Gao  *
4157e72e8SLikun Gao  * Permission is hereby granted, free of charge, to any person obtaining a
5157e72e8SLikun Gao  * copy of this software and associated documentation files (the "Software"),
6157e72e8SLikun Gao  * to deal in the Software without restriction, including without limitation
7157e72e8SLikun Gao  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8157e72e8SLikun Gao  * and/or sell copies of the Software, and to permit persons to whom the
9157e72e8SLikun Gao  * Software is furnished to do so, subject to the following conditions:
10157e72e8SLikun Gao  *
11157e72e8SLikun Gao  * The above copyright notice and this permission notice shall be included in
12157e72e8SLikun Gao  * all copies or substantial portions of the Software.
13157e72e8SLikun Gao  *
14157e72e8SLikun Gao  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15157e72e8SLikun Gao  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16157e72e8SLikun Gao  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17157e72e8SLikun Gao  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18157e72e8SLikun Gao  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19157e72e8SLikun Gao  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20157e72e8SLikun Gao  * OTHER DEALINGS IN THE SOFTWARE.
21157e72e8SLikun Gao  *
22157e72e8SLikun Gao  */
23157e72e8SLikun Gao 
24157e72e8SLikun Gao #include <linux/delay.h>
25157e72e8SLikun Gao #include <linux/firmware.h>
26157e72e8SLikun Gao #include <linux/module.h>
27157e72e8SLikun Gao #include <linux/pci.h>
28157e72e8SLikun Gao 
29157e72e8SLikun Gao #include "amdgpu.h"
30157e72e8SLikun Gao #include "amdgpu_ucode.h"
31157e72e8SLikun Gao #include "amdgpu_trace.h"
32157e72e8SLikun Gao 
33157e72e8SLikun Gao #include "gc/gc_10_3_0_offset.h"
34157e72e8SLikun Gao #include "gc/gc_10_3_0_sh_mask.h"
35157e72e8SLikun Gao #include "ivsrcid/sdma0/irqsrcs_sdma0_5_0.h"
36157e72e8SLikun Gao #include "ivsrcid/sdma1/irqsrcs_sdma1_5_0.h"
37157e72e8SLikun Gao #include "ivsrcid/sdma2/irqsrcs_sdma2_5_0.h"
38157e72e8SLikun Gao #include "ivsrcid/sdma3/irqsrcs_sdma3_5_0.h"
39157e72e8SLikun Gao 
40157e72e8SLikun Gao #include "soc15_common.h"
41157e72e8SLikun Gao #include "soc15.h"
42157e72e8SLikun Gao #include "navi10_sdma_pkt_open.h"
43157e72e8SLikun Gao #include "nbio_v2_3.h"
44157e72e8SLikun Gao #include "sdma_common.h"
45157e72e8SLikun Gao #include "sdma_v5_2.h"
46157e72e8SLikun Gao 
47157e72e8SLikun Gao MODULE_FIRMWARE("amdgpu/sienna_cichlid_sdma.bin");
48df2d15dfSJiansong Chen MODULE_FIRMWARE("amdgpu/navy_flounder_sdma.bin");
4901069226STao Zhou MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_sdma.bin");
508760403eSChengming Gui MODULE_FIRMWARE("amdgpu/beige_goby_sdma.bin");
51157e72e8SLikun Gao 
5254c98eacSHuang Rui MODULE_FIRMWARE("amdgpu/vangogh_sdma.bin");
53e88d68e1SAaron Liu MODULE_FIRMWARE("amdgpu/yellow_carp_sdma.bin");
5493afe158SYifan Zhang MODULE_FIRMWARE("amdgpu/sdma_5_2_6.bin");
55967af863SPrike Liang MODULE_FIRMWARE("amdgpu/sdma_5_2_7.bin");
5654c98eacSHuang Rui 
57157e72e8SLikun Gao #define SDMA1_REG_OFFSET 0x600
58157e72e8SLikun Gao #define SDMA3_REG_OFFSET 0x400
59157e72e8SLikun Gao #define SDMA0_HYP_DEC_REG_START 0x5880
60157e72e8SLikun Gao #define SDMA0_HYP_DEC_REG_END 0x5893
61157e72e8SLikun Gao #define SDMA1_HYP_DEC_REG_OFFSET 0x20
62157e72e8SLikun Gao 
63157e72e8SLikun Gao static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev);
64157e72e8SLikun Gao static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev);
65157e72e8SLikun Gao static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev);
66157e72e8SLikun Gao static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev);
67157e72e8SLikun Gao 
sdma_v5_2_get_reg_offset(struct amdgpu_device * adev,u32 instance,u32 internal_offset)68157e72e8SLikun Gao static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
69157e72e8SLikun Gao {
70157e72e8SLikun Gao 	u32 base;
71157e72e8SLikun Gao 
72157e72e8SLikun Gao 	if (internal_offset >= SDMA0_HYP_DEC_REG_START &&
73157e72e8SLikun Gao 	    internal_offset <= SDMA0_HYP_DEC_REG_END) {
74157e72e8SLikun Gao 		base = adev->reg_offset[GC_HWIP][0][1];
75157e72e8SLikun Gao 		if (instance != 0)
76157e72e8SLikun Gao 			internal_offset += SDMA1_HYP_DEC_REG_OFFSET * instance;
77157e72e8SLikun Gao 	} else {
78157e72e8SLikun Gao 		if (instance < 2) {
79157e72e8SLikun Gao 			base = adev->reg_offset[GC_HWIP][0][0];
80157e72e8SLikun Gao 			if (instance == 1)
81157e72e8SLikun Gao 				internal_offset += SDMA1_REG_OFFSET;
82157e72e8SLikun Gao 		} else {
83157e72e8SLikun Gao 			base = adev->reg_offset[GC_HWIP][0][2];
84157e72e8SLikun Gao 			if (instance == 3)
85157e72e8SLikun Gao 				internal_offset += SDMA3_REG_OFFSET;
86157e72e8SLikun Gao 		}
87157e72e8SLikun Gao 	}
88157e72e8SLikun Gao 
89157e72e8SLikun Gao 	return base + internal_offset;
90157e72e8SLikun Gao }
91157e72e8SLikun Gao 
sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring * ring)92157e72e8SLikun Gao static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring)
93157e72e8SLikun Gao {
94157e72e8SLikun Gao 	unsigned ret;
95157e72e8SLikun Gao 
96157e72e8SLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
97157e72e8SLikun Gao 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
98157e72e8SLikun Gao 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
99157e72e8SLikun Gao 	amdgpu_ring_write(ring, 1);
100157e72e8SLikun Gao 	ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
101157e72e8SLikun Gao 	amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
102157e72e8SLikun Gao 
103157e72e8SLikun Gao 	return ret;
104157e72e8SLikun Gao }
105157e72e8SLikun Gao 
sdma_v5_2_ring_patch_cond_exec(struct amdgpu_ring * ring,unsigned offset)106157e72e8SLikun Gao static void sdma_v5_2_ring_patch_cond_exec(struct amdgpu_ring *ring,
107157e72e8SLikun Gao 					   unsigned offset)
108157e72e8SLikun Gao {
109157e72e8SLikun Gao 	unsigned cur;
110157e72e8SLikun Gao 
111157e72e8SLikun Gao 	BUG_ON(offset > ring->buf_mask);
112157e72e8SLikun Gao 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
113157e72e8SLikun Gao 
114157e72e8SLikun Gao 	cur = (ring->wptr - 1) & ring->buf_mask;
115157e72e8SLikun Gao 	if (cur > offset)
116157e72e8SLikun Gao 		ring->ring[offset] = cur - offset;
117157e72e8SLikun Gao 	else
118157e72e8SLikun Gao 		ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
119157e72e8SLikun Gao }
120157e72e8SLikun Gao 
121157e72e8SLikun Gao /**
122157e72e8SLikun Gao  * sdma_v5_2_ring_get_rptr - get the current read pointer
123157e72e8SLikun Gao  *
124157e72e8SLikun Gao  * @ring: amdgpu ring pointer
125157e72e8SLikun Gao  *
126157e72e8SLikun Gao  * Get the current rptr from the hardware (NAVI10+).
127157e72e8SLikun Gao  */
sdma_v5_2_ring_get_rptr(struct amdgpu_ring * ring)128157e72e8SLikun Gao static uint64_t sdma_v5_2_ring_get_rptr(struct amdgpu_ring *ring)
129157e72e8SLikun Gao {
130157e72e8SLikun Gao 	u64 *rptr;
131157e72e8SLikun Gao 
132157e72e8SLikun Gao 	/* XXX check if swapping is necessary on BE */
1333748424bSJack Xiao 	rptr = (u64 *)ring->rptr_cpu_addr;
134157e72e8SLikun Gao 
135157e72e8SLikun Gao 	DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
136157e72e8SLikun Gao 	return ((*rptr) >> 2);
137157e72e8SLikun Gao }
138157e72e8SLikun Gao 
139157e72e8SLikun Gao /**
140157e72e8SLikun Gao  * sdma_v5_2_ring_get_wptr - get the current write pointer
141157e72e8SLikun Gao  *
142157e72e8SLikun Gao  * @ring: amdgpu ring pointer
143157e72e8SLikun Gao  *
144157e72e8SLikun Gao  * Get the current wptr from the hardware (NAVI10+).
145157e72e8SLikun Gao  */
sdma_v5_2_ring_get_wptr(struct amdgpu_ring * ring)146157e72e8SLikun Gao static uint64_t sdma_v5_2_ring_get_wptr(struct amdgpu_ring *ring)
147157e72e8SLikun Gao {
148157e72e8SLikun Gao 	struct amdgpu_device *adev = ring->adev;
14987d6883bSXiaojie Yuan 	u64 wptr;
150157e72e8SLikun Gao 
151157e72e8SLikun Gao 	if (ring->use_doorbell) {
152157e72e8SLikun Gao 		/* XXX check if swapping is necessary on BE */
1533748424bSJack Xiao 		wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
15487d6883bSXiaojie Yuan 		DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
155157e72e8SLikun Gao 	} else {
15687d6883bSXiaojie Yuan 		wptr = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI));
15787d6883bSXiaojie Yuan 		wptr = wptr << 32;
15887d6883bSXiaojie Yuan 		wptr |= RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR));
15987d6883bSXiaojie Yuan 		DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", ring->me, wptr);
160157e72e8SLikun Gao 	}
161157e72e8SLikun Gao 
16287d6883bSXiaojie Yuan 	return wptr >> 2;
163157e72e8SLikun Gao }
164157e72e8SLikun Gao 
165157e72e8SLikun Gao /**
166157e72e8SLikun Gao  * sdma_v5_2_ring_set_wptr - commit the write pointer
167157e72e8SLikun Gao  *
168157e72e8SLikun Gao  * @ring: amdgpu ring pointer
169157e72e8SLikun Gao  *
170157e72e8SLikun Gao  * Write the wptr back to the hardware (NAVI10+).
171157e72e8SLikun Gao  */
sdma_v5_2_ring_set_wptr(struct amdgpu_ring * ring)172157e72e8SLikun Gao static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring)
173157e72e8SLikun Gao {
174157e72e8SLikun Gao 	struct amdgpu_device *adev = ring->adev;
175157e72e8SLikun Gao 
176157e72e8SLikun Gao 	DRM_DEBUG("Setting write pointer\n");
177157e72e8SLikun Gao 	if (ring->use_doorbell) {
178157e72e8SLikun Gao 		DRM_DEBUG("Using doorbell -- "
179157e72e8SLikun Gao 				"wptr_offs == 0x%08x "
1807dba6e83SHaohui Mai 				"lower_32_bits(ring->wptr << 2) == 0x%08x "
1817dba6e83SHaohui Mai 				"upper_32_bits(ring->wptr << 2) == 0x%08x\n",
182157e72e8SLikun Gao 				ring->wptr_offs,
183157e72e8SLikun Gao 				lower_32_bits(ring->wptr << 2),
184157e72e8SLikun Gao 				upper_32_bits(ring->wptr << 2));
185157e72e8SLikun Gao 		/* XXX check if swapping is necessary on BE */
1863748424bSJack Xiao 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
1873748424bSJack Xiao 			     ring->wptr << 2);
188157e72e8SLikun Gao 		DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
189157e72e8SLikun Gao 				ring->doorbell_index, ring->wptr << 2);
190157e72e8SLikun Gao 		WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
191*9d74e500SAlex Deucher 		/* SDMA seems to miss doorbells sometimes when powergating kicks in.
192*9d74e500SAlex Deucher 		 * Updating the wptr directly will wake it. This is only safe because
193*9d74e500SAlex Deucher 		 * we disallow gfxoff in begin_use() and then allow it again in end_use().
194*9d74e500SAlex Deucher 		 */
195*9d74e500SAlex Deucher 		WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR),
196*9d74e500SAlex Deucher 		       lower_32_bits(ring->wptr << 2));
197*9d74e500SAlex Deucher 		WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI),
198*9d74e500SAlex Deucher 		       upper_32_bits(ring->wptr << 2));
199157e72e8SLikun Gao 	} else {
200157e72e8SLikun Gao 		DRM_DEBUG("Not using doorbell -- "
201157e72e8SLikun Gao 				"mmSDMA%i_GFX_RB_WPTR == 0x%08x "
202157e72e8SLikun Gao 				"mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
203157e72e8SLikun Gao 				ring->me,
204157e72e8SLikun Gao 				lower_32_bits(ring->wptr << 2),
205157e72e8SLikun Gao 				ring->me,
206157e72e8SLikun Gao 				upper_32_bits(ring->wptr << 2));
207157e72e8SLikun Gao 		WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR),
208157e72e8SLikun Gao 			lower_32_bits(ring->wptr << 2));
209157e72e8SLikun Gao 		WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI),
210157e72e8SLikun Gao 			upper_32_bits(ring->wptr << 2));
211157e72e8SLikun Gao 	}
212157e72e8SLikun Gao }
213157e72e8SLikun Gao 
sdma_v5_2_ring_insert_nop(struct amdgpu_ring * ring,uint32_t count)214157e72e8SLikun Gao static void sdma_v5_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
215157e72e8SLikun Gao {
216157e72e8SLikun Gao 	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
217157e72e8SLikun Gao 	int i;
218157e72e8SLikun Gao 
219157e72e8SLikun Gao 	for (i = 0; i < count; i++)
220157e72e8SLikun Gao 		if (sdma && sdma->burst_nop && (i == 0))
221157e72e8SLikun Gao 			amdgpu_ring_write(ring, ring->funcs->nop |
222157e72e8SLikun Gao 				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
223157e72e8SLikun Gao 		else
224157e72e8SLikun Gao 			amdgpu_ring_write(ring, ring->funcs->nop);
225157e72e8SLikun Gao }
226157e72e8SLikun Gao 
227157e72e8SLikun Gao /**
228157e72e8SLikun Gao  * sdma_v5_2_ring_emit_ib - Schedule an IB on the DMA engine
229157e72e8SLikun Gao  *
230157e72e8SLikun Gao  * @ring: amdgpu ring pointer
231fd1c541dSLee Jones  * @job: job to retrieve vmid from
232157e72e8SLikun Gao  * @ib: IB object to schedule
233fd1c541dSLee Jones  * @flags: unused
234157e72e8SLikun Gao  *
235157e72e8SLikun Gao  * Schedule an IB in the DMA ring.
236157e72e8SLikun Gao  */
sdma_v5_2_ring_emit_ib(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)237157e72e8SLikun Gao static void sdma_v5_2_ring_emit_ib(struct amdgpu_ring *ring,
238157e72e8SLikun Gao 				   struct amdgpu_job *job,
239157e72e8SLikun Gao 				   struct amdgpu_ib *ib,
240157e72e8SLikun Gao 				   uint32_t flags)
241157e72e8SLikun Gao {
242157e72e8SLikun Gao 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
243157e72e8SLikun Gao 	uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
244157e72e8SLikun Gao 
245157e72e8SLikun Gao 	/* An IB packet must end on a 8 DW boundary--the next dword
246157e72e8SLikun Gao 	 * must be on a 8-dword boundary. Our IB packet below is 6
247157e72e8SLikun Gao 	 * dwords long, thus add x number of NOPs, such that, in
248157e72e8SLikun Gao 	 * modular arithmetic,
249157e72e8SLikun Gao 	 * wptr + 6 + x = 8k, k >= 0, which in C is,
250157e72e8SLikun Gao 	 * (wptr + 6 + x) % 8 = 0.
251157e72e8SLikun Gao 	 * The expression below, is a solution of x.
252157e72e8SLikun Gao 	 */
253157e72e8SLikun Gao 	sdma_v5_2_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
254157e72e8SLikun Gao 
255157e72e8SLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
256157e72e8SLikun Gao 			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
257157e72e8SLikun Gao 	/* base must be 32 byte aligned */
258157e72e8SLikun Gao 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
259157e72e8SLikun Gao 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
260157e72e8SLikun Gao 	amdgpu_ring_write(ring, ib->length_dw);
261157e72e8SLikun Gao 	amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
262157e72e8SLikun Gao 	amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
263157e72e8SLikun Gao }
264157e72e8SLikun Gao 
265157e72e8SLikun Gao /**
266b45fdeabSJinzhou Su  * sdma_v5_2_ring_emit_mem_sync - flush the IB by graphics cache rinse
267b45fdeabSJinzhou Su  *
268b45fdeabSJinzhou Su  * @ring: amdgpu ring pointer
269b45fdeabSJinzhou Su  *
270b45fdeabSJinzhou Su  * flush the IB by graphics cache rinse.
271b45fdeabSJinzhou Su  */
sdma_v5_2_ring_emit_mem_sync(struct amdgpu_ring * ring)272b45fdeabSJinzhou Su static void sdma_v5_2_ring_emit_mem_sync(struct amdgpu_ring *ring)
273b45fdeabSJinzhou Su {
274e8ba4922SColin Ian King 	uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB |
275e8ba4922SColin Ian King 			    SDMA_GCR_GLM_INV | SDMA_GCR_GL1_INV |
276e8ba4922SColin Ian King 			    SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
277b45fdeabSJinzhou Su 			    SDMA_GCR_GLI_INV(1);
278b45fdeabSJinzhou Su 
279b45fdeabSJinzhou Su 	/* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
280b45fdeabSJinzhou Su 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
281b45fdeabSJinzhou Su 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
282b45fdeabSJinzhou Su 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) |
283b45fdeabSJinzhou Su 			SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0));
284b45fdeabSJinzhou Su 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) |
285b45fdeabSJinzhou Su 			SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16));
286b45fdeabSJinzhou Su 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) |
287b45fdeabSJinzhou Su 			SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0));
288b45fdeabSJinzhou Su }
289b45fdeabSJinzhou Su 
290b45fdeabSJinzhou Su /**
291157e72e8SLikun Gao  * sdma_v5_2_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
292157e72e8SLikun Gao  *
293157e72e8SLikun Gao  * @ring: amdgpu ring pointer
294157e72e8SLikun Gao  *
295157e72e8SLikun Gao  * Emit an hdp flush packet on the requested DMA ring.
296157e72e8SLikun Gao  */
sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring * ring)297157e72e8SLikun Gao static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
298157e72e8SLikun Gao {
299157e72e8SLikun Gao 	struct amdgpu_device *adev = ring->adev;
300157e72e8SLikun Gao 	u32 ref_and_mask = 0;
301157e72e8SLikun Gao 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
302157e72e8SLikun Gao 
303b33d7aaaSAlex Deucher 	if (ring->me > 1) {
304b33d7aaaSAlex Deucher 		amdgpu_asic_flush_hdp(adev, ring);
305b33d7aaaSAlex Deucher 	} else {
3061f5d9cadSLikun Gao 		ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
307157e72e8SLikun Gao 
308157e72e8SLikun Gao 		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
309157e72e8SLikun Gao 				  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
310157e72e8SLikun Gao 				  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
311157e72e8SLikun Gao 		amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
312157e72e8SLikun Gao 		amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
313157e72e8SLikun Gao 		amdgpu_ring_write(ring, ref_and_mask); /* reference */
314157e72e8SLikun Gao 		amdgpu_ring_write(ring, ref_and_mask); /* mask */
315157e72e8SLikun Gao 		amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
316157e72e8SLikun Gao 				  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
317157e72e8SLikun Gao 	}
318b33d7aaaSAlex Deucher }
319157e72e8SLikun Gao 
320157e72e8SLikun Gao /**
321157e72e8SLikun Gao  * sdma_v5_2_ring_emit_fence - emit a fence on the DMA ring
322157e72e8SLikun Gao  *
323157e72e8SLikun Gao  * @ring: amdgpu ring pointer
324fd1c541dSLee Jones  * @addr: address
325fd1c541dSLee Jones  * @seq: sequence number
326fd1c541dSLee Jones  * @flags: fence related flags
327157e72e8SLikun Gao  *
328157e72e8SLikun Gao  * Add a DMA fence packet to the ring to write
329157e72e8SLikun Gao  * the fence seq number and DMA trap packet to generate
330157e72e8SLikun Gao  * an interrupt if needed.
331157e72e8SLikun Gao  */
sdma_v5_2_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)332157e72e8SLikun Gao static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
333157e72e8SLikun Gao 				      unsigned flags)
334157e72e8SLikun Gao {
335157e72e8SLikun Gao 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
336157e72e8SLikun Gao 	/* write the fence */
337157e72e8SLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) |
338157e72e8SLikun Gao 			  SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
339157e72e8SLikun Gao 	/* zero in first two bits */
340157e72e8SLikun Gao 	BUG_ON(addr & 0x3);
341157e72e8SLikun Gao 	amdgpu_ring_write(ring, lower_32_bits(addr));
342157e72e8SLikun Gao 	amdgpu_ring_write(ring, upper_32_bits(addr));
343157e72e8SLikun Gao 	amdgpu_ring_write(ring, lower_32_bits(seq));
344157e72e8SLikun Gao 
345157e72e8SLikun Gao 	/* optionally write high bits as well */
346157e72e8SLikun Gao 	if (write64bit) {
347157e72e8SLikun Gao 		addr += 4;
348157e72e8SLikun Gao 		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) |
349157e72e8SLikun Gao 				  SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
350157e72e8SLikun Gao 		/* zero in first two bits */
351157e72e8SLikun Gao 		BUG_ON(addr & 0x3);
352157e72e8SLikun Gao 		amdgpu_ring_write(ring, lower_32_bits(addr));
353157e72e8SLikun Gao 		amdgpu_ring_write(ring, upper_32_bits(addr));
354157e72e8SLikun Gao 		amdgpu_ring_write(ring, upper_32_bits(seq));
355157e72e8SLikun Gao 	}
356157e72e8SLikun Gao 
3576f120134SJack Xiao 	if ((flags & AMDGPU_FENCE_FLAG_INT)) {
3586f120134SJack Xiao 		uint32_t ctx = ring->is_mes_queue ?
3596f120134SJack Xiao 			(ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0;
360157e72e8SLikun Gao 		/* generate an interrupt */
361157e72e8SLikun Gao 		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
3626f120134SJack Xiao 		amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx));
363157e72e8SLikun Gao 	}
364157e72e8SLikun Gao }
365157e72e8SLikun Gao 
36641782d70SGuchun Chen 
367157e72e8SLikun Gao /**
368157e72e8SLikun Gao  * sdma_v5_2_gfx_stop - stop the gfx async dma engines
369157e72e8SLikun Gao  *
370157e72e8SLikun Gao  * @adev: amdgpu_device pointer
371157e72e8SLikun Gao  *
372157e72e8SLikun Gao  * Stop the gfx async dma ring buffers.
373157e72e8SLikun Gao  */
sdma_v5_2_gfx_stop(struct amdgpu_device * adev)374157e72e8SLikun Gao static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev)
375157e72e8SLikun Gao {
376157e72e8SLikun Gao 	u32 rb_cntl, ib_cntl;
377157e72e8SLikun Gao 	int i;
378157e72e8SLikun Gao 
379571c0536SAlex Deucher 	amdgpu_sdma_unset_buffer_funcs_helper(adev);
380157e72e8SLikun Gao 
381157e72e8SLikun Gao 	for (i = 0; i < adev->sdma.num_instances; i++) {
382cf2a22e4SRohit Khaire 		rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
383157e72e8SLikun Gao 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
384cf2a22e4SRohit Khaire 		WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
385cf2a22e4SRohit Khaire 		ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
386157e72e8SLikun Gao 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
387cf2a22e4SRohit Khaire 		WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
388157e72e8SLikun Gao 	}
389157e72e8SLikun Gao }
390157e72e8SLikun Gao 
391157e72e8SLikun Gao /**
392157e72e8SLikun Gao  * sdma_v5_2_rlc_stop - stop the compute async dma engines
393157e72e8SLikun Gao  *
394157e72e8SLikun Gao  * @adev: amdgpu_device pointer
395157e72e8SLikun Gao  *
396157e72e8SLikun Gao  * Stop the compute async dma queues.
397157e72e8SLikun Gao  */
sdma_v5_2_rlc_stop(struct amdgpu_device * adev)398157e72e8SLikun Gao static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev)
399157e72e8SLikun Gao {
400157e72e8SLikun Gao 	/* XXX todo */
401157e72e8SLikun Gao }
402157e72e8SLikun Gao 
403157e72e8SLikun Gao /**
40441782d70SGuchun Chen  * sdma_v5_2_ctx_switch_enable - stop the async dma engines context switch
405157e72e8SLikun Gao  *
406157e72e8SLikun Gao  * @adev: amdgpu_device pointer
40741782d70SGuchun Chen  * @enable: enable/disable the DMA MEs context switch.
408157e72e8SLikun Gao  *
40941782d70SGuchun Chen  * Halt or unhalt the async dma engines context switch.
410157e72e8SLikun Gao  */
sdma_v5_2_ctx_switch_enable(struct amdgpu_device * adev,bool enable)41141782d70SGuchun Chen static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
412157e72e8SLikun Gao {
413157e72e8SLikun Gao 	u32 f32_cntl, phase_quantum = 0;
41441782d70SGuchun Chen 	int i;
415157e72e8SLikun Gao 
416157e72e8SLikun Gao 	if (amdgpu_sdma_phase_quantum) {
417157e72e8SLikun Gao 		unsigned value = amdgpu_sdma_phase_quantum;
418157e72e8SLikun Gao 		unsigned unit = 0;
419157e72e8SLikun Gao 
420157e72e8SLikun Gao 		while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
421157e72e8SLikun Gao 				SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
422157e72e8SLikun Gao 			value = (value + 1) >> 1;
423157e72e8SLikun Gao 			unit++;
424157e72e8SLikun Gao 		}
425157e72e8SLikun Gao 		if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
426157e72e8SLikun Gao 			    SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
427157e72e8SLikun Gao 			value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
428157e72e8SLikun Gao 				 SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
429157e72e8SLikun Gao 			unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
430157e72e8SLikun Gao 				SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
431157e72e8SLikun Gao 			WARN_ONCE(1,
432157e72e8SLikun Gao 			"clamping sdma_phase_quantum to %uK clock cycles\n",
433157e72e8SLikun Gao 				  value << unit);
434157e72e8SLikun Gao 		}
435157e72e8SLikun Gao 		phase_quantum =
436157e72e8SLikun Gao 			value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
437157e72e8SLikun Gao 			unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
43841782d70SGuchun Chen 	}
439157e72e8SLikun Gao 
44041782d70SGuchun Chen 	for (i = 0; i < adev->sdma.num_instances; i++) {
44141782d70SGuchun Chen 		if (enable && amdgpu_sdma_phase_quantum) {
44241782d70SGuchun Chen 			WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
443157e72e8SLikun Gao 			       phase_quantum);
44441782d70SGuchun Chen 			WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
445157e72e8SLikun Gao 			       phase_quantum);
44641782d70SGuchun Chen 			WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
447157e72e8SLikun Gao 			       phase_quantum);
448157e72e8SLikun Gao 		}
449b18ff692SBokun Zhang 
450b18ff692SBokun Zhang 		if (!amdgpu_sriov_vf(adev)) {
451b992a190SHaohui Mai 			f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
452b992a190SHaohui Mai 			f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
45341782d70SGuchun Chen 					AUTO_CTXSW_ENABLE, enable ? 1 : 0);
454b992a190SHaohui Mai 			WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
455b992a190SHaohui Mai 		}
456b992a190SHaohui Mai 	}
457b992a190SHaohui Mai 
45841782d70SGuchun Chen }
45941782d70SGuchun Chen 
460b992a190SHaohui Mai /**
46141782d70SGuchun Chen  * sdma_v5_2_enable - stop the async dma engines
462b992a190SHaohui Mai  *
463b992a190SHaohui Mai  * @adev: amdgpu_device pointer
46441782d70SGuchun Chen  * @enable: enable/disable the DMA MEs.
465b992a190SHaohui Mai  *
46641782d70SGuchun Chen  * Halt or unhalt the async dma engines.
467b992a190SHaohui Mai  */
sdma_v5_2_enable(struct amdgpu_device * adev,bool enable)46841782d70SGuchun Chen static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
469b992a190SHaohui Mai {
470b992a190SHaohui Mai 	u32 f32_cntl;
47141782d70SGuchun Chen 	int i;
472b992a190SHaohui Mai 
47341782d70SGuchun Chen 	if (!enable) {
474157e72e8SLikun Gao 		sdma_v5_2_gfx_stop(adev);
475157e72e8SLikun Gao 		sdma_v5_2_rlc_stop(adev);
47641782d70SGuchun Chen 	}
477157e72e8SLikun Gao 
478b18ff692SBokun Zhang 	if (!amdgpu_sriov_vf(adev)) {
479157e72e8SLikun Gao 		for (i = 0; i < adev->sdma.num_instances; i++) {
480157e72e8SLikun Gao 			f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
48141782d70SGuchun Chen 			f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
482157e72e8SLikun Gao 			WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
483157e72e8SLikun Gao 		}
484157e72e8SLikun Gao 	}
485b18ff692SBokun Zhang }
486157e72e8SLikun Gao 
487157e72e8SLikun Gao /**
488157e72e8SLikun Gao  * sdma_v5_2_gfx_resume - setup and start the async dma engines
489157e72e8SLikun Gao  *
490157e72e8SLikun Gao  * @adev: amdgpu_device pointer
491157e72e8SLikun Gao  *
492157e72e8SLikun Gao  * Set up the gfx DMA ring buffers and enable them.
493157e72e8SLikun Gao  * Returns 0 for success, error for failure.
494157e72e8SLikun Gao  */
sdma_v5_2_gfx_resume(struct amdgpu_device * adev)495157e72e8SLikun Gao static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
496157e72e8SLikun Gao {
497157e72e8SLikun Gao 	struct amdgpu_ring *ring;
498157e72e8SLikun Gao 	u32 rb_cntl, ib_cntl;
499157e72e8SLikun Gao 	u32 rb_bufsz;
500157e72e8SLikun Gao 	u32 doorbell;
501157e72e8SLikun Gao 	u32 doorbell_offset;
502157e72e8SLikun Gao 	u32 temp;
503157e72e8SLikun Gao 	u32 wptr_poll_cntl;
504157e72e8SLikun Gao 	u64 wptr_gpu_addr;
505157e72e8SLikun Gao 	int i, r;
506157e72e8SLikun Gao 
507157e72e8SLikun Gao 	for (i = 0; i < adev->sdma.num_instances; i++) {
508157e72e8SLikun Gao 		ring = &adev->sdma.instance[i].ring;
509157e72e8SLikun Gao 
510b18ff692SBokun Zhang 		if (!amdgpu_sriov_vf(adev))
511cf2a22e4SRohit Khaire 			WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
512157e72e8SLikun Gao 
513157e72e8SLikun Gao 		/* Set ring buffer size in dwords */
514157e72e8SLikun Gao 		rb_bufsz = order_base_2(ring->ring_size / 4);
515cf2a22e4SRohit Khaire 		rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
516157e72e8SLikun Gao 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
517157e72e8SLikun Gao #ifdef __BIG_ENDIAN
518157e72e8SLikun Gao 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
519157e72e8SLikun Gao 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
520157e72e8SLikun Gao 					RPTR_WRITEBACK_SWAP_ENABLE, 1);
521157e72e8SLikun Gao #endif
522cf2a22e4SRohit Khaire 		WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
523157e72e8SLikun Gao 
524157e72e8SLikun Gao 		/* Initialize the ring buffer's read and write pointers */
525cf2a22e4SRohit Khaire 		WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
526cf2a22e4SRohit Khaire 		WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
527cf2a22e4SRohit Khaire 		WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
528cf2a22e4SRohit Khaire 		WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
529157e72e8SLikun Gao 
530157e72e8SLikun Gao 		/* setup the wptr shadow polling */
5313748424bSJack Xiao 		wptr_gpu_addr = ring->wptr_gpu_addr;
532cf2a22e4SRohit Khaire 		WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
533157e72e8SLikun Gao 		       lower_32_bits(wptr_gpu_addr));
534cf2a22e4SRohit Khaire 		WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
535157e72e8SLikun Gao 		       upper_32_bits(wptr_gpu_addr));
536cf2a22e4SRohit Khaire 		wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
537157e72e8SLikun Gao 							 mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
538157e72e8SLikun Gao 		wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
539157e72e8SLikun Gao 					       SDMA0_GFX_RB_WPTR_POLL_CNTL,
540157e72e8SLikun Gao 					       F32_POLL_ENABLE, 1);
541cf2a22e4SRohit Khaire 		WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
542157e72e8SLikun Gao 		       wptr_poll_cntl);
543157e72e8SLikun Gao 
544157e72e8SLikun Gao 		/* set the wb address whether it's enabled or not */
545cf2a22e4SRohit Khaire 		WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
5463748424bSJack Xiao 		       upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
547cf2a22e4SRohit Khaire 		WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
5483748424bSJack Xiao 		       lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
549157e72e8SLikun Gao 
550157e72e8SLikun Gao 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
551157e72e8SLikun Gao 
552cf2a22e4SRohit Khaire 		WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
553cf2a22e4SRohit Khaire 		WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
554157e72e8SLikun Gao 
555157e72e8SLikun Gao 		ring->wptr = 0;
556157e72e8SLikun Gao 
557157e72e8SLikun Gao 		/* before programing wptr to a less value, need set minor_ptr_update first */
558cf2a22e4SRohit Khaire 		WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
559157e72e8SLikun Gao 
560157e72e8SLikun Gao 		if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
5617dba6e83SHaohui Mai 			WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
5627dba6e83SHaohui Mai 			WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
563157e72e8SLikun Gao 		}
564157e72e8SLikun Gao 
565cf2a22e4SRohit Khaire 		doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
566cf2a22e4SRohit Khaire 		doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
567157e72e8SLikun Gao 
568157e72e8SLikun Gao 		if (ring->use_doorbell) {
569157e72e8SLikun Gao 			doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
570157e72e8SLikun Gao 			doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
571157e72e8SLikun Gao 					OFFSET, ring->doorbell_index);
572157e72e8SLikun Gao 		} else {
573157e72e8SLikun Gao 			doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
574157e72e8SLikun Gao 		}
575cf2a22e4SRohit Khaire 		WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
576cf2a22e4SRohit Khaire 		WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
577157e72e8SLikun Gao 
578157e72e8SLikun Gao 		adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
5799822ac8fSYong Zhao 						      ring->doorbell_index,
5809822ac8fSYong Zhao 						      adev->doorbell_index.sdma_doorbell_range);
581157e72e8SLikun Gao 
582157e72e8SLikun Gao 		if (amdgpu_sriov_vf(adev))
583157e72e8SLikun Gao 			sdma_v5_2_ring_set_wptr(ring);
584157e72e8SLikun Gao 
585157e72e8SLikun Gao 		/* set minor_ptr_update to 0 after wptr programed */
586b18ff692SBokun Zhang 
587cf2a22e4SRohit Khaire 		WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
588157e72e8SLikun Gao 
589b18ff692SBokun Zhang 		/* SRIOV VF has no control of any of registers below */
590b18ff692SBokun Zhang 		if (!amdgpu_sriov_vf(adev)) {
591157e72e8SLikun Gao 			/* set utc l1 enable flag always to 1 */
592157e72e8SLikun Gao 			temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
593157e72e8SLikun Gao 			temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
594157e72e8SLikun Gao 
595157e72e8SLikun Gao 			/* enable MCBP */
596157e72e8SLikun Gao 			temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
597157e72e8SLikun Gao 			WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
598157e72e8SLikun Gao 
599157e72e8SLikun Gao 			/* Set up RESP_MODE to non-copy addresses */
600cf2a22e4SRohit Khaire 			temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
601157e72e8SLikun Gao 			temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
602157e72e8SLikun Gao 			temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
603cf2a22e4SRohit Khaire 			WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
604157e72e8SLikun Gao 
605157e72e8SLikun Gao 			/* program default cache read and write policy */
606cf2a22e4SRohit Khaire 			temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
607157e72e8SLikun Gao 			/* clean read policy and write policy bits */
608157e72e8SLikun Gao 			temp &= 0xFF0FFF;
609157e72e8SLikun Gao 			temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
610157e72e8SLikun Gao 				 (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
6114005809bSLikun Gao 				 SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
612cf2a22e4SRohit Khaire 			WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
613157e72e8SLikun Gao 
614157e72e8SLikun Gao 			/* unhalt engine */
615157e72e8SLikun Gao 			temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
616157e72e8SLikun Gao 			temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
617157e72e8SLikun Gao 			WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
618157e72e8SLikun Gao 		}
619157e72e8SLikun Gao 
620157e72e8SLikun Gao 		/* enable DMA RB */
621157e72e8SLikun Gao 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
622cf2a22e4SRohit Khaire 		WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
623157e72e8SLikun Gao 
624cf2a22e4SRohit Khaire 		ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
625157e72e8SLikun Gao 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
626157e72e8SLikun Gao #ifdef __BIG_ENDIAN
627157e72e8SLikun Gao 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
628157e72e8SLikun Gao #endif
629157e72e8SLikun Gao 		/* enable DMA IBs */
630cf2a22e4SRohit Khaire 		WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
631157e72e8SLikun Gao 
63241782d70SGuchun Chen 		if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
63341782d70SGuchun Chen 			sdma_v5_2_ctx_switch_enable(adev, true);
63441782d70SGuchun Chen 			sdma_v5_2_enable(adev, true);
63541782d70SGuchun Chen 		}
636157e72e8SLikun Gao 
63793ab59acSGuchun Chen 		r = amdgpu_ring_test_helper(ring);
63893ab59acSGuchun Chen 		if (r)
639157e72e8SLikun Gao 			return r;
64061c31b8bSGuchun Chen 
641157e72e8SLikun Gao 		if (adev->mman.buffer_funcs_ring == ring)
642157e72e8SLikun Gao 			amdgpu_ttm_set_buffer_funcs_status(adev, true);
643157e72e8SLikun Gao 	}
644157e72e8SLikun Gao 
645157e72e8SLikun Gao 	return 0;
646157e72e8SLikun Gao }
647157e72e8SLikun Gao 
648157e72e8SLikun Gao /**
649157e72e8SLikun Gao  * sdma_v5_2_rlc_resume - setup and start the async dma engines
650157e72e8SLikun Gao  *
651157e72e8SLikun Gao  * @adev: amdgpu_device pointer
652157e72e8SLikun Gao  *
653157e72e8SLikun Gao  * Set up the compute DMA queues and enable them.
654157e72e8SLikun Gao  * Returns 0 for success, error for failure.
655157e72e8SLikun Gao  */
sdma_v5_2_rlc_resume(struct amdgpu_device * adev)656157e72e8SLikun Gao static int sdma_v5_2_rlc_resume(struct amdgpu_device *adev)
657157e72e8SLikun Gao {
658157e72e8SLikun Gao 	return 0;
659157e72e8SLikun Gao }
660157e72e8SLikun Gao 
661157e72e8SLikun Gao /**
662157e72e8SLikun Gao  * sdma_v5_2_load_microcode - load the sDMA ME ucode
663157e72e8SLikun Gao  *
664157e72e8SLikun Gao  * @adev: amdgpu_device pointer
665157e72e8SLikun Gao  *
666157e72e8SLikun Gao  * Loads the sDMA0/1/2/3 ucode.
667157e72e8SLikun Gao  * Returns 0 for success, -EINVAL if the ucode is not available.
668157e72e8SLikun Gao  */
sdma_v5_2_load_microcode(struct amdgpu_device * adev)669157e72e8SLikun Gao static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
670157e72e8SLikun Gao {
671157e72e8SLikun Gao 	const struct sdma_firmware_header_v1_0 *hdr;
672157e72e8SLikun Gao 	const __le32 *fw_data;
673157e72e8SLikun Gao 	u32 fw_size;
674157e72e8SLikun Gao 	int i, j;
675157e72e8SLikun Gao 
676157e72e8SLikun Gao 	/* halt the MEs */
67741782d70SGuchun Chen 	sdma_v5_2_enable(adev, false);
678157e72e8SLikun Gao 
679157e72e8SLikun Gao 	for (i = 0; i < adev->sdma.num_instances; i++) {
680157e72e8SLikun Gao 		if (!adev->sdma.instance[i].fw)
681157e72e8SLikun Gao 			return -EINVAL;
682157e72e8SLikun Gao 
683157e72e8SLikun Gao 		hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
684157e72e8SLikun Gao 		amdgpu_ucode_print_sdma_hdr(&hdr->header);
685157e72e8SLikun Gao 		fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
686157e72e8SLikun Gao 
687157e72e8SLikun Gao 		fw_data = (const __le32 *)
688157e72e8SLikun Gao 			(adev->sdma.instance[i].fw->data +
689157e72e8SLikun Gao 				le32_to_cpu(hdr->header.ucode_array_offset_bytes));
690157e72e8SLikun Gao 
691157e72e8SLikun Gao 		WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0);
692157e72e8SLikun Gao 
693157e72e8SLikun Gao 		for (j = 0; j < fw_size; j++) {
694157e72e8SLikun Gao 			if (amdgpu_emu_mode == 1 && j % 500 == 0)
695157e72e8SLikun Gao 				msleep(1);
696157e72e8SLikun Gao 			WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
697157e72e8SLikun Gao 		}
698157e72e8SLikun Gao 
699157e72e8SLikun Gao 		WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version);
700157e72e8SLikun Gao 	}
701157e72e8SLikun Gao 
702157e72e8SLikun Gao 	return 0;
703157e72e8SLikun Gao }
704157e72e8SLikun Gao 
sdma_v5_2_soft_reset(void * handle)705a9c210c1SXiaomeng Hou static int sdma_v5_2_soft_reset(void *handle)
706a9c210c1SXiaomeng Hou {
707a9c210c1SXiaomeng Hou 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
708a9c210c1SXiaomeng Hou 	u32 grbm_soft_reset;
709a9c210c1SXiaomeng Hou 	u32 tmp;
710a9c210c1SXiaomeng Hou 	int i;
711a9c210c1SXiaomeng Hou 
712a9c210c1SXiaomeng Hou 	for (i = 0; i < adev->sdma.num_instances; i++) {
713a9c210c1SXiaomeng Hou 		grbm_soft_reset = REG_SET_FIELD(0,
714a9c210c1SXiaomeng Hou 						GRBM_SOFT_RESET, SOFT_RESET_SDMA0,
715a9c210c1SXiaomeng Hou 						1);
716a9c210c1SXiaomeng Hou 		grbm_soft_reset <<= i;
717a9c210c1SXiaomeng Hou 
718a9c210c1SXiaomeng Hou 		tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
719a9c210c1SXiaomeng Hou 		tmp |= grbm_soft_reset;
720a9c210c1SXiaomeng Hou 		DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp);
721a9c210c1SXiaomeng Hou 		WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
722a9c210c1SXiaomeng Hou 		tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
723a9c210c1SXiaomeng Hou 
724a9c210c1SXiaomeng Hou 		udelay(50);
725a9c210c1SXiaomeng Hou 
726a9c210c1SXiaomeng Hou 		tmp &= ~grbm_soft_reset;
727a9c210c1SXiaomeng Hou 		WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
728a9c210c1SXiaomeng Hou 		tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
729a9c210c1SXiaomeng Hou 
730a9c210c1SXiaomeng Hou 		udelay(50);
731a9c210c1SXiaomeng Hou 	}
732a9c210c1SXiaomeng Hou 
733a9c210c1SXiaomeng Hou 	return 0;
734a9c210c1SXiaomeng Hou }
735a9c210c1SXiaomeng Hou 
736157e72e8SLikun Gao /**
737157e72e8SLikun Gao  * sdma_v5_2_start - setup and start the async dma engines
738157e72e8SLikun Gao  *
739157e72e8SLikun Gao  * @adev: amdgpu_device pointer
740157e72e8SLikun Gao  *
741157e72e8SLikun Gao  * Set up the DMA engines and enable them.
742157e72e8SLikun Gao  * Returns 0 for success, error for failure.
743157e72e8SLikun Gao  */
sdma_v5_2_start(struct amdgpu_device * adev)744157e72e8SLikun Gao static int sdma_v5_2_start(struct amdgpu_device *adev)
745157e72e8SLikun Gao {
746157e72e8SLikun Gao 	int r = 0;
747157e72e8SLikun Gao 
748157e72e8SLikun Gao 	if (amdgpu_sriov_vf(adev)) {
74941782d70SGuchun Chen 		sdma_v5_2_ctx_switch_enable(adev, false);
75041782d70SGuchun Chen 		sdma_v5_2_enable(adev, false);
751157e72e8SLikun Gao 
752157e72e8SLikun Gao 		/* set RB registers */
753157e72e8SLikun Gao 		r = sdma_v5_2_gfx_resume(adev);
754157e72e8SLikun Gao 		return r;
755157e72e8SLikun Gao 	}
756157e72e8SLikun Gao 
757157e72e8SLikun Gao 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
758157e72e8SLikun Gao 		r = sdma_v5_2_load_microcode(adev);
759157e72e8SLikun Gao 		if (r)
760157e72e8SLikun Gao 			return r;
761157e72e8SLikun Gao 
762157e72e8SLikun Gao 		/* The value of mmSDMA_F32_CNTL is invalid the moment after loading fw */
763157e72e8SLikun Gao 		if (amdgpu_emu_mode == 1)
764157e72e8SLikun Gao 			msleep(1000);
765157e72e8SLikun Gao 	}
766157e72e8SLikun Gao 
767a9c210c1SXiaomeng Hou 	sdma_v5_2_soft_reset(adev);
76841782d70SGuchun Chen 	/* unhalt the MEs */
76941782d70SGuchun Chen 	sdma_v5_2_enable(adev, true);
77041782d70SGuchun Chen 	/* enable sdma ring preemption */
77141782d70SGuchun Chen 	sdma_v5_2_ctx_switch_enable(adev, true);
772157e72e8SLikun Gao 
77341782d70SGuchun Chen 	/* start the gfx rings and rlc compute queues */
774157e72e8SLikun Gao 	r = sdma_v5_2_gfx_resume(adev);
775157e72e8SLikun Gao 	if (r)
776157e72e8SLikun Gao 		return r;
777157e72e8SLikun Gao 	r = sdma_v5_2_rlc_resume(adev);
778157e72e8SLikun Gao 
779157e72e8SLikun Gao 	return r;
780157e72e8SLikun Gao }
781157e72e8SLikun Gao 
sdma_v5_2_mqd_init(struct amdgpu_device * adev,void * mqd,struct amdgpu_mqd_prop * prop)782e0f5b4c9SJack Xiao static int sdma_v5_2_mqd_init(struct amdgpu_device *adev, void *mqd,
783e0f5b4c9SJack Xiao 			      struct amdgpu_mqd_prop *prop)
784e0f5b4c9SJack Xiao {
785e0f5b4c9SJack Xiao 	struct v10_sdma_mqd *m = mqd;
786e0f5b4c9SJack Xiao 	uint64_t wb_gpu_addr;
787e0f5b4c9SJack Xiao 
788e0f5b4c9SJack Xiao 	m->sdmax_rlcx_rb_cntl =
789e0f5b4c9SJack Xiao 		order_base_2(prop->queue_size / 4) << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
790e0f5b4c9SJack Xiao 		1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
791e0f5b4c9SJack Xiao 		6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
792e0f5b4c9SJack Xiao 		1 << SDMA0_RLC0_RB_CNTL__RB_PRIV__SHIFT;
793e0f5b4c9SJack Xiao 
794e0f5b4c9SJack Xiao 	m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
795e0f5b4c9SJack Xiao 	m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
796e0f5b4c9SJack Xiao 
797e0f5b4c9SJack Xiao 	m->sdmax_rlcx_rb_wptr_poll_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, 0,
798e0f5b4c9SJack Xiao 						  mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
799e0f5b4c9SJack Xiao 
800e0f5b4c9SJack Xiao 	wb_gpu_addr = prop->wptr_gpu_addr;
801e0f5b4c9SJack Xiao 	m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
802e0f5b4c9SJack Xiao 	m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
803e0f5b4c9SJack Xiao 
804e0f5b4c9SJack Xiao 	wb_gpu_addr = prop->rptr_gpu_addr;
805e0f5b4c9SJack Xiao 	m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
806e0f5b4c9SJack Xiao 	m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
807e0f5b4c9SJack Xiao 
808e0f5b4c9SJack Xiao 	m->sdmax_rlcx_ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, 0,
809e0f5b4c9SJack Xiao 							mmSDMA0_GFX_IB_CNTL));
810e0f5b4c9SJack Xiao 
811e0f5b4c9SJack Xiao 	m->sdmax_rlcx_doorbell_offset =
812e0f5b4c9SJack Xiao 		prop->doorbell_index << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
813e0f5b4c9SJack Xiao 
814e0f5b4c9SJack Xiao 	m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_RLC0_DOORBELL, ENABLE, 1);
815e0f5b4c9SJack Xiao 
816e0f5b4c9SJack Xiao 	return 0;
817e0f5b4c9SJack Xiao }
818e0f5b4c9SJack Xiao 
sdma_v5_2_set_mqd_funcs(struct amdgpu_device * adev)819e0f5b4c9SJack Xiao static void sdma_v5_2_set_mqd_funcs(struct amdgpu_device *adev)
820e0f5b4c9SJack Xiao {
821e0f5b4c9SJack Xiao 	adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v10_sdma_mqd);
822e0f5b4c9SJack Xiao 	adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v5_2_mqd_init;
823e0f5b4c9SJack Xiao }
824e0f5b4c9SJack Xiao 
825157e72e8SLikun Gao /**
826157e72e8SLikun Gao  * sdma_v5_2_ring_test_ring - simple async dma engine test
827157e72e8SLikun Gao  *
828157e72e8SLikun Gao  * @ring: amdgpu_ring structure holding ring information
829157e72e8SLikun Gao  *
830157e72e8SLikun Gao  * Test the DMA engine by writing using it to write an
831157e72e8SLikun Gao  * value to memory.
832157e72e8SLikun Gao  * Returns 0 for success, error for failure.
833157e72e8SLikun Gao  */
sdma_v5_2_ring_test_ring(struct amdgpu_ring * ring)834157e72e8SLikun Gao static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
835157e72e8SLikun Gao {
836157e72e8SLikun Gao 	struct amdgpu_device *adev = ring->adev;
837157e72e8SLikun Gao 	unsigned i;
838157e72e8SLikun Gao 	unsigned index;
839157e72e8SLikun Gao 	int r;
840157e72e8SLikun Gao 	u32 tmp;
841157e72e8SLikun Gao 	u64 gpu_addr;
8427e5e7971SJack Xiao 	volatile uint32_t *cpu_ptr = NULL;
843157e72e8SLikun Gao 
8447e5e7971SJack Xiao 	tmp = 0xCAFEDEAD;
8457e5e7971SJack Xiao 
8467e5e7971SJack Xiao 	if (ring->is_mes_queue) {
8477e5e7971SJack Xiao 		uint32_t offset = 0;
8487e5e7971SJack Xiao 		offset = amdgpu_mes_ctx_get_offs(ring,
8497e5e7971SJack Xiao 					 AMDGPU_MES_CTX_PADDING_OFFS);
8507e5e7971SJack Xiao 		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
8517e5e7971SJack Xiao 		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
8527e5e7971SJack Xiao 		*cpu_ptr = tmp;
8537e5e7971SJack Xiao 	} else {
854157e72e8SLikun Gao 		r = amdgpu_device_wb_get(adev, &index);
855157e72e8SLikun Gao 		if (r) {
856157e72e8SLikun Gao 			dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
857157e72e8SLikun Gao 			return r;
858157e72e8SLikun Gao 		}
859157e72e8SLikun Gao 
860157e72e8SLikun Gao 		gpu_addr = adev->wb.gpu_addr + (index * 4);
861157e72e8SLikun Gao 		adev->wb.wb[index] = cpu_to_le32(tmp);
8627e5e7971SJack Xiao 	}
863157e72e8SLikun Gao 
8647e5e7971SJack Xiao 	r = amdgpu_ring_alloc(ring, 20);
865157e72e8SLikun Gao 	if (r) {
866157e72e8SLikun Gao 		DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
867157e72e8SLikun Gao 		amdgpu_device_wb_free(adev, index);
868157e72e8SLikun Gao 		return r;
869157e72e8SLikun Gao 	}
870157e72e8SLikun Gao 
871157e72e8SLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
872157e72e8SLikun Gao 			  SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
873157e72e8SLikun Gao 	amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
874157e72e8SLikun Gao 	amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
875157e72e8SLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
876157e72e8SLikun Gao 	amdgpu_ring_write(ring, 0xDEADBEEF);
877157e72e8SLikun Gao 	amdgpu_ring_commit(ring);
878157e72e8SLikun Gao 
879157e72e8SLikun Gao 	for (i = 0; i < adev->usec_timeout; i++) {
8807e5e7971SJack Xiao 		if (ring->is_mes_queue)
8817e5e7971SJack Xiao 			tmp = le32_to_cpu(*cpu_ptr);
8827e5e7971SJack Xiao 		else
883157e72e8SLikun Gao 			tmp = le32_to_cpu(adev->wb.wb[index]);
884157e72e8SLikun Gao 		if (tmp == 0xDEADBEEF)
885157e72e8SLikun Gao 			break;
886157e72e8SLikun Gao 		if (amdgpu_emu_mode == 1)
887157e72e8SLikun Gao 			msleep(1);
888157e72e8SLikun Gao 		else
889157e72e8SLikun Gao 			udelay(1);
890157e72e8SLikun Gao 	}
891157e72e8SLikun Gao 
892157e72e8SLikun Gao 	if (i >= adev->usec_timeout)
893157e72e8SLikun Gao 		r = -ETIMEDOUT;
894157e72e8SLikun Gao 
8957e5e7971SJack Xiao 	if (!ring->is_mes_queue)
896157e72e8SLikun Gao 		amdgpu_device_wb_free(adev, index);
897157e72e8SLikun Gao 
898157e72e8SLikun Gao 	return r;
899157e72e8SLikun Gao }
900157e72e8SLikun Gao 
901157e72e8SLikun Gao /**
902157e72e8SLikun Gao  * sdma_v5_2_ring_test_ib - test an IB on the DMA engine
903157e72e8SLikun Gao  *
904157e72e8SLikun Gao  * @ring: amdgpu_ring structure holding ring information
905fd1c541dSLee Jones  * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
906157e72e8SLikun Gao  *
907157e72e8SLikun Gao  * Test a simple IB in the DMA ring.
908157e72e8SLikun Gao  * Returns 0 on success, error on failure.
909157e72e8SLikun Gao  */
sdma_v5_2_ring_test_ib(struct amdgpu_ring * ring,long timeout)910157e72e8SLikun Gao static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
911157e72e8SLikun Gao {
912157e72e8SLikun Gao 	struct amdgpu_device *adev = ring->adev;
913157e72e8SLikun Gao 	struct amdgpu_ib ib;
914157e72e8SLikun Gao 	struct dma_fence *f = NULL;
915157e72e8SLikun Gao 	unsigned index;
916157e72e8SLikun Gao 	long r;
917157e72e8SLikun Gao 	u32 tmp = 0;
918157e72e8SLikun Gao 	u64 gpu_addr;
919c097aac7SJack Xiao 	volatile uint32_t *cpu_ptr = NULL;
920157e72e8SLikun Gao 
921c097aac7SJack Xiao 	tmp = 0xCAFEDEAD;
922c097aac7SJack Xiao 	memset(&ib, 0, sizeof(ib));
923c097aac7SJack Xiao 
924c097aac7SJack Xiao 	if (ring->is_mes_queue) {
925c097aac7SJack Xiao 		uint32_t offset = 0;
926c097aac7SJack Xiao 		offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
927c097aac7SJack Xiao 		ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
928c097aac7SJack Xiao 		ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
929c097aac7SJack Xiao 
930c097aac7SJack Xiao 		offset = amdgpu_mes_ctx_get_offs(ring,
931c097aac7SJack Xiao 					 AMDGPU_MES_CTX_PADDING_OFFS);
932c097aac7SJack Xiao 		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
933c097aac7SJack Xiao 		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
934c097aac7SJack Xiao 		*cpu_ptr = tmp;
935c097aac7SJack Xiao 	} else {
936157e72e8SLikun Gao 		r = amdgpu_device_wb_get(adev, &index);
937157e72e8SLikun Gao 		if (r) {
938157e72e8SLikun Gao 			dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
939157e72e8SLikun Gao 			return r;
940157e72e8SLikun Gao 		}
941157e72e8SLikun Gao 
942157e72e8SLikun Gao 		gpu_addr = adev->wb.gpu_addr + (index * 4);
943157e72e8SLikun Gao 		adev->wb.wb[index] = cpu_to_le32(tmp);
944c097aac7SJack Xiao 
945157e72e8SLikun Gao 		r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
946157e72e8SLikun Gao 		if (r) {
947157e72e8SLikun Gao 			DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
948157e72e8SLikun Gao 			goto err0;
949157e72e8SLikun Gao 		}
950c097aac7SJack Xiao 	}
951157e72e8SLikun Gao 
952157e72e8SLikun Gao 	ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
953157e72e8SLikun Gao 		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
954157e72e8SLikun Gao 	ib.ptr[1] = lower_32_bits(gpu_addr);
955157e72e8SLikun Gao 	ib.ptr[2] = upper_32_bits(gpu_addr);
956157e72e8SLikun Gao 	ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
957157e72e8SLikun Gao 	ib.ptr[4] = 0xDEADBEEF;
958157e72e8SLikun Gao 	ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
959157e72e8SLikun Gao 	ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
960157e72e8SLikun Gao 	ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
961157e72e8SLikun Gao 	ib.length_dw = 8;
962157e72e8SLikun Gao 
963157e72e8SLikun Gao 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
964157e72e8SLikun Gao 	if (r)
965157e72e8SLikun Gao 		goto err1;
966157e72e8SLikun Gao 
967157e72e8SLikun Gao 	r = dma_fence_wait_timeout(f, false, timeout);
968157e72e8SLikun Gao 	if (r == 0) {
969157e72e8SLikun Gao 		DRM_ERROR("amdgpu: IB test timed out\n");
970157e72e8SLikun Gao 		r = -ETIMEDOUT;
971157e72e8SLikun Gao 		goto err1;
972157e72e8SLikun Gao 	} else if (r < 0) {
973157e72e8SLikun Gao 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
974157e72e8SLikun Gao 		goto err1;
975157e72e8SLikun Gao 	}
976c097aac7SJack Xiao 
977c097aac7SJack Xiao 	if (ring->is_mes_queue)
978c097aac7SJack Xiao 		tmp = le32_to_cpu(*cpu_ptr);
979c097aac7SJack Xiao 	else
980157e72e8SLikun Gao 		tmp = le32_to_cpu(adev->wb.wb[index]);
981c097aac7SJack Xiao 
982157e72e8SLikun Gao 	if (tmp == 0xDEADBEEF)
983157e72e8SLikun Gao 		r = 0;
984157e72e8SLikun Gao 	else
985157e72e8SLikun Gao 		r = -EINVAL;
986157e72e8SLikun Gao 
987157e72e8SLikun Gao err1:
988157e72e8SLikun Gao 	amdgpu_ib_free(adev, &ib, NULL);
989157e72e8SLikun Gao 	dma_fence_put(f);
990157e72e8SLikun Gao err0:
991c097aac7SJack Xiao 	if (!ring->is_mes_queue)
992157e72e8SLikun Gao 		amdgpu_device_wb_free(adev, index);
993157e72e8SLikun Gao 	return r;
994157e72e8SLikun Gao }
995157e72e8SLikun Gao 
996157e72e8SLikun Gao 
997157e72e8SLikun Gao /**
998157e72e8SLikun Gao  * sdma_v5_2_vm_copy_pte - update PTEs by copying them from the GART
999157e72e8SLikun Gao  *
1000157e72e8SLikun Gao  * @ib: indirect buffer to fill with commands
1001157e72e8SLikun Gao  * @pe: addr of the page entry
1002157e72e8SLikun Gao  * @src: src addr to copy from
1003157e72e8SLikun Gao  * @count: number of page entries to update
1004157e72e8SLikun Gao  *
1005157e72e8SLikun Gao  * Update PTEs by copying them from the GART using sDMA.
1006157e72e8SLikun Gao  */
sdma_v5_2_vm_copy_pte(struct amdgpu_ib * ib,uint64_t pe,uint64_t src,unsigned count)1007157e72e8SLikun Gao static void sdma_v5_2_vm_copy_pte(struct amdgpu_ib *ib,
1008157e72e8SLikun Gao 				  uint64_t pe, uint64_t src,
1009157e72e8SLikun Gao 				  unsigned count)
1010157e72e8SLikun Gao {
1011157e72e8SLikun Gao 	unsigned bytes = count * 8;
1012157e72e8SLikun Gao 
1013157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
1014157e72e8SLikun Gao 		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
1015157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = bytes - 1;
1016157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1017157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = lower_32_bits(src);
1018157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = upper_32_bits(src);
1019157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1020157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1021157e72e8SLikun Gao 
1022157e72e8SLikun Gao }
1023157e72e8SLikun Gao 
1024157e72e8SLikun Gao /**
1025157e72e8SLikun Gao  * sdma_v5_2_vm_write_pte - update PTEs by writing them manually
1026157e72e8SLikun Gao  *
1027157e72e8SLikun Gao  * @ib: indirect buffer to fill with commands
1028157e72e8SLikun Gao  * @pe: addr of the page entry
1029fd1c541dSLee Jones  * @value: dst addr to write into pe
1030157e72e8SLikun Gao  * @count: number of page entries to update
1031157e72e8SLikun Gao  * @incr: increase next addr by incr bytes
1032157e72e8SLikun Gao  *
1033157e72e8SLikun Gao  * Update PTEs by writing them manually using sDMA.
1034157e72e8SLikun Gao  */
sdma_v5_2_vm_write_pte(struct amdgpu_ib * ib,uint64_t pe,uint64_t value,unsigned count,uint32_t incr)1035157e72e8SLikun Gao static void sdma_v5_2_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
1036157e72e8SLikun Gao 				   uint64_t value, unsigned count,
1037157e72e8SLikun Gao 				   uint32_t incr)
1038157e72e8SLikun Gao {
1039157e72e8SLikun Gao 	unsigned ndw = count * 2;
1040157e72e8SLikun Gao 
1041157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
1042157e72e8SLikun Gao 		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1043157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1044157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1045157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = ndw - 1;
1046157e72e8SLikun Gao 	for (; ndw > 0; ndw -= 2) {
1047157e72e8SLikun Gao 		ib->ptr[ib->length_dw++] = lower_32_bits(value);
1048157e72e8SLikun Gao 		ib->ptr[ib->length_dw++] = upper_32_bits(value);
1049157e72e8SLikun Gao 		value += incr;
1050157e72e8SLikun Gao 	}
1051157e72e8SLikun Gao }
1052157e72e8SLikun Gao 
1053157e72e8SLikun Gao /**
1054157e72e8SLikun Gao  * sdma_v5_2_vm_set_pte_pde - update the page tables using sDMA
1055157e72e8SLikun Gao  *
1056157e72e8SLikun Gao  * @ib: indirect buffer to fill with commands
1057157e72e8SLikun Gao  * @pe: addr of the page entry
1058157e72e8SLikun Gao  * @addr: dst addr to write into pe
1059157e72e8SLikun Gao  * @count: number of page entries to update
1060157e72e8SLikun Gao  * @incr: increase next addr by incr bytes
1061157e72e8SLikun Gao  * @flags: access flags
1062157e72e8SLikun Gao  *
1063157e72e8SLikun Gao  * Update the page tables using sDMA.
1064157e72e8SLikun Gao  */
sdma_v5_2_vm_set_pte_pde(struct amdgpu_ib * ib,uint64_t pe,uint64_t addr,unsigned count,uint32_t incr,uint64_t flags)1065157e72e8SLikun Gao static void sdma_v5_2_vm_set_pte_pde(struct amdgpu_ib *ib,
1066157e72e8SLikun Gao 				     uint64_t pe,
1067157e72e8SLikun Gao 				     uint64_t addr, unsigned count,
1068157e72e8SLikun Gao 				     uint32_t incr, uint64_t flags)
1069157e72e8SLikun Gao {
1070157e72e8SLikun Gao 	/* for physically contiguous pages (vram) */
1071157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE);
1072157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
1073157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1074157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
1075157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = upper_32_bits(flags);
1076157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
1077157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
1078157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = incr; /* increment size */
1079157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = 0;
1080157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
1081157e72e8SLikun Gao }
1082157e72e8SLikun Gao 
1083157e72e8SLikun Gao /**
1084157e72e8SLikun Gao  * sdma_v5_2_ring_pad_ib - pad the IB
1085157e72e8SLikun Gao  *
1086157e72e8SLikun Gao  * @ib: indirect buffer to fill with padding
1087fd1c541dSLee Jones  * @ring: amdgpu_ring structure holding ring information
1088157e72e8SLikun Gao  *
1089157e72e8SLikun Gao  * Pad the IB with NOPs to a boundary multiple of 8.
1090157e72e8SLikun Gao  */
sdma_v5_2_ring_pad_ib(struct amdgpu_ring * ring,struct amdgpu_ib * ib)1091157e72e8SLikun Gao static void sdma_v5_2_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
1092157e72e8SLikun Gao {
1093157e72e8SLikun Gao 	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
1094157e72e8SLikun Gao 	u32 pad_count;
1095157e72e8SLikun Gao 	int i;
1096157e72e8SLikun Gao 
1097157e72e8SLikun Gao 	pad_count = (-ib->length_dw) & 0x7;
1098157e72e8SLikun Gao 	for (i = 0; i < pad_count; i++)
1099157e72e8SLikun Gao 		if (sdma && sdma->burst_nop && (i == 0))
1100157e72e8SLikun Gao 			ib->ptr[ib->length_dw++] =
1101157e72e8SLikun Gao 				SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
1102157e72e8SLikun Gao 				SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
1103157e72e8SLikun Gao 		else
1104157e72e8SLikun Gao 			ib->ptr[ib->length_dw++] =
1105157e72e8SLikun Gao 				SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
1106157e72e8SLikun Gao }
1107157e72e8SLikun Gao 
1108157e72e8SLikun Gao 
1109157e72e8SLikun Gao /**
1110157e72e8SLikun Gao  * sdma_v5_2_ring_emit_pipeline_sync - sync the pipeline
1111157e72e8SLikun Gao  *
1112157e72e8SLikun Gao  * @ring: amdgpu_ring pointer
1113157e72e8SLikun Gao  *
1114157e72e8SLikun Gao  * Make sure all previous operations are completed (CIK).
1115157e72e8SLikun Gao  */
sdma_v5_2_ring_emit_pipeline_sync(struct amdgpu_ring * ring)1116157e72e8SLikun Gao static void sdma_v5_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
1117157e72e8SLikun Gao {
1118157e72e8SLikun Gao 	uint32_t seq = ring->fence_drv.sync_seq;
1119157e72e8SLikun Gao 	uint64_t addr = ring->fence_drv.gpu_addr;
1120157e72e8SLikun Gao 
1121157e72e8SLikun Gao 	/* wait for idle */
1122157e72e8SLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1123157e72e8SLikun Gao 			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
1124157e72e8SLikun Gao 			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
1125157e72e8SLikun Gao 			  SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
1126157e72e8SLikun Gao 	amdgpu_ring_write(ring, addr & 0xfffffffc);
1127157e72e8SLikun Gao 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
1128157e72e8SLikun Gao 	amdgpu_ring_write(ring, seq); /* reference */
1129157e72e8SLikun Gao 	amdgpu_ring_write(ring, 0xffffffff); /* mask */
1130157e72e8SLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1131157e72e8SLikun Gao 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
1132157e72e8SLikun Gao }
1133157e72e8SLikun Gao 
1134157e72e8SLikun Gao 
1135157e72e8SLikun Gao /**
1136157e72e8SLikun Gao  * sdma_v5_2_ring_emit_vm_flush - vm flush using sDMA
1137157e72e8SLikun Gao  *
1138157e72e8SLikun Gao  * @ring: amdgpu_ring pointer
1139fd1c541dSLee Jones  * @vmid: vmid number to use
1140fd1c541dSLee Jones  * @pd_addr: address
1141157e72e8SLikun Gao  *
1142157e72e8SLikun Gao  * Update the page table base and flush the VM TLB
1143157e72e8SLikun Gao  * using sDMA.
1144157e72e8SLikun Gao  */
sdma_v5_2_ring_emit_vm_flush(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)1145157e72e8SLikun Gao static void sdma_v5_2_ring_emit_vm_flush(struct amdgpu_ring *ring,
1146157e72e8SLikun Gao 					 unsigned vmid, uint64_t pd_addr)
1147157e72e8SLikun Gao {
1148157e72e8SLikun Gao 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1149157e72e8SLikun Gao }
1150157e72e8SLikun Gao 
sdma_v5_2_ring_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)1151157e72e8SLikun Gao static void sdma_v5_2_ring_emit_wreg(struct amdgpu_ring *ring,
1152157e72e8SLikun Gao 				     uint32_t reg, uint32_t val)
1153157e72e8SLikun Gao {
1154157e72e8SLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
1155157e72e8SLikun Gao 			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
1156157e72e8SLikun Gao 	amdgpu_ring_write(ring, reg);
1157157e72e8SLikun Gao 	amdgpu_ring_write(ring, val);
1158157e72e8SLikun Gao }
1159157e72e8SLikun Gao 
sdma_v5_2_ring_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)1160157e72e8SLikun Gao static void sdma_v5_2_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1161157e72e8SLikun Gao 					 uint32_t val, uint32_t mask)
1162157e72e8SLikun Gao {
1163157e72e8SLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1164157e72e8SLikun Gao 			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
1165157e72e8SLikun Gao 			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
1166157e72e8SLikun Gao 	amdgpu_ring_write(ring, reg << 2);
1167157e72e8SLikun Gao 	amdgpu_ring_write(ring, 0);
1168157e72e8SLikun Gao 	amdgpu_ring_write(ring, val); /* reference */
1169157e72e8SLikun Gao 	amdgpu_ring_write(ring, mask); /* mask */
1170157e72e8SLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1171157e72e8SLikun Gao 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
1172157e72e8SLikun Gao }
1173157e72e8SLikun Gao 
sdma_v5_2_ring_emit_reg_write_reg_wait(struct amdgpu_ring * ring,uint32_t reg0,uint32_t reg1,uint32_t ref,uint32_t mask)1174157e72e8SLikun Gao static void sdma_v5_2_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
1175157e72e8SLikun Gao 						   uint32_t reg0, uint32_t reg1,
1176157e72e8SLikun Gao 						   uint32_t ref, uint32_t mask)
1177157e72e8SLikun Gao {
1178157e72e8SLikun Gao 	amdgpu_ring_emit_wreg(ring, reg0, ref);
1179157e72e8SLikun Gao 	/* wait for a cycle to reset vm_inv_eng*_ack */
1180157e72e8SLikun Gao 	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
1181157e72e8SLikun Gao 	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
1182157e72e8SLikun Gao }
1183157e72e8SLikun Gao 
sdma_v5_2_early_init(void * handle)1184157e72e8SLikun Gao static int sdma_v5_2_early_init(void *handle)
1185157e72e8SLikun Gao {
1186157e72e8SLikun Gao 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1187157e72e8SLikun Gao 
1188157e72e8SLikun Gao 	sdma_v5_2_set_ring_funcs(adev);
1189157e72e8SLikun Gao 	sdma_v5_2_set_buffer_funcs(adev);
1190157e72e8SLikun Gao 	sdma_v5_2_set_vm_pte_funcs(adev);
1191157e72e8SLikun Gao 	sdma_v5_2_set_irq_funcs(adev);
1192e0f5b4c9SJack Xiao 	sdma_v5_2_set_mqd_funcs(adev);
1193157e72e8SLikun Gao 
1194157e72e8SLikun Gao 	return 0;
1195157e72e8SLikun Gao }
1196157e72e8SLikun Gao 
sdma_v5_2_seq_to_irq_id(int seq_num)119765655471SHuang Rui static unsigned sdma_v5_2_seq_to_irq_id(int seq_num)
119865655471SHuang Rui {
119965655471SHuang Rui 	switch (seq_num) {
120065655471SHuang Rui 	case 0:
120165655471SHuang Rui 		return SOC15_IH_CLIENTID_SDMA0;
120265655471SHuang Rui 	case 1:
120365655471SHuang Rui 		return SOC15_IH_CLIENTID_SDMA1;
120465655471SHuang Rui 	case 2:
120565655471SHuang Rui 		return SOC15_IH_CLIENTID_SDMA2;
120665655471SHuang Rui 	case 3:
120765655471SHuang Rui 		return SOC15_IH_CLIENTID_SDMA3_Sienna_Cichlid;
120865655471SHuang Rui 	default:
120965655471SHuang Rui 		break;
121065655471SHuang Rui 	}
121165655471SHuang Rui 	return -EINVAL;
121265655471SHuang Rui }
121365655471SHuang Rui 
sdma_v5_2_seq_to_trap_id(int seq_num)121465655471SHuang Rui static unsigned sdma_v5_2_seq_to_trap_id(int seq_num)
121565655471SHuang Rui {
121665655471SHuang Rui 	switch (seq_num) {
121765655471SHuang Rui 	case 0:
121865655471SHuang Rui 		return SDMA0_5_0__SRCID__SDMA_TRAP;
121965655471SHuang Rui 	case 1:
122065655471SHuang Rui 		return SDMA1_5_0__SRCID__SDMA_TRAP;
122165655471SHuang Rui 	case 2:
122265655471SHuang Rui 		return SDMA2_5_0__SRCID__SDMA_TRAP;
122365655471SHuang Rui 	case 3:
122465655471SHuang Rui 		return SDMA3_5_0__SRCID__SDMA_TRAP;
122565655471SHuang Rui 	default:
122665655471SHuang Rui 		break;
122765655471SHuang Rui 	}
122865655471SHuang Rui 	return -EINVAL;
122965655471SHuang Rui }
123065655471SHuang Rui 
sdma_v5_2_sw_init(void * handle)1231157e72e8SLikun Gao static int sdma_v5_2_sw_init(void *handle)
1232157e72e8SLikun Gao {
1233157e72e8SLikun Gao 	struct amdgpu_ring *ring;
1234157e72e8SLikun Gao 	int r, i;
1235157e72e8SLikun Gao 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1236157e72e8SLikun Gao 
1237157e72e8SLikun Gao 	/* SDMA trap event */
123865655471SHuang Rui 	for (i = 0; i < adev->sdma.num_instances; i++) {
123965655471SHuang Rui 		r = amdgpu_irq_add_id(adev, sdma_v5_2_seq_to_irq_id(i),
124065655471SHuang Rui 				      sdma_v5_2_seq_to_trap_id(i),
1241157e72e8SLikun Gao 				      &adev->sdma.trap_irq);
1242157e72e8SLikun Gao 		if (r)
1243157e72e8SLikun Gao 			return r;
124465655471SHuang Rui 	}
1245157e72e8SLikun Gao 
12461336b4e7SMario Limonciello 	r = amdgpu_sdma_init_microcode(adev, 0, true);
1247157e72e8SLikun Gao 	if (r) {
1248157e72e8SLikun Gao 		DRM_ERROR("Failed to load sdma firmware!\n");
1249157e72e8SLikun Gao 		return r;
1250157e72e8SLikun Gao 	}
1251157e72e8SLikun Gao 
1252157e72e8SLikun Gao 	for (i = 0; i < adev->sdma.num_instances; i++) {
1253157e72e8SLikun Gao 		ring = &adev->sdma.instance[i].ring;
1254157e72e8SLikun Gao 		ring->ring_obj = NULL;
1255157e72e8SLikun Gao 		ring->use_doorbell = true;
12561f5d9cadSLikun Gao 		ring->me = i;
1257157e72e8SLikun Gao 
1258157e72e8SLikun Gao 		DRM_INFO("use_doorbell being set to: [%s]\n",
1259157e72e8SLikun Gao 				ring->use_doorbell?"true":"false");
1260157e72e8SLikun Gao 
1261157e72e8SLikun Gao 		ring->doorbell_index =
1262157e72e8SLikun Gao 			(adev->doorbell_index.sdma_engine[i] << 1); //get DWORD offset
1263157e72e8SLikun Gao 
1264f4caf584SHawking Zhang 		ring->vm_hub = AMDGPU_GFXHUB(0);
1265157e72e8SLikun Gao 		sprintf(ring->name, "sdma%d", i);
1266c107171bSChristian König 		r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
1267157e72e8SLikun Gao 				     AMDGPU_SDMA_IRQ_INSTANCE0 + i,
1268c107171bSChristian König 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
1269157e72e8SLikun Gao 		if (r)
1270157e72e8SLikun Gao 			return r;
1271157e72e8SLikun Gao 	}
1272157e72e8SLikun Gao 
1273157e72e8SLikun Gao 	return r;
1274157e72e8SLikun Gao }
1275157e72e8SLikun Gao 
sdma_v5_2_sw_fini(void * handle)1276157e72e8SLikun Gao static int sdma_v5_2_sw_fini(void *handle)
1277157e72e8SLikun Gao {
1278157e72e8SLikun Gao 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1279d5197bc0SWenhui Sheng 	int i;
1280d5197bc0SWenhui Sheng 
1281d5197bc0SWenhui Sheng 	for (i = 0; i < adev->sdma.num_instances; i++)
1282d5197bc0SWenhui Sheng 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
1283157e72e8SLikun Gao 
1284108db8deSLikun Gao 	amdgpu_sdma_destroy_inst_ctx(adev, true);
1285157e72e8SLikun Gao 
1286157e72e8SLikun Gao 	return 0;
1287157e72e8SLikun Gao }
1288157e72e8SLikun Gao 
sdma_v5_2_hw_init(void * handle)1289157e72e8SLikun Gao static int sdma_v5_2_hw_init(void *handle)
1290157e72e8SLikun Gao {
1291157e72e8SLikun Gao 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1292157e72e8SLikun Gao 
12931d5d1947Sye xingchen 	return sdma_v5_2_start(adev);
1294157e72e8SLikun Gao }
1295157e72e8SLikun Gao 
sdma_v5_2_hw_fini(void * handle)1296157e72e8SLikun Gao static int sdma_v5_2_hw_fini(void *handle)
1297157e72e8SLikun Gao {
1298157e72e8SLikun Gao 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1299157e72e8SLikun Gao 
1300a98cec22SAlex Deucher 	if (amdgpu_sriov_vf(adev)) {
1301a98cec22SAlex Deucher 		/* disable the scheduler for SDMA */
1302a98cec22SAlex Deucher 		amdgpu_sdma_unset_buffer_funcs_helper(adev);
1303a98cec22SAlex Deucher 		return 0;
1304a98cec22SAlex Deucher 	}
1305a98cec22SAlex Deucher 
130641782d70SGuchun Chen 	sdma_v5_2_ctx_switch_enable(adev, false);
130741782d70SGuchun Chen 	sdma_v5_2_enable(adev, false);
1308157e72e8SLikun Gao 
1309157e72e8SLikun Gao 	return 0;
1310157e72e8SLikun Gao }
1311157e72e8SLikun Gao 
sdma_v5_2_suspend(void * handle)1312157e72e8SLikun Gao static int sdma_v5_2_suspend(void *handle)
1313157e72e8SLikun Gao {
1314157e72e8SLikun Gao 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1315157e72e8SLikun Gao 
1316157e72e8SLikun Gao 	return sdma_v5_2_hw_fini(adev);
1317157e72e8SLikun Gao }
1318157e72e8SLikun Gao 
sdma_v5_2_resume(void * handle)1319157e72e8SLikun Gao static int sdma_v5_2_resume(void *handle)
1320157e72e8SLikun Gao {
1321157e72e8SLikun Gao 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1322157e72e8SLikun Gao 
1323157e72e8SLikun Gao 	return sdma_v5_2_hw_init(adev);
1324157e72e8SLikun Gao }
1325157e72e8SLikun Gao 
sdma_v5_2_is_idle(void * handle)1326157e72e8SLikun Gao static bool sdma_v5_2_is_idle(void *handle)
1327157e72e8SLikun Gao {
1328157e72e8SLikun Gao 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1329157e72e8SLikun Gao 	u32 i;
1330157e72e8SLikun Gao 
1331157e72e8SLikun Gao 	for (i = 0; i < adev->sdma.num_instances; i++) {
1332157e72e8SLikun Gao 		u32 tmp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_STATUS_REG));
1333157e72e8SLikun Gao 
1334157e72e8SLikun Gao 		if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
1335157e72e8SLikun Gao 			return false;
1336157e72e8SLikun Gao 	}
1337157e72e8SLikun Gao 
1338157e72e8SLikun Gao 	return true;
1339157e72e8SLikun Gao }
1340157e72e8SLikun Gao 
sdma_v5_2_wait_for_idle(void * handle)1341157e72e8SLikun Gao static int sdma_v5_2_wait_for_idle(void *handle)
1342157e72e8SLikun Gao {
1343157e72e8SLikun Gao 	unsigned i;
1344157e72e8SLikun Gao 	u32 sdma0, sdma1, sdma2, sdma3;
1345157e72e8SLikun Gao 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1346157e72e8SLikun Gao 
1347157e72e8SLikun Gao 	for (i = 0; i < adev->usec_timeout; i++) {
1348157e72e8SLikun Gao 		sdma0 = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
1349157e72e8SLikun Gao 		sdma1 = RREG32(sdma_v5_2_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG));
1350157e72e8SLikun Gao 		sdma2 = RREG32(sdma_v5_2_get_reg_offset(adev, 2, mmSDMA0_STATUS_REG));
1351157e72e8SLikun Gao 		sdma3 = RREG32(sdma_v5_2_get_reg_offset(adev, 3, mmSDMA0_STATUS_REG));
1352157e72e8SLikun Gao 
1353157e72e8SLikun Gao 		if (sdma0 & sdma1 & sdma2 & sdma3 & SDMA0_STATUS_REG__IDLE_MASK)
1354157e72e8SLikun Gao 			return 0;
1355157e72e8SLikun Gao 		udelay(1);
1356157e72e8SLikun Gao 	}
1357157e72e8SLikun Gao 	return -ETIMEDOUT;
1358157e72e8SLikun Gao }
1359157e72e8SLikun Gao 
sdma_v5_2_ring_preempt_ib(struct amdgpu_ring * ring)1360157e72e8SLikun Gao static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring)
1361157e72e8SLikun Gao {
1362157e72e8SLikun Gao 	int i, r = 0;
1363157e72e8SLikun Gao 	struct amdgpu_device *adev = ring->adev;
1364157e72e8SLikun Gao 	u32 index = 0;
1365157e72e8SLikun Gao 	u64 sdma_gfx_preempt;
1366157e72e8SLikun Gao 
1367157e72e8SLikun Gao 	amdgpu_sdma_get_index_from_ring(ring, &index);
1368157e72e8SLikun Gao 	sdma_gfx_preempt =
1369157e72e8SLikun Gao 		sdma_v5_2_get_reg_offset(adev, index, mmSDMA0_GFX_PREEMPT);
1370157e72e8SLikun Gao 
1371157e72e8SLikun Gao 	/* assert preemption condition */
1372157e72e8SLikun Gao 	amdgpu_ring_set_preempt_cond_exec(ring, false);
1373157e72e8SLikun Gao 
1374157e72e8SLikun Gao 	/* emit the trailing fence */
1375157e72e8SLikun Gao 	ring->trail_seq += 1;
1376157e72e8SLikun Gao 	amdgpu_ring_alloc(ring, 10);
1377157e72e8SLikun Gao 	sdma_v5_2_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
1378157e72e8SLikun Gao 				  ring->trail_seq, 0);
1379157e72e8SLikun Gao 	amdgpu_ring_commit(ring);
1380157e72e8SLikun Gao 
1381157e72e8SLikun Gao 	/* assert IB preemption */
1382157e72e8SLikun Gao 	WREG32(sdma_gfx_preempt, 1);
1383157e72e8SLikun Gao 
1384157e72e8SLikun Gao 	/* poll the trailing fence */
1385157e72e8SLikun Gao 	for (i = 0; i < adev->usec_timeout; i++) {
1386157e72e8SLikun Gao 		if (ring->trail_seq ==
1387157e72e8SLikun Gao 		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
1388157e72e8SLikun Gao 			break;
1389157e72e8SLikun Gao 		udelay(1);
1390157e72e8SLikun Gao 	}
1391157e72e8SLikun Gao 
1392157e72e8SLikun Gao 	if (i >= adev->usec_timeout) {
1393157e72e8SLikun Gao 		r = -EINVAL;
1394157e72e8SLikun Gao 		DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
1395157e72e8SLikun Gao 	}
1396157e72e8SLikun Gao 
1397157e72e8SLikun Gao 	/* deassert IB preemption */
1398157e72e8SLikun Gao 	WREG32(sdma_gfx_preempt, 0);
1399157e72e8SLikun Gao 
1400157e72e8SLikun Gao 	/* deassert the preemption condition */
1401157e72e8SLikun Gao 	amdgpu_ring_set_preempt_cond_exec(ring, true);
1402157e72e8SLikun Gao 	return r;
1403157e72e8SLikun Gao }
1404157e72e8SLikun Gao 
sdma_v5_2_set_trap_irq_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)1405157e72e8SLikun Gao static int sdma_v5_2_set_trap_irq_state(struct amdgpu_device *adev,
1406157e72e8SLikun Gao 					struct amdgpu_irq_src *source,
1407157e72e8SLikun Gao 					unsigned type,
1408157e72e8SLikun Gao 					enum amdgpu_interrupt_state state)
1409157e72e8SLikun Gao {
1410157e72e8SLikun Gao 	u32 sdma_cntl;
1411157e72e8SLikun Gao 	u32 reg_offset = sdma_v5_2_get_reg_offset(adev, type, mmSDMA0_CNTL);
1412157e72e8SLikun Gao 
1413b18ff692SBokun Zhang 	if (!amdgpu_sriov_vf(adev)) {
1414157e72e8SLikun Gao 		sdma_cntl = RREG32(reg_offset);
1415157e72e8SLikun Gao 		sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
1416157e72e8SLikun Gao 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
1417157e72e8SLikun Gao 		WREG32(reg_offset, sdma_cntl);
1418b18ff692SBokun Zhang 	}
1419157e72e8SLikun Gao 
1420157e72e8SLikun Gao 	return 0;
1421157e72e8SLikun Gao }
1422157e72e8SLikun Gao 
sdma_v5_2_process_trap_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)1423157e72e8SLikun Gao static int sdma_v5_2_process_trap_irq(struct amdgpu_device *adev,
1424157e72e8SLikun Gao 				      struct amdgpu_irq_src *source,
1425157e72e8SLikun Gao 				      struct amdgpu_iv_entry *entry)
1426157e72e8SLikun Gao {
1427254492b6SJack Xiao 	uint32_t mes_queue_id = entry->src_data[0];
1428254492b6SJack Xiao 
1429157e72e8SLikun Gao 	DRM_DEBUG("IH: SDMA trap\n");
1430254492b6SJack Xiao 
1431254492b6SJack Xiao 	if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
1432254492b6SJack Xiao 		struct amdgpu_mes_queue *queue;
1433254492b6SJack Xiao 
1434254492b6SJack Xiao 		mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
1435254492b6SJack Xiao 
1436254492b6SJack Xiao 		spin_lock(&adev->mes.queue_id_lock);
1437254492b6SJack Xiao 		queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
1438254492b6SJack Xiao 		if (queue) {
1439254492b6SJack Xiao 			DRM_DEBUG("process smda queue id = %d\n", mes_queue_id);
1440254492b6SJack Xiao 			amdgpu_fence_process(queue->ring);
1441254492b6SJack Xiao 		}
1442254492b6SJack Xiao 		spin_unlock(&adev->mes.queue_id_lock);
1443254492b6SJack Xiao 		return 0;
1444254492b6SJack Xiao 	}
1445254492b6SJack Xiao 
1446157e72e8SLikun Gao 	switch (entry->client_id) {
1447157e72e8SLikun Gao 	case SOC15_IH_CLIENTID_SDMA0:
1448157e72e8SLikun Gao 		switch (entry->ring_id) {
1449157e72e8SLikun Gao 		case 0:
1450157e72e8SLikun Gao 			amdgpu_fence_process(&adev->sdma.instance[0].ring);
1451157e72e8SLikun Gao 			break;
1452157e72e8SLikun Gao 		case 1:
1453157e72e8SLikun Gao 			/* XXX compute */
1454157e72e8SLikun Gao 			break;
1455157e72e8SLikun Gao 		case 2:
1456157e72e8SLikun Gao 			/* XXX compute */
1457157e72e8SLikun Gao 			break;
1458157e72e8SLikun Gao 		case 3:
1459157e72e8SLikun Gao 			/* XXX page queue*/
1460157e72e8SLikun Gao 			break;
1461157e72e8SLikun Gao 		}
1462157e72e8SLikun Gao 		break;
1463157e72e8SLikun Gao 	case SOC15_IH_CLIENTID_SDMA1:
1464157e72e8SLikun Gao 		switch (entry->ring_id) {
1465157e72e8SLikun Gao 		case 0:
1466157e72e8SLikun Gao 			amdgpu_fence_process(&adev->sdma.instance[1].ring);
1467157e72e8SLikun Gao 			break;
1468157e72e8SLikun Gao 		case 1:
1469157e72e8SLikun Gao 			/* XXX compute */
1470157e72e8SLikun Gao 			break;
1471157e72e8SLikun Gao 		case 2:
1472157e72e8SLikun Gao 			/* XXX compute */
1473157e72e8SLikun Gao 			break;
1474157e72e8SLikun Gao 		case 3:
1475157e72e8SLikun Gao 			/* XXX page queue*/
1476157e72e8SLikun Gao 			break;
1477157e72e8SLikun Gao 		}
1478157e72e8SLikun Gao 		break;
1479157e72e8SLikun Gao 	case SOC15_IH_CLIENTID_SDMA2:
1480157e72e8SLikun Gao 		switch (entry->ring_id) {
1481157e72e8SLikun Gao 		case 0:
1482157e72e8SLikun Gao 			amdgpu_fence_process(&adev->sdma.instance[2].ring);
1483157e72e8SLikun Gao 			break;
1484157e72e8SLikun Gao 		case 1:
1485157e72e8SLikun Gao 			/* XXX compute */
1486157e72e8SLikun Gao 			break;
1487157e72e8SLikun Gao 		case 2:
1488157e72e8SLikun Gao 			/* XXX compute */
1489157e72e8SLikun Gao 			break;
1490157e72e8SLikun Gao 		case 3:
1491157e72e8SLikun Gao 			/* XXX page queue*/
1492157e72e8SLikun Gao 			break;
1493157e72e8SLikun Gao 		}
1494157e72e8SLikun Gao 		break;
1495d682a353SLikun Gao 	case SOC15_IH_CLIENTID_SDMA3_Sienna_Cichlid:
1496157e72e8SLikun Gao 		switch (entry->ring_id) {
1497157e72e8SLikun Gao 		case 0:
1498157e72e8SLikun Gao 			amdgpu_fence_process(&adev->sdma.instance[3].ring);
1499157e72e8SLikun Gao 			break;
1500157e72e8SLikun Gao 		case 1:
1501157e72e8SLikun Gao 			/* XXX compute */
1502157e72e8SLikun Gao 			break;
1503157e72e8SLikun Gao 		case 2:
1504157e72e8SLikun Gao 			/* XXX compute */
1505157e72e8SLikun Gao 			break;
1506157e72e8SLikun Gao 		case 3:
1507157e72e8SLikun Gao 			/* XXX page queue*/
1508157e72e8SLikun Gao 			break;
1509157e72e8SLikun Gao 		}
1510157e72e8SLikun Gao 		break;
1511157e72e8SLikun Gao 	}
1512157e72e8SLikun Gao 	return 0;
1513157e72e8SLikun Gao }
1514157e72e8SLikun Gao 
sdma_v5_2_process_illegal_inst_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)1515157e72e8SLikun Gao static int sdma_v5_2_process_illegal_inst_irq(struct amdgpu_device *adev,
1516157e72e8SLikun Gao 					      struct amdgpu_irq_src *source,
1517157e72e8SLikun Gao 					      struct amdgpu_iv_entry *entry)
1518157e72e8SLikun Gao {
1519157e72e8SLikun Gao 	return 0;
1520157e72e8SLikun Gao }
1521157e72e8SLikun Gao 
sdma_v5_2_firmware_mgcg_support(struct amdgpu_device * adev,int i)1522f05f4fe6SPrike Liang static bool sdma_v5_2_firmware_mgcg_support(struct amdgpu_device *adev,
1523f05f4fe6SPrike Liang 						     int i)
1524f05f4fe6SPrike Liang {
1525f05f4fe6SPrike Liang 	switch (adev->ip_versions[SDMA0_HWIP][0]) {
1526f05f4fe6SPrike Liang 	case IP_VERSION(5, 2, 1):
1527f05f4fe6SPrike Liang 		if (adev->sdma.instance[i].fw_version < 70)
1528f05f4fe6SPrike Liang 			return false;
1529f05f4fe6SPrike Liang 		break;
1530f05f4fe6SPrike Liang 	case IP_VERSION(5, 2, 3):
1531f05f4fe6SPrike Liang 		if (adev->sdma.instance[i].fw_version < 47)
1532f05f4fe6SPrike Liang 			return false;
1533f05f4fe6SPrike Liang 		break;
1534f05f4fe6SPrike Liang 	case IP_VERSION(5, 2, 7):
1535f05f4fe6SPrike Liang 		if (adev->sdma.instance[i].fw_version < 9)
1536f05f4fe6SPrike Liang 			return false;
1537f05f4fe6SPrike Liang 		break;
1538f05f4fe6SPrike Liang 	default:
1539f05f4fe6SPrike Liang 		return true;
1540f05f4fe6SPrike Liang 	}
1541f05f4fe6SPrike Liang 
1542f05f4fe6SPrike Liang 	return true;
1543f05f4fe6SPrike Liang 
1544f05f4fe6SPrike Liang }
1545f05f4fe6SPrike Liang 
sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device * adev,bool enable)1546157e72e8SLikun Gao static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev,
1547157e72e8SLikun Gao 						       bool enable)
1548157e72e8SLikun Gao {
1549157e72e8SLikun Gao 	uint32_t data, def;
1550157e72e8SLikun Gao 	int i;
1551157e72e8SLikun Gao 
1552157e72e8SLikun Gao 	for (i = 0; i < adev->sdma.num_instances; i++) {
1553ef9bcfdeSJinzhou Su 
1554f05f4fe6SPrike Liang 		if (!sdma_v5_2_firmware_mgcg_support(adev, i))
1555ef9bcfdeSJinzhou Su 			adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_MGCG;
1556ef9bcfdeSJinzhou Su 
1557157e72e8SLikun Gao 		if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
1558157e72e8SLikun Gao 			/* Enable sdma clock gating */
1559157e72e8SLikun Gao 			def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
1560157e72e8SLikun Gao 			data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
1561157e72e8SLikun Gao 				  SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
1562157e72e8SLikun Gao 				  SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
1563157e72e8SLikun Gao 				  SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
1564157e72e8SLikun Gao 				  SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK |
1565157e72e8SLikun Gao 				  SDMA0_CLK_CTRL__SOFT_OVERRIDER_REG_MASK);
1566157e72e8SLikun Gao 			if (def != data)
1567157e72e8SLikun Gao 				WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data);
1568157e72e8SLikun Gao 		} else {
1569157e72e8SLikun Gao 			/* Disable sdma clock gating */
1570157e72e8SLikun Gao 			def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
1571157e72e8SLikun Gao 			data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
1572157e72e8SLikun Gao 				 SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
1573157e72e8SLikun Gao 				 SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
1574157e72e8SLikun Gao 				 SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
1575157e72e8SLikun Gao 				 SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK |
1576157e72e8SLikun Gao 				 SDMA0_CLK_CTRL__SOFT_OVERRIDER_REG_MASK);
1577157e72e8SLikun Gao 			if (def != data)
1578157e72e8SLikun Gao 				WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data);
1579157e72e8SLikun Gao 		}
1580157e72e8SLikun Gao 	}
1581157e72e8SLikun Gao }
1582157e72e8SLikun Gao 
sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device * adev,bool enable)1583157e72e8SLikun Gao static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev,
1584157e72e8SLikun Gao 						      bool enable)
1585157e72e8SLikun Gao {
1586157e72e8SLikun Gao 	uint32_t data, def;
1587157e72e8SLikun Gao 	int i;
1588157e72e8SLikun Gao 
1589157e72e8SLikun Gao 	for (i = 0; i < adev->sdma.num_instances; i++) {
1590ec0f72cbSJinzhou Su 
15911d789535SAlex Deucher 		if (adev->sdma.instance[i].fw_version < 70 && adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(5, 2, 1))
1592ec0f72cbSJinzhou Su 			adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_LS;
1593ec0f72cbSJinzhou Su 
1594157e72e8SLikun Gao 		if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
1595157e72e8SLikun Gao 			/* Enable sdma mem light sleep */
1596157e72e8SLikun Gao 			def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));
1597157e72e8SLikun Gao 			data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
1598157e72e8SLikun Gao 			if (def != data)
1599157e72e8SLikun Gao 				WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data);
1600157e72e8SLikun Gao 
1601157e72e8SLikun Gao 		} else {
1602157e72e8SLikun Gao 			/* Disable sdma mem light sleep */
1603157e72e8SLikun Gao 			def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));
1604157e72e8SLikun Gao 			data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
1605157e72e8SLikun Gao 			if (def != data)
1606157e72e8SLikun Gao 				WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data);
1607157e72e8SLikun Gao 
1608157e72e8SLikun Gao 		}
1609157e72e8SLikun Gao 	}
1610157e72e8SLikun Gao }
1611157e72e8SLikun Gao 
sdma_v5_2_set_clockgating_state(void * handle,enum amd_clockgating_state state)1612157e72e8SLikun Gao static int sdma_v5_2_set_clockgating_state(void *handle,
1613157e72e8SLikun Gao 					   enum amd_clockgating_state state)
1614157e72e8SLikun Gao {
1615157e72e8SLikun Gao 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1616157e72e8SLikun Gao 
1617157e72e8SLikun Gao 	if (amdgpu_sriov_vf(adev))
1618157e72e8SLikun Gao 		return 0;
1619157e72e8SLikun Gao 
16201d789535SAlex Deucher 	switch (adev->ip_versions[SDMA0_HWIP][0]) {
16218f4bb1e7SAlex Deucher 	case IP_VERSION(5, 2, 0):
16228f4bb1e7SAlex Deucher 	case IP_VERSION(5, 2, 2):
16238f4bb1e7SAlex Deucher 	case IP_VERSION(5, 2, 1):
16248f4bb1e7SAlex Deucher 	case IP_VERSION(5, 2, 4):
16258f4bb1e7SAlex Deucher 	case IP_VERSION(5, 2, 5):
162693afe158SYifan Zhang 	case IP_VERSION(5, 2, 6):
16278f4bb1e7SAlex Deucher 	case IP_VERSION(5, 2, 3):
1628f05f4fe6SPrike Liang 	case IP_VERSION(5, 2, 7):
1629157e72e8SLikun Gao 		sdma_v5_2_update_medium_grain_clock_gating(adev,
1630cd48758cSJiapeng Chong 				state == AMD_CG_STATE_GATE);
1631157e72e8SLikun Gao 		sdma_v5_2_update_medium_grain_light_sleep(adev,
1632cd48758cSJiapeng Chong 				state == AMD_CG_STATE_GATE);
1633157e72e8SLikun Gao 		break;
1634157e72e8SLikun Gao 	default:
1635157e72e8SLikun Gao 		break;
1636157e72e8SLikun Gao 	}
1637157e72e8SLikun Gao 
1638157e72e8SLikun Gao 	return 0;
1639157e72e8SLikun Gao }
1640157e72e8SLikun Gao 
sdma_v5_2_set_powergating_state(void * handle,enum amd_powergating_state state)1641157e72e8SLikun Gao static int sdma_v5_2_set_powergating_state(void *handle,
1642157e72e8SLikun Gao 					  enum amd_powergating_state state)
1643157e72e8SLikun Gao {
1644157e72e8SLikun Gao 	return 0;
1645157e72e8SLikun Gao }
1646157e72e8SLikun Gao 
sdma_v5_2_get_clockgating_state(void * handle,u64 * flags)164725faeddcSEvan Quan static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags)
1648157e72e8SLikun Gao {
1649157e72e8SLikun Gao 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1650157e72e8SLikun Gao 	int data;
1651157e72e8SLikun Gao 
1652157e72e8SLikun Gao 	if (amdgpu_sriov_vf(adev))
1653157e72e8SLikun Gao 		*flags = 0;
1654157e72e8SLikun Gao 
165593afe158SYifan Zhang 	/* AMD_CG_SUPPORT_SDMA_MGCG */
165693afe158SYifan Zhang 	data = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_CLK_CTRL));
165793afe158SYifan Zhang 	if (!(data & SDMA0_CLK_CTRL__CGCG_EN_OVERRIDE_MASK))
165893afe158SYifan Zhang 		*flags |= AMD_CG_SUPPORT_SDMA_MGCG;
165993afe158SYifan Zhang 
1660157e72e8SLikun Gao 	/* AMD_CG_SUPPORT_SDMA_LS */
16612373dd48SLikun Gao 	data = RREG32_KIQ(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL));
1662157e72e8SLikun Gao 	if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
1663157e72e8SLikun Gao 		*flags |= AMD_CG_SUPPORT_SDMA_LS;
1664157e72e8SLikun Gao }
1665157e72e8SLikun Gao 
sdma_v5_2_ring_begin_use(struct amdgpu_ring * ring)16663aae4ef4SAlex Deucher static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring)
16673aae4ef4SAlex Deucher {
16683aae4ef4SAlex Deucher 	struct amdgpu_device *adev = ring->adev;
16693aae4ef4SAlex Deucher 
16703aae4ef4SAlex Deucher 	/* SDMA 5.2.3 (RMB) FW doesn't seem to properly
16713aae4ef4SAlex Deucher 	 * disallow GFXOFF in some cases leading to
16723aae4ef4SAlex Deucher 	 * hangs in SDMA.  Disallow GFXOFF while SDMA is active.
16733aae4ef4SAlex Deucher 	 * We can probably just limit this to 5.2.3,
16743aae4ef4SAlex Deucher 	 * but it shouldn't hurt for other parts since
16753aae4ef4SAlex Deucher 	 * this GFXOFF will be disallowed anyway when SDMA is
16763aae4ef4SAlex Deucher 	 * active, this just makes it explicit.
1677*9d74e500SAlex Deucher 	 * sdma_v5_2_ring_set_wptr() takes advantage of this
1678*9d74e500SAlex Deucher 	 * to update the wptr because sometimes SDMA seems to miss
1679*9d74e500SAlex Deucher 	 * doorbells when entering PG.  If you remove this, update
1680*9d74e500SAlex Deucher 	 * sdma_v5_2_ring_set_wptr() as well!
16813aae4ef4SAlex Deucher 	 */
16823aae4ef4SAlex Deucher 	amdgpu_gfx_off_ctrl(adev, false);
16833aae4ef4SAlex Deucher }
16843aae4ef4SAlex Deucher 
sdma_v5_2_ring_end_use(struct amdgpu_ring * ring)16853aae4ef4SAlex Deucher static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring)
16863aae4ef4SAlex Deucher {
16873aae4ef4SAlex Deucher 	struct amdgpu_device *adev = ring->adev;
16883aae4ef4SAlex Deucher 
16893aae4ef4SAlex Deucher 	/* SDMA 5.2.3 (RMB) FW doesn't seem to properly
16903aae4ef4SAlex Deucher 	 * disallow GFXOFF in some cases leading to
16913aae4ef4SAlex Deucher 	 * hangs in SDMA.  Allow GFXOFF when SDMA is complete.
16923aae4ef4SAlex Deucher 	 */
16933aae4ef4SAlex Deucher 	amdgpu_gfx_off_ctrl(adev, true);
16943aae4ef4SAlex Deucher }
16953aae4ef4SAlex Deucher 
1696157e72e8SLikun Gao const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
1697157e72e8SLikun Gao 	.name = "sdma_v5_2",
1698157e72e8SLikun Gao 	.early_init = sdma_v5_2_early_init,
1699157e72e8SLikun Gao 	.late_init = NULL,
1700157e72e8SLikun Gao 	.sw_init = sdma_v5_2_sw_init,
1701157e72e8SLikun Gao 	.sw_fini = sdma_v5_2_sw_fini,
1702157e72e8SLikun Gao 	.hw_init = sdma_v5_2_hw_init,
1703157e72e8SLikun Gao 	.hw_fini = sdma_v5_2_hw_fini,
1704157e72e8SLikun Gao 	.suspend = sdma_v5_2_suspend,
1705157e72e8SLikun Gao 	.resume = sdma_v5_2_resume,
1706157e72e8SLikun Gao 	.is_idle = sdma_v5_2_is_idle,
1707157e72e8SLikun Gao 	.wait_for_idle = sdma_v5_2_wait_for_idle,
1708157e72e8SLikun Gao 	.soft_reset = sdma_v5_2_soft_reset,
1709157e72e8SLikun Gao 	.set_clockgating_state = sdma_v5_2_set_clockgating_state,
1710157e72e8SLikun Gao 	.set_powergating_state = sdma_v5_2_set_powergating_state,
1711157e72e8SLikun Gao 	.get_clockgating_state = sdma_v5_2_get_clockgating_state,
1712157e72e8SLikun Gao };
1713157e72e8SLikun Gao 
1714157e72e8SLikun Gao static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
1715157e72e8SLikun Gao 	.type = AMDGPU_RING_TYPE_SDMA,
1716157e72e8SLikun Gao 	.align_mask = 0xf,
1717157e72e8SLikun Gao 	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
1718157e72e8SLikun Gao 	.support_64bit_ptrs = true,
17198c0f11ffSLang Yu 	.secure_submission_supported = true,
1720157e72e8SLikun Gao 	.get_rptr = sdma_v5_2_ring_get_rptr,
1721157e72e8SLikun Gao 	.get_wptr = sdma_v5_2_ring_get_wptr,
1722157e72e8SLikun Gao 	.set_wptr = sdma_v5_2_ring_set_wptr,
1723157e72e8SLikun Gao 	.emit_frame_size =
1724157e72e8SLikun Gao 		5 + /* sdma_v5_2_ring_init_cond_exec */
1725157e72e8SLikun Gao 		6 + /* sdma_v5_2_ring_emit_hdp_flush */
1726157e72e8SLikun Gao 		3 + /* hdp_invalidate */
1727157e72e8SLikun Gao 		6 + /* sdma_v5_2_ring_emit_pipeline_sync */
1728157e72e8SLikun Gao 		/* sdma_v5_2_ring_emit_vm_flush */
1729157e72e8SLikun Gao 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1730157e72e8SLikun Gao 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
1731157e72e8SLikun Gao 		10 + 10 + 10, /* sdma_v5_2_ring_emit_fence x3 for user fence, vm fence */
1732157e72e8SLikun Gao 	.emit_ib_size = 7 + 6, /* sdma_v5_2_ring_emit_ib */
1733157e72e8SLikun Gao 	.emit_ib = sdma_v5_2_ring_emit_ib,
1734b45fdeabSJinzhou Su 	.emit_mem_sync = sdma_v5_2_ring_emit_mem_sync,
1735157e72e8SLikun Gao 	.emit_fence = sdma_v5_2_ring_emit_fence,
1736157e72e8SLikun Gao 	.emit_pipeline_sync = sdma_v5_2_ring_emit_pipeline_sync,
1737157e72e8SLikun Gao 	.emit_vm_flush = sdma_v5_2_ring_emit_vm_flush,
1738157e72e8SLikun Gao 	.emit_hdp_flush = sdma_v5_2_ring_emit_hdp_flush,
1739157e72e8SLikun Gao 	.test_ring = sdma_v5_2_ring_test_ring,
1740157e72e8SLikun Gao 	.test_ib = sdma_v5_2_ring_test_ib,
1741157e72e8SLikun Gao 	.insert_nop = sdma_v5_2_ring_insert_nop,
1742157e72e8SLikun Gao 	.pad_ib = sdma_v5_2_ring_pad_ib,
17433aae4ef4SAlex Deucher 	.begin_use = sdma_v5_2_ring_begin_use,
17443aae4ef4SAlex Deucher 	.end_use = sdma_v5_2_ring_end_use,
1745157e72e8SLikun Gao 	.emit_wreg = sdma_v5_2_ring_emit_wreg,
1746157e72e8SLikun Gao 	.emit_reg_wait = sdma_v5_2_ring_emit_reg_wait,
1747157e72e8SLikun Gao 	.emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait,
1748157e72e8SLikun Gao 	.init_cond_exec = sdma_v5_2_ring_init_cond_exec,
1749157e72e8SLikun Gao 	.patch_cond_exec = sdma_v5_2_ring_patch_cond_exec,
1750157e72e8SLikun Gao 	.preempt_ib = sdma_v5_2_ring_preempt_ib,
1751157e72e8SLikun Gao };
1752157e72e8SLikun Gao 
sdma_v5_2_set_ring_funcs(struct amdgpu_device * adev)1753157e72e8SLikun Gao static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev)
1754157e72e8SLikun Gao {
1755157e72e8SLikun Gao 	int i;
1756157e72e8SLikun Gao 
1757157e72e8SLikun Gao 	for (i = 0; i < adev->sdma.num_instances; i++) {
1758157e72e8SLikun Gao 		adev->sdma.instance[i].ring.funcs = &sdma_v5_2_ring_funcs;
1759157e72e8SLikun Gao 		adev->sdma.instance[i].ring.me = i;
1760157e72e8SLikun Gao 	}
1761157e72e8SLikun Gao }
1762157e72e8SLikun Gao 
1763157e72e8SLikun Gao static const struct amdgpu_irq_src_funcs sdma_v5_2_trap_irq_funcs = {
1764157e72e8SLikun Gao 	.set = sdma_v5_2_set_trap_irq_state,
1765157e72e8SLikun Gao 	.process = sdma_v5_2_process_trap_irq,
1766157e72e8SLikun Gao };
1767157e72e8SLikun Gao 
1768157e72e8SLikun Gao static const struct amdgpu_irq_src_funcs sdma_v5_2_illegal_inst_irq_funcs = {
1769157e72e8SLikun Gao 	.process = sdma_v5_2_process_illegal_inst_irq,
1770157e72e8SLikun Gao };
1771157e72e8SLikun Gao 
sdma_v5_2_set_irq_funcs(struct amdgpu_device * adev)1772157e72e8SLikun Gao static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev)
1773157e72e8SLikun Gao {
1774157e72e8SLikun Gao 	adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
1775157e72e8SLikun Gao 					adev->sdma.num_instances;
1776157e72e8SLikun Gao 	adev->sdma.trap_irq.funcs = &sdma_v5_2_trap_irq_funcs;
1777157e72e8SLikun Gao 	adev->sdma.illegal_inst_irq.funcs = &sdma_v5_2_illegal_inst_irq_funcs;
1778157e72e8SLikun Gao }
1779157e72e8SLikun Gao 
1780157e72e8SLikun Gao /**
1781157e72e8SLikun Gao  * sdma_v5_2_emit_copy_buffer - copy buffer using the sDMA engine
1782157e72e8SLikun Gao  *
1783fd1c541dSLee Jones  * @ib: indirect buffer to copy to
1784157e72e8SLikun Gao  * @src_offset: src GPU address
1785157e72e8SLikun Gao  * @dst_offset: dst GPU address
1786157e72e8SLikun Gao  * @byte_count: number of bytes to xfer
1787fd1c541dSLee Jones  * @tmz: if a secure copy should be used
1788157e72e8SLikun Gao  *
1789157e72e8SLikun Gao  * Copy GPU buffers using the DMA engine.
1790157e72e8SLikun Gao  * Used by the amdgpu ttm implementation to move pages if
1791157e72e8SLikun Gao  * registered as the asic copy callback.
1792157e72e8SLikun Gao  */
sdma_v5_2_emit_copy_buffer(struct amdgpu_ib * ib,uint64_t src_offset,uint64_t dst_offset,uint32_t byte_count,bool tmz)1793157e72e8SLikun Gao static void sdma_v5_2_emit_copy_buffer(struct amdgpu_ib *ib,
1794157e72e8SLikun Gao 				       uint64_t src_offset,
1795157e72e8SLikun Gao 				       uint64_t dst_offset,
1796157e72e8SLikun Gao 				       uint32_t byte_count,
1797157e72e8SLikun Gao 				       bool tmz)
1798157e72e8SLikun Gao {
1799157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
1800157e72e8SLikun Gao 		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
1801157e72e8SLikun Gao 		SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
1802157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = byte_count - 1;
1803157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1804157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
1805157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
1806157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1807157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1808157e72e8SLikun Gao }
1809157e72e8SLikun Gao 
1810157e72e8SLikun Gao /**
1811157e72e8SLikun Gao  * sdma_v5_2_emit_fill_buffer - fill buffer using the sDMA engine
1812157e72e8SLikun Gao  *
1813fd1c541dSLee Jones  * @ib: indirect buffer to fill
1814157e72e8SLikun Gao  * @src_data: value to write to buffer
1815157e72e8SLikun Gao  * @dst_offset: dst GPU address
1816157e72e8SLikun Gao  * @byte_count: number of bytes to xfer
1817157e72e8SLikun Gao  *
1818157e72e8SLikun Gao  * Fill GPU buffers using the DMA engine.
1819157e72e8SLikun Gao  */
sdma_v5_2_emit_fill_buffer(struct amdgpu_ib * ib,uint32_t src_data,uint64_t dst_offset,uint32_t byte_count)1820157e72e8SLikun Gao static void sdma_v5_2_emit_fill_buffer(struct amdgpu_ib *ib,
1821157e72e8SLikun Gao 				       uint32_t src_data,
1822157e72e8SLikun Gao 				       uint64_t dst_offset,
1823157e72e8SLikun Gao 				       uint32_t byte_count)
1824157e72e8SLikun Gao {
1825157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
1826157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1827157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1828157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = src_data;
1829157e72e8SLikun Gao 	ib->ptr[ib->length_dw++] = byte_count - 1;
1830157e72e8SLikun Gao }
1831157e72e8SLikun Gao 
1832157e72e8SLikun Gao static const struct amdgpu_buffer_funcs sdma_v5_2_buffer_funcs = {
1833157e72e8SLikun Gao 	.copy_max_bytes = 0x400000,
1834157e72e8SLikun Gao 	.copy_num_dw = 7,
1835157e72e8SLikun Gao 	.emit_copy_buffer = sdma_v5_2_emit_copy_buffer,
1836157e72e8SLikun Gao 
1837157e72e8SLikun Gao 	.fill_max_bytes = 0x400000,
1838157e72e8SLikun Gao 	.fill_num_dw = 5,
1839157e72e8SLikun Gao 	.emit_fill_buffer = sdma_v5_2_emit_fill_buffer,
1840157e72e8SLikun Gao };
1841157e72e8SLikun Gao 
sdma_v5_2_set_buffer_funcs(struct amdgpu_device * adev)1842157e72e8SLikun Gao static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev)
1843157e72e8SLikun Gao {
1844157e72e8SLikun Gao 	if (adev->mman.buffer_funcs == NULL) {
1845157e72e8SLikun Gao 		adev->mman.buffer_funcs = &sdma_v5_2_buffer_funcs;
1846157e72e8SLikun Gao 		adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
1847157e72e8SLikun Gao 	}
1848157e72e8SLikun Gao }
1849157e72e8SLikun Gao 
1850157e72e8SLikun Gao static const struct amdgpu_vm_pte_funcs sdma_v5_2_vm_pte_funcs = {
1851157e72e8SLikun Gao 	.copy_pte_num_dw = 7,
1852157e72e8SLikun Gao 	.copy_pte = sdma_v5_2_vm_copy_pte,
1853157e72e8SLikun Gao 	.write_pte = sdma_v5_2_vm_write_pte,
1854157e72e8SLikun Gao 	.set_pte_pde = sdma_v5_2_vm_set_pte_pde,
1855157e72e8SLikun Gao };
1856157e72e8SLikun Gao 
sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device * adev)1857157e72e8SLikun Gao static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev)
1858157e72e8SLikun Gao {
1859157e72e8SLikun Gao 	unsigned i;
1860157e72e8SLikun Gao 
1861157e72e8SLikun Gao 	if (adev->vm_manager.vm_pte_funcs == NULL) {
1862157e72e8SLikun Gao 		adev->vm_manager.vm_pte_funcs = &sdma_v5_2_vm_pte_funcs;
1863157e72e8SLikun Gao 		for (i = 0; i < adev->sdma.num_instances; i++) {
1864157e72e8SLikun Gao 			adev->vm_manager.vm_pte_scheds[i] =
1865157e72e8SLikun Gao 				&adev->sdma.instance[i].ring.sched;
1866157e72e8SLikun Gao 		}
1867157e72e8SLikun Gao 		adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
1868157e72e8SLikun Gao 	}
1869157e72e8SLikun Gao }
1870157e72e8SLikun Gao 
1871157e72e8SLikun Gao const struct amdgpu_ip_block_version sdma_v5_2_ip_block = {
1872157e72e8SLikun Gao 	.type = AMD_IP_BLOCK_TYPE_SDMA,
1873157e72e8SLikun Gao 	.major = 5,
1874157e72e8SLikun Gao 	.minor = 2,
1875157e72e8SLikun Gao 	.rev = 0,
1876157e72e8SLikun Gao 	.funcs = &sdma_v5_2_ip_funcs,
1877157e72e8SLikun Gao };
1878