1157e72e8SLikun Gao /*
2157e72e8SLikun Gao * Copyright 2019 Advanced Micro Devices, Inc.
3157e72e8SLikun Gao *
4157e72e8SLikun Gao * Permission is hereby granted, free of charge, to any person obtaining a
5157e72e8SLikun Gao * copy of this software and associated documentation files (the "Software"),
6157e72e8SLikun Gao * to deal in the Software without restriction, including without limitation
7157e72e8SLikun Gao * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8157e72e8SLikun Gao * and/or sell copies of the Software, and to permit persons to whom the
9157e72e8SLikun Gao * Software is furnished to do so, subject to the following conditions:
10157e72e8SLikun Gao *
11157e72e8SLikun Gao * The above copyright notice and this permission notice shall be included in
12157e72e8SLikun Gao * all copies or substantial portions of the Software.
13157e72e8SLikun Gao *
14157e72e8SLikun Gao * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15157e72e8SLikun Gao * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16157e72e8SLikun Gao * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17157e72e8SLikun Gao * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18157e72e8SLikun Gao * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19157e72e8SLikun Gao * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20157e72e8SLikun Gao * OTHER DEALINGS IN THE SOFTWARE.
21157e72e8SLikun Gao *
22157e72e8SLikun Gao */
23157e72e8SLikun Gao
24157e72e8SLikun Gao #include <linux/delay.h>
25157e72e8SLikun Gao #include <linux/firmware.h>
26157e72e8SLikun Gao #include <linux/module.h>
27157e72e8SLikun Gao #include <linux/pci.h>
28157e72e8SLikun Gao
29157e72e8SLikun Gao #include "amdgpu.h"
30157e72e8SLikun Gao #include "amdgpu_ucode.h"
31157e72e8SLikun Gao #include "amdgpu_trace.h"
32157e72e8SLikun Gao
33157e72e8SLikun Gao #include "gc/gc_10_3_0_offset.h"
34157e72e8SLikun Gao #include "gc/gc_10_3_0_sh_mask.h"
35157e72e8SLikun Gao #include "ivsrcid/sdma0/irqsrcs_sdma0_5_0.h"
36157e72e8SLikun Gao #include "ivsrcid/sdma1/irqsrcs_sdma1_5_0.h"
37157e72e8SLikun Gao #include "ivsrcid/sdma2/irqsrcs_sdma2_5_0.h"
38157e72e8SLikun Gao #include "ivsrcid/sdma3/irqsrcs_sdma3_5_0.h"
39157e72e8SLikun Gao
40157e72e8SLikun Gao #include "soc15_common.h"
41157e72e8SLikun Gao #include "soc15.h"
42157e72e8SLikun Gao #include "navi10_sdma_pkt_open.h"
43157e72e8SLikun Gao #include "nbio_v2_3.h"
44157e72e8SLikun Gao #include "sdma_common.h"
45157e72e8SLikun Gao #include "sdma_v5_2.h"
46157e72e8SLikun Gao
47157e72e8SLikun Gao MODULE_FIRMWARE("amdgpu/sienna_cichlid_sdma.bin");
48df2d15dfSJiansong Chen MODULE_FIRMWARE("amdgpu/navy_flounder_sdma.bin");
4901069226STao Zhou MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_sdma.bin");
508760403eSChengming Gui MODULE_FIRMWARE("amdgpu/beige_goby_sdma.bin");
51157e72e8SLikun Gao
5254c98eacSHuang Rui MODULE_FIRMWARE("amdgpu/vangogh_sdma.bin");
53e88d68e1SAaron Liu MODULE_FIRMWARE("amdgpu/yellow_carp_sdma.bin");
5493afe158SYifan Zhang MODULE_FIRMWARE("amdgpu/sdma_5_2_6.bin");
55967af863SPrike Liang MODULE_FIRMWARE("amdgpu/sdma_5_2_7.bin");
5654c98eacSHuang Rui
57157e72e8SLikun Gao #define SDMA1_REG_OFFSET 0x600
58157e72e8SLikun Gao #define SDMA3_REG_OFFSET 0x400
59157e72e8SLikun Gao #define SDMA0_HYP_DEC_REG_START 0x5880
60157e72e8SLikun Gao #define SDMA0_HYP_DEC_REG_END 0x5893
61157e72e8SLikun Gao #define SDMA1_HYP_DEC_REG_OFFSET 0x20
62157e72e8SLikun Gao
63157e72e8SLikun Gao static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev);
64157e72e8SLikun Gao static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev);
65157e72e8SLikun Gao static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev);
66157e72e8SLikun Gao static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev);
67157e72e8SLikun Gao
sdma_v5_2_get_reg_offset(struct amdgpu_device * adev,u32 instance,u32 internal_offset)68157e72e8SLikun Gao static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
69157e72e8SLikun Gao {
70157e72e8SLikun Gao u32 base;
71157e72e8SLikun Gao
72157e72e8SLikun Gao if (internal_offset >= SDMA0_HYP_DEC_REG_START &&
73157e72e8SLikun Gao internal_offset <= SDMA0_HYP_DEC_REG_END) {
74157e72e8SLikun Gao base = adev->reg_offset[GC_HWIP][0][1];
75157e72e8SLikun Gao if (instance != 0)
76157e72e8SLikun Gao internal_offset += SDMA1_HYP_DEC_REG_OFFSET * instance;
77157e72e8SLikun Gao } else {
78157e72e8SLikun Gao if (instance < 2) {
79157e72e8SLikun Gao base = adev->reg_offset[GC_HWIP][0][0];
80157e72e8SLikun Gao if (instance == 1)
81157e72e8SLikun Gao internal_offset += SDMA1_REG_OFFSET;
82157e72e8SLikun Gao } else {
83157e72e8SLikun Gao base = adev->reg_offset[GC_HWIP][0][2];
84157e72e8SLikun Gao if (instance == 3)
85157e72e8SLikun Gao internal_offset += SDMA3_REG_OFFSET;
86157e72e8SLikun Gao }
87157e72e8SLikun Gao }
88157e72e8SLikun Gao
89157e72e8SLikun Gao return base + internal_offset;
90157e72e8SLikun Gao }
91157e72e8SLikun Gao
sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring * ring)92157e72e8SLikun Gao static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring)
93157e72e8SLikun Gao {
94157e72e8SLikun Gao unsigned ret;
95157e72e8SLikun Gao
96157e72e8SLikun Gao amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
97157e72e8SLikun Gao amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
98157e72e8SLikun Gao amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
99157e72e8SLikun Gao amdgpu_ring_write(ring, 1);
100157e72e8SLikun Gao ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
101157e72e8SLikun Gao amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
102157e72e8SLikun Gao
103157e72e8SLikun Gao return ret;
104157e72e8SLikun Gao }
105157e72e8SLikun Gao
sdma_v5_2_ring_patch_cond_exec(struct amdgpu_ring * ring,unsigned offset)106157e72e8SLikun Gao static void sdma_v5_2_ring_patch_cond_exec(struct amdgpu_ring *ring,
107157e72e8SLikun Gao unsigned offset)
108157e72e8SLikun Gao {
109157e72e8SLikun Gao unsigned cur;
110157e72e8SLikun Gao
111157e72e8SLikun Gao BUG_ON(offset > ring->buf_mask);
112157e72e8SLikun Gao BUG_ON(ring->ring[offset] != 0x55aa55aa);
113157e72e8SLikun Gao
114157e72e8SLikun Gao cur = (ring->wptr - 1) & ring->buf_mask;
115157e72e8SLikun Gao if (cur > offset)
116157e72e8SLikun Gao ring->ring[offset] = cur - offset;
117157e72e8SLikun Gao else
118157e72e8SLikun Gao ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
119157e72e8SLikun Gao }
120157e72e8SLikun Gao
121157e72e8SLikun Gao /**
122157e72e8SLikun Gao * sdma_v5_2_ring_get_rptr - get the current read pointer
123157e72e8SLikun Gao *
124157e72e8SLikun Gao * @ring: amdgpu ring pointer
125157e72e8SLikun Gao *
126157e72e8SLikun Gao * Get the current rptr from the hardware (NAVI10+).
127157e72e8SLikun Gao */
sdma_v5_2_ring_get_rptr(struct amdgpu_ring * ring)128157e72e8SLikun Gao static uint64_t sdma_v5_2_ring_get_rptr(struct amdgpu_ring *ring)
129157e72e8SLikun Gao {
130157e72e8SLikun Gao u64 *rptr;
131157e72e8SLikun Gao
132157e72e8SLikun Gao /* XXX check if swapping is necessary on BE */
1333748424bSJack Xiao rptr = (u64 *)ring->rptr_cpu_addr;
134157e72e8SLikun Gao
135157e72e8SLikun Gao DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
136157e72e8SLikun Gao return ((*rptr) >> 2);
137157e72e8SLikun Gao }
138157e72e8SLikun Gao
139157e72e8SLikun Gao /**
140157e72e8SLikun Gao * sdma_v5_2_ring_get_wptr - get the current write pointer
141157e72e8SLikun Gao *
142157e72e8SLikun Gao * @ring: amdgpu ring pointer
143157e72e8SLikun Gao *
144157e72e8SLikun Gao * Get the current wptr from the hardware (NAVI10+).
145157e72e8SLikun Gao */
sdma_v5_2_ring_get_wptr(struct amdgpu_ring * ring)146157e72e8SLikun Gao static uint64_t sdma_v5_2_ring_get_wptr(struct amdgpu_ring *ring)
147157e72e8SLikun Gao {
148157e72e8SLikun Gao struct amdgpu_device *adev = ring->adev;
14987d6883bSXiaojie Yuan u64 wptr;
150157e72e8SLikun Gao
151157e72e8SLikun Gao if (ring->use_doorbell) {
152157e72e8SLikun Gao /* XXX check if swapping is necessary on BE */
1533748424bSJack Xiao wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
15487d6883bSXiaojie Yuan DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
155157e72e8SLikun Gao } else {
15687d6883bSXiaojie Yuan wptr = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI));
15787d6883bSXiaojie Yuan wptr = wptr << 32;
15887d6883bSXiaojie Yuan wptr |= RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR));
15987d6883bSXiaojie Yuan DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", ring->me, wptr);
160157e72e8SLikun Gao }
161157e72e8SLikun Gao
16287d6883bSXiaojie Yuan return wptr >> 2;
163157e72e8SLikun Gao }
164157e72e8SLikun Gao
165157e72e8SLikun Gao /**
166157e72e8SLikun Gao * sdma_v5_2_ring_set_wptr - commit the write pointer
167157e72e8SLikun Gao *
168157e72e8SLikun Gao * @ring: amdgpu ring pointer
169157e72e8SLikun Gao *
170157e72e8SLikun Gao * Write the wptr back to the hardware (NAVI10+).
171157e72e8SLikun Gao */
sdma_v5_2_ring_set_wptr(struct amdgpu_ring * ring)172157e72e8SLikun Gao static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring)
173157e72e8SLikun Gao {
174157e72e8SLikun Gao struct amdgpu_device *adev = ring->adev;
175157e72e8SLikun Gao
176157e72e8SLikun Gao DRM_DEBUG("Setting write pointer\n");
177157e72e8SLikun Gao if (ring->use_doorbell) {
178157e72e8SLikun Gao DRM_DEBUG("Using doorbell -- "
179157e72e8SLikun Gao "wptr_offs == 0x%08x "
1807dba6e83SHaohui Mai "lower_32_bits(ring->wptr << 2) == 0x%08x "
1817dba6e83SHaohui Mai "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
182157e72e8SLikun Gao ring->wptr_offs,
183157e72e8SLikun Gao lower_32_bits(ring->wptr << 2),
184157e72e8SLikun Gao upper_32_bits(ring->wptr << 2));
185157e72e8SLikun Gao /* XXX check if swapping is necessary on BE */
1863748424bSJack Xiao atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
1873748424bSJack Xiao ring->wptr << 2);
188157e72e8SLikun Gao DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
189157e72e8SLikun Gao ring->doorbell_index, ring->wptr << 2);
190157e72e8SLikun Gao WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
191*9d74e500SAlex Deucher /* SDMA seems to miss doorbells sometimes when powergating kicks in.
192*9d74e500SAlex Deucher * Updating the wptr directly will wake it. This is only safe because
193*9d74e500SAlex Deucher * we disallow gfxoff in begin_use() and then allow it again in end_use().
194*9d74e500SAlex Deucher */
195*9d74e500SAlex Deucher WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR),
196*9d74e500SAlex Deucher lower_32_bits(ring->wptr << 2));
197*9d74e500SAlex Deucher WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI),
198*9d74e500SAlex Deucher upper_32_bits(ring->wptr << 2));
199157e72e8SLikun Gao } else {
200157e72e8SLikun Gao DRM_DEBUG("Not using doorbell -- "
201157e72e8SLikun Gao "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
202157e72e8SLikun Gao "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
203157e72e8SLikun Gao ring->me,
204157e72e8SLikun Gao lower_32_bits(ring->wptr << 2),
205157e72e8SLikun Gao ring->me,
206157e72e8SLikun Gao upper_32_bits(ring->wptr << 2));
207157e72e8SLikun Gao WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR),
208157e72e8SLikun Gao lower_32_bits(ring->wptr << 2));
209157e72e8SLikun Gao WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI),
210157e72e8SLikun Gao upper_32_bits(ring->wptr << 2));
211157e72e8SLikun Gao }
212157e72e8SLikun Gao }
213157e72e8SLikun Gao
sdma_v5_2_ring_insert_nop(struct amdgpu_ring * ring,uint32_t count)214157e72e8SLikun Gao static void sdma_v5_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
215157e72e8SLikun Gao {
216157e72e8SLikun Gao struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
217157e72e8SLikun Gao int i;
218157e72e8SLikun Gao
219157e72e8SLikun Gao for (i = 0; i < count; i++)
220157e72e8SLikun Gao if (sdma && sdma->burst_nop && (i == 0))
221157e72e8SLikun Gao amdgpu_ring_write(ring, ring->funcs->nop |
222157e72e8SLikun Gao SDMA_PKT_NOP_HEADER_COUNT(count - 1));
223157e72e8SLikun Gao else
224157e72e8SLikun Gao amdgpu_ring_write(ring, ring->funcs->nop);
225157e72e8SLikun Gao }
226157e72e8SLikun Gao
227157e72e8SLikun Gao /**
228157e72e8SLikun Gao * sdma_v5_2_ring_emit_ib - Schedule an IB on the DMA engine
229157e72e8SLikun Gao *
230157e72e8SLikun Gao * @ring: amdgpu ring pointer
231fd1c541dSLee Jones * @job: job to retrieve vmid from
232157e72e8SLikun Gao * @ib: IB object to schedule
233fd1c541dSLee Jones * @flags: unused
234157e72e8SLikun Gao *
235157e72e8SLikun Gao * Schedule an IB in the DMA ring.
236157e72e8SLikun Gao */
sdma_v5_2_ring_emit_ib(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)237157e72e8SLikun Gao static void sdma_v5_2_ring_emit_ib(struct amdgpu_ring *ring,
238157e72e8SLikun Gao struct amdgpu_job *job,
239157e72e8SLikun Gao struct amdgpu_ib *ib,
240157e72e8SLikun Gao uint32_t flags)
241157e72e8SLikun Gao {
242157e72e8SLikun Gao unsigned vmid = AMDGPU_JOB_GET_VMID(job);
243157e72e8SLikun Gao uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
244157e72e8SLikun Gao
245157e72e8SLikun Gao /* An IB packet must end on a 8 DW boundary--the next dword
246157e72e8SLikun Gao * must be on a 8-dword boundary. Our IB packet below is 6
247157e72e8SLikun Gao * dwords long, thus add x number of NOPs, such that, in
248157e72e8SLikun Gao * modular arithmetic,
249157e72e8SLikun Gao * wptr + 6 + x = 8k, k >= 0, which in C is,
250157e72e8SLikun Gao * (wptr + 6 + x) % 8 = 0.
251157e72e8SLikun Gao * The expression below, is a solution of x.
252157e72e8SLikun Gao */
253157e72e8SLikun Gao sdma_v5_2_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
254157e72e8SLikun Gao
255157e72e8SLikun Gao amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
256157e72e8SLikun Gao SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
257157e72e8SLikun Gao /* base must be 32 byte aligned */
258157e72e8SLikun Gao amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
259157e72e8SLikun Gao amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
260157e72e8SLikun Gao amdgpu_ring_write(ring, ib->length_dw);
261157e72e8SLikun Gao amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
262157e72e8SLikun Gao amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
263157e72e8SLikun Gao }
264157e72e8SLikun Gao
265157e72e8SLikun Gao /**
266b45fdeabSJinzhou Su * sdma_v5_2_ring_emit_mem_sync - flush the IB by graphics cache rinse
267b45fdeabSJinzhou Su *
268b45fdeabSJinzhou Su * @ring: amdgpu ring pointer
269b45fdeabSJinzhou Su *
270b45fdeabSJinzhou Su * flush the IB by graphics cache rinse.
271b45fdeabSJinzhou Su */
sdma_v5_2_ring_emit_mem_sync(struct amdgpu_ring * ring)272b45fdeabSJinzhou Su static void sdma_v5_2_ring_emit_mem_sync(struct amdgpu_ring *ring)
273b45fdeabSJinzhou Su {
274e8ba4922SColin Ian King uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB |
275e8ba4922SColin Ian King SDMA_GCR_GLM_INV | SDMA_GCR_GL1_INV |
276e8ba4922SColin Ian King SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
277b45fdeabSJinzhou Su SDMA_GCR_GLI_INV(1);
278b45fdeabSJinzhou Su
279b45fdeabSJinzhou Su /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
280b45fdeabSJinzhou Su amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
281b45fdeabSJinzhou Su amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
282b45fdeabSJinzhou Su amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) |
283b45fdeabSJinzhou Su SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0));
284b45fdeabSJinzhou Su amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) |
285b45fdeabSJinzhou Su SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16));
286b45fdeabSJinzhou Su amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) |
287b45fdeabSJinzhou Su SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0));
288b45fdeabSJinzhou Su }
289b45fdeabSJinzhou Su
290b45fdeabSJinzhou Su /**
291157e72e8SLikun Gao * sdma_v5_2_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
292157e72e8SLikun Gao *
293157e72e8SLikun Gao * @ring: amdgpu ring pointer
294157e72e8SLikun Gao *
295157e72e8SLikun Gao * Emit an hdp flush packet on the requested DMA ring.
296157e72e8SLikun Gao */
sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring * ring)297157e72e8SLikun Gao static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
298157e72e8SLikun Gao {
299157e72e8SLikun Gao struct amdgpu_device *adev = ring->adev;
300157e72e8SLikun Gao u32 ref_and_mask = 0;
301157e72e8SLikun Gao const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
302157e72e8SLikun Gao
303b33d7aaaSAlex Deucher if (ring->me > 1) {
304b33d7aaaSAlex Deucher amdgpu_asic_flush_hdp(adev, ring);
305b33d7aaaSAlex Deucher } else {
3061f5d9cadSLikun Gao ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
307157e72e8SLikun Gao
308157e72e8SLikun Gao amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
309157e72e8SLikun Gao SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
310157e72e8SLikun Gao SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
311157e72e8SLikun Gao amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
312157e72e8SLikun Gao amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
313157e72e8SLikun Gao amdgpu_ring_write(ring, ref_and_mask); /* reference */
314157e72e8SLikun Gao amdgpu_ring_write(ring, ref_and_mask); /* mask */
315157e72e8SLikun Gao amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
316157e72e8SLikun Gao SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
317157e72e8SLikun Gao }
318b33d7aaaSAlex Deucher }
319157e72e8SLikun Gao
320157e72e8SLikun Gao /**
321157e72e8SLikun Gao * sdma_v5_2_ring_emit_fence - emit a fence on the DMA ring
322157e72e8SLikun Gao *
323157e72e8SLikun Gao * @ring: amdgpu ring pointer
324fd1c541dSLee Jones * @addr: address
325fd1c541dSLee Jones * @seq: sequence number
326fd1c541dSLee Jones * @flags: fence related flags
327157e72e8SLikun Gao *
328157e72e8SLikun Gao * Add a DMA fence packet to the ring to write
329157e72e8SLikun Gao * the fence seq number and DMA trap packet to generate
330157e72e8SLikun Gao * an interrupt if needed.
331157e72e8SLikun Gao */
sdma_v5_2_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)332157e72e8SLikun Gao static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
333157e72e8SLikun Gao unsigned flags)
334157e72e8SLikun Gao {
335157e72e8SLikun Gao bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
336157e72e8SLikun Gao /* write the fence */
337157e72e8SLikun Gao amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) |
338157e72e8SLikun Gao SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
339157e72e8SLikun Gao /* zero in first two bits */
340157e72e8SLikun Gao BUG_ON(addr & 0x3);
341157e72e8SLikun Gao amdgpu_ring_write(ring, lower_32_bits(addr));
342157e72e8SLikun Gao amdgpu_ring_write(ring, upper_32_bits(addr));
343157e72e8SLikun Gao amdgpu_ring_write(ring, lower_32_bits(seq));
344157e72e8SLikun Gao
345157e72e8SLikun Gao /* optionally write high bits as well */
346157e72e8SLikun Gao if (write64bit) {
347157e72e8SLikun Gao addr += 4;
348157e72e8SLikun Gao amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) |
349157e72e8SLikun Gao SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
350157e72e8SLikun Gao /* zero in first two bits */
351157e72e8SLikun Gao BUG_ON(addr & 0x3);
352157e72e8SLikun Gao amdgpu_ring_write(ring, lower_32_bits(addr));
353157e72e8SLikun Gao amdgpu_ring_write(ring, upper_32_bits(addr));
354157e72e8SLikun Gao amdgpu_ring_write(ring, upper_32_bits(seq));
355157e72e8SLikun Gao }
356157e72e8SLikun Gao
3576f120134SJack Xiao if ((flags & AMDGPU_FENCE_FLAG_INT)) {
3586f120134SJack Xiao uint32_t ctx = ring->is_mes_queue ?
3596f120134SJack Xiao (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0;
360157e72e8SLikun Gao /* generate an interrupt */
361157e72e8SLikun Gao amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
3626f120134SJack Xiao amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx));
363157e72e8SLikun Gao }
364157e72e8SLikun Gao }
365157e72e8SLikun Gao
36641782d70SGuchun Chen
367157e72e8SLikun Gao /**
368157e72e8SLikun Gao * sdma_v5_2_gfx_stop - stop the gfx async dma engines
369157e72e8SLikun Gao *
370157e72e8SLikun Gao * @adev: amdgpu_device pointer
371157e72e8SLikun Gao *
372157e72e8SLikun Gao * Stop the gfx async dma ring buffers.
373157e72e8SLikun Gao */
sdma_v5_2_gfx_stop(struct amdgpu_device * adev)374157e72e8SLikun Gao static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev)
375157e72e8SLikun Gao {
376157e72e8SLikun Gao u32 rb_cntl, ib_cntl;
377157e72e8SLikun Gao int i;
378157e72e8SLikun Gao
379571c0536SAlex Deucher amdgpu_sdma_unset_buffer_funcs_helper(adev);
380157e72e8SLikun Gao
381157e72e8SLikun Gao for (i = 0; i < adev->sdma.num_instances; i++) {
382cf2a22e4SRohit Khaire rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
383157e72e8SLikun Gao rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
384cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
385cf2a22e4SRohit Khaire ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
386157e72e8SLikun Gao ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
387cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
388157e72e8SLikun Gao }
389157e72e8SLikun Gao }
390157e72e8SLikun Gao
391157e72e8SLikun Gao /**
392157e72e8SLikun Gao * sdma_v5_2_rlc_stop - stop the compute async dma engines
393157e72e8SLikun Gao *
394157e72e8SLikun Gao * @adev: amdgpu_device pointer
395157e72e8SLikun Gao *
396157e72e8SLikun Gao * Stop the compute async dma queues.
397157e72e8SLikun Gao */
sdma_v5_2_rlc_stop(struct amdgpu_device * adev)398157e72e8SLikun Gao static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev)
399157e72e8SLikun Gao {
400157e72e8SLikun Gao /* XXX todo */
401157e72e8SLikun Gao }
402157e72e8SLikun Gao
403157e72e8SLikun Gao /**
40441782d70SGuchun Chen * sdma_v5_2_ctx_switch_enable - stop the async dma engines context switch
405157e72e8SLikun Gao *
406157e72e8SLikun Gao * @adev: amdgpu_device pointer
40741782d70SGuchun Chen * @enable: enable/disable the DMA MEs context switch.
408157e72e8SLikun Gao *
40941782d70SGuchun Chen * Halt or unhalt the async dma engines context switch.
410157e72e8SLikun Gao */
sdma_v5_2_ctx_switch_enable(struct amdgpu_device * adev,bool enable)41141782d70SGuchun Chen static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
412157e72e8SLikun Gao {
413157e72e8SLikun Gao u32 f32_cntl, phase_quantum = 0;
41441782d70SGuchun Chen int i;
415157e72e8SLikun Gao
416157e72e8SLikun Gao if (amdgpu_sdma_phase_quantum) {
417157e72e8SLikun Gao unsigned value = amdgpu_sdma_phase_quantum;
418157e72e8SLikun Gao unsigned unit = 0;
419157e72e8SLikun Gao
420157e72e8SLikun Gao while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
421157e72e8SLikun Gao SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
422157e72e8SLikun Gao value = (value + 1) >> 1;
423157e72e8SLikun Gao unit++;
424157e72e8SLikun Gao }
425157e72e8SLikun Gao if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
426157e72e8SLikun Gao SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
427157e72e8SLikun Gao value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
428157e72e8SLikun Gao SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
429157e72e8SLikun Gao unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
430157e72e8SLikun Gao SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
431157e72e8SLikun Gao WARN_ONCE(1,
432157e72e8SLikun Gao "clamping sdma_phase_quantum to %uK clock cycles\n",
433157e72e8SLikun Gao value << unit);
434157e72e8SLikun Gao }
435157e72e8SLikun Gao phase_quantum =
436157e72e8SLikun Gao value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
437157e72e8SLikun Gao unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
43841782d70SGuchun Chen }
439157e72e8SLikun Gao
44041782d70SGuchun Chen for (i = 0; i < adev->sdma.num_instances; i++) {
44141782d70SGuchun Chen if (enable && amdgpu_sdma_phase_quantum) {
44241782d70SGuchun Chen WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
443157e72e8SLikun Gao phase_quantum);
44441782d70SGuchun Chen WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
445157e72e8SLikun Gao phase_quantum);
44641782d70SGuchun Chen WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
447157e72e8SLikun Gao phase_quantum);
448157e72e8SLikun Gao }
449b18ff692SBokun Zhang
450b18ff692SBokun Zhang if (!amdgpu_sriov_vf(adev)) {
451b992a190SHaohui Mai f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
452b992a190SHaohui Mai f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
45341782d70SGuchun Chen AUTO_CTXSW_ENABLE, enable ? 1 : 0);
454b992a190SHaohui Mai WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
455b992a190SHaohui Mai }
456b992a190SHaohui Mai }
457b992a190SHaohui Mai
45841782d70SGuchun Chen }
45941782d70SGuchun Chen
460b992a190SHaohui Mai /**
46141782d70SGuchun Chen * sdma_v5_2_enable - stop the async dma engines
462b992a190SHaohui Mai *
463b992a190SHaohui Mai * @adev: amdgpu_device pointer
46441782d70SGuchun Chen * @enable: enable/disable the DMA MEs.
465b992a190SHaohui Mai *
46641782d70SGuchun Chen * Halt or unhalt the async dma engines.
467b992a190SHaohui Mai */
sdma_v5_2_enable(struct amdgpu_device * adev,bool enable)46841782d70SGuchun Chen static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
469b992a190SHaohui Mai {
470b992a190SHaohui Mai u32 f32_cntl;
47141782d70SGuchun Chen int i;
472b992a190SHaohui Mai
47341782d70SGuchun Chen if (!enable) {
474157e72e8SLikun Gao sdma_v5_2_gfx_stop(adev);
475157e72e8SLikun Gao sdma_v5_2_rlc_stop(adev);
47641782d70SGuchun Chen }
477157e72e8SLikun Gao
478b18ff692SBokun Zhang if (!amdgpu_sriov_vf(adev)) {
479157e72e8SLikun Gao for (i = 0; i < adev->sdma.num_instances; i++) {
480157e72e8SLikun Gao f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
48141782d70SGuchun Chen f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
482157e72e8SLikun Gao WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
483157e72e8SLikun Gao }
484157e72e8SLikun Gao }
485b18ff692SBokun Zhang }
486157e72e8SLikun Gao
487157e72e8SLikun Gao /**
488157e72e8SLikun Gao * sdma_v5_2_gfx_resume - setup and start the async dma engines
489157e72e8SLikun Gao *
490157e72e8SLikun Gao * @adev: amdgpu_device pointer
491157e72e8SLikun Gao *
492157e72e8SLikun Gao * Set up the gfx DMA ring buffers and enable them.
493157e72e8SLikun Gao * Returns 0 for success, error for failure.
494157e72e8SLikun Gao */
sdma_v5_2_gfx_resume(struct amdgpu_device * adev)495157e72e8SLikun Gao static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
496157e72e8SLikun Gao {
497157e72e8SLikun Gao struct amdgpu_ring *ring;
498157e72e8SLikun Gao u32 rb_cntl, ib_cntl;
499157e72e8SLikun Gao u32 rb_bufsz;
500157e72e8SLikun Gao u32 doorbell;
501157e72e8SLikun Gao u32 doorbell_offset;
502157e72e8SLikun Gao u32 temp;
503157e72e8SLikun Gao u32 wptr_poll_cntl;
504157e72e8SLikun Gao u64 wptr_gpu_addr;
505157e72e8SLikun Gao int i, r;
506157e72e8SLikun Gao
507157e72e8SLikun Gao for (i = 0; i < adev->sdma.num_instances; i++) {
508157e72e8SLikun Gao ring = &adev->sdma.instance[i].ring;
509157e72e8SLikun Gao
510b18ff692SBokun Zhang if (!amdgpu_sriov_vf(adev))
511cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
512157e72e8SLikun Gao
513157e72e8SLikun Gao /* Set ring buffer size in dwords */
514157e72e8SLikun Gao rb_bufsz = order_base_2(ring->ring_size / 4);
515cf2a22e4SRohit Khaire rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
516157e72e8SLikun Gao rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
517157e72e8SLikun Gao #ifdef __BIG_ENDIAN
518157e72e8SLikun Gao rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
519157e72e8SLikun Gao rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
520157e72e8SLikun Gao RPTR_WRITEBACK_SWAP_ENABLE, 1);
521157e72e8SLikun Gao #endif
522cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
523157e72e8SLikun Gao
524157e72e8SLikun Gao /* Initialize the ring buffer's read and write pointers */
525cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
526cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
527cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
528cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
529157e72e8SLikun Gao
530157e72e8SLikun Gao /* setup the wptr shadow polling */
5313748424bSJack Xiao wptr_gpu_addr = ring->wptr_gpu_addr;
532cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
533157e72e8SLikun Gao lower_32_bits(wptr_gpu_addr));
534cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
535157e72e8SLikun Gao upper_32_bits(wptr_gpu_addr));
536cf2a22e4SRohit Khaire wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
537157e72e8SLikun Gao mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
538157e72e8SLikun Gao wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
539157e72e8SLikun Gao SDMA0_GFX_RB_WPTR_POLL_CNTL,
540157e72e8SLikun Gao F32_POLL_ENABLE, 1);
541cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
542157e72e8SLikun Gao wptr_poll_cntl);
543157e72e8SLikun Gao
544157e72e8SLikun Gao /* set the wb address whether it's enabled or not */
545cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
5463748424bSJack Xiao upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
547cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
5483748424bSJack Xiao lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
549157e72e8SLikun Gao
550157e72e8SLikun Gao rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
551157e72e8SLikun Gao
552cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
553cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
554157e72e8SLikun Gao
555157e72e8SLikun Gao ring->wptr = 0;
556157e72e8SLikun Gao
557157e72e8SLikun Gao /* before programing wptr to a less value, need set minor_ptr_update first */
558cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
559157e72e8SLikun Gao
560157e72e8SLikun Gao if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
5617dba6e83SHaohui Mai WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
5627dba6e83SHaohui Mai WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
563157e72e8SLikun Gao }
564157e72e8SLikun Gao
565cf2a22e4SRohit Khaire doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
566cf2a22e4SRohit Khaire doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
567157e72e8SLikun Gao
568157e72e8SLikun Gao if (ring->use_doorbell) {
569157e72e8SLikun Gao doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
570157e72e8SLikun Gao doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
571157e72e8SLikun Gao OFFSET, ring->doorbell_index);
572157e72e8SLikun Gao } else {
573157e72e8SLikun Gao doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
574157e72e8SLikun Gao }
575cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
576cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
577157e72e8SLikun Gao
578157e72e8SLikun Gao adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
5799822ac8fSYong Zhao ring->doorbell_index,
5809822ac8fSYong Zhao adev->doorbell_index.sdma_doorbell_range);
581157e72e8SLikun Gao
582157e72e8SLikun Gao if (amdgpu_sriov_vf(adev))
583157e72e8SLikun Gao sdma_v5_2_ring_set_wptr(ring);
584157e72e8SLikun Gao
585157e72e8SLikun Gao /* set minor_ptr_update to 0 after wptr programed */
586b18ff692SBokun Zhang
587cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
588157e72e8SLikun Gao
589b18ff692SBokun Zhang /* SRIOV VF has no control of any of registers below */
590b18ff692SBokun Zhang if (!amdgpu_sriov_vf(adev)) {
591157e72e8SLikun Gao /* set utc l1 enable flag always to 1 */
592157e72e8SLikun Gao temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
593157e72e8SLikun Gao temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
594157e72e8SLikun Gao
595157e72e8SLikun Gao /* enable MCBP */
596157e72e8SLikun Gao temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
597157e72e8SLikun Gao WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
598157e72e8SLikun Gao
599157e72e8SLikun Gao /* Set up RESP_MODE to non-copy addresses */
600cf2a22e4SRohit Khaire temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
601157e72e8SLikun Gao temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
602157e72e8SLikun Gao temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
603cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
604157e72e8SLikun Gao
605157e72e8SLikun Gao /* program default cache read and write policy */
606cf2a22e4SRohit Khaire temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
607157e72e8SLikun Gao /* clean read policy and write policy bits */
608157e72e8SLikun Gao temp &= 0xFF0FFF;
609157e72e8SLikun Gao temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
610157e72e8SLikun Gao (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
6114005809bSLikun Gao SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
612cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
613157e72e8SLikun Gao
614157e72e8SLikun Gao /* unhalt engine */
615157e72e8SLikun Gao temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
616157e72e8SLikun Gao temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
617157e72e8SLikun Gao WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
618157e72e8SLikun Gao }
619157e72e8SLikun Gao
620157e72e8SLikun Gao /* enable DMA RB */
621157e72e8SLikun Gao rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
622cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
623157e72e8SLikun Gao
624cf2a22e4SRohit Khaire ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
625157e72e8SLikun Gao ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
626157e72e8SLikun Gao #ifdef __BIG_ENDIAN
627157e72e8SLikun Gao ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
628157e72e8SLikun Gao #endif
629157e72e8SLikun Gao /* enable DMA IBs */
630cf2a22e4SRohit Khaire WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
631157e72e8SLikun Gao
63241782d70SGuchun Chen if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
63341782d70SGuchun Chen sdma_v5_2_ctx_switch_enable(adev, true);
63441782d70SGuchun Chen sdma_v5_2_enable(adev, true);
63541782d70SGuchun Chen }
636157e72e8SLikun Gao
63793ab59acSGuchun Chen r = amdgpu_ring_test_helper(ring);
63893ab59acSGuchun Chen if (r)
639157e72e8SLikun Gao return r;
64061c31b8bSGuchun Chen
641157e72e8SLikun Gao if (adev->mman.buffer_funcs_ring == ring)
642157e72e8SLikun Gao amdgpu_ttm_set_buffer_funcs_status(adev, true);
643157e72e8SLikun Gao }
644157e72e8SLikun Gao
645157e72e8SLikun Gao return 0;
646157e72e8SLikun Gao }
647157e72e8SLikun Gao
648157e72e8SLikun Gao /**
649157e72e8SLikun Gao * sdma_v5_2_rlc_resume - setup and start the async dma engines
650157e72e8SLikun Gao *
651157e72e8SLikun Gao * @adev: amdgpu_device pointer
652157e72e8SLikun Gao *
653157e72e8SLikun Gao * Set up the compute DMA queues and enable them.
654157e72e8SLikun Gao * Returns 0 for success, error for failure.
655157e72e8SLikun Gao */
sdma_v5_2_rlc_resume(struct amdgpu_device * adev)656157e72e8SLikun Gao static int sdma_v5_2_rlc_resume(struct amdgpu_device *adev)
657157e72e8SLikun Gao {
658157e72e8SLikun Gao return 0;
659157e72e8SLikun Gao }
660157e72e8SLikun Gao
661157e72e8SLikun Gao /**
662157e72e8SLikun Gao * sdma_v5_2_load_microcode - load the sDMA ME ucode
663157e72e8SLikun Gao *
664157e72e8SLikun Gao * @adev: amdgpu_device pointer
665157e72e8SLikun Gao *
666157e72e8SLikun Gao * Loads the sDMA0/1/2/3 ucode.
667157e72e8SLikun Gao * Returns 0 for success, -EINVAL if the ucode is not available.
668157e72e8SLikun Gao */
sdma_v5_2_load_microcode(struct amdgpu_device * adev)669157e72e8SLikun Gao static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
670157e72e8SLikun Gao {
671157e72e8SLikun Gao const struct sdma_firmware_header_v1_0 *hdr;
672157e72e8SLikun Gao const __le32 *fw_data;
673157e72e8SLikun Gao u32 fw_size;
674157e72e8SLikun Gao int i, j;
675157e72e8SLikun Gao
676157e72e8SLikun Gao /* halt the MEs */
67741782d70SGuchun Chen sdma_v5_2_enable(adev, false);
678157e72e8SLikun Gao
679157e72e8SLikun Gao for (i = 0; i < adev->sdma.num_instances; i++) {
680157e72e8SLikun Gao if (!adev->sdma.instance[i].fw)
681157e72e8SLikun Gao return -EINVAL;
682157e72e8SLikun Gao
683157e72e8SLikun Gao hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
684157e72e8SLikun Gao amdgpu_ucode_print_sdma_hdr(&hdr->header);
685157e72e8SLikun Gao fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
686157e72e8SLikun Gao
687157e72e8SLikun Gao fw_data = (const __le32 *)
688157e72e8SLikun Gao (adev->sdma.instance[i].fw->data +
689157e72e8SLikun Gao le32_to_cpu(hdr->header.ucode_array_offset_bytes));
690157e72e8SLikun Gao
691157e72e8SLikun Gao WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0);
692157e72e8SLikun Gao
693157e72e8SLikun Gao for (j = 0; j < fw_size; j++) {
694157e72e8SLikun Gao if (amdgpu_emu_mode == 1 && j % 500 == 0)
695157e72e8SLikun Gao msleep(1);
696157e72e8SLikun Gao WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
697157e72e8SLikun Gao }
698157e72e8SLikun Gao
699157e72e8SLikun Gao WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version);
700157e72e8SLikun Gao }
701157e72e8SLikun Gao
702157e72e8SLikun Gao return 0;
703157e72e8SLikun Gao }
704157e72e8SLikun Gao
sdma_v5_2_soft_reset(void * handle)705a9c210c1SXiaomeng Hou static int sdma_v5_2_soft_reset(void *handle)
706a9c210c1SXiaomeng Hou {
707a9c210c1SXiaomeng Hou struct amdgpu_device *adev = (struct amdgpu_device *)handle;
708a9c210c1SXiaomeng Hou u32 grbm_soft_reset;
709a9c210c1SXiaomeng Hou u32 tmp;
710a9c210c1SXiaomeng Hou int i;
711a9c210c1SXiaomeng Hou
712a9c210c1SXiaomeng Hou for (i = 0; i < adev->sdma.num_instances; i++) {
713a9c210c1SXiaomeng Hou grbm_soft_reset = REG_SET_FIELD(0,
714a9c210c1SXiaomeng Hou GRBM_SOFT_RESET, SOFT_RESET_SDMA0,
715a9c210c1SXiaomeng Hou 1);
716a9c210c1SXiaomeng Hou grbm_soft_reset <<= i;
717a9c210c1SXiaomeng Hou
718a9c210c1SXiaomeng Hou tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
719a9c210c1SXiaomeng Hou tmp |= grbm_soft_reset;
720a9c210c1SXiaomeng Hou DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp);
721a9c210c1SXiaomeng Hou WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
722a9c210c1SXiaomeng Hou tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
723a9c210c1SXiaomeng Hou
724a9c210c1SXiaomeng Hou udelay(50);
725a9c210c1SXiaomeng Hou
726a9c210c1SXiaomeng Hou tmp &= ~grbm_soft_reset;
727a9c210c1SXiaomeng Hou WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
728a9c210c1SXiaomeng Hou tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
729a9c210c1SXiaomeng Hou
730a9c210c1SXiaomeng Hou udelay(50);
731a9c210c1SXiaomeng Hou }
732a9c210c1SXiaomeng Hou
733a9c210c1SXiaomeng Hou return 0;
734a9c210c1SXiaomeng Hou }
735a9c210c1SXiaomeng Hou
736157e72e8SLikun Gao /**
737157e72e8SLikun Gao * sdma_v5_2_start - setup and start the async dma engines
738157e72e8SLikun Gao *
739157e72e8SLikun Gao * @adev: amdgpu_device pointer
740157e72e8SLikun Gao *
741157e72e8SLikun Gao * Set up the DMA engines and enable them.
742157e72e8SLikun Gao * Returns 0 for success, error for failure.
743157e72e8SLikun Gao */
sdma_v5_2_start(struct amdgpu_device * adev)744157e72e8SLikun Gao static int sdma_v5_2_start(struct amdgpu_device *adev)
745157e72e8SLikun Gao {
746157e72e8SLikun Gao int r = 0;
747157e72e8SLikun Gao
748157e72e8SLikun Gao if (amdgpu_sriov_vf(adev)) {
74941782d70SGuchun Chen sdma_v5_2_ctx_switch_enable(adev, false);
75041782d70SGuchun Chen sdma_v5_2_enable(adev, false);
751157e72e8SLikun Gao
752157e72e8SLikun Gao /* set RB registers */
753157e72e8SLikun Gao r = sdma_v5_2_gfx_resume(adev);
754157e72e8SLikun Gao return r;
755157e72e8SLikun Gao }
756157e72e8SLikun Gao
757157e72e8SLikun Gao if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
758157e72e8SLikun Gao r = sdma_v5_2_load_microcode(adev);
759157e72e8SLikun Gao if (r)
760157e72e8SLikun Gao return r;
761157e72e8SLikun Gao
762157e72e8SLikun Gao /* The value of mmSDMA_F32_CNTL is invalid the moment after loading fw */
763157e72e8SLikun Gao if (amdgpu_emu_mode == 1)
764157e72e8SLikun Gao msleep(1000);
765157e72e8SLikun Gao }
766157e72e8SLikun Gao
767a9c210c1SXiaomeng Hou sdma_v5_2_soft_reset(adev);
76841782d70SGuchun Chen /* unhalt the MEs */
76941782d70SGuchun Chen sdma_v5_2_enable(adev, true);
77041782d70SGuchun Chen /* enable sdma ring preemption */
77141782d70SGuchun Chen sdma_v5_2_ctx_switch_enable(adev, true);
772157e72e8SLikun Gao
77341782d70SGuchun Chen /* start the gfx rings and rlc compute queues */
774157e72e8SLikun Gao r = sdma_v5_2_gfx_resume(adev);
775157e72e8SLikun Gao if (r)
776157e72e8SLikun Gao return r;
777157e72e8SLikun Gao r = sdma_v5_2_rlc_resume(adev);
778157e72e8SLikun Gao
779157e72e8SLikun Gao return r;
780157e72e8SLikun Gao }
781157e72e8SLikun Gao
sdma_v5_2_mqd_init(struct amdgpu_device * adev,void * mqd,struct amdgpu_mqd_prop * prop)782e0f5b4c9SJack Xiao static int sdma_v5_2_mqd_init(struct amdgpu_device *adev, void *mqd,
783e0f5b4c9SJack Xiao struct amdgpu_mqd_prop *prop)
784e0f5b4c9SJack Xiao {
785e0f5b4c9SJack Xiao struct v10_sdma_mqd *m = mqd;
786e0f5b4c9SJack Xiao uint64_t wb_gpu_addr;
787e0f5b4c9SJack Xiao
788e0f5b4c9SJack Xiao m->sdmax_rlcx_rb_cntl =
789e0f5b4c9SJack Xiao order_base_2(prop->queue_size / 4) << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
790e0f5b4c9SJack Xiao 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
791e0f5b4c9SJack Xiao 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
792e0f5b4c9SJack Xiao 1 << SDMA0_RLC0_RB_CNTL__RB_PRIV__SHIFT;
793e0f5b4c9SJack Xiao
794e0f5b4c9SJack Xiao m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
795e0f5b4c9SJack Xiao m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
796e0f5b4c9SJack Xiao
797e0f5b4c9SJack Xiao m->sdmax_rlcx_rb_wptr_poll_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, 0,
798e0f5b4c9SJack Xiao mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
799e0f5b4c9SJack Xiao
800e0f5b4c9SJack Xiao wb_gpu_addr = prop->wptr_gpu_addr;
801e0f5b4c9SJack Xiao m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
802e0f5b4c9SJack Xiao m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
803e0f5b4c9SJack Xiao
804e0f5b4c9SJack Xiao wb_gpu_addr = prop->rptr_gpu_addr;
805e0f5b4c9SJack Xiao m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
806e0f5b4c9SJack Xiao m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
807e0f5b4c9SJack Xiao
808e0f5b4c9SJack Xiao m->sdmax_rlcx_ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, 0,
809e0f5b4c9SJack Xiao mmSDMA0_GFX_IB_CNTL));
810e0f5b4c9SJack Xiao
811e0f5b4c9SJack Xiao m->sdmax_rlcx_doorbell_offset =
812e0f5b4c9SJack Xiao prop->doorbell_index << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
813e0f5b4c9SJack Xiao
814e0f5b4c9SJack Xiao m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_RLC0_DOORBELL, ENABLE, 1);
815e0f5b4c9SJack Xiao
816e0f5b4c9SJack Xiao return 0;
817e0f5b4c9SJack Xiao }
818e0f5b4c9SJack Xiao
sdma_v5_2_set_mqd_funcs(struct amdgpu_device * adev)819e0f5b4c9SJack Xiao static void sdma_v5_2_set_mqd_funcs(struct amdgpu_device *adev)
820e0f5b4c9SJack Xiao {
821e0f5b4c9SJack Xiao adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v10_sdma_mqd);
822e0f5b4c9SJack Xiao adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v5_2_mqd_init;
823e0f5b4c9SJack Xiao }
824e0f5b4c9SJack Xiao
825157e72e8SLikun Gao /**
826157e72e8SLikun Gao * sdma_v5_2_ring_test_ring - simple async dma engine test
827157e72e8SLikun Gao *
828157e72e8SLikun Gao * @ring: amdgpu_ring structure holding ring information
829157e72e8SLikun Gao *
830157e72e8SLikun Gao * Test the DMA engine by writing using it to write an
831157e72e8SLikun Gao * value to memory.
832157e72e8SLikun Gao * Returns 0 for success, error for failure.
833157e72e8SLikun Gao */
sdma_v5_2_ring_test_ring(struct amdgpu_ring * ring)834157e72e8SLikun Gao static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
835157e72e8SLikun Gao {
836157e72e8SLikun Gao struct amdgpu_device *adev = ring->adev;
837157e72e8SLikun Gao unsigned i;
838157e72e8SLikun Gao unsigned index;
839157e72e8SLikun Gao int r;
840157e72e8SLikun Gao u32 tmp;
841157e72e8SLikun Gao u64 gpu_addr;
8427e5e7971SJack Xiao volatile uint32_t *cpu_ptr = NULL;
843157e72e8SLikun Gao
8447e5e7971SJack Xiao tmp = 0xCAFEDEAD;
8457e5e7971SJack Xiao
8467e5e7971SJack Xiao if (ring->is_mes_queue) {
8477e5e7971SJack Xiao uint32_t offset = 0;
8487e5e7971SJack Xiao offset = amdgpu_mes_ctx_get_offs(ring,
8497e5e7971SJack Xiao AMDGPU_MES_CTX_PADDING_OFFS);
8507e5e7971SJack Xiao gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
8517e5e7971SJack Xiao cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
8527e5e7971SJack Xiao *cpu_ptr = tmp;
8537e5e7971SJack Xiao } else {
854157e72e8SLikun Gao r = amdgpu_device_wb_get(adev, &index);
855157e72e8SLikun Gao if (r) {
856157e72e8SLikun Gao dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
857157e72e8SLikun Gao return r;
858157e72e8SLikun Gao }
859157e72e8SLikun Gao
860157e72e8SLikun Gao gpu_addr = adev->wb.gpu_addr + (index * 4);
861157e72e8SLikun Gao adev->wb.wb[index] = cpu_to_le32(tmp);
8627e5e7971SJack Xiao }
863157e72e8SLikun Gao
8647e5e7971SJack Xiao r = amdgpu_ring_alloc(ring, 20);
865157e72e8SLikun Gao if (r) {
866157e72e8SLikun Gao DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
867157e72e8SLikun Gao amdgpu_device_wb_free(adev, index);
868157e72e8SLikun Gao return r;
869157e72e8SLikun Gao }
870157e72e8SLikun Gao
871157e72e8SLikun Gao amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
872157e72e8SLikun Gao SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
873157e72e8SLikun Gao amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
874157e72e8SLikun Gao amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
875157e72e8SLikun Gao amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
876157e72e8SLikun Gao amdgpu_ring_write(ring, 0xDEADBEEF);
877157e72e8SLikun Gao amdgpu_ring_commit(ring);
878157e72e8SLikun Gao
879157e72e8SLikun Gao for (i = 0; i < adev->usec_timeout; i++) {
8807e5e7971SJack Xiao if (ring->is_mes_queue)
8817e5e7971SJack Xiao tmp = le32_to_cpu(*cpu_ptr);
8827e5e7971SJack Xiao else
883157e72e8SLikun Gao tmp = le32_to_cpu(adev->wb.wb[index]);
884157e72e8SLikun Gao if (tmp == 0xDEADBEEF)
885157e72e8SLikun Gao break;
886157e72e8SLikun Gao if (amdgpu_emu_mode == 1)
887157e72e8SLikun Gao msleep(1);
888157e72e8SLikun Gao else
889157e72e8SLikun Gao udelay(1);
890157e72e8SLikun Gao }
891157e72e8SLikun Gao
892157e72e8SLikun Gao if (i >= adev->usec_timeout)
893157e72e8SLikun Gao r = -ETIMEDOUT;
894157e72e8SLikun Gao
8957e5e7971SJack Xiao if (!ring->is_mes_queue)
896157e72e8SLikun Gao amdgpu_device_wb_free(adev, index);
897157e72e8SLikun Gao
898157e72e8SLikun Gao return r;
899157e72e8SLikun Gao }
900157e72e8SLikun Gao
901157e72e8SLikun Gao /**
902157e72e8SLikun Gao * sdma_v5_2_ring_test_ib - test an IB on the DMA engine
903157e72e8SLikun Gao *
904157e72e8SLikun Gao * @ring: amdgpu_ring structure holding ring information
905fd1c541dSLee Jones * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
906157e72e8SLikun Gao *
907157e72e8SLikun Gao * Test a simple IB in the DMA ring.
908157e72e8SLikun Gao * Returns 0 on success, error on failure.
909157e72e8SLikun Gao */
sdma_v5_2_ring_test_ib(struct amdgpu_ring * ring,long timeout)910157e72e8SLikun Gao static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
911157e72e8SLikun Gao {
912157e72e8SLikun Gao struct amdgpu_device *adev = ring->adev;
913157e72e8SLikun Gao struct amdgpu_ib ib;
914157e72e8SLikun Gao struct dma_fence *f = NULL;
915157e72e8SLikun Gao unsigned index;
916157e72e8SLikun Gao long r;
917157e72e8SLikun Gao u32 tmp = 0;
918157e72e8SLikun Gao u64 gpu_addr;
919c097aac7SJack Xiao volatile uint32_t *cpu_ptr = NULL;
920157e72e8SLikun Gao
921c097aac7SJack Xiao tmp = 0xCAFEDEAD;
922c097aac7SJack Xiao memset(&ib, 0, sizeof(ib));
923c097aac7SJack Xiao
924c097aac7SJack Xiao if (ring->is_mes_queue) {
925c097aac7SJack Xiao uint32_t offset = 0;
926c097aac7SJack Xiao offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
927c097aac7SJack Xiao ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
928c097aac7SJack Xiao ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
929c097aac7SJack Xiao
930c097aac7SJack Xiao offset = amdgpu_mes_ctx_get_offs(ring,
931c097aac7SJack Xiao AMDGPU_MES_CTX_PADDING_OFFS);
932c097aac7SJack Xiao gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
933c097aac7SJack Xiao cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
934c097aac7SJack Xiao *cpu_ptr = tmp;
935c097aac7SJack Xiao } else {
936157e72e8SLikun Gao r = amdgpu_device_wb_get(adev, &index);
937157e72e8SLikun Gao if (r) {
938157e72e8SLikun Gao dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
939157e72e8SLikun Gao return r;
940157e72e8SLikun Gao }
941157e72e8SLikun Gao
942157e72e8SLikun Gao gpu_addr = adev->wb.gpu_addr + (index * 4);
943157e72e8SLikun Gao adev->wb.wb[index] = cpu_to_le32(tmp);
944c097aac7SJack Xiao
945157e72e8SLikun Gao r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
946157e72e8SLikun Gao if (r) {
947157e72e8SLikun Gao DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
948157e72e8SLikun Gao goto err0;
949157e72e8SLikun Gao }
950c097aac7SJack Xiao }
951157e72e8SLikun Gao
952157e72e8SLikun Gao ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
953157e72e8SLikun Gao SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
954157e72e8SLikun Gao ib.ptr[1] = lower_32_bits(gpu_addr);
955157e72e8SLikun Gao ib.ptr[2] = upper_32_bits(gpu_addr);
956157e72e8SLikun Gao ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
957157e72e8SLikun Gao ib.ptr[4] = 0xDEADBEEF;
958157e72e8SLikun Gao ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
959157e72e8SLikun Gao ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
960157e72e8SLikun Gao ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
961157e72e8SLikun Gao ib.length_dw = 8;
962157e72e8SLikun Gao
963157e72e8SLikun Gao r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
964157e72e8SLikun Gao if (r)
965157e72e8SLikun Gao goto err1;
966157e72e8SLikun Gao
967157e72e8SLikun Gao r = dma_fence_wait_timeout(f, false, timeout);
968157e72e8SLikun Gao if (r == 0) {
969157e72e8SLikun Gao DRM_ERROR("amdgpu: IB test timed out\n");
970157e72e8SLikun Gao r = -ETIMEDOUT;
971157e72e8SLikun Gao goto err1;
972157e72e8SLikun Gao } else if (r < 0) {
973157e72e8SLikun Gao DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
974157e72e8SLikun Gao goto err1;
975157e72e8SLikun Gao }
976c097aac7SJack Xiao
977c097aac7SJack Xiao if (ring->is_mes_queue)
978c097aac7SJack Xiao tmp = le32_to_cpu(*cpu_ptr);
979c097aac7SJack Xiao else
980157e72e8SLikun Gao tmp = le32_to_cpu(adev->wb.wb[index]);
981c097aac7SJack Xiao
982157e72e8SLikun Gao if (tmp == 0xDEADBEEF)
983157e72e8SLikun Gao r = 0;
984157e72e8SLikun Gao else
985157e72e8SLikun Gao r = -EINVAL;
986157e72e8SLikun Gao
987157e72e8SLikun Gao err1:
988157e72e8SLikun Gao amdgpu_ib_free(adev, &ib, NULL);
989157e72e8SLikun Gao dma_fence_put(f);
990157e72e8SLikun Gao err0:
991c097aac7SJack Xiao if (!ring->is_mes_queue)
992157e72e8SLikun Gao amdgpu_device_wb_free(adev, index);
993157e72e8SLikun Gao return r;
994157e72e8SLikun Gao }
995157e72e8SLikun Gao
996157e72e8SLikun Gao
997157e72e8SLikun Gao /**
998157e72e8SLikun Gao * sdma_v5_2_vm_copy_pte - update PTEs by copying them from the GART
999157e72e8SLikun Gao *
1000157e72e8SLikun Gao * @ib: indirect buffer to fill with commands
1001157e72e8SLikun Gao * @pe: addr of the page entry
1002157e72e8SLikun Gao * @src: src addr to copy from
1003157e72e8SLikun Gao * @count: number of page entries to update
1004157e72e8SLikun Gao *
1005157e72e8SLikun Gao * Update PTEs by copying them from the GART using sDMA.
1006157e72e8SLikun Gao */
sdma_v5_2_vm_copy_pte(struct amdgpu_ib * ib,uint64_t pe,uint64_t src,unsigned count)1007157e72e8SLikun Gao static void sdma_v5_2_vm_copy_pte(struct amdgpu_ib *ib,
1008157e72e8SLikun Gao uint64_t pe, uint64_t src,
1009157e72e8SLikun Gao unsigned count)
1010157e72e8SLikun Gao {
1011157e72e8SLikun Gao unsigned bytes = count * 8;
1012157e72e8SLikun Gao
1013157e72e8SLikun Gao ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
1014157e72e8SLikun Gao SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
1015157e72e8SLikun Gao ib->ptr[ib->length_dw++] = bytes - 1;
1016157e72e8SLikun Gao ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1017157e72e8SLikun Gao ib->ptr[ib->length_dw++] = lower_32_bits(src);
1018157e72e8SLikun Gao ib->ptr[ib->length_dw++] = upper_32_bits(src);
1019157e72e8SLikun Gao ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1020157e72e8SLikun Gao ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1021157e72e8SLikun Gao
1022157e72e8SLikun Gao }
1023157e72e8SLikun Gao
1024157e72e8SLikun Gao /**
1025157e72e8SLikun Gao * sdma_v5_2_vm_write_pte - update PTEs by writing them manually
1026157e72e8SLikun Gao *
1027157e72e8SLikun Gao * @ib: indirect buffer to fill with commands
1028157e72e8SLikun Gao * @pe: addr of the page entry
1029fd1c541dSLee Jones * @value: dst addr to write into pe
1030157e72e8SLikun Gao * @count: number of page entries to update
1031157e72e8SLikun Gao * @incr: increase next addr by incr bytes
1032157e72e8SLikun Gao *
1033157e72e8SLikun Gao * Update PTEs by writing them manually using sDMA.
1034157e72e8SLikun Gao */
sdma_v5_2_vm_write_pte(struct amdgpu_ib * ib,uint64_t pe,uint64_t value,unsigned count,uint32_t incr)1035157e72e8SLikun Gao static void sdma_v5_2_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
1036157e72e8SLikun Gao uint64_t value, unsigned count,
1037157e72e8SLikun Gao uint32_t incr)
1038157e72e8SLikun Gao {
1039157e72e8SLikun Gao unsigned ndw = count * 2;
1040157e72e8SLikun Gao
1041157e72e8SLikun Gao ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
1042157e72e8SLikun Gao SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1043157e72e8SLikun Gao ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1044157e72e8SLikun Gao ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1045157e72e8SLikun Gao ib->ptr[ib->length_dw++] = ndw - 1;
1046157e72e8SLikun Gao for (; ndw > 0; ndw -= 2) {
1047157e72e8SLikun Gao ib->ptr[ib->length_dw++] = lower_32_bits(value);
1048157e72e8SLikun Gao ib->ptr[ib->length_dw++] = upper_32_bits(value);
1049157e72e8SLikun Gao value += incr;
1050157e72e8SLikun Gao }
1051157e72e8SLikun Gao }
1052157e72e8SLikun Gao
1053157e72e8SLikun Gao /**
1054157e72e8SLikun Gao * sdma_v5_2_vm_set_pte_pde - update the page tables using sDMA
1055157e72e8SLikun Gao *
1056157e72e8SLikun Gao * @ib: indirect buffer to fill with commands
1057157e72e8SLikun Gao * @pe: addr of the page entry
1058157e72e8SLikun Gao * @addr: dst addr to write into pe
1059157e72e8SLikun Gao * @count: number of page entries to update
1060157e72e8SLikun Gao * @incr: increase next addr by incr bytes
1061157e72e8SLikun Gao * @flags: access flags
1062157e72e8SLikun Gao *
1063157e72e8SLikun Gao * Update the page tables using sDMA.
1064157e72e8SLikun Gao */
sdma_v5_2_vm_set_pte_pde(struct amdgpu_ib * ib,uint64_t pe,uint64_t addr,unsigned count,uint32_t incr,uint64_t flags)1065157e72e8SLikun Gao static void sdma_v5_2_vm_set_pte_pde(struct amdgpu_ib *ib,
1066157e72e8SLikun Gao uint64_t pe,
1067157e72e8SLikun Gao uint64_t addr, unsigned count,
1068157e72e8SLikun Gao uint32_t incr, uint64_t flags)
1069157e72e8SLikun Gao {
1070157e72e8SLikun Gao /* for physically contiguous pages (vram) */
1071157e72e8SLikun Gao ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE);
1072157e72e8SLikun Gao ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
1073157e72e8SLikun Gao ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1074157e72e8SLikun Gao ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
1075157e72e8SLikun Gao ib->ptr[ib->length_dw++] = upper_32_bits(flags);
1076157e72e8SLikun Gao ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
1077157e72e8SLikun Gao ib->ptr[ib->length_dw++] = upper_32_bits(addr);
1078157e72e8SLikun Gao ib->ptr[ib->length_dw++] = incr; /* increment size */
1079157e72e8SLikun Gao ib->ptr[ib->length_dw++] = 0;
1080157e72e8SLikun Gao ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
1081157e72e8SLikun Gao }
1082157e72e8SLikun Gao
1083157e72e8SLikun Gao /**
1084157e72e8SLikun Gao * sdma_v5_2_ring_pad_ib - pad the IB
1085157e72e8SLikun Gao *
1086157e72e8SLikun Gao * @ib: indirect buffer to fill with padding
1087fd1c541dSLee Jones * @ring: amdgpu_ring structure holding ring information
1088157e72e8SLikun Gao *
1089157e72e8SLikun Gao * Pad the IB with NOPs to a boundary multiple of 8.
1090157e72e8SLikun Gao */
sdma_v5_2_ring_pad_ib(struct amdgpu_ring * ring,struct amdgpu_ib * ib)1091157e72e8SLikun Gao static void sdma_v5_2_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
1092157e72e8SLikun Gao {
1093157e72e8SLikun Gao struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
1094157e72e8SLikun Gao u32 pad_count;
1095157e72e8SLikun Gao int i;
1096157e72e8SLikun Gao
1097157e72e8SLikun Gao pad_count = (-ib->length_dw) & 0x7;
1098157e72e8SLikun Gao for (i = 0; i < pad_count; i++)
1099157e72e8SLikun Gao if (sdma && sdma->burst_nop && (i == 0))
1100157e72e8SLikun Gao ib->ptr[ib->length_dw++] =
1101157e72e8SLikun Gao SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
1102157e72e8SLikun Gao SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
1103157e72e8SLikun Gao else
1104157e72e8SLikun Gao ib->ptr[ib->length_dw++] =
1105157e72e8SLikun Gao SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
1106157e72e8SLikun Gao }
1107157e72e8SLikun Gao
1108157e72e8SLikun Gao
1109157e72e8SLikun Gao /**
1110157e72e8SLikun Gao * sdma_v5_2_ring_emit_pipeline_sync - sync the pipeline
1111157e72e8SLikun Gao *
1112157e72e8SLikun Gao * @ring: amdgpu_ring pointer
1113157e72e8SLikun Gao *
1114157e72e8SLikun Gao * Make sure all previous operations are completed (CIK).
1115157e72e8SLikun Gao */
sdma_v5_2_ring_emit_pipeline_sync(struct amdgpu_ring * ring)1116157e72e8SLikun Gao static void sdma_v5_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
1117157e72e8SLikun Gao {
1118157e72e8SLikun Gao uint32_t seq = ring->fence_drv.sync_seq;
1119157e72e8SLikun Gao uint64_t addr = ring->fence_drv.gpu_addr;
1120157e72e8SLikun Gao
1121157e72e8SLikun Gao /* wait for idle */
1122157e72e8SLikun Gao amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1123157e72e8SLikun Gao SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
1124157e72e8SLikun Gao SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
1125157e72e8SLikun Gao SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
1126157e72e8SLikun Gao amdgpu_ring_write(ring, addr & 0xfffffffc);
1127157e72e8SLikun Gao amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
1128157e72e8SLikun Gao amdgpu_ring_write(ring, seq); /* reference */
1129157e72e8SLikun Gao amdgpu_ring_write(ring, 0xffffffff); /* mask */
1130157e72e8SLikun Gao amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1131157e72e8SLikun Gao SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
1132157e72e8SLikun Gao }
1133157e72e8SLikun Gao
1134157e72e8SLikun Gao
1135157e72e8SLikun Gao /**
1136157e72e8SLikun Gao * sdma_v5_2_ring_emit_vm_flush - vm flush using sDMA
1137157e72e8SLikun Gao *
1138157e72e8SLikun Gao * @ring: amdgpu_ring pointer
1139fd1c541dSLee Jones * @vmid: vmid number to use
1140fd1c541dSLee Jones * @pd_addr: address
1141157e72e8SLikun Gao *
1142157e72e8SLikun Gao * Update the page table base and flush the VM TLB
1143157e72e8SLikun Gao * using sDMA.
1144157e72e8SLikun Gao */
sdma_v5_2_ring_emit_vm_flush(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)1145157e72e8SLikun Gao static void sdma_v5_2_ring_emit_vm_flush(struct amdgpu_ring *ring,
1146157e72e8SLikun Gao unsigned vmid, uint64_t pd_addr)
1147157e72e8SLikun Gao {
1148157e72e8SLikun Gao amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1149157e72e8SLikun Gao }
1150157e72e8SLikun Gao
sdma_v5_2_ring_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)1151157e72e8SLikun Gao static void sdma_v5_2_ring_emit_wreg(struct amdgpu_ring *ring,
1152157e72e8SLikun Gao uint32_t reg, uint32_t val)
1153157e72e8SLikun Gao {
1154157e72e8SLikun Gao amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
1155157e72e8SLikun Gao SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
1156157e72e8SLikun Gao amdgpu_ring_write(ring, reg);
1157157e72e8SLikun Gao amdgpu_ring_write(ring, val);
1158157e72e8SLikun Gao }
1159157e72e8SLikun Gao
sdma_v5_2_ring_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)1160157e72e8SLikun Gao static void sdma_v5_2_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1161157e72e8SLikun Gao uint32_t val, uint32_t mask)
1162157e72e8SLikun Gao {
1163157e72e8SLikun Gao amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1164157e72e8SLikun Gao SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
1165157e72e8SLikun Gao SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
1166157e72e8SLikun Gao amdgpu_ring_write(ring, reg << 2);
1167157e72e8SLikun Gao amdgpu_ring_write(ring, 0);
1168157e72e8SLikun Gao amdgpu_ring_write(ring, val); /* reference */
1169157e72e8SLikun Gao amdgpu_ring_write(ring, mask); /* mask */
1170157e72e8SLikun Gao amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1171157e72e8SLikun Gao SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
1172157e72e8SLikun Gao }
1173157e72e8SLikun Gao
sdma_v5_2_ring_emit_reg_write_reg_wait(struct amdgpu_ring * ring,uint32_t reg0,uint32_t reg1,uint32_t ref,uint32_t mask)1174157e72e8SLikun Gao static void sdma_v5_2_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
1175157e72e8SLikun Gao uint32_t reg0, uint32_t reg1,
1176157e72e8SLikun Gao uint32_t ref, uint32_t mask)
1177157e72e8SLikun Gao {
1178157e72e8SLikun Gao amdgpu_ring_emit_wreg(ring, reg0, ref);
1179157e72e8SLikun Gao /* wait for a cycle to reset vm_inv_eng*_ack */
1180157e72e8SLikun Gao amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
1181157e72e8SLikun Gao amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
1182157e72e8SLikun Gao }
1183157e72e8SLikun Gao
sdma_v5_2_early_init(void * handle)1184157e72e8SLikun Gao static int sdma_v5_2_early_init(void *handle)
1185157e72e8SLikun Gao {
1186157e72e8SLikun Gao struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1187157e72e8SLikun Gao
1188157e72e8SLikun Gao sdma_v5_2_set_ring_funcs(adev);
1189157e72e8SLikun Gao sdma_v5_2_set_buffer_funcs(adev);
1190157e72e8SLikun Gao sdma_v5_2_set_vm_pte_funcs(adev);
1191157e72e8SLikun Gao sdma_v5_2_set_irq_funcs(adev);
1192e0f5b4c9SJack Xiao sdma_v5_2_set_mqd_funcs(adev);
1193157e72e8SLikun Gao
1194157e72e8SLikun Gao return 0;
1195157e72e8SLikun Gao }
1196157e72e8SLikun Gao
sdma_v5_2_seq_to_irq_id(int seq_num)119765655471SHuang Rui static unsigned sdma_v5_2_seq_to_irq_id(int seq_num)
119865655471SHuang Rui {
119965655471SHuang Rui switch (seq_num) {
120065655471SHuang Rui case 0:
120165655471SHuang Rui return SOC15_IH_CLIENTID_SDMA0;
120265655471SHuang Rui case 1:
120365655471SHuang Rui return SOC15_IH_CLIENTID_SDMA1;
120465655471SHuang Rui case 2:
120565655471SHuang Rui return SOC15_IH_CLIENTID_SDMA2;
120665655471SHuang Rui case 3:
120765655471SHuang Rui return SOC15_IH_CLIENTID_SDMA3_Sienna_Cichlid;
120865655471SHuang Rui default:
120965655471SHuang Rui break;
121065655471SHuang Rui }
121165655471SHuang Rui return -EINVAL;
121265655471SHuang Rui }
121365655471SHuang Rui
sdma_v5_2_seq_to_trap_id(int seq_num)121465655471SHuang Rui static unsigned sdma_v5_2_seq_to_trap_id(int seq_num)
121565655471SHuang Rui {
121665655471SHuang Rui switch (seq_num) {
121765655471SHuang Rui case 0:
121865655471SHuang Rui return SDMA0_5_0__SRCID__SDMA_TRAP;
121965655471SHuang Rui case 1:
122065655471SHuang Rui return SDMA1_5_0__SRCID__SDMA_TRAP;
122165655471SHuang Rui case 2:
122265655471SHuang Rui return SDMA2_5_0__SRCID__SDMA_TRAP;
122365655471SHuang Rui case 3:
122465655471SHuang Rui return SDMA3_5_0__SRCID__SDMA_TRAP;
122565655471SHuang Rui default:
122665655471SHuang Rui break;
122765655471SHuang Rui }
122865655471SHuang Rui return -EINVAL;
122965655471SHuang Rui }
123065655471SHuang Rui
sdma_v5_2_sw_init(void * handle)1231157e72e8SLikun Gao static int sdma_v5_2_sw_init(void *handle)
1232157e72e8SLikun Gao {
1233157e72e8SLikun Gao struct amdgpu_ring *ring;
1234157e72e8SLikun Gao int r, i;
1235157e72e8SLikun Gao struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1236157e72e8SLikun Gao
1237157e72e8SLikun Gao /* SDMA trap event */
123865655471SHuang Rui for (i = 0; i < adev->sdma.num_instances; i++) {
123965655471SHuang Rui r = amdgpu_irq_add_id(adev, sdma_v5_2_seq_to_irq_id(i),
124065655471SHuang Rui sdma_v5_2_seq_to_trap_id(i),
1241157e72e8SLikun Gao &adev->sdma.trap_irq);
1242157e72e8SLikun Gao if (r)
1243157e72e8SLikun Gao return r;
124465655471SHuang Rui }
1245157e72e8SLikun Gao
12461336b4e7SMario Limonciello r = amdgpu_sdma_init_microcode(adev, 0, true);
1247157e72e8SLikun Gao if (r) {
1248157e72e8SLikun Gao DRM_ERROR("Failed to load sdma firmware!\n");
1249157e72e8SLikun Gao return r;
1250157e72e8SLikun Gao }
1251157e72e8SLikun Gao
1252157e72e8SLikun Gao for (i = 0; i < adev->sdma.num_instances; i++) {
1253157e72e8SLikun Gao ring = &adev->sdma.instance[i].ring;
1254157e72e8SLikun Gao ring->ring_obj = NULL;
1255157e72e8SLikun Gao ring->use_doorbell = true;
12561f5d9cadSLikun Gao ring->me = i;
1257157e72e8SLikun Gao
1258157e72e8SLikun Gao DRM_INFO("use_doorbell being set to: [%s]\n",
1259157e72e8SLikun Gao ring->use_doorbell?"true":"false");
1260157e72e8SLikun Gao
1261157e72e8SLikun Gao ring->doorbell_index =
1262157e72e8SLikun Gao (adev->doorbell_index.sdma_engine[i] << 1); //get DWORD offset
1263157e72e8SLikun Gao
1264f4caf584SHawking Zhang ring->vm_hub = AMDGPU_GFXHUB(0);
1265157e72e8SLikun Gao sprintf(ring->name, "sdma%d", i);
1266c107171bSChristian König r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
1267157e72e8SLikun Gao AMDGPU_SDMA_IRQ_INSTANCE0 + i,
1268c107171bSChristian König AMDGPU_RING_PRIO_DEFAULT, NULL);
1269157e72e8SLikun Gao if (r)
1270157e72e8SLikun Gao return r;
1271157e72e8SLikun Gao }
1272157e72e8SLikun Gao
1273157e72e8SLikun Gao return r;
1274157e72e8SLikun Gao }
1275157e72e8SLikun Gao
sdma_v5_2_sw_fini(void * handle)1276157e72e8SLikun Gao static int sdma_v5_2_sw_fini(void *handle)
1277157e72e8SLikun Gao {
1278157e72e8SLikun Gao struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1279d5197bc0SWenhui Sheng int i;
1280d5197bc0SWenhui Sheng
1281d5197bc0SWenhui Sheng for (i = 0; i < adev->sdma.num_instances; i++)
1282d5197bc0SWenhui Sheng amdgpu_ring_fini(&adev->sdma.instance[i].ring);
1283157e72e8SLikun Gao
1284108db8deSLikun Gao amdgpu_sdma_destroy_inst_ctx(adev, true);
1285157e72e8SLikun Gao
1286157e72e8SLikun Gao return 0;
1287157e72e8SLikun Gao }
1288157e72e8SLikun Gao
sdma_v5_2_hw_init(void * handle)1289157e72e8SLikun Gao static int sdma_v5_2_hw_init(void *handle)
1290157e72e8SLikun Gao {
1291157e72e8SLikun Gao struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1292157e72e8SLikun Gao
12931d5d1947Sye xingchen return sdma_v5_2_start(adev);
1294157e72e8SLikun Gao }
1295157e72e8SLikun Gao
sdma_v5_2_hw_fini(void * handle)1296157e72e8SLikun Gao static int sdma_v5_2_hw_fini(void *handle)
1297157e72e8SLikun Gao {
1298157e72e8SLikun Gao struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1299157e72e8SLikun Gao
1300a98cec22SAlex Deucher if (amdgpu_sriov_vf(adev)) {
1301a98cec22SAlex Deucher /* disable the scheduler for SDMA */
1302a98cec22SAlex Deucher amdgpu_sdma_unset_buffer_funcs_helper(adev);
1303a98cec22SAlex Deucher return 0;
1304a98cec22SAlex Deucher }
1305a98cec22SAlex Deucher
130641782d70SGuchun Chen sdma_v5_2_ctx_switch_enable(adev, false);
130741782d70SGuchun Chen sdma_v5_2_enable(adev, false);
1308157e72e8SLikun Gao
1309157e72e8SLikun Gao return 0;
1310157e72e8SLikun Gao }
1311157e72e8SLikun Gao
sdma_v5_2_suspend(void * handle)1312157e72e8SLikun Gao static int sdma_v5_2_suspend(void *handle)
1313157e72e8SLikun Gao {
1314157e72e8SLikun Gao struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1315157e72e8SLikun Gao
1316157e72e8SLikun Gao return sdma_v5_2_hw_fini(adev);
1317157e72e8SLikun Gao }
1318157e72e8SLikun Gao
sdma_v5_2_resume(void * handle)1319157e72e8SLikun Gao static int sdma_v5_2_resume(void *handle)
1320157e72e8SLikun Gao {
1321157e72e8SLikun Gao struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1322157e72e8SLikun Gao
1323157e72e8SLikun Gao return sdma_v5_2_hw_init(adev);
1324157e72e8SLikun Gao }
1325157e72e8SLikun Gao
sdma_v5_2_is_idle(void * handle)1326157e72e8SLikun Gao static bool sdma_v5_2_is_idle(void *handle)
1327157e72e8SLikun Gao {
1328157e72e8SLikun Gao struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1329157e72e8SLikun Gao u32 i;
1330157e72e8SLikun Gao
1331157e72e8SLikun Gao for (i = 0; i < adev->sdma.num_instances; i++) {
1332157e72e8SLikun Gao u32 tmp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_STATUS_REG));
1333157e72e8SLikun Gao
1334157e72e8SLikun Gao if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
1335157e72e8SLikun Gao return false;
1336157e72e8SLikun Gao }
1337157e72e8SLikun Gao
1338157e72e8SLikun Gao return true;
1339157e72e8SLikun Gao }
1340157e72e8SLikun Gao
sdma_v5_2_wait_for_idle(void * handle)1341157e72e8SLikun Gao static int sdma_v5_2_wait_for_idle(void *handle)
1342157e72e8SLikun Gao {
1343157e72e8SLikun Gao unsigned i;
1344157e72e8SLikun Gao u32 sdma0, sdma1, sdma2, sdma3;
1345157e72e8SLikun Gao struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1346157e72e8SLikun Gao
1347157e72e8SLikun Gao for (i = 0; i < adev->usec_timeout; i++) {
1348157e72e8SLikun Gao sdma0 = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
1349157e72e8SLikun Gao sdma1 = RREG32(sdma_v5_2_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG));
1350157e72e8SLikun Gao sdma2 = RREG32(sdma_v5_2_get_reg_offset(adev, 2, mmSDMA0_STATUS_REG));
1351157e72e8SLikun Gao sdma3 = RREG32(sdma_v5_2_get_reg_offset(adev, 3, mmSDMA0_STATUS_REG));
1352157e72e8SLikun Gao
1353157e72e8SLikun Gao if (sdma0 & sdma1 & sdma2 & sdma3 & SDMA0_STATUS_REG__IDLE_MASK)
1354157e72e8SLikun Gao return 0;
1355157e72e8SLikun Gao udelay(1);
1356157e72e8SLikun Gao }
1357157e72e8SLikun Gao return -ETIMEDOUT;
1358157e72e8SLikun Gao }
1359157e72e8SLikun Gao
sdma_v5_2_ring_preempt_ib(struct amdgpu_ring * ring)1360157e72e8SLikun Gao static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring)
1361157e72e8SLikun Gao {
1362157e72e8SLikun Gao int i, r = 0;
1363157e72e8SLikun Gao struct amdgpu_device *adev = ring->adev;
1364157e72e8SLikun Gao u32 index = 0;
1365157e72e8SLikun Gao u64 sdma_gfx_preempt;
1366157e72e8SLikun Gao
1367157e72e8SLikun Gao amdgpu_sdma_get_index_from_ring(ring, &index);
1368157e72e8SLikun Gao sdma_gfx_preempt =
1369157e72e8SLikun Gao sdma_v5_2_get_reg_offset(adev, index, mmSDMA0_GFX_PREEMPT);
1370157e72e8SLikun Gao
1371157e72e8SLikun Gao /* assert preemption condition */
1372157e72e8SLikun Gao amdgpu_ring_set_preempt_cond_exec(ring, false);
1373157e72e8SLikun Gao
1374157e72e8SLikun Gao /* emit the trailing fence */
1375157e72e8SLikun Gao ring->trail_seq += 1;
1376157e72e8SLikun Gao amdgpu_ring_alloc(ring, 10);
1377157e72e8SLikun Gao sdma_v5_2_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
1378157e72e8SLikun Gao ring->trail_seq, 0);
1379157e72e8SLikun Gao amdgpu_ring_commit(ring);
1380157e72e8SLikun Gao
1381157e72e8SLikun Gao /* assert IB preemption */
1382157e72e8SLikun Gao WREG32(sdma_gfx_preempt, 1);
1383157e72e8SLikun Gao
1384157e72e8SLikun Gao /* poll the trailing fence */
1385157e72e8SLikun Gao for (i = 0; i < adev->usec_timeout; i++) {
1386157e72e8SLikun Gao if (ring->trail_seq ==
1387157e72e8SLikun Gao le32_to_cpu(*(ring->trail_fence_cpu_addr)))
1388157e72e8SLikun Gao break;
1389157e72e8SLikun Gao udelay(1);
1390157e72e8SLikun Gao }
1391157e72e8SLikun Gao
1392157e72e8SLikun Gao if (i >= adev->usec_timeout) {
1393157e72e8SLikun Gao r = -EINVAL;
1394157e72e8SLikun Gao DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
1395157e72e8SLikun Gao }
1396157e72e8SLikun Gao
1397157e72e8SLikun Gao /* deassert IB preemption */
1398157e72e8SLikun Gao WREG32(sdma_gfx_preempt, 0);
1399157e72e8SLikun Gao
1400157e72e8SLikun Gao /* deassert the preemption condition */
1401157e72e8SLikun Gao amdgpu_ring_set_preempt_cond_exec(ring, true);
1402157e72e8SLikun Gao return r;
1403157e72e8SLikun Gao }
1404157e72e8SLikun Gao
sdma_v5_2_set_trap_irq_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)1405157e72e8SLikun Gao static int sdma_v5_2_set_trap_irq_state(struct amdgpu_device *adev,
1406157e72e8SLikun Gao struct amdgpu_irq_src *source,
1407157e72e8SLikun Gao unsigned type,
1408157e72e8SLikun Gao enum amdgpu_interrupt_state state)
1409157e72e8SLikun Gao {
1410157e72e8SLikun Gao u32 sdma_cntl;
1411157e72e8SLikun Gao u32 reg_offset = sdma_v5_2_get_reg_offset(adev, type, mmSDMA0_CNTL);
1412157e72e8SLikun Gao
1413b18ff692SBokun Zhang if (!amdgpu_sriov_vf(adev)) {
1414157e72e8SLikun Gao sdma_cntl = RREG32(reg_offset);
1415157e72e8SLikun Gao sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
1416157e72e8SLikun Gao state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
1417157e72e8SLikun Gao WREG32(reg_offset, sdma_cntl);
1418b18ff692SBokun Zhang }
1419157e72e8SLikun Gao
1420157e72e8SLikun Gao return 0;
1421157e72e8SLikun Gao }
1422157e72e8SLikun Gao
sdma_v5_2_process_trap_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)1423157e72e8SLikun Gao static int sdma_v5_2_process_trap_irq(struct amdgpu_device *adev,
1424157e72e8SLikun Gao struct amdgpu_irq_src *source,
1425157e72e8SLikun Gao struct amdgpu_iv_entry *entry)
1426157e72e8SLikun Gao {
1427254492b6SJack Xiao uint32_t mes_queue_id = entry->src_data[0];
1428254492b6SJack Xiao
1429157e72e8SLikun Gao DRM_DEBUG("IH: SDMA trap\n");
1430254492b6SJack Xiao
1431254492b6SJack Xiao if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
1432254492b6SJack Xiao struct amdgpu_mes_queue *queue;
1433254492b6SJack Xiao
1434254492b6SJack Xiao mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
1435254492b6SJack Xiao
1436254492b6SJack Xiao spin_lock(&adev->mes.queue_id_lock);
1437254492b6SJack Xiao queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
1438254492b6SJack Xiao if (queue) {
1439254492b6SJack Xiao DRM_DEBUG("process smda queue id = %d\n", mes_queue_id);
1440254492b6SJack Xiao amdgpu_fence_process(queue->ring);
1441254492b6SJack Xiao }
1442254492b6SJack Xiao spin_unlock(&adev->mes.queue_id_lock);
1443254492b6SJack Xiao return 0;
1444254492b6SJack Xiao }
1445254492b6SJack Xiao
1446157e72e8SLikun Gao switch (entry->client_id) {
1447157e72e8SLikun Gao case SOC15_IH_CLIENTID_SDMA0:
1448157e72e8SLikun Gao switch (entry->ring_id) {
1449157e72e8SLikun Gao case 0:
1450157e72e8SLikun Gao amdgpu_fence_process(&adev->sdma.instance[0].ring);
1451157e72e8SLikun Gao break;
1452157e72e8SLikun Gao case 1:
1453157e72e8SLikun Gao /* XXX compute */
1454157e72e8SLikun Gao break;
1455157e72e8SLikun Gao case 2:
1456157e72e8SLikun Gao /* XXX compute */
1457157e72e8SLikun Gao break;
1458157e72e8SLikun Gao case 3:
1459157e72e8SLikun Gao /* XXX page queue*/
1460157e72e8SLikun Gao break;
1461157e72e8SLikun Gao }
1462157e72e8SLikun Gao break;
1463157e72e8SLikun Gao case SOC15_IH_CLIENTID_SDMA1:
1464157e72e8SLikun Gao switch (entry->ring_id) {
1465157e72e8SLikun Gao case 0:
1466157e72e8SLikun Gao amdgpu_fence_process(&adev->sdma.instance[1].ring);
1467157e72e8SLikun Gao break;
1468157e72e8SLikun Gao case 1:
1469157e72e8SLikun Gao /* XXX compute */
1470157e72e8SLikun Gao break;
1471157e72e8SLikun Gao case 2:
1472157e72e8SLikun Gao /* XXX compute */
1473157e72e8SLikun Gao break;
1474157e72e8SLikun Gao case 3:
1475157e72e8SLikun Gao /* XXX page queue*/
1476157e72e8SLikun Gao break;
1477157e72e8SLikun Gao }
1478157e72e8SLikun Gao break;
1479157e72e8SLikun Gao case SOC15_IH_CLIENTID_SDMA2:
1480157e72e8SLikun Gao switch (entry->ring_id) {
1481157e72e8SLikun Gao case 0:
1482157e72e8SLikun Gao amdgpu_fence_process(&adev->sdma.instance[2].ring);
1483157e72e8SLikun Gao break;
1484157e72e8SLikun Gao case 1:
1485157e72e8SLikun Gao /* XXX compute */
1486157e72e8SLikun Gao break;
1487157e72e8SLikun Gao case 2:
1488157e72e8SLikun Gao /* XXX compute */
1489157e72e8SLikun Gao break;
1490157e72e8SLikun Gao case 3:
1491157e72e8SLikun Gao /* XXX page queue*/
1492157e72e8SLikun Gao break;
1493157e72e8SLikun Gao }
1494157e72e8SLikun Gao break;
1495d682a353SLikun Gao case SOC15_IH_CLIENTID_SDMA3_Sienna_Cichlid:
1496157e72e8SLikun Gao switch (entry->ring_id) {
1497157e72e8SLikun Gao case 0:
1498157e72e8SLikun Gao amdgpu_fence_process(&adev->sdma.instance[3].ring);
1499157e72e8SLikun Gao break;
1500157e72e8SLikun Gao case 1:
1501157e72e8SLikun Gao /* XXX compute */
1502157e72e8SLikun Gao break;
1503157e72e8SLikun Gao case 2:
1504157e72e8SLikun Gao /* XXX compute */
1505157e72e8SLikun Gao break;
1506157e72e8SLikun Gao case 3:
1507157e72e8SLikun Gao /* XXX page queue*/
1508157e72e8SLikun Gao break;
1509157e72e8SLikun Gao }
1510157e72e8SLikun Gao break;
1511157e72e8SLikun Gao }
1512157e72e8SLikun Gao return 0;
1513157e72e8SLikun Gao }
1514157e72e8SLikun Gao
sdma_v5_2_process_illegal_inst_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)1515157e72e8SLikun Gao static int sdma_v5_2_process_illegal_inst_irq(struct amdgpu_device *adev,
1516157e72e8SLikun Gao struct amdgpu_irq_src *source,
1517157e72e8SLikun Gao struct amdgpu_iv_entry *entry)
1518157e72e8SLikun Gao {
1519157e72e8SLikun Gao return 0;
1520157e72e8SLikun Gao }
1521157e72e8SLikun Gao
sdma_v5_2_firmware_mgcg_support(struct amdgpu_device * adev,int i)1522f05f4fe6SPrike Liang static bool sdma_v5_2_firmware_mgcg_support(struct amdgpu_device *adev,
1523f05f4fe6SPrike Liang int i)
1524f05f4fe6SPrike Liang {
1525f05f4fe6SPrike Liang switch (adev->ip_versions[SDMA0_HWIP][0]) {
1526f05f4fe6SPrike Liang case IP_VERSION(5, 2, 1):
1527f05f4fe6SPrike Liang if (adev->sdma.instance[i].fw_version < 70)
1528f05f4fe6SPrike Liang return false;
1529f05f4fe6SPrike Liang break;
1530f05f4fe6SPrike Liang case IP_VERSION(5, 2, 3):
1531f05f4fe6SPrike Liang if (adev->sdma.instance[i].fw_version < 47)
1532f05f4fe6SPrike Liang return false;
1533f05f4fe6SPrike Liang break;
1534f05f4fe6SPrike Liang case IP_VERSION(5, 2, 7):
1535f05f4fe6SPrike Liang if (adev->sdma.instance[i].fw_version < 9)
1536f05f4fe6SPrike Liang return false;
1537f05f4fe6SPrike Liang break;
1538f05f4fe6SPrike Liang default:
1539f05f4fe6SPrike Liang return true;
1540f05f4fe6SPrike Liang }
1541f05f4fe6SPrike Liang
1542f05f4fe6SPrike Liang return true;
1543f05f4fe6SPrike Liang
1544f05f4fe6SPrike Liang }
1545f05f4fe6SPrike Liang
sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device * adev,bool enable)1546157e72e8SLikun Gao static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev,
1547157e72e8SLikun Gao bool enable)
1548157e72e8SLikun Gao {
1549157e72e8SLikun Gao uint32_t data, def;
1550157e72e8SLikun Gao int i;
1551157e72e8SLikun Gao
1552157e72e8SLikun Gao for (i = 0; i < adev->sdma.num_instances; i++) {
1553ef9bcfdeSJinzhou Su
1554f05f4fe6SPrike Liang if (!sdma_v5_2_firmware_mgcg_support(adev, i))
1555ef9bcfdeSJinzhou Su adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_MGCG;
1556ef9bcfdeSJinzhou Su
1557157e72e8SLikun Gao if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
1558157e72e8SLikun Gao /* Enable sdma clock gating */
1559157e72e8SLikun Gao def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
1560157e72e8SLikun Gao data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
1561157e72e8SLikun Gao SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
1562157e72e8SLikun Gao SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
1563157e72e8SLikun Gao SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
1564157e72e8SLikun Gao SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK |
1565157e72e8SLikun Gao SDMA0_CLK_CTRL__SOFT_OVERRIDER_REG_MASK);
1566157e72e8SLikun Gao if (def != data)
1567157e72e8SLikun Gao WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data);
1568157e72e8SLikun Gao } else {
1569157e72e8SLikun Gao /* Disable sdma clock gating */
1570157e72e8SLikun Gao def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
1571157e72e8SLikun Gao data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
1572157e72e8SLikun Gao SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
1573157e72e8SLikun Gao SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
1574157e72e8SLikun Gao SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
1575157e72e8SLikun Gao SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK |
1576157e72e8SLikun Gao SDMA0_CLK_CTRL__SOFT_OVERRIDER_REG_MASK);
1577157e72e8SLikun Gao if (def != data)
1578157e72e8SLikun Gao WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data);
1579157e72e8SLikun Gao }
1580157e72e8SLikun Gao }
1581157e72e8SLikun Gao }
1582157e72e8SLikun Gao
sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device * adev,bool enable)1583157e72e8SLikun Gao static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev,
1584157e72e8SLikun Gao bool enable)
1585157e72e8SLikun Gao {
1586157e72e8SLikun Gao uint32_t data, def;
1587157e72e8SLikun Gao int i;
1588157e72e8SLikun Gao
1589157e72e8SLikun Gao for (i = 0; i < adev->sdma.num_instances; i++) {
1590ec0f72cbSJinzhou Su
15911d789535SAlex Deucher if (adev->sdma.instance[i].fw_version < 70 && adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(5, 2, 1))
1592ec0f72cbSJinzhou Su adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_LS;
1593ec0f72cbSJinzhou Su
1594157e72e8SLikun Gao if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
1595157e72e8SLikun Gao /* Enable sdma mem light sleep */
1596157e72e8SLikun Gao def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));
1597157e72e8SLikun Gao data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
1598157e72e8SLikun Gao if (def != data)
1599157e72e8SLikun Gao WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data);
1600157e72e8SLikun Gao
1601157e72e8SLikun Gao } else {
1602157e72e8SLikun Gao /* Disable sdma mem light sleep */
1603157e72e8SLikun Gao def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));
1604157e72e8SLikun Gao data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
1605157e72e8SLikun Gao if (def != data)
1606157e72e8SLikun Gao WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data);
1607157e72e8SLikun Gao
1608157e72e8SLikun Gao }
1609157e72e8SLikun Gao }
1610157e72e8SLikun Gao }
1611157e72e8SLikun Gao
sdma_v5_2_set_clockgating_state(void * handle,enum amd_clockgating_state state)1612157e72e8SLikun Gao static int sdma_v5_2_set_clockgating_state(void *handle,
1613157e72e8SLikun Gao enum amd_clockgating_state state)
1614157e72e8SLikun Gao {
1615157e72e8SLikun Gao struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1616157e72e8SLikun Gao
1617157e72e8SLikun Gao if (amdgpu_sriov_vf(adev))
1618157e72e8SLikun Gao return 0;
1619157e72e8SLikun Gao
16201d789535SAlex Deucher switch (adev->ip_versions[SDMA0_HWIP][0]) {
16218f4bb1e7SAlex Deucher case IP_VERSION(5, 2, 0):
16228f4bb1e7SAlex Deucher case IP_VERSION(5, 2, 2):
16238f4bb1e7SAlex Deucher case IP_VERSION(5, 2, 1):
16248f4bb1e7SAlex Deucher case IP_VERSION(5, 2, 4):
16258f4bb1e7SAlex Deucher case IP_VERSION(5, 2, 5):
162693afe158SYifan Zhang case IP_VERSION(5, 2, 6):
16278f4bb1e7SAlex Deucher case IP_VERSION(5, 2, 3):
1628f05f4fe6SPrike Liang case IP_VERSION(5, 2, 7):
1629157e72e8SLikun Gao sdma_v5_2_update_medium_grain_clock_gating(adev,
1630cd48758cSJiapeng Chong state == AMD_CG_STATE_GATE);
1631157e72e8SLikun Gao sdma_v5_2_update_medium_grain_light_sleep(adev,
1632cd48758cSJiapeng Chong state == AMD_CG_STATE_GATE);
1633157e72e8SLikun Gao break;
1634157e72e8SLikun Gao default:
1635157e72e8SLikun Gao break;
1636157e72e8SLikun Gao }
1637157e72e8SLikun Gao
1638157e72e8SLikun Gao return 0;
1639157e72e8SLikun Gao }
1640157e72e8SLikun Gao
sdma_v5_2_set_powergating_state(void * handle,enum amd_powergating_state state)1641157e72e8SLikun Gao static int sdma_v5_2_set_powergating_state(void *handle,
1642157e72e8SLikun Gao enum amd_powergating_state state)
1643157e72e8SLikun Gao {
1644157e72e8SLikun Gao return 0;
1645157e72e8SLikun Gao }
1646157e72e8SLikun Gao
sdma_v5_2_get_clockgating_state(void * handle,u64 * flags)164725faeddcSEvan Quan static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags)
1648157e72e8SLikun Gao {
1649157e72e8SLikun Gao struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1650157e72e8SLikun Gao int data;
1651157e72e8SLikun Gao
1652157e72e8SLikun Gao if (amdgpu_sriov_vf(adev))
1653157e72e8SLikun Gao *flags = 0;
1654157e72e8SLikun Gao
165593afe158SYifan Zhang /* AMD_CG_SUPPORT_SDMA_MGCG */
165693afe158SYifan Zhang data = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_CLK_CTRL));
165793afe158SYifan Zhang if (!(data & SDMA0_CLK_CTRL__CGCG_EN_OVERRIDE_MASK))
165893afe158SYifan Zhang *flags |= AMD_CG_SUPPORT_SDMA_MGCG;
165993afe158SYifan Zhang
1660157e72e8SLikun Gao /* AMD_CG_SUPPORT_SDMA_LS */
16612373dd48SLikun Gao data = RREG32_KIQ(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL));
1662157e72e8SLikun Gao if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
1663157e72e8SLikun Gao *flags |= AMD_CG_SUPPORT_SDMA_LS;
1664157e72e8SLikun Gao }
1665157e72e8SLikun Gao
sdma_v5_2_ring_begin_use(struct amdgpu_ring * ring)16663aae4ef4SAlex Deucher static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring)
16673aae4ef4SAlex Deucher {
16683aae4ef4SAlex Deucher struct amdgpu_device *adev = ring->adev;
16693aae4ef4SAlex Deucher
16703aae4ef4SAlex Deucher /* SDMA 5.2.3 (RMB) FW doesn't seem to properly
16713aae4ef4SAlex Deucher * disallow GFXOFF in some cases leading to
16723aae4ef4SAlex Deucher * hangs in SDMA. Disallow GFXOFF while SDMA is active.
16733aae4ef4SAlex Deucher * We can probably just limit this to 5.2.3,
16743aae4ef4SAlex Deucher * but it shouldn't hurt for other parts since
16753aae4ef4SAlex Deucher * this GFXOFF will be disallowed anyway when SDMA is
16763aae4ef4SAlex Deucher * active, this just makes it explicit.
1677*9d74e500SAlex Deucher * sdma_v5_2_ring_set_wptr() takes advantage of this
1678*9d74e500SAlex Deucher * to update the wptr because sometimes SDMA seems to miss
1679*9d74e500SAlex Deucher * doorbells when entering PG. If you remove this, update
1680*9d74e500SAlex Deucher * sdma_v5_2_ring_set_wptr() as well!
16813aae4ef4SAlex Deucher */
16823aae4ef4SAlex Deucher amdgpu_gfx_off_ctrl(adev, false);
16833aae4ef4SAlex Deucher }
16843aae4ef4SAlex Deucher
sdma_v5_2_ring_end_use(struct amdgpu_ring * ring)16853aae4ef4SAlex Deucher static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring)
16863aae4ef4SAlex Deucher {
16873aae4ef4SAlex Deucher struct amdgpu_device *adev = ring->adev;
16883aae4ef4SAlex Deucher
16893aae4ef4SAlex Deucher /* SDMA 5.2.3 (RMB) FW doesn't seem to properly
16903aae4ef4SAlex Deucher * disallow GFXOFF in some cases leading to
16913aae4ef4SAlex Deucher * hangs in SDMA. Allow GFXOFF when SDMA is complete.
16923aae4ef4SAlex Deucher */
16933aae4ef4SAlex Deucher amdgpu_gfx_off_ctrl(adev, true);
16943aae4ef4SAlex Deucher }
16953aae4ef4SAlex Deucher
1696157e72e8SLikun Gao const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
1697157e72e8SLikun Gao .name = "sdma_v5_2",
1698157e72e8SLikun Gao .early_init = sdma_v5_2_early_init,
1699157e72e8SLikun Gao .late_init = NULL,
1700157e72e8SLikun Gao .sw_init = sdma_v5_2_sw_init,
1701157e72e8SLikun Gao .sw_fini = sdma_v5_2_sw_fini,
1702157e72e8SLikun Gao .hw_init = sdma_v5_2_hw_init,
1703157e72e8SLikun Gao .hw_fini = sdma_v5_2_hw_fini,
1704157e72e8SLikun Gao .suspend = sdma_v5_2_suspend,
1705157e72e8SLikun Gao .resume = sdma_v5_2_resume,
1706157e72e8SLikun Gao .is_idle = sdma_v5_2_is_idle,
1707157e72e8SLikun Gao .wait_for_idle = sdma_v5_2_wait_for_idle,
1708157e72e8SLikun Gao .soft_reset = sdma_v5_2_soft_reset,
1709157e72e8SLikun Gao .set_clockgating_state = sdma_v5_2_set_clockgating_state,
1710157e72e8SLikun Gao .set_powergating_state = sdma_v5_2_set_powergating_state,
1711157e72e8SLikun Gao .get_clockgating_state = sdma_v5_2_get_clockgating_state,
1712157e72e8SLikun Gao };
1713157e72e8SLikun Gao
1714157e72e8SLikun Gao static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
1715157e72e8SLikun Gao .type = AMDGPU_RING_TYPE_SDMA,
1716157e72e8SLikun Gao .align_mask = 0xf,
1717157e72e8SLikun Gao .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
1718157e72e8SLikun Gao .support_64bit_ptrs = true,
17198c0f11ffSLang Yu .secure_submission_supported = true,
1720157e72e8SLikun Gao .get_rptr = sdma_v5_2_ring_get_rptr,
1721157e72e8SLikun Gao .get_wptr = sdma_v5_2_ring_get_wptr,
1722157e72e8SLikun Gao .set_wptr = sdma_v5_2_ring_set_wptr,
1723157e72e8SLikun Gao .emit_frame_size =
1724157e72e8SLikun Gao 5 + /* sdma_v5_2_ring_init_cond_exec */
1725157e72e8SLikun Gao 6 + /* sdma_v5_2_ring_emit_hdp_flush */
1726157e72e8SLikun Gao 3 + /* hdp_invalidate */
1727157e72e8SLikun Gao 6 + /* sdma_v5_2_ring_emit_pipeline_sync */
1728157e72e8SLikun Gao /* sdma_v5_2_ring_emit_vm_flush */
1729157e72e8SLikun Gao SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1730157e72e8SLikun Gao SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
1731157e72e8SLikun Gao 10 + 10 + 10, /* sdma_v5_2_ring_emit_fence x3 for user fence, vm fence */
1732157e72e8SLikun Gao .emit_ib_size = 7 + 6, /* sdma_v5_2_ring_emit_ib */
1733157e72e8SLikun Gao .emit_ib = sdma_v5_2_ring_emit_ib,
1734b45fdeabSJinzhou Su .emit_mem_sync = sdma_v5_2_ring_emit_mem_sync,
1735157e72e8SLikun Gao .emit_fence = sdma_v5_2_ring_emit_fence,
1736157e72e8SLikun Gao .emit_pipeline_sync = sdma_v5_2_ring_emit_pipeline_sync,
1737157e72e8SLikun Gao .emit_vm_flush = sdma_v5_2_ring_emit_vm_flush,
1738157e72e8SLikun Gao .emit_hdp_flush = sdma_v5_2_ring_emit_hdp_flush,
1739157e72e8SLikun Gao .test_ring = sdma_v5_2_ring_test_ring,
1740157e72e8SLikun Gao .test_ib = sdma_v5_2_ring_test_ib,
1741157e72e8SLikun Gao .insert_nop = sdma_v5_2_ring_insert_nop,
1742157e72e8SLikun Gao .pad_ib = sdma_v5_2_ring_pad_ib,
17433aae4ef4SAlex Deucher .begin_use = sdma_v5_2_ring_begin_use,
17443aae4ef4SAlex Deucher .end_use = sdma_v5_2_ring_end_use,
1745157e72e8SLikun Gao .emit_wreg = sdma_v5_2_ring_emit_wreg,
1746157e72e8SLikun Gao .emit_reg_wait = sdma_v5_2_ring_emit_reg_wait,
1747157e72e8SLikun Gao .emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait,
1748157e72e8SLikun Gao .init_cond_exec = sdma_v5_2_ring_init_cond_exec,
1749157e72e8SLikun Gao .patch_cond_exec = sdma_v5_2_ring_patch_cond_exec,
1750157e72e8SLikun Gao .preempt_ib = sdma_v5_2_ring_preempt_ib,
1751157e72e8SLikun Gao };
1752157e72e8SLikun Gao
sdma_v5_2_set_ring_funcs(struct amdgpu_device * adev)1753157e72e8SLikun Gao static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev)
1754157e72e8SLikun Gao {
1755157e72e8SLikun Gao int i;
1756157e72e8SLikun Gao
1757157e72e8SLikun Gao for (i = 0; i < adev->sdma.num_instances; i++) {
1758157e72e8SLikun Gao adev->sdma.instance[i].ring.funcs = &sdma_v5_2_ring_funcs;
1759157e72e8SLikun Gao adev->sdma.instance[i].ring.me = i;
1760157e72e8SLikun Gao }
1761157e72e8SLikun Gao }
1762157e72e8SLikun Gao
1763157e72e8SLikun Gao static const struct amdgpu_irq_src_funcs sdma_v5_2_trap_irq_funcs = {
1764157e72e8SLikun Gao .set = sdma_v5_2_set_trap_irq_state,
1765157e72e8SLikun Gao .process = sdma_v5_2_process_trap_irq,
1766157e72e8SLikun Gao };
1767157e72e8SLikun Gao
1768157e72e8SLikun Gao static const struct amdgpu_irq_src_funcs sdma_v5_2_illegal_inst_irq_funcs = {
1769157e72e8SLikun Gao .process = sdma_v5_2_process_illegal_inst_irq,
1770157e72e8SLikun Gao };
1771157e72e8SLikun Gao
sdma_v5_2_set_irq_funcs(struct amdgpu_device * adev)1772157e72e8SLikun Gao static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev)
1773157e72e8SLikun Gao {
1774157e72e8SLikun Gao adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
1775157e72e8SLikun Gao adev->sdma.num_instances;
1776157e72e8SLikun Gao adev->sdma.trap_irq.funcs = &sdma_v5_2_trap_irq_funcs;
1777157e72e8SLikun Gao adev->sdma.illegal_inst_irq.funcs = &sdma_v5_2_illegal_inst_irq_funcs;
1778157e72e8SLikun Gao }
1779157e72e8SLikun Gao
1780157e72e8SLikun Gao /**
1781157e72e8SLikun Gao * sdma_v5_2_emit_copy_buffer - copy buffer using the sDMA engine
1782157e72e8SLikun Gao *
1783fd1c541dSLee Jones * @ib: indirect buffer to copy to
1784157e72e8SLikun Gao * @src_offset: src GPU address
1785157e72e8SLikun Gao * @dst_offset: dst GPU address
1786157e72e8SLikun Gao * @byte_count: number of bytes to xfer
1787fd1c541dSLee Jones * @tmz: if a secure copy should be used
1788157e72e8SLikun Gao *
1789157e72e8SLikun Gao * Copy GPU buffers using the DMA engine.
1790157e72e8SLikun Gao * Used by the amdgpu ttm implementation to move pages if
1791157e72e8SLikun Gao * registered as the asic copy callback.
1792157e72e8SLikun Gao */
sdma_v5_2_emit_copy_buffer(struct amdgpu_ib * ib,uint64_t src_offset,uint64_t dst_offset,uint32_t byte_count,bool tmz)1793157e72e8SLikun Gao static void sdma_v5_2_emit_copy_buffer(struct amdgpu_ib *ib,
1794157e72e8SLikun Gao uint64_t src_offset,
1795157e72e8SLikun Gao uint64_t dst_offset,
1796157e72e8SLikun Gao uint32_t byte_count,
1797157e72e8SLikun Gao bool tmz)
1798157e72e8SLikun Gao {
1799157e72e8SLikun Gao ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
1800157e72e8SLikun Gao SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
1801157e72e8SLikun Gao SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
1802157e72e8SLikun Gao ib->ptr[ib->length_dw++] = byte_count - 1;
1803157e72e8SLikun Gao ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1804157e72e8SLikun Gao ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
1805157e72e8SLikun Gao ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
1806157e72e8SLikun Gao ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1807157e72e8SLikun Gao ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1808157e72e8SLikun Gao }
1809157e72e8SLikun Gao
1810157e72e8SLikun Gao /**
1811157e72e8SLikun Gao * sdma_v5_2_emit_fill_buffer - fill buffer using the sDMA engine
1812157e72e8SLikun Gao *
1813fd1c541dSLee Jones * @ib: indirect buffer to fill
1814157e72e8SLikun Gao * @src_data: value to write to buffer
1815157e72e8SLikun Gao * @dst_offset: dst GPU address
1816157e72e8SLikun Gao * @byte_count: number of bytes to xfer
1817157e72e8SLikun Gao *
1818157e72e8SLikun Gao * Fill GPU buffers using the DMA engine.
1819157e72e8SLikun Gao */
sdma_v5_2_emit_fill_buffer(struct amdgpu_ib * ib,uint32_t src_data,uint64_t dst_offset,uint32_t byte_count)1820157e72e8SLikun Gao static void sdma_v5_2_emit_fill_buffer(struct amdgpu_ib *ib,
1821157e72e8SLikun Gao uint32_t src_data,
1822157e72e8SLikun Gao uint64_t dst_offset,
1823157e72e8SLikun Gao uint32_t byte_count)
1824157e72e8SLikun Gao {
1825157e72e8SLikun Gao ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
1826157e72e8SLikun Gao ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1827157e72e8SLikun Gao ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1828157e72e8SLikun Gao ib->ptr[ib->length_dw++] = src_data;
1829157e72e8SLikun Gao ib->ptr[ib->length_dw++] = byte_count - 1;
1830157e72e8SLikun Gao }
1831157e72e8SLikun Gao
1832157e72e8SLikun Gao static const struct amdgpu_buffer_funcs sdma_v5_2_buffer_funcs = {
1833157e72e8SLikun Gao .copy_max_bytes = 0x400000,
1834157e72e8SLikun Gao .copy_num_dw = 7,
1835157e72e8SLikun Gao .emit_copy_buffer = sdma_v5_2_emit_copy_buffer,
1836157e72e8SLikun Gao
1837157e72e8SLikun Gao .fill_max_bytes = 0x400000,
1838157e72e8SLikun Gao .fill_num_dw = 5,
1839157e72e8SLikun Gao .emit_fill_buffer = sdma_v5_2_emit_fill_buffer,
1840157e72e8SLikun Gao };
1841157e72e8SLikun Gao
sdma_v5_2_set_buffer_funcs(struct amdgpu_device * adev)1842157e72e8SLikun Gao static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev)
1843157e72e8SLikun Gao {
1844157e72e8SLikun Gao if (adev->mman.buffer_funcs == NULL) {
1845157e72e8SLikun Gao adev->mman.buffer_funcs = &sdma_v5_2_buffer_funcs;
1846157e72e8SLikun Gao adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
1847157e72e8SLikun Gao }
1848157e72e8SLikun Gao }
1849157e72e8SLikun Gao
1850157e72e8SLikun Gao static const struct amdgpu_vm_pte_funcs sdma_v5_2_vm_pte_funcs = {
1851157e72e8SLikun Gao .copy_pte_num_dw = 7,
1852157e72e8SLikun Gao .copy_pte = sdma_v5_2_vm_copy_pte,
1853157e72e8SLikun Gao .write_pte = sdma_v5_2_vm_write_pte,
1854157e72e8SLikun Gao .set_pte_pde = sdma_v5_2_vm_set_pte_pde,
1855157e72e8SLikun Gao };
1856157e72e8SLikun Gao
sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device * adev)1857157e72e8SLikun Gao static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev)
1858157e72e8SLikun Gao {
1859157e72e8SLikun Gao unsigned i;
1860157e72e8SLikun Gao
1861157e72e8SLikun Gao if (adev->vm_manager.vm_pte_funcs == NULL) {
1862157e72e8SLikun Gao adev->vm_manager.vm_pte_funcs = &sdma_v5_2_vm_pte_funcs;
1863157e72e8SLikun Gao for (i = 0; i < adev->sdma.num_instances; i++) {
1864157e72e8SLikun Gao adev->vm_manager.vm_pte_scheds[i] =
1865157e72e8SLikun Gao &adev->sdma.instance[i].ring.sched;
1866157e72e8SLikun Gao }
1867157e72e8SLikun Gao adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
1868157e72e8SLikun Gao }
1869157e72e8SLikun Gao }
1870157e72e8SLikun Gao
1871157e72e8SLikun Gao const struct amdgpu_ip_block_version sdma_v5_2_ip_block = {
1872157e72e8SLikun Gao .type = AMD_IP_BLOCK_TYPE_SDMA,
1873157e72e8SLikun Gao .major = 5,
1874157e72e8SLikun Gao .minor = 2,
1875157e72e8SLikun Gao .rev = 0,
1876157e72e8SLikun Gao .funcs = &sdma_v5_2_ip_funcs,
1877157e72e8SLikun Gao };
1878