xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
161a039d1SStanley Yang /*
261a039d1SStanley Yang  * Copyright 2020 Advanced Micro Devices, Inc.
361a039d1SStanley Yang  *
461a039d1SStanley Yang  * Permission is hereby granted, free of charge, to any person obtaining a
561a039d1SStanley Yang  * copy of this software and associated documentation files (the "Software"),
661a039d1SStanley Yang  * to deal in the Software without restriction, including without limitation
761a039d1SStanley Yang  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
861a039d1SStanley Yang  * and/or sell copies of the Software, and to permit persons to whom the
961a039d1SStanley Yang  * Software is furnished to do so, subject to the following conditions:
1061a039d1SStanley Yang  *
1161a039d1SStanley Yang  * The above copyright notice and this permission notice shall be included in
1261a039d1SStanley Yang  * all copies or substantial portions of the Software.
1361a039d1SStanley Yang  *
1461a039d1SStanley Yang  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1561a039d1SStanley Yang  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1661a039d1SStanley Yang  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1761a039d1SStanley Yang  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
1861a039d1SStanley Yang  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
1961a039d1SStanley Yang  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
2061a039d1SStanley Yang  * OTHER DEALINGS IN THE SOFTWARE.
2161a039d1SStanley Yang  *
2261a039d1SStanley Yang  */
2361a039d1SStanley Yang 
2461a039d1SStanley Yang #include <linux/delay.h>
2561a039d1SStanley Yang #include <linux/firmware.h>
2661a039d1SStanley Yang #include <linux/module.h>
2761a039d1SStanley Yang #include <linux/pci.h>
2861a039d1SStanley Yang 
2961a039d1SStanley Yang #include "amdgpu.h"
3061a039d1SStanley Yang #include "amdgpu_ucode.h"
3161a039d1SStanley Yang #include "amdgpu_trace.h"
3261a039d1SStanley Yang 
3361a039d1SStanley Yang #include "gc/gc_11_0_0_offset.h"
3461a039d1SStanley Yang #include "gc/gc_11_0_0_sh_mask.h"
3561a039d1SStanley Yang #include "gc/gc_11_0_0_default.h"
3661a039d1SStanley Yang #include "hdp/hdp_6_0_0_offset.h"
3761a039d1SStanley Yang #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
3861a039d1SStanley Yang 
3961a039d1SStanley Yang #include "soc15_common.h"
4061a039d1SStanley Yang #include "soc15.h"
4161a039d1SStanley Yang #include "sdma_v6_0_0_pkt_open.h"
4261a039d1SStanley Yang #include "nbio_v4_3.h"
4361a039d1SStanley Yang #include "sdma_common.h"
4461a039d1SStanley Yang #include "sdma_v6_0.h"
4561a039d1SStanley Yang #include "v11_structs.h"
4661a039d1SStanley Yang 
4761a039d1SStanley Yang MODULE_FIRMWARE("amdgpu/sdma_6_0_0.bin");
4804dd809bSHuang Rui MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin");
49c233f3faSFlora Cui MODULE_FIRMWARE("amdgpu/sdma_6_0_2.bin");
50f66f4847SHawking Zhang MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin");
51*7a22c147SPrike Liang MODULE_FIRMWARE("amdgpu/sdma_6_1_0.bin");
5261a039d1SStanley Yang 
5361a039d1SStanley Yang #define SDMA1_REG_OFFSET 0x600
5461a039d1SStanley Yang #define SDMA0_HYP_DEC_REG_START 0x5880
5561a039d1SStanley Yang #define SDMA0_HYP_DEC_REG_END 0x589a
5661a039d1SStanley Yang #define SDMA1_HYP_DEC_REG_OFFSET 0x20
5761a039d1SStanley Yang 
5861a039d1SStanley Yang static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev);
5961a039d1SStanley Yang static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev);
6061a039d1SStanley Yang static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev);
6161a039d1SStanley Yang static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev);
6258e969b6SLikun Gao static int sdma_v6_0_start(struct amdgpu_device *adev);
6361a039d1SStanley Yang 
sdma_v6_0_get_reg_offset(struct amdgpu_device * adev,u32 instance,u32 internal_offset)6461a039d1SStanley Yang static u32 sdma_v6_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
6561a039d1SStanley Yang {
6661a039d1SStanley Yang 	u32 base;
6761a039d1SStanley Yang 
6861a039d1SStanley Yang 	if (internal_offset >= SDMA0_HYP_DEC_REG_START &&
6961a039d1SStanley Yang 	    internal_offset <= SDMA0_HYP_DEC_REG_END) {
7061a039d1SStanley Yang 		base = adev->reg_offset[GC_HWIP][0][1];
7161a039d1SStanley Yang 		if (instance != 0)
7261a039d1SStanley Yang 			internal_offset += SDMA1_HYP_DEC_REG_OFFSET * instance;
7361a039d1SStanley Yang 	} else {
7461a039d1SStanley Yang 		base = adev->reg_offset[GC_HWIP][0][0];
7561a039d1SStanley Yang 		if (instance == 1)
7661a039d1SStanley Yang 			internal_offset += SDMA1_REG_OFFSET;
7761a039d1SStanley Yang 	}
7861a039d1SStanley Yang 
7961a039d1SStanley Yang 	return base + internal_offset;
8061a039d1SStanley Yang }
8161a039d1SStanley Yang 
sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring * ring)8261a039d1SStanley Yang static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring)
8361a039d1SStanley Yang {
8461a039d1SStanley Yang 	unsigned ret;
8561a039d1SStanley Yang 
8661a039d1SStanley Yang 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE));
8761a039d1SStanley Yang 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
8861a039d1SStanley Yang 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
8961a039d1SStanley Yang 	amdgpu_ring_write(ring, 1);
9061a039d1SStanley Yang 	ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
9161a039d1SStanley Yang 	amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
9261a039d1SStanley Yang 
9361a039d1SStanley Yang 	return ret;
9461a039d1SStanley Yang }
9561a039d1SStanley Yang 
sdma_v6_0_ring_patch_cond_exec(struct amdgpu_ring * ring,unsigned offset)9661a039d1SStanley Yang static void sdma_v6_0_ring_patch_cond_exec(struct amdgpu_ring *ring,
9761a039d1SStanley Yang 					   unsigned offset)
9861a039d1SStanley Yang {
9961a039d1SStanley Yang 	unsigned cur;
10061a039d1SStanley Yang 
10161a039d1SStanley Yang 	BUG_ON(offset > ring->buf_mask);
10261a039d1SStanley Yang 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
10361a039d1SStanley Yang 
10461a039d1SStanley Yang 	cur = (ring->wptr - 1) & ring->buf_mask;
10561a039d1SStanley Yang 	if (cur > offset)
10661a039d1SStanley Yang 		ring->ring[offset] = cur - offset;
10761a039d1SStanley Yang 	else
10861a039d1SStanley Yang 		ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
10961a039d1SStanley Yang }
11061a039d1SStanley Yang 
11161a039d1SStanley Yang /**
11261a039d1SStanley Yang  * sdma_v6_0_ring_get_rptr - get the current read pointer
11361a039d1SStanley Yang  *
11461a039d1SStanley Yang  * @ring: amdgpu ring pointer
11561a039d1SStanley Yang  *
11661a039d1SStanley Yang  * Get the current rptr from the hardware.
11761a039d1SStanley Yang  */
sdma_v6_0_ring_get_rptr(struct amdgpu_ring * ring)11861a039d1SStanley Yang static uint64_t sdma_v6_0_ring_get_rptr(struct amdgpu_ring *ring)
11961a039d1SStanley Yang {
12061a039d1SStanley Yang 	u64 *rptr;
12161a039d1SStanley Yang 
12261a039d1SStanley Yang 	/* XXX check if swapping is necessary on BE */
12361a039d1SStanley Yang 	rptr = (u64 *)ring->rptr_cpu_addr;
12461a039d1SStanley Yang 
12561a039d1SStanley Yang 	DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
12661a039d1SStanley Yang 	return ((*rptr) >> 2);
12761a039d1SStanley Yang }
12861a039d1SStanley Yang 
12961a039d1SStanley Yang /**
13061a039d1SStanley Yang  * sdma_v6_0_ring_get_wptr - get the current write pointer
13161a039d1SStanley Yang  *
13261a039d1SStanley Yang  * @ring: amdgpu ring pointer
13361a039d1SStanley Yang  *
13461a039d1SStanley Yang  * Get the current wptr from the hardware.
13561a039d1SStanley Yang  */
sdma_v6_0_ring_get_wptr(struct amdgpu_ring * ring)13661a039d1SStanley Yang static uint64_t sdma_v6_0_ring_get_wptr(struct amdgpu_ring *ring)
13761a039d1SStanley Yang {
13861a039d1SStanley Yang 	u64 wptr = 0;
13961a039d1SStanley Yang 
14061a039d1SStanley Yang 	if (ring->use_doorbell) {
14161a039d1SStanley Yang 		/* XXX check if swapping is necessary on BE */
14261a039d1SStanley Yang 		wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
14361a039d1SStanley Yang 		DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
14461a039d1SStanley Yang 	}
14561a039d1SStanley Yang 
14661a039d1SStanley Yang 	return wptr >> 2;
14761a039d1SStanley Yang }
14861a039d1SStanley Yang 
14961a039d1SStanley Yang /**
15061a039d1SStanley Yang  * sdma_v6_0_ring_set_wptr - commit the write pointer
15161a039d1SStanley Yang  *
15261a039d1SStanley Yang  * @ring: amdgpu ring pointer
15361a039d1SStanley Yang  *
15461a039d1SStanley Yang  * Write the wptr back to the hardware.
15561a039d1SStanley Yang  */
sdma_v6_0_ring_set_wptr(struct amdgpu_ring * ring)15661a039d1SStanley Yang static void sdma_v6_0_ring_set_wptr(struct amdgpu_ring *ring)
15761a039d1SStanley Yang {
15861a039d1SStanley Yang 	struct amdgpu_device *adev = ring->adev;
15986ef6eaeSJack Xiao 	uint32_t *wptr_saved;
16086ef6eaeSJack Xiao 	uint32_t *is_queue_unmap;
16186ef6eaeSJack Xiao 	uint64_t aggregated_db_index;
16286ef6eaeSJack Xiao 	uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size;
16361a039d1SStanley Yang 
16461a039d1SStanley Yang 	DRM_DEBUG("Setting write pointer\n");
16586ef6eaeSJack Xiao 
16686ef6eaeSJack Xiao 	if (ring->is_mes_queue) {
16786ef6eaeSJack Xiao 		wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
16886ef6eaeSJack Xiao 		is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
16986ef6eaeSJack Xiao 					      sizeof(uint32_t));
17086ef6eaeSJack Xiao 		aggregated_db_index =
17186ef6eaeSJack Xiao 			amdgpu_mes_get_aggregated_doorbell_index(adev,
17286ef6eaeSJack Xiao 							 ring->hw_prio);
17386ef6eaeSJack Xiao 
17486ef6eaeSJack Xiao 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
17586ef6eaeSJack Xiao 			     ring->wptr << 2);
17686ef6eaeSJack Xiao 		*wptr_saved = ring->wptr << 2;
17786ef6eaeSJack Xiao 		if (*is_queue_unmap) {
17886ef6eaeSJack Xiao 			WDOORBELL64(aggregated_db_index, ring->wptr << 2);
17986ef6eaeSJack Xiao 			DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
18086ef6eaeSJack Xiao 					ring->doorbell_index, ring->wptr << 2);
18186ef6eaeSJack Xiao 			WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
18286ef6eaeSJack Xiao 		} else {
18386ef6eaeSJack Xiao 			DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
18486ef6eaeSJack Xiao 					ring->doorbell_index, ring->wptr << 2);
18586ef6eaeSJack Xiao 			WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
18686ef6eaeSJack Xiao 
18786ef6eaeSJack Xiao 			if (*is_queue_unmap)
18886ef6eaeSJack Xiao 				WDOORBELL64(aggregated_db_index,
18986ef6eaeSJack Xiao 					    ring->wptr << 2);
19086ef6eaeSJack Xiao 		}
19186ef6eaeSJack Xiao 	} else {
19261a039d1SStanley Yang 		if (ring->use_doorbell) {
19361a039d1SStanley Yang 			DRM_DEBUG("Using doorbell -- "
19461a039d1SStanley Yang 				  "wptr_offs == 0x%08x "
19561a039d1SStanley Yang 				  "lower_32_bits(ring->wptr) << 2 == 0x%08x "
19661a039d1SStanley Yang 				  "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
19761a039d1SStanley Yang 				  ring->wptr_offs,
19861a039d1SStanley Yang 				  lower_32_bits(ring->wptr << 2),
19961a039d1SStanley Yang 				  upper_32_bits(ring->wptr << 2));
20061a039d1SStanley Yang 			/* XXX check if swapping is necessary on BE */
20161a039d1SStanley Yang 			atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
20261a039d1SStanley Yang 				     ring->wptr << 2);
20361a039d1SStanley Yang 			DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
20461a039d1SStanley Yang 				  ring->doorbell_index, ring->wptr << 2);
20561a039d1SStanley Yang 			WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
20661a039d1SStanley Yang 		} else {
20761a039d1SStanley Yang 			DRM_DEBUG("Not using doorbell -- "
20861a039d1SStanley Yang 				  "regSDMA%i_GFX_RB_WPTR == 0x%08x "
20961a039d1SStanley Yang 				  "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
21061a039d1SStanley Yang 				  ring->me,
21161a039d1SStanley Yang 				  lower_32_bits(ring->wptr << 2),
21261a039d1SStanley Yang 				  ring->me,
21361a039d1SStanley Yang 				  upper_32_bits(ring->wptr << 2));
21486ef6eaeSJack Xiao 			WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev,
21586ef6eaeSJack Xiao 				        ring->me, regSDMA0_QUEUE0_RB_WPTR),
21661a039d1SStanley Yang 					lower_32_bits(ring->wptr << 2));
21786ef6eaeSJack Xiao 			WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev,
21886ef6eaeSJack Xiao 				        ring->me, regSDMA0_QUEUE0_RB_WPTR_HI),
21961a039d1SStanley Yang 					upper_32_bits(ring->wptr << 2));
22061a039d1SStanley Yang 		}
22161a039d1SStanley Yang 	}
22286ef6eaeSJack Xiao }
22361a039d1SStanley Yang 
sdma_v6_0_ring_insert_nop(struct amdgpu_ring * ring,uint32_t count)22461a039d1SStanley Yang static void sdma_v6_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
22561a039d1SStanley Yang {
22661a039d1SStanley Yang 	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
22761a039d1SStanley Yang 	int i;
22861a039d1SStanley Yang 
22961a039d1SStanley Yang 	for (i = 0; i < count; i++)
23061a039d1SStanley Yang 		if (sdma && sdma->burst_nop && (i == 0))
23161a039d1SStanley Yang 			amdgpu_ring_write(ring, ring->funcs->nop |
23261a039d1SStanley Yang 				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
23361a039d1SStanley Yang 		else
23461a039d1SStanley Yang 			amdgpu_ring_write(ring, ring->funcs->nop);
23561a039d1SStanley Yang }
23661a039d1SStanley Yang 
23761a039d1SStanley Yang /**
23861a039d1SStanley Yang  * sdma_v6_0_ring_emit_ib - Schedule an IB on the DMA engine
23961a039d1SStanley Yang  *
24061a039d1SStanley Yang  * @ring: amdgpu ring pointer
24161a039d1SStanley Yang  * @ib: IB object to schedule
2429eba1b8bSSrinivasan Shanmugam  * @flags: unused
2439eba1b8bSSrinivasan Shanmugam  * @job: job to retrieve vmid from
24461a039d1SStanley Yang  *
24561a039d1SStanley Yang  * Schedule an IB in the DMA ring.
24661a039d1SStanley Yang  */
sdma_v6_0_ring_emit_ib(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)24761a039d1SStanley Yang static void sdma_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
24861a039d1SStanley Yang 				   struct amdgpu_job *job,
24961a039d1SStanley Yang 				   struct amdgpu_ib *ib,
25061a039d1SStanley Yang 				   uint32_t flags)
25161a039d1SStanley Yang {
25261a039d1SStanley Yang 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
25361a039d1SStanley Yang 	uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
25461a039d1SStanley Yang 
25561a039d1SStanley Yang 	/* An IB packet must end on a 8 DW boundary--the next dword
25661a039d1SStanley Yang 	 * must be on a 8-dword boundary. Our IB packet below is 6
25761a039d1SStanley Yang 	 * dwords long, thus add x number of NOPs, such that, in
25861a039d1SStanley Yang 	 * modular arithmetic,
25961a039d1SStanley Yang 	 * wptr + 6 + x = 8k, k >= 0, which in C is,
26061a039d1SStanley Yang 	 * (wptr + 6 + x) % 8 = 0.
26161a039d1SStanley Yang 	 * The expression below, is a solution of x.
26261a039d1SStanley Yang 	 */
26361a039d1SStanley Yang 	sdma_v6_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
26461a039d1SStanley Yang 
26561a039d1SStanley Yang 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_INDIRECT) |
26661a039d1SStanley Yang 			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
26761a039d1SStanley Yang 	/* base must be 32 byte aligned */
26861a039d1SStanley Yang 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
26961a039d1SStanley Yang 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
27061a039d1SStanley Yang 	amdgpu_ring_write(ring, ib->length_dw);
27161a039d1SStanley Yang 	amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
27261a039d1SStanley Yang 	amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
27361a039d1SStanley Yang }
27461a039d1SStanley Yang 
27561a039d1SStanley Yang /**
27661a039d1SStanley Yang  * sdma_v6_0_ring_emit_mem_sync - flush the IB by graphics cache rinse
27761a039d1SStanley Yang  *
27861a039d1SStanley Yang  * @ring: amdgpu ring pointer
27961a039d1SStanley Yang  *
28061a039d1SStanley Yang  * flush the IB by graphics cache rinse.
28161a039d1SStanley Yang  */
sdma_v6_0_ring_emit_mem_sync(struct amdgpu_ring * ring)28261a039d1SStanley Yang static void sdma_v6_0_ring_emit_mem_sync(struct amdgpu_ring *ring)
28361a039d1SStanley Yang {
28461a039d1SStanley Yang         uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
28561a039d1SStanley Yang                             SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
28661a039d1SStanley Yang                             SDMA_GCR_GLI_INV(1);
28761a039d1SStanley Yang 
28861a039d1SStanley Yang         /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
28961a039d1SStanley Yang         amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_GCR_REQ));
29061a039d1SStanley Yang         amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
29161a039d1SStanley Yang         amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) |
29261a039d1SStanley Yang                           SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0));
29361a039d1SStanley Yang         amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) |
29461a039d1SStanley Yang                           SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16));
29561a039d1SStanley Yang         amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) |
29661a039d1SStanley Yang                           SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0));
29761a039d1SStanley Yang }
29861a039d1SStanley Yang 
29961a039d1SStanley Yang 
30061a039d1SStanley Yang /**
30161a039d1SStanley Yang  * sdma_v6_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
30261a039d1SStanley Yang  *
30361a039d1SStanley Yang  * @ring: amdgpu ring pointer
30461a039d1SStanley Yang  *
30561a039d1SStanley Yang  * Emit an hdp flush packet on the requested DMA ring.
30661a039d1SStanley Yang  */
sdma_v6_0_ring_emit_hdp_flush(struct amdgpu_ring * ring)30761a039d1SStanley Yang static void sdma_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
30861a039d1SStanley Yang {
30961a039d1SStanley Yang 	struct amdgpu_device *adev = ring->adev;
31061a039d1SStanley Yang 	u32 ref_and_mask = 0;
31161a039d1SStanley Yang 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
31261a039d1SStanley Yang 
31361a039d1SStanley Yang 	ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
31461a039d1SStanley Yang 
31561a039d1SStanley Yang 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
31661a039d1SStanley Yang 			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
31761a039d1SStanley Yang 			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
31861a039d1SStanley Yang 	amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
31961a039d1SStanley Yang 	amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
32061a039d1SStanley Yang 	amdgpu_ring_write(ring, ref_and_mask); /* reference */
32161a039d1SStanley Yang 	amdgpu_ring_write(ring, ref_and_mask); /* mask */
32261a039d1SStanley Yang 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
32361a039d1SStanley Yang 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
32461a039d1SStanley Yang }
32561a039d1SStanley Yang 
32661a039d1SStanley Yang /**
32761a039d1SStanley Yang  * sdma_v6_0_ring_emit_fence - emit a fence on the DMA ring
32861a039d1SStanley Yang  *
32961a039d1SStanley Yang  * @ring: amdgpu ring pointer
33001543dcfSArthur Grillo  * @addr: address
33101543dcfSArthur Grillo  * @seq: fence seq number
33201543dcfSArthur Grillo  * @flags: fence flags
33361a039d1SStanley Yang  *
33461a039d1SStanley Yang  * Add a DMA fence packet to the ring to write
33561a039d1SStanley Yang  * the fence seq number and DMA trap packet to generate
33661a039d1SStanley Yang  * an interrupt if needed.
33761a039d1SStanley Yang  */
sdma_v6_0_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)33861a039d1SStanley Yang static void sdma_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
33961a039d1SStanley Yang 				      unsigned flags)
34061a039d1SStanley Yang {
34161a039d1SStanley Yang 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
34261a039d1SStanley Yang 	/* write the fence */
34361a039d1SStanley Yang 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
34461a039d1SStanley Yang 			  SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
34561a039d1SStanley Yang 	/* zero in first two bits */
34661a039d1SStanley Yang 	BUG_ON(addr & 0x3);
34761a039d1SStanley Yang 	amdgpu_ring_write(ring, lower_32_bits(addr));
34861a039d1SStanley Yang 	amdgpu_ring_write(ring, upper_32_bits(addr));
34961a039d1SStanley Yang 	amdgpu_ring_write(ring, lower_32_bits(seq));
35061a039d1SStanley Yang 
35161a039d1SStanley Yang 	/* optionally write high bits as well */
35261a039d1SStanley Yang 	if (write64bit) {
35361a039d1SStanley Yang 		addr += 4;
35461a039d1SStanley Yang 		amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
35561a039d1SStanley Yang 				  SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
35661a039d1SStanley Yang 		/* zero in first two bits */
35761a039d1SStanley Yang 		BUG_ON(addr & 0x3);
35861a039d1SStanley Yang 		amdgpu_ring_write(ring, lower_32_bits(addr));
35961a039d1SStanley Yang 		amdgpu_ring_write(ring, upper_32_bits(addr));
36061a039d1SStanley Yang 		amdgpu_ring_write(ring, upper_32_bits(seq));
36161a039d1SStanley Yang 	}
36261a039d1SStanley Yang 
36361a039d1SStanley Yang 	if (flags & AMDGPU_FENCE_FLAG_INT) {
36461a039d1SStanley Yang 		uint32_t ctx = ring->is_mes_queue ?
36561a039d1SStanley Yang 			(ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0;
36661a039d1SStanley Yang 		/* generate an interrupt */
36761a039d1SStanley Yang 		amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP));
36861a039d1SStanley Yang 		amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx));
36961a039d1SStanley Yang 	}
37061a039d1SStanley Yang }
37161a039d1SStanley Yang 
37261a039d1SStanley Yang /**
37361a039d1SStanley Yang  * sdma_v6_0_gfx_stop - stop the gfx async dma engines
37461a039d1SStanley Yang  *
37561a039d1SStanley Yang  * @adev: amdgpu_device pointer
37661a039d1SStanley Yang  *
37761a039d1SStanley Yang  * Stop the gfx async dma ring buffers.
37861a039d1SStanley Yang  */
sdma_v6_0_gfx_stop(struct amdgpu_device * adev)37961a039d1SStanley Yang static void sdma_v6_0_gfx_stop(struct amdgpu_device *adev)
38061a039d1SStanley Yang {
38161a039d1SStanley Yang 	u32 rb_cntl, ib_cntl;
38261a039d1SStanley Yang 	int i;
38361a039d1SStanley Yang 
384571c0536SAlex Deucher 	amdgpu_sdma_unset_buffer_funcs_helper(adev);
38561a039d1SStanley Yang 
38661a039d1SStanley Yang 	for (i = 0; i < adev->sdma.num_instances; i++) {
38761a039d1SStanley Yang 		rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
38861a039d1SStanley Yang 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 0);
38961a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
39061a039d1SStanley Yang 		ib_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
39161a039d1SStanley Yang 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 0);
39261a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
39361a039d1SStanley Yang 	}
39461a039d1SStanley Yang }
39561a039d1SStanley Yang 
39661a039d1SStanley Yang /**
39761a039d1SStanley Yang  * sdma_v6_0_rlc_stop - stop the compute async dma engines
39861a039d1SStanley Yang  *
39961a039d1SStanley Yang  * @adev: amdgpu_device pointer
40061a039d1SStanley Yang  *
40161a039d1SStanley Yang  * Stop the compute async dma queues.
40261a039d1SStanley Yang  */
sdma_v6_0_rlc_stop(struct amdgpu_device * adev)40361a039d1SStanley Yang static void sdma_v6_0_rlc_stop(struct amdgpu_device *adev)
40461a039d1SStanley Yang {
40561a039d1SStanley Yang 	/* XXX todo */
40661a039d1SStanley Yang }
40761a039d1SStanley Yang 
40861a039d1SStanley Yang /**
40927488686SGraham Sider  * sdma_v6_0_ctxempty_int_enable - enable or disable context empty interrupts
41061a039d1SStanley Yang  *
41161a039d1SStanley Yang  * @adev: amdgpu_device pointer
41227488686SGraham Sider  * @enable: enable/disable context switching due to queue empty conditions
41361a039d1SStanley Yang  *
41427488686SGraham Sider  * Enable or disable the async dma engines queue empty context switch.
41561a039d1SStanley Yang  */
sdma_v6_0_ctxempty_int_enable(struct amdgpu_device * adev,bool enable)41627488686SGraham Sider static void sdma_v6_0_ctxempty_int_enable(struct amdgpu_device *adev, bool enable)
41761a039d1SStanley Yang {
41827488686SGraham Sider 	u32 f32_cntl;
41927488686SGraham Sider 	int i;
42027488686SGraham Sider 
42127488686SGraham Sider 	if (!amdgpu_sriov_vf(adev)) {
42227488686SGraham Sider 		for (i = 0; i < adev->sdma.num_instances; i++) {
42327488686SGraham Sider 			f32_cntl = RREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_CNTL));
42427488686SGraham Sider 			f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
42527488686SGraham Sider 					CTXEMPTY_INT_ENABLE, enable ? 1 : 0);
42627488686SGraham Sider 			WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_CNTL), f32_cntl);
42727488686SGraham Sider 		}
42827488686SGraham Sider 	}
42961a039d1SStanley Yang }
43061a039d1SStanley Yang 
43161a039d1SStanley Yang /**
43261a039d1SStanley Yang  * sdma_v6_0_enable - stop the async dma engines
43361a039d1SStanley Yang  *
43461a039d1SStanley Yang  * @adev: amdgpu_device pointer
43561a039d1SStanley Yang  * @enable: enable/disable the DMA MEs.
43661a039d1SStanley Yang  *
43761a039d1SStanley Yang  * Halt or unhalt the async dma engines.
43861a039d1SStanley Yang  */
sdma_v6_0_enable(struct amdgpu_device * adev,bool enable)43961a039d1SStanley Yang static void sdma_v6_0_enable(struct amdgpu_device *adev, bool enable)
44061a039d1SStanley Yang {
44161a039d1SStanley Yang 	u32 f32_cntl;
44261a039d1SStanley Yang 	int i;
44361a039d1SStanley Yang 
44461a039d1SStanley Yang 	if (!enable) {
44561a039d1SStanley Yang 		sdma_v6_0_gfx_stop(adev);
44661a039d1SStanley Yang 		sdma_v6_0_rlc_stop(adev);
44761a039d1SStanley Yang 	}
44861a039d1SStanley Yang 
449e1a29b28SYifan Zha 	if (amdgpu_sriov_vf(adev))
450e1a29b28SYifan Zha 		return;
451e1a29b28SYifan Zha 
45261a039d1SStanley Yang 	for (i = 0; i < adev->sdma.num_instances; i++) {
45361a039d1SStanley Yang 		f32_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
45461a039d1SStanley Yang 		f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
45561a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), f32_cntl);
45661a039d1SStanley Yang 	}
45761a039d1SStanley Yang }
45861a039d1SStanley Yang 
45961a039d1SStanley Yang /**
46061a039d1SStanley Yang  * sdma_v6_0_gfx_resume - setup and start the async dma engines
46161a039d1SStanley Yang  *
46261a039d1SStanley Yang  * @adev: amdgpu_device pointer
46361a039d1SStanley Yang  *
46461a039d1SStanley Yang  * Set up the gfx DMA ring buffers and enable them.
46561a039d1SStanley Yang  * Returns 0 for success, error for failure.
46661a039d1SStanley Yang  */
sdma_v6_0_gfx_resume(struct amdgpu_device * adev)46761a039d1SStanley Yang static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev)
46861a039d1SStanley Yang {
46961a039d1SStanley Yang 	struct amdgpu_ring *ring;
47061a039d1SStanley Yang 	u32 rb_cntl, ib_cntl;
47161a039d1SStanley Yang 	u32 rb_bufsz;
47261a039d1SStanley Yang 	u32 doorbell;
47361a039d1SStanley Yang 	u32 doorbell_offset;
47461a039d1SStanley Yang 	u32 temp;
47561a039d1SStanley Yang 	u64 wptr_gpu_addr;
47661a039d1SStanley Yang 	int i, r;
47761a039d1SStanley Yang 
47861a039d1SStanley Yang 	for (i = 0; i < adev->sdma.num_instances; i++) {
47961a039d1SStanley Yang 		ring = &adev->sdma.instance[i].ring;
48061a039d1SStanley Yang 
48108c8442cSYifan Zha 		if (!amdgpu_sriov_vf(adev))
48261a039d1SStanley Yang 			WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
48361a039d1SStanley Yang 
48461a039d1SStanley Yang 		/* Set ring buffer size in dwords */
48561a039d1SStanley Yang 		rb_bufsz = order_base_2(ring->ring_size / 4);
48661a039d1SStanley Yang 		rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
48761a039d1SStanley Yang 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
48861a039d1SStanley Yang #ifdef __BIG_ENDIAN
48961a039d1SStanley Yang 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
49061a039d1SStanley Yang 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL,
49161a039d1SStanley Yang 					RPTR_WRITEBACK_SWAP_ENABLE, 1);
49261a039d1SStanley Yang #endif
49361a039d1SStanley Yang 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1);
49461a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
49561a039d1SStanley Yang 
49661a039d1SStanley Yang 		/* Initialize the ring buffer's read and write pointers */
49761a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), 0);
49861a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), 0);
49961a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), 0);
50061a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), 0);
50161a039d1SStanley Yang 
50261a039d1SStanley Yang 		/* setup the wptr shadow polling */
50361a039d1SStanley Yang 		wptr_gpu_addr = ring->wptr_gpu_addr;
50461a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO),
50561a039d1SStanley Yang 		       lower_32_bits(wptr_gpu_addr));
50661a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI),
50761a039d1SStanley Yang 		       upper_32_bits(wptr_gpu_addr));
50861a039d1SStanley Yang 
50961a039d1SStanley Yang 		/* set the wb address whether it's enabled or not */
51061a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI),
51161a039d1SStanley Yang 		       upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
51261a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO),
51361a039d1SStanley Yang 		       lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
51461a039d1SStanley Yang 
51561a039d1SStanley Yang 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
516de4c8a7bSGraham Sider 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
51761a039d1SStanley Yang 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, F32_WPTR_POLL_ENABLE, 1);
51861a039d1SStanley Yang 
51961a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
52061a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
52161a039d1SStanley Yang 
52261a039d1SStanley Yang 		ring->wptr = 0;
52361a039d1SStanley Yang 
52461a039d1SStanley Yang 		/* before programing wptr to a less value, need set minor_ptr_update first */
52561a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1);
52661a039d1SStanley Yang 
52761a039d1SStanley Yang 		if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
52861a039d1SStanley Yang 			WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
52961a039d1SStanley Yang 			WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
53061a039d1SStanley Yang 		}
53161a039d1SStanley Yang 
53261a039d1SStanley Yang 		doorbell = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL));
53361a039d1SStanley Yang 		doorbell_offset = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET));
53461a039d1SStanley Yang 
53561a039d1SStanley Yang 		if (ring->use_doorbell) {
53661a039d1SStanley Yang 			doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
53761a039d1SStanley Yang 			doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET,
53861a039d1SStanley Yang 					OFFSET, ring->doorbell_index);
53961a039d1SStanley Yang 		} else {
54061a039d1SStanley Yang 			doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0);
54161a039d1SStanley Yang 		}
54261a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell);
54361a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
54461a039d1SStanley Yang 
54561a039d1SStanley Yang 		if (i == 0)
54661a039d1SStanley Yang 			adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
54761a039d1SStanley Yang 						      ring->doorbell_index,
54861a039d1SStanley Yang 						      adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
54961a039d1SStanley Yang 
55061a039d1SStanley Yang 		if (amdgpu_sriov_vf(adev))
55161a039d1SStanley Yang 			sdma_v6_0_ring_set_wptr(ring);
55261a039d1SStanley Yang 
55361a039d1SStanley Yang 		/* set minor_ptr_update to 0 after wptr programed */
55461a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0);
55561a039d1SStanley Yang 
55661a039d1SStanley Yang 		/* Set up RESP_MODE to non-copy addresses */
55761a039d1SStanley Yang 		temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL));
55861a039d1SStanley Yang 		temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
55961a039d1SStanley Yang 		temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
56061a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), temp);
56161a039d1SStanley Yang 
56261a039d1SStanley Yang 		/* program default cache read and write policy */
56361a039d1SStanley Yang 		temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE));
56461a039d1SStanley Yang 		/* clean read policy and write policy bits */
56561a039d1SStanley Yang 		temp &= 0xFF0FFF;
56661a039d1SStanley Yang 		temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
56761a039d1SStanley Yang 			 (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
56861a039d1SStanley Yang 			 SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
56961a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), temp);
57061a039d1SStanley Yang 
57161a039d1SStanley Yang 		if (!amdgpu_sriov_vf(adev)) {
57261a039d1SStanley Yang 			/* unhalt engine */
57361a039d1SStanley Yang 			temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
57461a039d1SStanley Yang 			temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
57561a039d1SStanley Yang 			temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, TH1_RESET, 0);
57661a039d1SStanley Yang 			WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), temp);
57761a039d1SStanley Yang 		}
57861a039d1SStanley Yang 
57961a039d1SStanley Yang 		/* enable DMA RB */
58061a039d1SStanley Yang 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1);
58161a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
58261a039d1SStanley Yang 
58361a039d1SStanley Yang 		ib_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
58461a039d1SStanley Yang 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1);
58561a039d1SStanley Yang #ifdef __BIG_ENDIAN
58661a039d1SStanley Yang 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
58761a039d1SStanley Yang #endif
58861a039d1SStanley Yang 		/* enable DMA IBs */
58961a039d1SStanley Yang 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
59061a039d1SStanley Yang 
59127488686SGraham Sider 		if (amdgpu_sriov_vf(adev))
59261a039d1SStanley Yang 			sdma_v6_0_enable(adev, true);
59361a039d1SStanley Yang 
59461a039d1SStanley Yang 		r = amdgpu_ring_test_helper(ring);
59561c31b8bSGuchun Chen 		if (r)
59661a039d1SStanley Yang 			return r;
59761a039d1SStanley Yang 
59861a039d1SStanley Yang 		if (adev->mman.buffer_funcs_ring == ring)
59961a039d1SStanley Yang 			amdgpu_ttm_set_buffer_funcs_status(adev, true);
60061a039d1SStanley Yang 	}
60161a039d1SStanley Yang 
60261a039d1SStanley Yang 	return 0;
60361a039d1SStanley Yang }
60461a039d1SStanley Yang 
60561a039d1SStanley Yang /**
60661a039d1SStanley Yang  * sdma_v6_0_rlc_resume - setup and start the async dma engines
60761a039d1SStanley Yang  *
60861a039d1SStanley Yang  * @adev: amdgpu_device pointer
60961a039d1SStanley Yang  *
61061a039d1SStanley Yang  * Set up the compute DMA queues and enable them.
61161a039d1SStanley Yang  * Returns 0 for success, error for failure.
61261a039d1SStanley Yang  */
sdma_v6_0_rlc_resume(struct amdgpu_device * adev)61361a039d1SStanley Yang static int sdma_v6_0_rlc_resume(struct amdgpu_device *adev)
61461a039d1SStanley Yang {
61561a039d1SStanley Yang 	return 0;
61661a039d1SStanley Yang }
61761a039d1SStanley Yang 
61861a039d1SStanley Yang /**
61961a039d1SStanley Yang  * sdma_v6_0_load_microcode - load the sDMA ME ucode
62061a039d1SStanley Yang  *
62161a039d1SStanley Yang  * @adev: amdgpu_device pointer
62261a039d1SStanley Yang  *
62361a039d1SStanley Yang  * Loads the sDMA0/1 ucode.
62461a039d1SStanley Yang  * Returns 0 for success, -EINVAL if the ucode is not available.
62561a039d1SStanley Yang  */
sdma_v6_0_load_microcode(struct amdgpu_device * adev)62661a039d1SStanley Yang static int sdma_v6_0_load_microcode(struct amdgpu_device *adev)
62761a039d1SStanley Yang {
62861a039d1SStanley Yang 	const struct sdma_firmware_header_v2_0 *hdr;
62961a039d1SStanley Yang 	const __le32 *fw_data;
63061a039d1SStanley Yang 	u32 fw_size;
63161a039d1SStanley Yang 	int i, j;
63261a039d1SStanley Yang 	bool use_broadcast;
63361a039d1SStanley Yang 
63461a039d1SStanley Yang 	/* halt the MEs */
63561a039d1SStanley Yang 	sdma_v6_0_enable(adev, false);
63661a039d1SStanley Yang 
63761a039d1SStanley Yang 	if (!adev->sdma.instance[0].fw)
63861a039d1SStanley Yang 		return -EINVAL;
63961a039d1SStanley Yang 
64061a039d1SStanley Yang 	/* use broadcast mode to load SDMA microcode by default */
64161a039d1SStanley Yang 	use_broadcast = true;
64261a039d1SStanley Yang 
64361a039d1SStanley Yang 	if (use_broadcast) {
64461a039d1SStanley Yang 		dev_info(adev->dev, "Use broadcast method to load SDMA firmware\n");
64561a039d1SStanley Yang 		/* load Control Thread microcode */
64661a039d1SStanley Yang 		hdr = (const struct sdma_firmware_header_v2_0 *)adev->sdma.instance[0].fw->data;
64761a039d1SStanley Yang 		amdgpu_ucode_print_sdma_hdr(&hdr->header);
64861a039d1SStanley Yang 		fw_size = le32_to_cpu(hdr->ctx_jt_offset + hdr->ctx_jt_size) / 4;
64961a039d1SStanley Yang 
65061a039d1SStanley Yang 		fw_data = (const __le32 *)
65161a039d1SStanley Yang 			(adev->sdma.instance[0].fw->data +
65261a039d1SStanley Yang 				le32_to_cpu(hdr->header.ucode_array_offset_bytes));
65361a039d1SStanley Yang 
65461a039d1SStanley Yang 		WREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_BROADCAST_UCODE_ADDR), 0);
65561a039d1SStanley Yang 
65661a039d1SStanley Yang 		for (j = 0; j < fw_size; j++) {
65761a039d1SStanley Yang 			if (amdgpu_emu_mode == 1 && j % 500 == 0)
65861a039d1SStanley Yang 				msleep(1);
65961a039d1SStanley Yang 			WREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_BROADCAST_UCODE_DATA), le32_to_cpup(fw_data++));
66061a039d1SStanley Yang 		}
66161a039d1SStanley Yang 
66261a039d1SStanley Yang 		/* load Context Switch microcode */
66361a039d1SStanley Yang 		fw_size = le32_to_cpu(hdr->ctl_jt_offset + hdr->ctl_jt_size) / 4;
66461a039d1SStanley Yang 
66561a039d1SStanley Yang 		fw_data = (const __le32 *)
66661a039d1SStanley Yang 			(adev->sdma.instance[0].fw->data +
66761a039d1SStanley Yang 				le32_to_cpu(hdr->ctl_ucode_offset));
66861a039d1SStanley Yang 
66961a039d1SStanley Yang 		WREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_BROADCAST_UCODE_ADDR), 0x8000);
67061a039d1SStanley Yang 
67161a039d1SStanley Yang 		for (j = 0; j < fw_size; j++) {
67261a039d1SStanley Yang 			if (amdgpu_emu_mode == 1 && j % 500 == 0)
67361a039d1SStanley Yang 				msleep(1);
67461a039d1SStanley Yang 			WREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_BROADCAST_UCODE_DATA), le32_to_cpup(fw_data++));
67561a039d1SStanley Yang 		}
67661a039d1SStanley Yang 	} else {
67761a039d1SStanley Yang 		dev_info(adev->dev, "Use legacy method to load SDMA firmware\n");
67861a039d1SStanley Yang 		for (i = 0; i < adev->sdma.num_instances; i++) {
67961a039d1SStanley Yang 			/* load Control Thread microcode */
68061a039d1SStanley Yang 			hdr = (const struct sdma_firmware_header_v2_0 *)adev->sdma.instance[0].fw->data;
68161a039d1SStanley Yang 			amdgpu_ucode_print_sdma_hdr(&hdr->header);
68261a039d1SStanley Yang 			fw_size = le32_to_cpu(hdr->ctx_jt_offset + hdr->ctx_jt_size) / 4;
68361a039d1SStanley Yang 
68461a039d1SStanley Yang 			fw_data = (const __le32 *)
68561a039d1SStanley Yang 				(adev->sdma.instance[0].fw->data +
68661a039d1SStanley Yang 					le32_to_cpu(hdr->header.ucode_array_offset_bytes));
68761a039d1SStanley Yang 
68861a039d1SStanley Yang 			WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_ADDR), 0);
68961a039d1SStanley Yang 
69061a039d1SStanley Yang 			for (j = 0; j < fw_size; j++) {
69161a039d1SStanley Yang 				if (amdgpu_emu_mode == 1 && j % 500 == 0)
69261a039d1SStanley Yang 					msleep(1);
69361a039d1SStanley Yang 				WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
69461a039d1SStanley Yang 			}
69561a039d1SStanley Yang 
69661a039d1SStanley Yang 			WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_ADDR), adev->sdma.instance[0].fw_version);
69761a039d1SStanley Yang 
69861a039d1SStanley Yang 			/* load Context Switch microcode */
69961a039d1SStanley Yang 			fw_size = le32_to_cpu(hdr->ctl_jt_offset + hdr->ctl_jt_size) / 4;
70061a039d1SStanley Yang 
70161a039d1SStanley Yang 			fw_data = (const __le32 *)
70261a039d1SStanley Yang 				(adev->sdma.instance[0].fw->data +
70361a039d1SStanley Yang 					le32_to_cpu(hdr->ctl_ucode_offset));
70461a039d1SStanley Yang 
70561a039d1SStanley Yang 			WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_ADDR), 0x8000);
70661a039d1SStanley Yang 
70761a039d1SStanley Yang 			for (j = 0; j < fw_size; j++) {
70861a039d1SStanley Yang 				if (amdgpu_emu_mode == 1 && j % 500 == 0)
70961a039d1SStanley Yang 					msleep(1);
71061a039d1SStanley Yang 				WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
71161a039d1SStanley Yang 			}
71261a039d1SStanley Yang 
71361a039d1SStanley Yang 			WREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UCODE_ADDR), adev->sdma.instance[0].fw_version);
71461a039d1SStanley Yang 		}
71561a039d1SStanley Yang 	}
71661a039d1SStanley Yang 
71761a039d1SStanley Yang 	return 0;
71861a039d1SStanley Yang }
71961a039d1SStanley Yang 
sdma_v6_0_soft_reset(void * handle)72061a039d1SStanley Yang static int sdma_v6_0_soft_reset(void *handle)
72161a039d1SStanley Yang {
72261a039d1SStanley Yang 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
72361a039d1SStanley Yang 	u32 tmp;
72461a039d1SStanley Yang 	int i;
72561a039d1SStanley Yang 
72658e969b6SLikun Gao 	sdma_v6_0_gfx_stop(adev);
72758e969b6SLikun Gao 
72861a039d1SStanley Yang 	for (i = 0; i < adev->sdma.num_instances; i++) {
72958e969b6SLikun Gao 		tmp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_FREEZE));
73058e969b6SLikun Gao 		tmp |= SDMA0_FREEZE__FREEZE_MASK;
73158e969b6SLikun Gao 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_FREEZE), tmp);
73258e969b6SLikun Gao 		tmp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
73358e969b6SLikun Gao 		tmp |= SDMA0_F32_CNTL__HALT_MASK;
73458e969b6SLikun Gao 		tmp |= SDMA0_F32_CNTL__TH1_RESET_MASK;
73558e969b6SLikun Gao 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), tmp);
73661a039d1SStanley Yang 
73758e969b6SLikun Gao 		WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_PREEMPT), 0);
73858e969b6SLikun Gao 
73958e969b6SLikun Gao 		udelay(100);
74058e969b6SLikun Gao 
74158e969b6SLikun Gao 		tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i;
74261a039d1SStanley Yang 		WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
74361a039d1SStanley Yang 		tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
74461a039d1SStanley Yang 
74558e969b6SLikun Gao 		udelay(100);
74661a039d1SStanley Yang 
74758e969b6SLikun Gao 		WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0);
74861a039d1SStanley Yang 		tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
74961a039d1SStanley Yang 
75058e969b6SLikun Gao 		udelay(100);
75161a039d1SStanley Yang 	}
75261a039d1SStanley Yang 
75358e969b6SLikun Gao 	return sdma_v6_0_start(adev);
75458e969b6SLikun Gao }
75558e969b6SLikun Gao 
sdma_v6_0_check_soft_reset(void * handle)75658e969b6SLikun Gao static bool sdma_v6_0_check_soft_reset(void *handle)
75758e969b6SLikun Gao {
75858e969b6SLikun Gao 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
75958e969b6SLikun Gao 	struct amdgpu_ring *ring;
76058e969b6SLikun Gao 	int i, r;
76158e969b6SLikun Gao 	long tmo = msecs_to_jiffies(1000);
76258e969b6SLikun Gao 
76358e969b6SLikun Gao 	for (i = 0; i < adev->sdma.num_instances; i++) {
76458e969b6SLikun Gao 		ring = &adev->sdma.instance[i].ring;
76558e969b6SLikun Gao 		r = amdgpu_ring_test_ib(ring, tmo);
76658e969b6SLikun Gao 		if (r)
76758e969b6SLikun Gao 			return true;
76858e969b6SLikun Gao 	}
76958e969b6SLikun Gao 
77058e969b6SLikun Gao 	return false;
77161a039d1SStanley Yang }
77261a039d1SStanley Yang 
77361a039d1SStanley Yang /**
77461a039d1SStanley Yang  * sdma_v6_0_start - setup and start the async dma engines
77561a039d1SStanley Yang  *
77661a039d1SStanley Yang  * @adev: amdgpu_device pointer
77761a039d1SStanley Yang  *
77861a039d1SStanley Yang  * Set up the DMA engines and enable them.
77961a039d1SStanley Yang  * Returns 0 for success, error for failure.
78061a039d1SStanley Yang  */
sdma_v6_0_start(struct amdgpu_device * adev)78161a039d1SStanley Yang static int sdma_v6_0_start(struct amdgpu_device *adev)
78261a039d1SStanley Yang {
78361a039d1SStanley Yang 	int r = 0;
78461a039d1SStanley Yang 
78561a039d1SStanley Yang 	if (amdgpu_sriov_vf(adev)) {
78661a039d1SStanley Yang 		sdma_v6_0_enable(adev, false);
78761a039d1SStanley Yang 
78861a039d1SStanley Yang 		/* set RB registers */
78961a039d1SStanley Yang 		r = sdma_v6_0_gfx_resume(adev);
79061a039d1SStanley Yang 		return r;
79161a039d1SStanley Yang 	}
79261a039d1SStanley Yang 
79361a039d1SStanley Yang 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
79461a039d1SStanley Yang 		r = sdma_v6_0_load_microcode(adev);
79561a039d1SStanley Yang 		if (r)
79661a039d1SStanley Yang 			return r;
79761a039d1SStanley Yang 
79861a039d1SStanley Yang 		/* The value of regSDMA_F32_CNTL is invalid the moment after loading fw */
79961a039d1SStanley Yang 		if (amdgpu_emu_mode == 1)
80061a039d1SStanley Yang 			msleep(1000);
80161a039d1SStanley Yang 	}
80261a039d1SStanley Yang 
80361a039d1SStanley Yang 	/* unhalt the MEs */
80461a039d1SStanley Yang 	sdma_v6_0_enable(adev, true);
80561a039d1SStanley Yang 	/* enable sdma ring preemption */
80627488686SGraham Sider 	sdma_v6_0_ctxempty_int_enable(adev, true);
80761a039d1SStanley Yang 
80861a039d1SStanley Yang 	/* start the gfx rings and rlc compute queues */
80961a039d1SStanley Yang 	r = sdma_v6_0_gfx_resume(adev);
81061a039d1SStanley Yang 	if (r)
81161a039d1SStanley Yang 		return r;
81261a039d1SStanley Yang 	r = sdma_v6_0_rlc_resume(adev);
81361a039d1SStanley Yang 
81461a039d1SStanley Yang 	return r;
81561a039d1SStanley Yang }
81661a039d1SStanley Yang 
sdma_v6_0_mqd_init(struct amdgpu_device * adev,void * mqd,struct amdgpu_mqd_prop * prop)81761a039d1SStanley Yang static int sdma_v6_0_mqd_init(struct amdgpu_device *adev, void *mqd,
81861a039d1SStanley Yang 			      struct amdgpu_mqd_prop *prop)
81961a039d1SStanley Yang {
82061a039d1SStanley Yang 	struct v11_sdma_mqd *m = mqd;
82161a039d1SStanley Yang 	uint64_t wb_gpu_addr;
82261a039d1SStanley Yang 
82361a039d1SStanley Yang 	m->sdmax_rlcx_rb_cntl =
82461a039d1SStanley Yang 		order_base_2(prop->queue_size / 4) << SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
82561a039d1SStanley Yang 		1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
82621a550deSRuili Ji 		4 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
82721a550deSRuili Ji 		1 << SDMA0_QUEUE0_RB_CNTL__F32_WPTR_POLL_ENABLE__SHIFT;
82861a039d1SStanley Yang 
82961a039d1SStanley Yang 	m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
83061a039d1SStanley Yang 	m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
83161a039d1SStanley Yang 
83261a039d1SStanley Yang 	wb_gpu_addr = prop->wptr_gpu_addr;
83361a039d1SStanley Yang 	m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
83461a039d1SStanley Yang 	m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
83561a039d1SStanley Yang 
83661a039d1SStanley Yang 	wb_gpu_addr = prop->rptr_gpu_addr;
83761a039d1SStanley Yang 	m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
83861a039d1SStanley Yang 	m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
83961a039d1SStanley Yang 
84061a039d1SStanley Yang 	m->sdmax_rlcx_ib_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, 0,
84161a039d1SStanley Yang 							regSDMA0_QUEUE0_IB_CNTL));
84261a039d1SStanley Yang 
84361a039d1SStanley Yang 	m->sdmax_rlcx_doorbell_offset =
84461a039d1SStanley Yang 		prop->doorbell_index << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
84561a039d1SStanley Yang 
84661a039d1SStanley Yang 	m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
84761a039d1SStanley Yang 
84861a039d1SStanley Yang 	m->sdmax_rlcx_skip_cntl = 0;
84961a039d1SStanley Yang 	m->sdmax_rlcx_context_status = 0;
85061a039d1SStanley Yang 	m->sdmax_rlcx_doorbell_log = 0;
85161a039d1SStanley Yang 
85261a039d1SStanley Yang 	m->sdmax_rlcx_rb_aql_cntl = regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT;
85361a039d1SStanley Yang 	m->sdmax_rlcx_dummy_reg = regSDMA0_QUEUE0_DUMMY_REG_DEFAULT;
85461a039d1SStanley Yang 
85561a039d1SStanley Yang 	return 0;
85661a039d1SStanley Yang }
85761a039d1SStanley Yang 
sdma_v6_0_set_mqd_funcs(struct amdgpu_device * adev)85861a039d1SStanley Yang static void sdma_v6_0_set_mqd_funcs(struct amdgpu_device *adev)
85961a039d1SStanley Yang {
86061a039d1SStanley Yang 	adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v11_sdma_mqd);
86161a039d1SStanley Yang 	adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v6_0_mqd_init;
86261a039d1SStanley Yang }
86361a039d1SStanley Yang 
86461a039d1SStanley Yang /**
86561a039d1SStanley Yang  * sdma_v6_0_ring_test_ring - simple async dma engine test
86661a039d1SStanley Yang  *
86761a039d1SStanley Yang  * @ring: amdgpu_ring structure holding ring information
86861a039d1SStanley Yang  *
86961a039d1SStanley Yang  * Test the DMA engine by writing using it to write an
87061a039d1SStanley Yang  * value to memory.
87161a039d1SStanley Yang  * Returns 0 for success, error for failure.
87261a039d1SStanley Yang  */
sdma_v6_0_ring_test_ring(struct amdgpu_ring * ring)87361a039d1SStanley Yang static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring)
87461a039d1SStanley Yang {
87561a039d1SStanley Yang 	struct amdgpu_device *adev = ring->adev;
87661a039d1SStanley Yang 	unsigned i;
87761a039d1SStanley Yang 	unsigned index;
87861a039d1SStanley Yang 	int r;
87961a039d1SStanley Yang 	u32 tmp;
88061a039d1SStanley Yang 	u64 gpu_addr;
88161a039d1SStanley Yang 	volatile uint32_t *cpu_ptr = NULL;
88261a039d1SStanley Yang 
88361a039d1SStanley Yang 	tmp = 0xCAFEDEAD;
88461a039d1SStanley Yang 
88561a039d1SStanley Yang 	if (ring->is_mes_queue) {
88661a039d1SStanley Yang 		uint32_t offset = 0;
88761a039d1SStanley Yang 		offset = amdgpu_mes_ctx_get_offs(ring,
88861a039d1SStanley Yang 					 AMDGPU_MES_CTX_PADDING_OFFS);
88961a039d1SStanley Yang 		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
89061a039d1SStanley Yang 		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
89161a039d1SStanley Yang 		*cpu_ptr = tmp;
89261a039d1SStanley Yang 	} else {
89361a039d1SStanley Yang 		r = amdgpu_device_wb_get(adev, &index);
89461a039d1SStanley Yang 		if (r) {
89561a039d1SStanley Yang 			dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
89661a039d1SStanley Yang 			return r;
89761a039d1SStanley Yang 		}
89861a039d1SStanley Yang 
89961a039d1SStanley Yang 		gpu_addr = adev->wb.gpu_addr + (index * 4);
90061a039d1SStanley Yang 		adev->wb.wb[index] = cpu_to_le32(tmp);
90161a039d1SStanley Yang 	}
90261a039d1SStanley Yang 
90361a039d1SStanley Yang 	r = amdgpu_ring_alloc(ring, 5);
90461a039d1SStanley Yang 	if (r) {
90561a039d1SStanley Yang 		DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
90661a039d1SStanley Yang 		amdgpu_device_wb_free(adev, index);
90761a039d1SStanley Yang 		return r;
90861a039d1SStanley Yang 	}
90961a039d1SStanley Yang 
91061a039d1SStanley Yang 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
91161a039d1SStanley Yang 			  SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
91261a039d1SStanley Yang 	amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
91361a039d1SStanley Yang 	amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
91461a039d1SStanley Yang 	amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
91561a039d1SStanley Yang 	amdgpu_ring_write(ring, 0xDEADBEEF);
91661a039d1SStanley Yang 	amdgpu_ring_commit(ring);
91761a039d1SStanley Yang 
91861a039d1SStanley Yang 	for (i = 0; i < adev->usec_timeout; i++) {
91961a039d1SStanley Yang 		if (ring->is_mes_queue)
92061a039d1SStanley Yang 			tmp = le32_to_cpu(*cpu_ptr);
92161a039d1SStanley Yang 		else
92261a039d1SStanley Yang 			tmp = le32_to_cpu(adev->wb.wb[index]);
92361a039d1SStanley Yang 		if (tmp == 0xDEADBEEF)
92461a039d1SStanley Yang 			break;
92561a039d1SStanley Yang 		if (amdgpu_emu_mode == 1)
92661a039d1SStanley Yang 			msleep(1);
92761a039d1SStanley Yang 		else
92861a039d1SStanley Yang 			udelay(1);
92961a039d1SStanley Yang 	}
93061a039d1SStanley Yang 
93161a039d1SStanley Yang 	if (i >= adev->usec_timeout)
93261a039d1SStanley Yang 		r = -ETIMEDOUT;
93361a039d1SStanley Yang 
93461a039d1SStanley Yang 	if (!ring->is_mes_queue)
93561a039d1SStanley Yang 		amdgpu_device_wb_free(adev, index);
93661a039d1SStanley Yang 
93761a039d1SStanley Yang 	return r;
93861a039d1SStanley Yang }
93961a039d1SStanley Yang 
94061a039d1SStanley Yang /**
94161a039d1SStanley Yang  * sdma_v6_0_ring_test_ib - test an IB on the DMA engine
94261a039d1SStanley Yang  *
94361a039d1SStanley Yang  * @ring: amdgpu_ring structure holding ring information
9449eba1b8bSSrinivasan Shanmugam  * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
94561a039d1SStanley Yang  *
94661a039d1SStanley Yang  * Test a simple IB in the DMA ring.
94761a039d1SStanley Yang  * Returns 0 on success, error on failure.
94861a039d1SStanley Yang  */
sdma_v6_0_ring_test_ib(struct amdgpu_ring * ring,long timeout)94961a039d1SStanley Yang static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
95061a039d1SStanley Yang {
95161a039d1SStanley Yang 	struct amdgpu_device *adev = ring->adev;
95261a039d1SStanley Yang 	struct amdgpu_ib ib;
95361a039d1SStanley Yang 	struct dma_fence *f = NULL;
95461a039d1SStanley Yang 	unsigned index;
95561a039d1SStanley Yang 	long r;
95661a039d1SStanley Yang 	u32 tmp = 0;
95761a039d1SStanley Yang 	u64 gpu_addr;
95861a039d1SStanley Yang 	volatile uint32_t *cpu_ptr = NULL;
95961a039d1SStanley Yang 
96061a039d1SStanley Yang 	tmp = 0xCAFEDEAD;
96161a039d1SStanley Yang 	memset(&ib, 0, sizeof(ib));
96261a039d1SStanley Yang 
96361a039d1SStanley Yang 	if (ring->is_mes_queue) {
96461a039d1SStanley Yang 		uint32_t offset = 0;
96561a039d1SStanley Yang 		offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
96661a039d1SStanley Yang 		ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
96761a039d1SStanley Yang 		ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
96861a039d1SStanley Yang 
96961a039d1SStanley Yang 		offset = amdgpu_mes_ctx_get_offs(ring,
97061a039d1SStanley Yang 					 AMDGPU_MES_CTX_PADDING_OFFS);
97161a039d1SStanley Yang 		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
97261a039d1SStanley Yang 		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
97361a039d1SStanley Yang 		*cpu_ptr = tmp;
97461a039d1SStanley Yang 	} else {
97561a039d1SStanley Yang 		r = amdgpu_device_wb_get(adev, &index);
97661a039d1SStanley Yang 		if (r) {
97761a039d1SStanley Yang 			dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
97861a039d1SStanley Yang 			return r;
97961a039d1SStanley Yang 		}
98061a039d1SStanley Yang 
98161a039d1SStanley Yang 		gpu_addr = adev->wb.gpu_addr + (index * 4);
98261a039d1SStanley Yang 		adev->wb.wb[index] = cpu_to_le32(tmp);
98361a039d1SStanley Yang 
98461a039d1SStanley Yang 		r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
98561a039d1SStanley Yang 		if (r) {
98661a039d1SStanley Yang 			DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
98761a039d1SStanley Yang 			goto err0;
98861a039d1SStanley Yang 		}
98961a039d1SStanley Yang 	}
99061a039d1SStanley Yang 
99161a039d1SStanley Yang 	ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
99261a039d1SStanley Yang 		SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
99361a039d1SStanley Yang 	ib.ptr[1] = lower_32_bits(gpu_addr);
99461a039d1SStanley Yang 	ib.ptr[2] = upper_32_bits(gpu_addr);
99561a039d1SStanley Yang 	ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
99661a039d1SStanley Yang 	ib.ptr[4] = 0xDEADBEEF;
99761a039d1SStanley Yang 	ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
99861a039d1SStanley Yang 	ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
99961a039d1SStanley Yang 	ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
100061a039d1SStanley Yang 	ib.length_dw = 8;
100161a039d1SStanley Yang 
100261a039d1SStanley Yang 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
100361a039d1SStanley Yang 	if (r)
100461a039d1SStanley Yang 		goto err1;
100561a039d1SStanley Yang 
100661a039d1SStanley Yang 	r = dma_fence_wait_timeout(f, false, timeout);
100761a039d1SStanley Yang 	if (r == 0) {
100861a039d1SStanley Yang 		DRM_ERROR("amdgpu: IB test timed out\n");
100961a039d1SStanley Yang 		r = -ETIMEDOUT;
101061a039d1SStanley Yang 		goto err1;
101161a039d1SStanley Yang 	} else if (r < 0) {
101261a039d1SStanley Yang 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
101361a039d1SStanley Yang 		goto err1;
101461a039d1SStanley Yang 	}
101561a039d1SStanley Yang 
101661a039d1SStanley Yang 	if (ring->is_mes_queue)
101761a039d1SStanley Yang 		tmp = le32_to_cpu(*cpu_ptr);
101861a039d1SStanley Yang 	else
101961a039d1SStanley Yang 		tmp = le32_to_cpu(adev->wb.wb[index]);
102061a039d1SStanley Yang 
102161a039d1SStanley Yang 	if (tmp == 0xDEADBEEF)
102261a039d1SStanley Yang 		r = 0;
102361a039d1SStanley Yang 	else
102461a039d1SStanley Yang 		r = -EINVAL;
102561a039d1SStanley Yang 
102661a039d1SStanley Yang err1:
102761a039d1SStanley Yang 	amdgpu_ib_free(adev, &ib, NULL);
102861a039d1SStanley Yang 	dma_fence_put(f);
102961a039d1SStanley Yang err0:
103061a039d1SStanley Yang 	if (!ring->is_mes_queue)
103161a039d1SStanley Yang 		amdgpu_device_wb_free(adev, index);
103261a039d1SStanley Yang 	return r;
103361a039d1SStanley Yang }
103461a039d1SStanley Yang 
103561a039d1SStanley Yang 
103661a039d1SStanley Yang /**
103761a039d1SStanley Yang  * sdma_v6_0_vm_copy_pte - update PTEs by copying them from the GART
103861a039d1SStanley Yang  *
103961a039d1SStanley Yang  * @ib: indirect buffer to fill with commands
104061a039d1SStanley Yang  * @pe: addr of the page entry
104161a039d1SStanley Yang  * @src: src addr to copy from
104261a039d1SStanley Yang  * @count: number of page entries to update
104361a039d1SStanley Yang  *
104461a039d1SStanley Yang  * Update PTEs by copying them from the GART using sDMA.
104561a039d1SStanley Yang  */
sdma_v6_0_vm_copy_pte(struct amdgpu_ib * ib,uint64_t pe,uint64_t src,unsigned count)104661a039d1SStanley Yang static void sdma_v6_0_vm_copy_pte(struct amdgpu_ib *ib,
104761a039d1SStanley Yang 				  uint64_t pe, uint64_t src,
104861a039d1SStanley Yang 				  unsigned count)
104961a039d1SStanley Yang {
105061a039d1SStanley Yang 	unsigned bytes = count * 8;
105161a039d1SStanley Yang 
105261a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
105361a039d1SStanley Yang 		SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
105461a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = bytes - 1;
105561a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
105661a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = lower_32_bits(src);
105761a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = upper_32_bits(src);
105861a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
105961a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
106061a039d1SStanley Yang 
106161a039d1SStanley Yang }
106261a039d1SStanley Yang 
106361a039d1SStanley Yang /**
106461a039d1SStanley Yang  * sdma_v6_0_vm_write_pte - update PTEs by writing them manually
106561a039d1SStanley Yang  *
106661a039d1SStanley Yang  * @ib: indirect buffer to fill with commands
106761a039d1SStanley Yang  * @pe: addr of the page entry
106801543dcfSArthur Grillo  * @value: dst addr to write into pe
106961a039d1SStanley Yang  * @count: number of page entries to update
107061a039d1SStanley Yang  * @incr: increase next addr by incr bytes
107161a039d1SStanley Yang  *
107261a039d1SStanley Yang  * Update PTEs by writing them manually using sDMA.
107361a039d1SStanley Yang  */
sdma_v6_0_vm_write_pte(struct amdgpu_ib * ib,uint64_t pe,uint64_t value,unsigned count,uint32_t incr)107461a039d1SStanley Yang static void sdma_v6_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
107561a039d1SStanley Yang 				   uint64_t value, unsigned count,
107661a039d1SStanley Yang 				   uint32_t incr)
107761a039d1SStanley Yang {
107861a039d1SStanley Yang 	unsigned ndw = count * 2;
107961a039d1SStanley Yang 
108061a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
108161a039d1SStanley Yang 		SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
108261a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
108361a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
108461a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = ndw - 1;
108561a039d1SStanley Yang 	for (; ndw > 0; ndw -= 2) {
108661a039d1SStanley Yang 		ib->ptr[ib->length_dw++] = lower_32_bits(value);
108761a039d1SStanley Yang 		ib->ptr[ib->length_dw++] = upper_32_bits(value);
108861a039d1SStanley Yang 		value += incr;
108961a039d1SStanley Yang 	}
109061a039d1SStanley Yang }
109161a039d1SStanley Yang 
109261a039d1SStanley Yang /**
109361a039d1SStanley Yang  * sdma_v6_0_vm_set_pte_pde - update the page tables using sDMA
109461a039d1SStanley Yang  *
109561a039d1SStanley Yang  * @ib: indirect buffer to fill with commands
109661a039d1SStanley Yang  * @pe: addr of the page entry
109761a039d1SStanley Yang  * @addr: dst addr to write into pe
109861a039d1SStanley Yang  * @count: number of page entries to update
109961a039d1SStanley Yang  * @incr: increase next addr by incr bytes
110061a039d1SStanley Yang  * @flags: access flags
110161a039d1SStanley Yang  *
110261a039d1SStanley Yang  * Update the page tables using sDMA.
110361a039d1SStanley Yang  */
sdma_v6_0_vm_set_pte_pde(struct amdgpu_ib * ib,uint64_t pe,uint64_t addr,unsigned count,uint32_t incr,uint64_t flags)110461a039d1SStanley Yang static void sdma_v6_0_vm_set_pte_pde(struct amdgpu_ib *ib,
110561a039d1SStanley Yang 				     uint64_t pe,
110661a039d1SStanley Yang 				     uint64_t addr, unsigned count,
110761a039d1SStanley Yang 				     uint32_t incr, uint64_t flags)
110861a039d1SStanley Yang {
110961a039d1SStanley Yang 	/* for physically contiguous pages (vram) */
111061a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_PTEPDE);
111161a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
111261a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
111361a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
111461a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = upper_32_bits(flags);
111561a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
111661a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
111761a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = incr; /* increment size */
111861a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = 0;
111961a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
112061a039d1SStanley Yang }
112161a039d1SStanley Yang 
112261a039d1SStanley Yang /**
112361a039d1SStanley Yang  * sdma_v6_0_ring_pad_ib - pad the IB
112461a039d1SStanley Yang  * @ib: indirect buffer to fill with padding
1125ebe884e8SSrinivasan Shanmugam  * @ring: amdgpu ring pointer
112661a039d1SStanley Yang  *
112761a039d1SStanley Yang  * Pad the IB with NOPs to a boundary multiple of 8.
112861a039d1SStanley Yang  */
sdma_v6_0_ring_pad_ib(struct amdgpu_ring * ring,struct amdgpu_ib * ib)112961a039d1SStanley Yang static void sdma_v6_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
113061a039d1SStanley Yang {
113161a039d1SStanley Yang 	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
113261a039d1SStanley Yang 	u32 pad_count;
113361a039d1SStanley Yang 	int i;
113461a039d1SStanley Yang 
113561a039d1SStanley Yang 	pad_count = (-ib->length_dw) & 0x7;
113661a039d1SStanley Yang 	for (i = 0; i < pad_count; i++)
113761a039d1SStanley Yang 		if (sdma && sdma->burst_nop && (i == 0))
113861a039d1SStanley Yang 			ib->ptr[ib->length_dw++] =
113961a039d1SStanley Yang 				SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP) |
114061a039d1SStanley Yang 				SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
114161a039d1SStanley Yang 		else
114261a039d1SStanley Yang 			ib->ptr[ib->length_dw++] =
114361a039d1SStanley Yang 				SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP);
114461a039d1SStanley Yang }
114561a039d1SStanley Yang 
114661a039d1SStanley Yang /**
114761a039d1SStanley Yang  * sdma_v6_0_ring_emit_pipeline_sync - sync the pipeline
114861a039d1SStanley Yang  *
114961a039d1SStanley Yang  * @ring: amdgpu_ring pointer
115061a039d1SStanley Yang  *
115161a039d1SStanley Yang  * Make sure all previous operations are completed (CIK).
115261a039d1SStanley Yang  */
sdma_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring * ring)115361a039d1SStanley Yang static void sdma_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
115461a039d1SStanley Yang {
115561a039d1SStanley Yang 	uint32_t seq = ring->fence_drv.sync_seq;
115661a039d1SStanley Yang 	uint64_t addr = ring->fence_drv.gpu_addr;
115761a039d1SStanley Yang 
115861a039d1SStanley Yang 	/* wait for idle */
115961a039d1SStanley Yang 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
116061a039d1SStanley Yang 			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
116161a039d1SStanley Yang 			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
116261a039d1SStanley Yang 			  SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
116361a039d1SStanley Yang 	amdgpu_ring_write(ring, addr & 0xfffffffc);
116461a039d1SStanley Yang 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
116561a039d1SStanley Yang 	amdgpu_ring_write(ring, seq); /* reference */
116661a039d1SStanley Yang 	amdgpu_ring_write(ring, 0xffffffff); /* mask */
116761a039d1SStanley Yang 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
116861a039d1SStanley Yang 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
116961a039d1SStanley Yang }
117061a039d1SStanley Yang 
117161a039d1SStanley Yang /**
117261a039d1SStanley Yang  * sdma_v6_0_ring_emit_vm_flush - vm flush using sDMA
117361a039d1SStanley Yang  *
117461a039d1SStanley Yang  * @ring: amdgpu_ring pointer
11759eba1b8bSSrinivasan Shanmugam  * @vmid: vmid number to use
11769eba1b8bSSrinivasan Shanmugam  * @pd_addr: address
117761a039d1SStanley Yang  *
117861a039d1SStanley Yang  * Update the page table base and flush the VM TLB
117961a039d1SStanley Yang  * using sDMA.
118061a039d1SStanley Yang  */
sdma_v6_0_ring_emit_vm_flush(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)118161a039d1SStanley Yang static void sdma_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
118261a039d1SStanley Yang 					 unsigned vmid, uint64_t pd_addr)
118361a039d1SStanley Yang {
11840530553bSLe Ma 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
1185febc9c65SPierre-Eric Pelloux-Prayer 	uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
1186febc9c65SPierre-Eric Pelloux-Prayer 
1187febc9c65SPierre-Eric Pelloux-Prayer 	/* Update the PD address for this VMID. */
1188febc9c65SPierre-Eric Pelloux-Prayer 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
1189febc9c65SPierre-Eric Pelloux-Prayer 			      (hub->ctx_addr_distance * vmid),
1190febc9c65SPierre-Eric Pelloux-Prayer 			      lower_32_bits(pd_addr));
1191febc9c65SPierre-Eric Pelloux-Prayer 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
1192febc9c65SPierre-Eric Pelloux-Prayer 			      (hub->ctx_addr_distance * vmid),
1193febc9c65SPierre-Eric Pelloux-Prayer 			      upper_32_bits(pd_addr));
1194febc9c65SPierre-Eric Pelloux-Prayer 
1195febc9c65SPierre-Eric Pelloux-Prayer 	/* Trigger invalidation. */
1196febc9c65SPierre-Eric Pelloux-Prayer 	amdgpu_ring_write(ring,
1197febc9c65SPierre-Eric Pelloux-Prayer 			  SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1198febc9c65SPierre-Eric Pelloux-Prayer 			  SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATION) |
1199febc9c65SPierre-Eric Pelloux-Prayer 			  SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(ring->vm_inv_eng) |
1200febc9c65SPierre-Eric Pelloux-Prayer 			  SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(0x1f));
1201febc9c65SPierre-Eric Pelloux-Prayer 	amdgpu_ring_write(ring, req);
1202febc9c65SPierre-Eric Pelloux-Prayer 	amdgpu_ring_write(ring, 0xFFFFFFFF);
1203febc9c65SPierre-Eric Pelloux-Prayer 	amdgpu_ring_write(ring,
1204febc9c65SPierre-Eric Pelloux-Prayer 			  SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(1 << vmid) |
1205febc9c65SPierre-Eric Pelloux-Prayer 			  SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(0x1F));
120661a039d1SStanley Yang }
120761a039d1SStanley Yang 
sdma_v6_0_ring_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)120861a039d1SStanley Yang static void sdma_v6_0_ring_emit_wreg(struct amdgpu_ring *ring,
120961a039d1SStanley Yang 				     uint32_t reg, uint32_t val)
121061a039d1SStanley Yang {
121161a039d1SStanley Yang 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_SRBM_WRITE) |
121261a039d1SStanley Yang 			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
121361a039d1SStanley Yang 	amdgpu_ring_write(ring, reg);
121461a039d1SStanley Yang 	amdgpu_ring_write(ring, val);
121561a039d1SStanley Yang }
121661a039d1SStanley Yang 
sdma_v6_0_ring_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)121761a039d1SStanley Yang static void sdma_v6_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
121861a039d1SStanley Yang 					 uint32_t val, uint32_t mask)
121961a039d1SStanley Yang {
122061a039d1SStanley Yang 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
122161a039d1SStanley Yang 			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
122261a039d1SStanley Yang 			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
122361a039d1SStanley Yang 	amdgpu_ring_write(ring, reg << 2);
122461a039d1SStanley Yang 	amdgpu_ring_write(ring, 0);
122561a039d1SStanley Yang 	amdgpu_ring_write(ring, val); /* reference */
122661a039d1SStanley Yang 	amdgpu_ring_write(ring, mask); /* mask */
122761a039d1SStanley Yang 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
122861a039d1SStanley Yang 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
122961a039d1SStanley Yang }
123061a039d1SStanley Yang 
sdma_v6_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring * ring,uint32_t reg0,uint32_t reg1,uint32_t ref,uint32_t mask)123161a039d1SStanley Yang static void sdma_v6_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
123261a039d1SStanley Yang 						   uint32_t reg0, uint32_t reg1,
123361a039d1SStanley Yang 						   uint32_t ref, uint32_t mask)
123461a039d1SStanley Yang {
123561a039d1SStanley Yang 	amdgpu_ring_emit_wreg(ring, reg0, ref);
123661a039d1SStanley Yang 	/* wait for a cycle to reset vm_inv_eng*_ack */
123761a039d1SStanley Yang 	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
123861a039d1SStanley Yang 	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
123961a039d1SStanley Yang }
124061a039d1SStanley Yang 
1241a57b24e1SYiPeng Chai static struct amdgpu_sdma_ras sdma_v6_0_3_ras = {
1242a57b24e1SYiPeng Chai 	.ras_block = {
1243a57b24e1SYiPeng Chai 		.ras_late_init = amdgpu_ras_block_late_init,
1244a57b24e1SYiPeng Chai 	},
1245a57b24e1SYiPeng Chai };
1246a57b24e1SYiPeng Chai 
sdma_v6_0_set_ras_funcs(struct amdgpu_device * adev)1247a57b24e1SYiPeng Chai static void sdma_v6_0_set_ras_funcs(struct amdgpu_device *adev)
1248a57b24e1SYiPeng Chai {
1249a57b24e1SYiPeng Chai 	switch (adev->ip_versions[SDMA0_HWIP][0]) {
1250a57b24e1SYiPeng Chai 	case IP_VERSION(6, 0, 3):
1251a57b24e1SYiPeng Chai 		adev->sdma.ras = &sdma_v6_0_3_ras;
1252a57b24e1SYiPeng Chai 		break;
1253a57b24e1SYiPeng Chai 	default:
1254a57b24e1SYiPeng Chai 		break;
1255a57b24e1SYiPeng Chai 	}
1256a57b24e1SYiPeng Chai 
1257a57b24e1SYiPeng Chai }
1258a57b24e1SYiPeng Chai 
sdma_v6_0_early_init(void * handle)125961a039d1SStanley Yang static int sdma_v6_0_early_init(void *handle)
126061a039d1SStanley Yang {
126161a039d1SStanley Yang 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
126261a039d1SStanley Yang 
126361a039d1SStanley Yang 	sdma_v6_0_set_ring_funcs(adev);
126461a039d1SStanley Yang 	sdma_v6_0_set_buffer_funcs(adev);
126561a039d1SStanley Yang 	sdma_v6_0_set_vm_pte_funcs(adev);
126661a039d1SStanley Yang 	sdma_v6_0_set_irq_funcs(adev);
126761a039d1SStanley Yang 	sdma_v6_0_set_mqd_funcs(adev);
1268a57b24e1SYiPeng Chai 	sdma_v6_0_set_ras_funcs(adev);
126961a039d1SStanley Yang 
127061a039d1SStanley Yang 	return 0;
127161a039d1SStanley Yang }
127261a039d1SStanley Yang 
sdma_v6_0_sw_init(void * handle)127361a039d1SStanley Yang static int sdma_v6_0_sw_init(void *handle)
127461a039d1SStanley Yang {
127561a039d1SStanley Yang 	struct amdgpu_ring *ring;
127661a039d1SStanley Yang 	int r, i;
127761a039d1SStanley Yang 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
127861a039d1SStanley Yang 
127961a039d1SStanley Yang 	/* SDMA trap event */
128061a039d1SStanley Yang 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
128161a039d1SStanley Yang 			      GFX_11_0_0__SRCID__SDMA_TRAP,
128261a039d1SStanley Yang 			      &adev->sdma.trap_irq);
128361a039d1SStanley Yang 	if (r)
128461a039d1SStanley Yang 		return r;
128561a039d1SStanley Yang 
12861336b4e7SMario Limonciello 	r = amdgpu_sdma_init_microcode(adev, 0, true);
128761a039d1SStanley Yang 	if (r) {
128861a039d1SStanley Yang 		DRM_ERROR("Failed to load sdma firmware!\n");
128961a039d1SStanley Yang 		return r;
129061a039d1SStanley Yang 	}
129161a039d1SStanley Yang 
129261a039d1SStanley Yang 	for (i = 0; i < adev->sdma.num_instances; i++) {
129361a039d1SStanley Yang 		ring = &adev->sdma.instance[i].ring;
129461a039d1SStanley Yang 		ring->ring_obj = NULL;
129561a039d1SStanley Yang 		ring->use_doorbell = true;
129661a039d1SStanley Yang 		ring->me = i;
129761a039d1SStanley Yang 
129861a039d1SStanley Yang 		DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
129961a039d1SStanley Yang 				ring->use_doorbell?"true":"false");
130061a039d1SStanley Yang 
130161a039d1SStanley Yang 		ring->doorbell_index =
130261a039d1SStanley Yang 			(adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset
130361a039d1SStanley Yang 
1304f4caf584SHawking Zhang 		ring->vm_hub = AMDGPU_GFXHUB(0);
130561a039d1SStanley Yang 		sprintf(ring->name, "sdma%d", i);
130661a039d1SStanley Yang 		r = amdgpu_ring_init(adev, ring, 1024,
130761a039d1SStanley Yang 				     &adev->sdma.trap_irq,
130861a039d1SStanley Yang 				     AMDGPU_SDMA_IRQ_INSTANCE0 + i,
130961a039d1SStanley Yang 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
131061a039d1SStanley Yang 		if (r)
131161a039d1SStanley Yang 			return r;
131261a039d1SStanley Yang 	}
131361a039d1SStanley Yang 
1314a57b24e1SYiPeng Chai 	if (amdgpu_sdma_ras_sw_init(adev)) {
1315a57b24e1SYiPeng Chai 		dev_err(adev->dev, "Failed to initialize sdma ras block!\n");
1316a57b24e1SYiPeng Chai 		return -EINVAL;
1317a57b24e1SYiPeng Chai 	}
1318a57b24e1SYiPeng Chai 
131961a039d1SStanley Yang 	return r;
132061a039d1SStanley Yang }
132161a039d1SStanley Yang 
sdma_v6_0_sw_fini(void * handle)132261a039d1SStanley Yang static int sdma_v6_0_sw_fini(void *handle)
132361a039d1SStanley Yang {
132461a039d1SStanley Yang 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
132561a039d1SStanley Yang 	int i;
132661a039d1SStanley Yang 
132761a039d1SStanley Yang 	for (i = 0; i < adev->sdma.num_instances; i++)
132861a039d1SStanley Yang 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
132961a039d1SStanley Yang 
1330b077656bSLikun Gao 	amdgpu_sdma_destroy_inst_ctx(adev, true);
133161a039d1SStanley Yang 
133261a039d1SStanley Yang 	return 0;
133361a039d1SStanley Yang }
133461a039d1SStanley Yang 
sdma_v6_0_hw_init(void * handle)133561a039d1SStanley Yang static int sdma_v6_0_hw_init(void *handle)
133661a039d1SStanley Yang {
133761a039d1SStanley Yang 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
133861a039d1SStanley Yang 
133979c0d7ddSzhang songyi 	return sdma_v6_0_start(adev);
134061a039d1SStanley Yang }
134161a039d1SStanley Yang 
sdma_v6_0_hw_fini(void * handle)134261a039d1SStanley Yang static int sdma_v6_0_hw_fini(void *handle)
134361a039d1SStanley Yang {
134461a039d1SStanley Yang 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
134561a039d1SStanley Yang 
1346a98cec22SAlex Deucher 	if (amdgpu_sriov_vf(adev)) {
1347a98cec22SAlex Deucher 		/* disable the scheduler for SDMA */
1348a98cec22SAlex Deucher 		amdgpu_sdma_unset_buffer_funcs_helper(adev);
134961a039d1SStanley Yang 		return 0;
1350a98cec22SAlex Deucher 	}
135161a039d1SStanley Yang 
135227488686SGraham Sider 	sdma_v6_0_ctxempty_int_enable(adev, false);
135361a039d1SStanley Yang 	sdma_v6_0_enable(adev, false);
135461a039d1SStanley Yang 
135561a039d1SStanley Yang 	return 0;
135661a039d1SStanley Yang }
135761a039d1SStanley Yang 
sdma_v6_0_suspend(void * handle)135861a039d1SStanley Yang static int sdma_v6_0_suspend(void *handle)
135961a039d1SStanley Yang {
136061a039d1SStanley Yang 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
136161a039d1SStanley Yang 
136261a039d1SStanley Yang 	return sdma_v6_0_hw_fini(adev);
136361a039d1SStanley Yang }
136461a039d1SStanley Yang 
sdma_v6_0_resume(void * handle)136561a039d1SStanley Yang static int sdma_v6_0_resume(void *handle)
136661a039d1SStanley Yang {
136761a039d1SStanley Yang 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
136861a039d1SStanley Yang 
136961a039d1SStanley Yang 	return sdma_v6_0_hw_init(adev);
137061a039d1SStanley Yang }
137161a039d1SStanley Yang 
sdma_v6_0_is_idle(void * handle)137261a039d1SStanley Yang static bool sdma_v6_0_is_idle(void *handle)
137361a039d1SStanley Yang {
137461a039d1SStanley Yang 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
137561a039d1SStanley Yang 	u32 i;
137661a039d1SStanley Yang 
137761a039d1SStanley Yang 	for (i = 0; i < adev->sdma.num_instances; i++) {
137861a039d1SStanley Yang 		u32 tmp = RREG32(sdma_v6_0_get_reg_offset(adev, i, regSDMA0_STATUS_REG));
137961a039d1SStanley Yang 
138061a039d1SStanley Yang 		if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
138161a039d1SStanley Yang 			return false;
138261a039d1SStanley Yang 	}
138361a039d1SStanley Yang 
138461a039d1SStanley Yang 	return true;
138561a039d1SStanley Yang }
138661a039d1SStanley Yang 
sdma_v6_0_wait_for_idle(void * handle)138761a039d1SStanley Yang static int sdma_v6_0_wait_for_idle(void *handle)
138861a039d1SStanley Yang {
138961a039d1SStanley Yang 	unsigned i;
139061a039d1SStanley Yang 	u32 sdma0, sdma1;
139161a039d1SStanley Yang 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
139261a039d1SStanley Yang 
139361a039d1SStanley Yang 	for (i = 0; i < adev->usec_timeout; i++) {
139461a039d1SStanley Yang 		sdma0 = RREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_STATUS_REG));
139561a039d1SStanley Yang 		sdma1 = RREG32(sdma_v6_0_get_reg_offset(adev, 1, regSDMA0_STATUS_REG));
139661a039d1SStanley Yang 
139761a039d1SStanley Yang 		if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
139861a039d1SStanley Yang 			return 0;
139961a039d1SStanley Yang 		udelay(1);
140061a039d1SStanley Yang 	}
140161a039d1SStanley Yang 	return -ETIMEDOUT;
140261a039d1SStanley Yang }
140361a039d1SStanley Yang 
sdma_v6_0_ring_preempt_ib(struct amdgpu_ring * ring)140461a039d1SStanley Yang static int sdma_v6_0_ring_preempt_ib(struct amdgpu_ring *ring)
140561a039d1SStanley Yang {
140661a039d1SStanley Yang 	int i, r = 0;
140761a039d1SStanley Yang 	struct amdgpu_device *adev = ring->adev;
140861a039d1SStanley Yang 	u32 index = 0;
140961a039d1SStanley Yang 	u64 sdma_gfx_preempt;
141061a039d1SStanley Yang 
141161a039d1SStanley Yang 	amdgpu_sdma_get_index_from_ring(ring, &index);
141261a039d1SStanley Yang 	sdma_gfx_preempt =
141361a039d1SStanley Yang 		sdma_v6_0_get_reg_offset(adev, index, regSDMA0_QUEUE0_PREEMPT);
141461a039d1SStanley Yang 
141561a039d1SStanley Yang 	/* assert preemption condition */
141661a039d1SStanley Yang 	amdgpu_ring_set_preempt_cond_exec(ring, false);
141761a039d1SStanley Yang 
141861a039d1SStanley Yang 	/* emit the trailing fence */
141961a039d1SStanley Yang 	ring->trail_seq += 1;
142061a039d1SStanley Yang 	amdgpu_ring_alloc(ring, 10);
142161a039d1SStanley Yang 	sdma_v6_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
142261a039d1SStanley Yang 				  ring->trail_seq, 0);
142361a039d1SStanley Yang 	amdgpu_ring_commit(ring);
142461a039d1SStanley Yang 
142561a039d1SStanley Yang 	/* assert IB preemption */
142661a039d1SStanley Yang 	WREG32(sdma_gfx_preempt, 1);
142761a039d1SStanley Yang 
142861a039d1SStanley Yang 	/* poll the trailing fence */
142961a039d1SStanley Yang 	for (i = 0; i < adev->usec_timeout; i++) {
143061a039d1SStanley Yang 		if (ring->trail_seq ==
143161a039d1SStanley Yang 		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
143261a039d1SStanley Yang 			break;
143361a039d1SStanley Yang 		udelay(1);
143461a039d1SStanley Yang 	}
143561a039d1SStanley Yang 
143661a039d1SStanley Yang 	if (i >= adev->usec_timeout) {
143761a039d1SStanley Yang 		r = -EINVAL;
143861a039d1SStanley Yang 		DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
143961a039d1SStanley Yang 	}
144061a039d1SStanley Yang 
144161a039d1SStanley Yang 	/* deassert IB preemption */
144261a039d1SStanley Yang 	WREG32(sdma_gfx_preempt, 0);
144361a039d1SStanley Yang 
144461a039d1SStanley Yang 	/* deassert the preemption condition */
144561a039d1SStanley Yang 	amdgpu_ring_set_preempt_cond_exec(ring, true);
144661a039d1SStanley Yang 	return r;
144761a039d1SStanley Yang }
144861a039d1SStanley Yang 
sdma_v6_0_set_trap_irq_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)144961a039d1SStanley Yang static int sdma_v6_0_set_trap_irq_state(struct amdgpu_device *adev,
145061a039d1SStanley Yang 					struct amdgpu_irq_src *source,
145161a039d1SStanley Yang 					unsigned type,
145261a039d1SStanley Yang 					enum amdgpu_interrupt_state state)
145361a039d1SStanley Yang {
145461a039d1SStanley Yang 	u32 sdma_cntl;
145561a039d1SStanley Yang 
145661a039d1SStanley Yang 	u32 reg_offset = sdma_v6_0_get_reg_offset(adev, type, regSDMA0_CNTL);
145761a039d1SStanley Yang 
145860b73429SYifan Zha 	if (!amdgpu_sriov_vf(adev)) {
145961a039d1SStanley Yang 		sdma_cntl = RREG32(reg_offset);
146061a039d1SStanley Yang 		sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
146161a039d1SStanley Yang 				state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
146261a039d1SStanley Yang 		WREG32(reg_offset, sdma_cntl);
146360b73429SYifan Zha 	}
146461a039d1SStanley Yang 
146561a039d1SStanley Yang 	return 0;
146661a039d1SStanley Yang }
146761a039d1SStanley Yang 
sdma_v6_0_process_trap_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)146861a039d1SStanley Yang static int sdma_v6_0_process_trap_irq(struct amdgpu_device *adev,
146961a039d1SStanley Yang 				      struct amdgpu_irq_src *source,
147061a039d1SStanley Yang 				      struct amdgpu_iv_entry *entry)
147161a039d1SStanley Yang {
147261a039d1SStanley Yang 	int instances, queue;
147361a039d1SStanley Yang 	uint32_t mes_queue_id = entry->src_data[0];
147461a039d1SStanley Yang 
147561a039d1SStanley Yang 	DRM_DEBUG("IH: SDMA trap\n");
147661a039d1SStanley Yang 
147761a039d1SStanley Yang 	if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
147861a039d1SStanley Yang 		struct amdgpu_mes_queue *queue;
147961a039d1SStanley Yang 
148061a039d1SStanley Yang 		mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
148161a039d1SStanley Yang 
148261a039d1SStanley Yang 		spin_lock(&adev->mes.queue_id_lock);
148361a039d1SStanley Yang 		queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
148461a039d1SStanley Yang 		if (queue) {
148561a039d1SStanley Yang 			DRM_DEBUG("process smda queue id = %d\n", mes_queue_id);
148661a039d1SStanley Yang 			amdgpu_fence_process(queue->ring);
148761a039d1SStanley Yang 		}
148861a039d1SStanley Yang 		spin_unlock(&adev->mes.queue_id_lock);
148961a039d1SStanley Yang 		return 0;
149061a039d1SStanley Yang 	}
149161a039d1SStanley Yang 
149261a039d1SStanley Yang 	queue = entry->ring_id & 0xf;
149361a039d1SStanley Yang 	instances = (entry->ring_id & 0xf0) >> 4;
149461a039d1SStanley Yang 	if (instances > 1) {
149561a039d1SStanley Yang 		DRM_ERROR("IH: wrong ring_ID detected, as wrong sdma instance\n");
149661a039d1SStanley Yang 		return -EINVAL;
149761a039d1SStanley Yang 	}
149861a039d1SStanley Yang 
149961a039d1SStanley Yang 	switch (entry->client_id) {
150061a039d1SStanley Yang 	case SOC21_IH_CLIENTID_GFX:
150161a039d1SStanley Yang 		switch (queue) {
150261a039d1SStanley Yang 		case 0:
150361a039d1SStanley Yang 			amdgpu_fence_process(&adev->sdma.instance[instances].ring);
150461a039d1SStanley Yang 			break;
150561a039d1SStanley Yang 		default:
150661a039d1SStanley Yang 			break;
150761a039d1SStanley Yang 		}
150861a039d1SStanley Yang 		break;
150961a039d1SStanley Yang 	}
151061a039d1SStanley Yang 	return 0;
151161a039d1SStanley Yang }
151261a039d1SStanley Yang 
sdma_v6_0_process_illegal_inst_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)151361a039d1SStanley Yang static int sdma_v6_0_process_illegal_inst_irq(struct amdgpu_device *adev,
151461a039d1SStanley Yang 					      struct amdgpu_irq_src *source,
151561a039d1SStanley Yang 					      struct amdgpu_iv_entry *entry)
151661a039d1SStanley Yang {
151761a039d1SStanley Yang 	return 0;
151861a039d1SStanley Yang }
151961a039d1SStanley Yang 
sdma_v6_0_set_clockgating_state(void * handle,enum amd_clockgating_state state)152061a039d1SStanley Yang static int sdma_v6_0_set_clockgating_state(void *handle,
152161a039d1SStanley Yang 					   enum amd_clockgating_state state)
152261a039d1SStanley Yang {
152361a039d1SStanley Yang 	return 0;
152461a039d1SStanley Yang }
152561a039d1SStanley Yang 
sdma_v6_0_set_powergating_state(void * handle,enum amd_powergating_state state)152661a039d1SStanley Yang static int sdma_v6_0_set_powergating_state(void *handle,
152761a039d1SStanley Yang 					  enum amd_powergating_state state)
152861a039d1SStanley Yang {
152961a039d1SStanley Yang 	return 0;
153061a039d1SStanley Yang }
153161a039d1SStanley Yang 
sdma_v6_0_get_clockgating_state(void * handle,u64 * flags)153261a039d1SStanley Yang static void sdma_v6_0_get_clockgating_state(void *handle, u64 *flags)
153361a039d1SStanley Yang {
153461a039d1SStanley Yang }
153561a039d1SStanley Yang 
153661a039d1SStanley Yang const struct amd_ip_funcs sdma_v6_0_ip_funcs = {
153761a039d1SStanley Yang 	.name = "sdma_v6_0",
153861a039d1SStanley Yang 	.early_init = sdma_v6_0_early_init,
153961a039d1SStanley Yang 	.late_init = NULL,
154061a039d1SStanley Yang 	.sw_init = sdma_v6_0_sw_init,
154161a039d1SStanley Yang 	.sw_fini = sdma_v6_0_sw_fini,
154261a039d1SStanley Yang 	.hw_init = sdma_v6_0_hw_init,
154361a039d1SStanley Yang 	.hw_fini = sdma_v6_0_hw_fini,
154461a039d1SStanley Yang 	.suspend = sdma_v6_0_suspend,
154561a039d1SStanley Yang 	.resume = sdma_v6_0_resume,
154661a039d1SStanley Yang 	.is_idle = sdma_v6_0_is_idle,
154761a039d1SStanley Yang 	.wait_for_idle = sdma_v6_0_wait_for_idle,
154861a039d1SStanley Yang 	.soft_reset = sdma_v6_0_soft_reset,
154958e969b6SLikun Gao 	.check_soft_reset = sdma_v6_0_check_soft_reset,
155061a039d1SStanley Yang 	.set_clockgating_state = sdma_v6_0_set_clockgating_state,
155161a039d1SStanley Yang 	.set_powergating_state = sdma_v6_0_set_powergating_state,
155261a039d1SStanley Yang 	.get_clockgating_state = sdma_v6_0_get_clockgating_state,
155361a039d1SStanley Yang };
155461a039d1SStanley Yang 
155561a039d1SStanley Yang static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = {
155661a039d1SStanley Yang 	.type = AMDGPU_RING_TYPE_SDMA,
155761a039d1SStanley Yang 	.align_mask = 0xf,
155861a039d1SStanley Yang 	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
155961a039d1SStanley Yang 	.support_64bit_ptrs = true,
1560bfa8cb05SYifan Zhang 	.secure_submission_supported = true,
156161a039d1SStanley Yang 	.get_rptr = sdma_v6_0_ring_get_rptr,
156261a039d1SStanley Yang 	.get_wptr = sdma_v6_0_ring_get_wptr,
156361a039d1SStanley Yang 	.set_wptr = sdma_v6_0_ring_set_wptr,
156461a039d1SStanley Yang 	.emit_frame_size =
156561a039d1SStanley Yang 		5 + /* sdma_v6_0_ring_init_cond_exec */
156661a039d1SStanley Yang 		6 + /* sdma_v6_0_ring_emit_hdp_flush */
156761a039d1SStanley Yang 		6 + /* sdma_v6_0_ring_emit_pipeline_sync */
156861a039d1SStanley Yang 		/* sdma_v6_0_ring_emit_vm_flush */
156961a039d1SStanley Yang 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
157061a039d1SStanley Yang 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
157161a039d1SStanley Yang 		10 + 10 + 10, /* sdma_v6_0_ring_emit_fence x3 for user fence, vm fence */
157261a039d1SStanley Yang 	.emit_ib_size = 5 + 7 + 6, /* sdma_v6_0_ring_emit_ib */
157361a039d1SStanley Yang 	.emit_ib = sdma_v6_0_ring_emit_ib,
157461a039d1SStanley Yang 	.emit_mem_sync = sdma_v6_0_ring_emit_mem_sync,
157561a039d1SStanley Yang 	.emit_fence = sdma_v6_0_ring_emit_fence,
157661a039d1SStanley Yang 	.emit_pipeline_sync = sdma_v6_0_ring_emit_pipeline_sync,
157761a039d1SStanley Yang 	.emit_vm_flush = sdma_v6_0_ring_emit_vm_flush,
157861a039d1SStanley Yang 	.emit_hdp_flush = sdma_v6_0_ring_emit_hdp_flush,
157961a039d1SStanley Yang 	.test_ring = sdma_v6_0_ring_test_ring,
158061a039d1SStanley Yang 	.test_ib = sdma_v6_0_ring_test_ib,
158161a039d1SStanley Yang 	.insert_nop = sdma_v6_0_ring_insert_nop,
158261a039d1SStanley Yang 	.pad_ib = sdma_v6_0_ring_pad_ib,
158361a039d1SStanley Yang 	.emit_wreg = sdma_v6_0_ring_emit_wreg,
158461a039d1SStanley Yang 	.emit_reg_wait = sdma_v6_0_ring_emit_reg_wait,
158561a039d1SStanley Yang 	.emit_reg_write_reg_wait = sdma_v6_0_ring_emit_reg_write_reg_wait,
158661a039d1SStanley Yang 	.init_cond_exec = sdma_v6_0_ring_init_cond_exec,
158761a039d1SStanley Yang 	.patch_cond_exec = sdma_v6_0_ring_patch_cond_exec,
158861a039d1SStanley Yang 	.preempt_ib = sdma_v6_0_ring_preempt_ib,
158961a039d1SStanley Yang };
159061a039d1SStanley Yang 
sdma_v6_0_set_ring_funcs(struct amdgpu_device * adev)159161a039d1SStanley Yang static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev)
159261a039d1SStanley Yang {
159361a039d1SStanley Yang 	int i;
159461a039d1SStanley Yang 
159561a039d1SStanley Yang 	for (i = 0; i < adev->sdma.num_instances; i++) {
159661a039d1SStanley Yang 		adev->sdma.instance[i].ring.funcs = &sdma_v6_0_ring_funcs;
159761a039d1SStanley Yang 		adev->sdma.instance[i].ring.me = i;
159861a039d1SStanley Yang 	}
159961a039d1SStanley Yang }
160061a039d1SStanley Yang 
160161a039d1SStanley Yang static const struct amdgpu_irq_src_funcs sdma_v6_0_trap_irq_funcs = {
160261a039d1SStanley Yang 	.set = sdma_v6_0_set_trap_irq_state,
160361a039d1SStanley Yang 	.process = sdma_v6_0_process_trap_irq,
160461a039d1SStanley Yang };
160561a039d1SStanley Yang 
160661a039d1SStanley Yang static const struct amdgpu_irq_src_funcs sdma_v6_0_illegal_inst_irq_funcs = {
160761a039d1SStanley Yang 	.process = sdma_v6_0_process_illegal_inst_irq,
160861a039d1SStanley Yang };
160961a039d1SStanley Yang 
sdma_v6_0_set_irq_funcs(struct amdgpu_device * adev)161061a039d1SStanley Yang static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev)
161161a039d1SStanley Yang {
161261a039d1SStanley Yang 	adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
161361a039d1SStanley Yang 					adev->sdma.num_instances;
161461a039d1SStanley Yang 	adev->sdma.trap_irq.funcs = &sdma_v6_0_trap_irq_funcs;
161561a039d1SStanley Yang 	adev->sdma.illegal_inst_irq.funcs = &sdma_v6_0_illegal_inst_irq_funcs;
161661a039d1SStanley Yang }
161761a039d1SStanley Yang 
161861a039d1SStanley Yang /**
161961a039d1SStanley Yang  * sdma_v6_0_emit_copy_buffer - copy buffer using the sDMA engine
162061a039d1SStanley Yang  *
162101543dcfSArthur Grillo  * @ib: indirect buffer to fill with commands
162261a039d1SStanley Yang  * @src_offset: src GPU address
162361a039d1SStanley Yang  * @dst_offset: dst GPU address
162461a039d1SStanley Yang  * @byte_count: number of bytes to xfer
162501543dcfSArthur Grillo  * @tmz: if a secure copy should be used
162661a039d1SStanley Yang  *
162761a039d1SStanley Yang  * Copy GPU buffers using the DMA engine.
162861a039d1SStanley Yang  * Used by the amdgpu ttm implementation to move pages if
162961a039d1SStanley Yang  * registered as the asic copy callback.
163061a039d1SStanley Yang  */
sdma_v6_0_emit_copy_buffer(struct amdgpu_ib * ib,uint64_t src_offset,uint64_t dst_offset,uint32_t byte_count,bool tmz)163161a039d1SStanley Yang static void sdma_v6_0_emit_copy_buffer(struct amdgpu_ib *ib,
163261a039d1SStanley Yang 				       uint64_t src_offset,
163361a039d1SStanley Yang 				       uint64_t dst_offset,
163461a039d1SStanley Yang 				       uint32_t byte_count,
163561a039d1SStanley Yang 				       bool tmz)
163661a039d1SStanley Yang {
163761a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
163861a039d1SStanley Yang 		SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
163961a039d1SStanley Yang 		SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
164061a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = byte_count - 1;
164161a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
164261a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
164361a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
164461a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
164561a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
164661a039d1SStanley Yang }
164761a039d1SStanley Yang 
164861a039d1SStanley Yang /**
164961a039d1SStanley Yang  * sdma_v6_0_emit_fill_buffer - fill buffer using the sDMA engine
165061a039d1SStanley Yang  *
165101543dcfSArthur Grillo  * @ib: indirect buffer to fill
165261a039d1SStanley Yang  * @src_data: value to write to buffer
165361a039d1SStanley Yang  * @dst_offset: dst GPU address
165461a039d1SStanley Yang  * @byte_count: number of bytes to xfer
165561a039d1SStanley Yang  *
165661a039d1SStanley Yang  * Fill GPU buffers using the DMA engine.
165761a039d1SStanley Yang  */
sdma_v6_0_emit_fill_buffer(struct amdgpu_ib * ib,uint32_t src_data,uint64_t dst_offset,uint32_t byte_count)165861a039d1SStanley Yang static void sdma_v6_0_emit_fill_buffer(struct amdgpu_ib *ib,
165961a039d1SStanley Yang 				       uint32_t src_data,
166061a039d1SStanley Yang 				       uint64_t dst_offset,
166161a039d1SStanley Yang 				       uint32_t byte_count)
166261a039d1SStanley Yang {
166361a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_CONST_FILL);
166461a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
166561a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
166661a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = src_data;
166761a039d1SStanley Yang 	ib->ptr[ib->length_dw++] = byte_count - 1;
166861a039d1SStanley Yang }
166961a039d1SStanley Yang 
167061a039d1SStanley Yang static const struct amdgpu_buffer_funcs sdma_v6_0_buffer_funcs = {
167161a039d1SStanley Yang 	.copy_max_bytes = 0x400000,
167261a039d1SStanley Yang 	.copy_num_dw = 7,
167361a039d1SStanley Yang 	.emit_copy_buffer = sdma_v6_0_emit_copy_buffer,
167461a039d1SStanley Yang 
167561a039d1SStanley Yang 	.fill_max_bytes = 0x400000,
167661a039d1SStanley Yang 	.fill_num_dw = 5,
167761a039d1SStanley Yang 	.emit_fill_buffer = sdma_v6_0_emit_fill_buffer,
167861a039d1SStanley Yang };
167961a039d1SStanley Yang 
sdma_v6_0_set_buffer_funcs(struct amdgpu_device * adev)168061a039d1SStanley Yang static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev)
168161a039d1SStanley Yang {
168261a039d1SStanley Yang 	adev->mman.buffer_funcs = &sdma_v6_0_buffer_funcs;
168361a039d1SStanley Yang 	adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
168461a039d1SStanley Yang }
168561a039d1SStanley Yang 
168661a039d1SStanley Yang static const struct amdgpu_vm_pte_funcs sdma_v6_0_vm_pte_funcs = {
168761a039d1SStanley Yang 	.copy_pte_num_dw = 7,
168861a039d1SStanley Yang 	.copy_pte = sdma_v6_0_vm_copy_pte,
168961a039d1SStanley Yang 	.write_pte = sdma_v6_0_vm_write_pte,
169061a039d1SStanley Yang 	.set_pte_pde = sdma_v6_0_vm_set_pte_pde,
169161a039d1SStanley Yang };
169261a039d1SStanley Yang 
sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device * adev)169361a039d1SStanley Yang static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev)
169461a039d1SStanley Yang {
169561a039d1SStanley Yang 	unsigned i;
169661a039d1SStanley Yang 
169761a039d1SStanley Yang 	adev->vm_manager.vm_pte_funcs = &sdma_v6_0_vm_pte_funcs;
169861a039d1SStanley Yang 	for (i = 0; i < adev->sdma.num_instances; i++) {
169961a039d1SStanley Yang 		adev->vm_manager.vm_pte_scheds[i] =
170061a039d1SStanley Yang 			&adev->sdma.instance[i].ring.sched;
170161a039d1SStanley Yang 	}
170261a039d1SStanley Yang 	adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
170361a039d1SStanley Yang }
170461a039d1SStanley Yang 
170561a039d1SStanley Yang const struct amdgpu_ip_block_version sdma_v6_0_ip_block = {
170661a039d1SStanley Yang 	.type = AMD_IP_BLOCK_TYPE_SDMA,
170761a039d1SStanley Yang 	.major = 6,
170861a039d1SStanley Yang 	.minor = 0,
170961a039d1SStanley Yang 	.rev = 0,
171061a039d1SStanley Yang 	.funcs = &sdma_v6_0_ip_funcs,
171161a039d1SStanley Yang };
1712