17138fc88SLe Ma /*
27138fc88SLe Ma  * Copyright 2022 Advanced Micro Devices, Inc.
37138fc88SLe Ma  *
47138fc88SLe Ma  * Permission is hereby granted, free of charge, to any person obtaining a
57138fc88SLe Ma  * copy of this software and associated documentation files (the "Software"),
67138fc88SLe Ma  * to deal in the Software without restriction, including without limitation
77138fc88SLe Ma  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87138fc88SLe Ma  * and/or sell copies of the Software, and to permit persons to whom the
97138fc88SLe Ma  * Software is furnished to do so, subject to the following conditions:
107138fc88SLe Ma  *
117138fc88SLe Ma  * The above copyright notice and this permission notice shall be included in
127138fc88SLe Ma  * all copies or substantial portions of the Software.
137138fc88SLe Ma  *
147138fc88SLe Ma  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
157138fc88SLe Ma  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
167138fc88SLe Ma  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
177138fc88SLe Ma  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
187138fc88SLe Ma  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
197138fc88SLe Ma  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
207138fc88SLe Ma  * OTHER DEALINGS IN THE SOFTWARE.
217138fc88SLe Ma  *
227138fc88SLe Ma  */
237138fc88SLe Ma 
247138fc88SLe Ma #include <linux/delay.h>
257138fc88SLe Ma #include <linux/firmware.h>
267138fc88SLe Ma #include <linux/module.h>
277138fc88SLe Ma #include <linux/pci.h>
287138fc88SLe Ma 
297138fc88SLe Ma #include "amdgpu.h"
303446cb78SLijo Lazar #include "amdgpu_xcp.h"
317138fc88SLe Ma #include "amdgpu_ucode.h"
327138fc88SLe Ma #include "amdgpu_trace.h"
337138fc88SLe Ma 
347138fc88SLe Ma #include "sdma/sdma_4_4_2_offset.h"
357138fc88SLe Ma #include "sdma/sdma_4_4_2_sh_mask.h"
367138fc88SLe Ma 
377138fc88SLe Ma #include "soc15_common.h"
387138fc88SLe Ma #include "soc15.h"
397138fc88SLe Ma #include "vega10_sdma_pkt_open.h"
407138fc88SLe Ma 
417138fc88SLe Ma #include "ivsrcid/sdma0/irqsrcs_sdma0_4_0.h"
427138fc88SLe Ma #include "ivsrcid/sdma1/irqsrcs_sdma1_4_0.h"
437138fc88SLe Ma 
447138fc88SLe Ma #include "amdgpu_ras.h"
457138fc88SLe Ma 
467138fc88SLe Ma MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin");
477138fc88SLe Ma 
487138fc88SLe Ma #define WREG32_SDMA(instance, offset, value) \
497138fc88SLe Ma 	WREG32(sdma_v4_4_2_get_reg_offset(adev, (instance), (offset)), value)
507138fc88SLe Ma #define RREG32_SDMA(instance, offset) \
517138fc88SLe Ma 	RREG32(sdma_v4_4_2_get_reg_offset(adev, (instance), (offset)))
527138fc88SLe Ma 
537138fc88SLe Ma static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev);
547138fc88SLe Ma static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev);
557138fc88SLe Ma static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev);
567138fc88SLe Ma static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev);
571e69fde7SHawking Zhang static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev);
587138fc88SLe Ma 
sdma_v4_4_2_get_reg_offset(struct amdgpu_device * adev,u32 instance,u32 offset)597138fc88SLe Ma static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev,
607138fc88SLe Ma 		u32 instance, u32 offset)
617138fc88SLe Ma {
62f8b34a05SLijo Lazar 	u32 dev_inst = GET_INST(SDMA0, instance);
63f8b34a05SLijo Lazar 
64f8b34a05SLijo Lazar 	return (adev->reg_offset[SDMA0_HWIP][dev_inst][0] + offset);
657138fc88SLe Ma }
667138fc88SLe Ma 
sdma_v4_4_2_seq_to_irq_id(int seq_num)677138fc88SLe Ma static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num)
687138fc88SLe Ma {
697138fc88SLe Ma 	switch (seq_num) {
707138fc88SLe Ma 	case 0:
717138fc88SLe Ma 		return SOC15_IH_CLIENTID_SDMA0;
727138fc88SLe Ma 	case 1:
737138fc88SLe Ma 		return SOC15_IH_CLIENTID_SDMA1;
747138fc88SLe Ma 	case 2:
757138fc88SLe Ma 		return SOC15_IH_CLIENTID_SDMA2;
767138fc88SLe Ma 	case 3:
777138fc88SLe Ma 		return SOC15_IH_CLIENTID_SDMA3;
787138fc88SLe Ma 	default:
797138fc88SLe Ma 		return -EINVAL;
807138fc88SLe Ma 	}
817138fc88SLe Ma }
827138fc88SLe Ma 
sdma_v4_4_2_irq_id_to_seq(unsigned client_id)837138fc88SLe Ma static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id)
847138fc88SLe Ma {
857138fc88SLe Ma 	switch (client_id) {
867138fc88SLe Ma 	case SOC15_IH_CLIENTID_SDMA0:
877138fc88SLe Ma 		return 0;
887138fc88SLe Ma 	case SOC15_IH_CLIENTID_SDMA1:
897138fc88SLe Ma 		return 1;
907138fc88SLe Ma 	case SOC15_IH_CLIENTID_SDMA2:
917138fc88SLe Ma 		return 2;
927138fc88SLe Ma 	case SOC15_IH_CLIENTID_SDMA3:
937138fc88SLe Ma 		return 3;
947138fc88SLe Ma 	default:
957138fc88SLe Ma 		return -EINVAL;
967138fc88SLe Ma 	}
977138fc88SLe Ma }
987138fc88SLe Ma 
sdma_v4_4_2_inst_init_golden_registers(struct amdgpu_device * adev,uint32_t inst_mask)99527c670eSLijo Lazar static void sdma_v4_4_2_inst_init_golden_registers(struct amdgpu_device *adev,
100527c670eSLijo Lazar 						   uint32_t inst_mask)
1017138fc88SLe Ma {
1025f09237bSLijo Lazar 	u32 val;
1035f09237bSLijo Lazar 	int i;
1045f09237bSLijo Lazar 
1055f09237bSLijo Lazar 	for (i = 0; i < adev->sdma.num_instances; i++) {
1065f09237bSLijo Lazar 		val = RREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG);
1075f09237bSLijo Lazar 		val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG, NUM_BANKS, 4);
1085f09237bSLijo Lazar 		val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG,
1095f09237bSLijo Lazar 				    PIPE_INTERLEAVE_SIZE, 0);
1105f09237bSLijo Lazar 		WREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG, val);
1115f09237bSLijo Lazar 
1125f09237bSLijo Lazar 		val = RREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG_READ);
1135f09237bSLijo Lazar 		val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG_READ, NUM_BANKS,
1145f09237bSLijo Lazar 				    4);
1155f09237bSLijo Lazar 		val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG_READ,
1165f09237bSLijo Lazar 				    PIPE_INTERLEAVE_SIZE, 0);
1175f09237bSLijo Lazar 		WREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG_READ, val);
1187138fc88SLe Ma 	}
1197138fc88SLe Ma }
1207138fc88SLe Ma 
1217138fc88SLe Ma /**
1227138fc88SLe Ma  * sdma_v4_4_2_init_microcode - load ucode images from disk
1237138fc88SLe Ma  *
1247138fc88SLe Ma  * @adev: amdgpu_device pointer
1257138fc88SLe Ma  *
1267138fc88SLe Ma  * Use the firmware interface to load the ucode images into
1277138fc88SLe Ma  * the driver (not loaded into hw).
1287138fc88SLe Ma  * Returns 0 on success, error on failure.
1297138fc88SLe Ma  */
sdma_v4_4_2_init_microcode(struct amdgpu_device * adev)1307138fc88SLe Ma static int sdma_v4_4_2_init_microcode(struct amdgpu_device *adev)
1317138fc88SLe Ma {
1327138fc88SLe Ma 	int ret, i;
1337138fc88SLe Ma 
1347138fc88SLe Ma 	for (i = 0; i < adev->sdma.num_instances; i++) {
1357138fc88SLe Ma 		if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 2)) {
1367138fc88SLe Ma 			ret = amdgpu_sdma_init_microcode(adev, 0, true);
1377138fc88SLe Ma 			break;
1387138fc88SLe Ma 		} else {
1397138fc88SLe Ma 			ret = amdgpu_sdma_init_microcode(adev, i, false);
1407138fc88SLe Ma 			if (ret)
1417138fc88SLe Ma 				return ret;
1427138fc88SLe Ma 		}
1437138fc88SLe Ma 	}
1447138fc88SLe Ma 
1457138fc88SLe Ma 	return ret;
1467138fc88SLe Ma }
1477138fc88SLe Ma 
1487138fc88SLe Ma /**
1497138fc88SLe Ma  * sdma_v4_4_2_ring_get_rptr - get the current read pointer
1507138fc88SLe Ma  *
1517138fc88SLe Ma  * @ring: amdgpu ring pointer
1527138fc88SLe Ma  *
1537138fc88SLe Ma  * Get the current rptr from the hardware.
1547138fc88SLe Ma  */
sdma_v4_4_2_ring_get_rptr(struct amdgpu_ring * ring)1557138fc88SLe Ma static uint64_t sdma_v4_4_2_ring_get_rptr(struct amdgpu_ring *ring)
1567138fc88SLe Ma {
1577138fc88SLe Ma 	u64 *rptr;
1587138fc88SLe Ma 
1597138fc88SLe Ma 	/* XXX check if swapping is necessary on BE */
1607138fc88SLe Ma 	rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
1617138fc88SLe Ma 
1627138fc88SLe Ma 	DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
1637138fc88SLe Ma 	return ((*rptr) >> 2);
1647138fc88SLe Ma }
1657138fc88SLe Ma 
1667138fc88SLe Ma /**
1677138fc88SLe Ma  * sdma_v4_4_2_ring_get_wptr - get the current write pointer
1687138fc88SLe Ma  *
1697138fc88SLe Ma  * @ring: amdgpu ring pointer
1707138fc88SLe Ma  *
1717138fc88SLe Ma  * Get the current wptr from the hardware.
1727138fc88SLe Ma  */
sdma_v4_4_2_ring_get_wptr(struct amdgpu_ring * ring)1737138fc88SLe Ma static uint64_t sdma_v4_4_2_ring_get_wptr(struct amdgpu_ring *ring)
1747138fc88SLe Ma {
1757138fc88SLe Ma 	struct amdgpu_device *adev = ring->adev;
1767138fc88SLe Ma 	u64 wptr;
1777138fc88SLe Ma 
1787138fc88SLe Ma 	if (ring->use_doorbell) {
1797138fc88SLe Ma 		/* XXX check if swapping is necessary on BE */
1807138fc88SLe Ma 		wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
1817138fc88SLe Ma 		DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
1827138fc88SLe Ma 	} else {
1837138fc88SLe Ma 		wptr = RREG32_SDMA(ring->me, regSDMA_GFX_RB_WPTR_HI);
1847138fc88SLe Ma 		wptr = wptr << 32;
1857138fc88SLe Ma 		wptr |= RREG32_SDMA(ring->me, regSDMA_GFX_RB_WPTR);
1867138fc88SLe Ma 		DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n",
1877138fc88SLe Ma 				ring->me, wptr);
1887138fc88SLe Ma 	}
1897138fc88SLe Ma 
1907138fc88SLe Ma 	return wptr >> 2;
1917138fc88SLe Ma }
1927138fc88SLe Ma 
1937138fc88SLe Ma /**
1947138fc88SLe Ma  * sdma_v4_4_2_ring_set_wptr - commit the write pointer
1957138fc88SLe Ma  *
1967138fc88SLe Ma  * @ring: amdgpu ring pointer
1977138fc88SLe Ma  *
1987138fc88SLe Ma  * Write the wptr back to the hardware.
1997138fc88SLe Ma  */
sdma_v4_4_2_ring_set_wptr(struct amdgpu_ring * ring)2007138fc88SLe Ma static void sdma_v4_4_2_ring_set_wptr(struct amdgpu_ring *ring)
2017138fc88SLe Ma {
2027138fc88SLe Ma 	struct amdgpu_device *adev = ring->adev;
2037138fc88SLe Ma 
2047138fc88SLe Ma 	DRM_DEBUG("Setting write pointer\n");
2057138fc88SLe Ma 	if (ring->use_doorbell) {
2067138fc88SLe Ma 		u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
2077138fc88SLe Ma 
2087138fc88SLe Ma 		DRM_DEBUG("Using doorbell -- "
2097138fc88SLe Ma 				"wptr_offs == 0x%08x "
2107138fc88SLe Ma 				"lower_32_bits(ring->wptr) << 2 == 0x%08x "
2117138fc88SLe Ma 				"upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
2127138fc88SLe Ma 				ring->wptr_offs,
2137138fc88SLe Ma 				lower_32_bits(ring->wptr << 2),
2147138fc88SLe Ma 				upper_32_bits(ring->wptr << 2));
2157138fc88SLe Ma 		/* XXX check if swapping is necessary on BE */
2167138fc88SLe Ma 		WRITE_ONCE(*wb, (ring->wptr << 2));
2177138fc88SLe Ma 		DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
2187138fc88SLe Ma 				ring->doorbell_index, ring->wptr << 2);
2197138fc88SLe Ma 		WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
2207138fc88SLe Ma 	} else {
2217138fc88SLe Ma 		DRM_DEBUG("Not using doorbell -- "
2227138fc88SLe Ma 				"regSDMA%i_GFX_RB_WPTR == 0x%08x "
2237138fc88SLe Ma 				"regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
2247138fc88SLe Ma 				ring->me,
2257138fc88SLe Ma 				lower_32_bits(ring->wptr << 2),
2267138fc88SLe Ma 				ring->me,
2277138fc88SLe Ma 				upper_32_bits(ring->wptr << 2));
2287138fc88SLe Ma 		WREG32_SDMA(ring->me, regSDMA_GFX_RB_WPTR,
2297138fc88SLe Ma 			    lower_32_bits(ring->wptr << 2));
2307138fc88SLe Ma 		WREG32_SDMA(ring->me, regSDMA_GFX_RB_WPTR_HI,
2317138fc88SLe Ma 			    upper_32_bits(ring->wptr << 2));
2327138fc88SLe Ma 	}
2337138fc88SLe Ma }
2347138fc88SLe Ma 
2357138fc88SLe Ma /**
2367138fc88SLe Ma  * sdma_v4_4_2_page_ring_get_wptr - get the current write pointer
2377138fc88SLe Ma  *
2387138fc88SLe Ma  * @ring: amdgpu ring pointer
2397138fc88SLe Ma  *
2407138fc88SLe Ma  * Get the current wptr from the hardware.
2417138fc88SLe Ma  */
sdma_v4_4_2_page_ring_get_wptr(struct amdgpu_ring * ring)2427138fc88SLe Ma static uint64_t sdma_v4_4_2_page_ring_get_wptr(struct amdgpu_ring *ring)
2437138fc88SLe Ma {
2447138fc88SLe Ma 	struct amdgpu_device *adev = ring->adev;
2457138fc88SLe Ma 	u64 wptr;
2467138fc88SLe Ma 
2477138fc88SLe Ma 	if (ring->use_doorbell) {
2487138fc88SLe Ma 		/* XXX check if swapping is necessary on BE */
2497138fc88SLe Ma 		wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
2507138fc88SLe Ma 	} else {
2517138fc88SLe Ma 		wptr = RREG32_SDMA(ring->me, regSDMA_PAGE_RB_WPTR_HI);
2527138fc88SLe Ma 		wptr = wptr << 32;
2537138fc88SLe Ma 		wptr |= RREG32_SDMA(ring->me, regSDMA_PAGE_RB_WPTR);
2547138fc88SLe Ma 	}
2557138fc88SLe Ma 
2567138fc88SLe Ma 	return wptr >> 2;
2577138fc88SLe Ma }
2587138fc88SLe Ma 
2597138fc88SLe Ma /**
2607138fc88SLe Ma  * sdma_v4_4_2_page_ring_set_wptr - commit the write pointer
2617138fc88SLe Ma  *
2627138fc88SLe Ma  * @ring: amdgpu ring pointer
2637138fc88SLe Ma  *
2647138fc88SLe Ma  * Write the wptr back to the hardware.
2657138fc88SLe Ma  */
sdma_v4_4_2_page_ring_set_wptr(struct amdgpu_ring * ring)2667138fc88SLe Ma static void sdma_v4_4_2_page_ring_set_wptr(struct amdgpu_ring *ring)
2677138fc88SLe Ma {
2687138fc88SLe Ma 	struct amdgpu_device *adev = ring->adev;
2697138fc88SLe Ma 
2707138fc88SLe Ma 	if (ring->use_doorbell) {
2717138fc88SLe Ma 		u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
2727138fc88SLe Ma 
2737138fc88SLe Ma 		/* XXX check if swapping is necessary on BE */
2747138fc88SLe Ma 		WRITE_ONCE(*wb, (ring->wptr << 2));
2757138fc88SLe Ma 		WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
2767138fc88SLe Ma 	} else {
2777138fc88SLe Ma 		uint64_t wptr = ring->wptr << 2;
2787138fc88SLe Ma 
2797138fc88SLe Ma 		WREG32_SDMA(ring->me, regSDMA_PAGE_RB_WPTR,
2807138fc88SLe Ma 			    lower_32_bits(wptr));
2817138fc88SLe Ma 		WREG32_SDMA(ring->me, regSDMA_PAGE_RB_WPTR_HI,
2827138fc88SLe Ma 			    upper_32_bits(wptr));
2837138fc88SLe Ma 	}
2847138fc88SLe Ma }
2857138fc88SLe Ma 
sdma_v4_4_2_ring_insert_nop(struct amdgpu_ring * ring,uint32_t count)2867138fc88SLe Ma static void sdma_v4_4_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
2877138fc88SLe Ma {
2887138fc88SLe Ma 	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
2897138fc88SLe Ma 	int i;
2907138fc88SLe Ma 
2917138fc88SLe Ma 	for (i = 0; i < count; i++)
2927138fc88SLe Ma 		if (sdma && sdma->burst_nop && (i == 0))
2937138fc88SLe Ma 			amdgpu_ring_write(ring, ring->funcs->nop |
2947138fc88SLe Ma 				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
2957138fc88SLe Ma 		else
2967138fc88SLe Ma 			amdgpu_ring_write(ring, ring->funcs->nop);
2977138fc88SLe Ma }
2987138fc88SLe Ma 
2997138fc88SLe Ma /**
3007138fc88SLe Ma  * sdma_v4_4_2_ring_emit_ib - Schedule an IB on the DMA engine
3017138fc88SLe Ma  *
3027138fc88SLe Ma  * @ring: amdgpu ring pointer
3037138fc88SLe Ma  * @job: job to retrieve vmid from
3047138fc88SLe Ma  * @ib: IB object to schedule
3057138fc88SLe Ma  * @flags: unused
3067138fc88SLe Ma  *
3077138fc88SLe Ma  * Schedule an IB in the DMA ring.
3087138fc88SLe Ma  */
sdma_v4_4_2_ring_emit_ib(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)3097138fc88SLe Ma static void sdma_v4_4_2_ring_emit_ib(struct amdgpu_ring *ring,
3107138fc88SLe Ma 				   struct amdgpu_job *job,
3117138fc88SLe Ma 				   struct amdgpu_ib *ib,
3127138fc88SLe Ma 				   uint32_t flags)
3137138fc88SLe Ma {
3147138fc88SLe Ma 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
3157138fc88SLe Ma 
3167138fc88SLe Ma 	/* IB packet must end on a 8 DW boundary */
3177138fc88SLe Ma 	sdma_v4_4_2_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
3187138fc88SLe Ma 
3197138fc88SLe Ma 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
3207138fc88SLe Ma 			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
3217138fc88SLe Ma 	/* base must be 32 byte aligned */
3227138fc88SLe Ma 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
3237138fc88SLe Ma 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
3247138fc88SLe Ma 	amdgpu_ring_write(ring, ib->length_dw);
3257138fc88SLe Ma 	amdgpu_ring_write(ring, 0);
3267138fc88SLe Ma 	amdgpu_ring_write(ring, 0);
3277138fc88SLe Ma 
3287138fc88SLe Ma }
3297138fc88SLe Ma 
sdma_v4_4_2_wait_reg_mem(struct amdgpu_ring * ring,int mem_space,int hdp,uint32_t addr0,uint32_t addr1,uint32_t ref,uint32_t mask,uint32_t inv)3307138fc88SLe Ma static void sdma_v4_4_2_wait_reg_mem(struct amdgpu_ring *ring,
3317138fc88SLe Ma 				   int mem_space, int hdp,
3327138fc88SLe Ma 				   uint32_t addr0, uint32_t addr1,
3337138fc88SLe Ma 				   uint32_t ref, uint32_t mask,
3347138fc88SLe Ma 				   uint32_t inv)
3357138fc88SLe Ma {
3367138fc88SLe Ma 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
3377138fc88SLe Ma 			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(hdp) |
3387138fc88SLe Ma 			  SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(mem_space) |
3397138fc88SLe Ma 			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
3407138fc88SLe Ma 	if (mem_space) {
3417138fc88SLe Ma 		/* memory */
3427138fc88SLe Ma 		amdgpu_ring_write(ring, addr0);
3437138fc88SLe Ma 		amdgpu_ring_write(ring, addr1);
3447138fc88SLe Ma 	} else {
3457138fc88SLe Ma 		/* registers */
3467138fc88SLe Ma 		amdgpu_ring_write(ring, addr0 << 2);
3477138fc88SLe Ma 		amdgpu_ring_write(ring, addr1 << 2);
3487138fc88SLe Ma 	}
3497138fc88SLe Ma 	amdgpu_ring_write(ring, ref); /* reference */
3507138fc88SLe Ma 	amdgpu_ring_write(ring, mask); /* mask */
3517138fc88SLe Ma 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
3527138fc88SLe Ma 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(inv)); /* retry count, poll interval */
3537138fc88SLe Ma }
3547138fc88SLe Ma 
3557138fc88SLe Ma /**
3567138fc88SLe Ma  * sdma_v4_4_2_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
3577138fc88SLe Ma  *
3587138fc88SLe Ma  * @ring: amdgpu ring pointer
3597138fc88SLe Ma  *
3607138fc88SLe Ma  * Emit an hdp flush packet on the requested DMA ring.
3617138fc88SLe Ma  */
sdma_v4_4_2_ring_emit_hdp_flush(struct amdgpu_ring * ring)3627138fc88SLe Ma static void sdma_v4_4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
3637138fc88SLe Ma {
3647138fc88SLe Ma 	struct amdgpu_device *adev = ring->adev;
3657138fc88SLe Ma 	u32 ref_and_mask = 0;
3667138fc88SLe Ma 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
3677138fc88SLe Ma 
36865356a1cSLijo Lazar 	ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0
36965356a1cSLijo Lazar 		       << (ring->me % adev->sdma.num_inst_per_aid);
3707138fc88SLe Ma 
3717138fc88SLe Ma 	sdma_v4_4_2_wait_reg_mem(ring, 0, 1,
3727138fc88SLe Ma 			       adev->nbio.funcs->get_hdp_flush_done_offset(adev),
3737138fc88SLe Ma 			       adev->nbio.funcs->get_hdp_flush_req_offset(adev),
3747138fc88SLe Ma 			       ref_and_mask, ref_and_mask, 10);
3757138fc88SLe Ma }
3767138fc88SLe Ma 
3777138fc88SLe Ma /**
3787138fc88SLe Ma  * sdma_v4_4_2_ring_emit_fence - emit a fence on the DMA ring
3797138fc88SLe Ma  *
3807138fc88SLe Ma  * @ring: amdgpu ring pointer
3817138fc88SLe Ma  * @addr: address
3827138fc88SLe Ma  * @seq: sequence number
3837138fc88SLe Ma  * @flags: fence related flags
3847138fc88SLe Ma  *
3857138fc88SLe Ma  * Add a DMA fence packet to the ring to write
3867138fc88SLe Ma  * the fence seq number and DMA trap packet to generate
3877138fc88SLe Ma  * an interrupt if needed.
3887138fc88SLe Ma  */
sdma_v4_4_2_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)3897138fc88SLe Ma static void sdma_v4_4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
3907138fc88SLe Ma 				      unsigned flags)
3917138fc88SLe Ma {
3927138fc88SLe Ma 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
3937138fc88SLe Ma 	/* write the fence */
3947138fc88SLe Ma 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
3957138fc88SLe Ma 	/* zero in first two bits */
3967138fc88SLe Ma 	BUG_ON(addr & 0x3);
3977138fc88SLe Ma 	amdgpu_ring_write(ring, lower_32_bits(addr));
3987138fc88SLe Ma 	amdgpu_ring_write(ring, upper_32_bits(addr));
3997138fc88SLe Ma 	amdgpu_ring_write(ring, lower_32_bits(seq));
4007138fc88SLe Ma 
4017138fc88SLe Ma 	/* optionally write high bits as well */
4027138fc88SLe Ma 	if (write64bit) {
4037138fc88SLe Ma 		addr += 4;
4047138fc88SLe Ma 		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
4057138fc88SLe Ma 		/* zero in first two bits */
4067138fc88SLe Ma 		BUG_ON(addr & 0x3);
4077138fc88SLe Ma 		amdgpu_ring_write(ring, lower_32_bits(addr));
4087138fc88SLe Ma 		amdgpu_ring_write(ring, upper_32_bits(addr));
4097138fc88SLe Ma 		amdgpu_ring_write(ring, upper_32_bits(seq));
4107138fc88SLe Ma 	}
4117138fc88SLe Ma 
4127138fc88SLe Ma 	/* generate an interrupt */
4137138fc88SLe Ma 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
4147138fc88SLe Ma 	amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
4157138fc88SLe Ma }
4167138fc88SLe Ma 
4177138fc88SLe Ma 
4187138fc88SLe Ma /**
419aaff9c08SJiapeng Chong  * sdma_v4_4_2_inst_gfx_stop - stop the gfx async dma engines
4207138fc88SLe Ma  *
4217138fc88SLe Ma  * @adev: amdgpu_device pointer
42223616d1fSSrinivasan Shanmugam  * @inst_mask: mask of dma engine instances to be disabled
4237138fc88SLe Ma  *
4247138fc88SLe Ma  * Stop the gfx async dma ring buffers.
4257138fc88SLe Ma  */
sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device * adev,uint32_t inst_mask)426527c670eSLijo Lazar static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev,
427527c670eSLijo Lazar 				      uint32_t inst_mask)
4287138fc88SLe Ma {
4297138fc88SLe Ma 	struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
4307138fc88SLe Ma 	u32 rb_cntl, ib_cntl;
4317138fc88SLe Ma 	int i, unset = 0;
4327138fc88SLe Ma 
433527c670eSLijo Lazar 	for_each_inst(i, inst_mask) {
4347138fc88SLe Ma 		sdma[i] = &adev->sdma.instance[i].ring;
4357138fc88SLe Ma 
4367138fc88SLe Ma 		if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {
4377138fc88SLe Ma 			amdgpu_ttm_set_buffer_funcs_status(adev, false);
4387138fc88SLe Ma 			unset = 1;
4397138fc88SLe Ma 		}
4407138fc88SLe Ma 
4417138fc88SLe Ma 		rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL);
4427138fc88SLe Ma 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_ENABLE, 0);
4437138fc88SLe Ma 		WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
4447138fc88SLe Ma 		ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL);
4457138fc88SLe Ma 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0);
4467138fc88SLe Ma 		WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl);
4477138fc88SLe Ma 	}
4487138fc88SLe Ma }
4497138fc88SLe Ma 
4507138fc88SLe Ma /**
451aaff9c08SJiapeng Chong  * sdma_v4_4_2_inst_rlc_stop - stop the compute async dma engines
4527138fc88SLe Ma  *
4537138fc88SLe Ma  * @adev: amdgpu_device pointer
45423616d1fSSrinivasan Shanmugam  * @inst_mask: mask of dma engine instances to be disabled
4557138fc88SLe Ma  *
4567138fc88SLe Ma  * Stop the compute async dma queues.
4577138fc88SLe Ma  */
sdma_v4_4_2_inst_rlc_stop(struct amdgpu_device * adev,uint32_t inst_mask)458527c670eSLijo Lazar static void sdma_v4_4_2_inst_rlc_stop(struct amdgpu_device *adev,
459527c670eSLijo Lazar 				      uint32_t inst_mask)
4607138fc88SLe Ma {
4617138fc88SLe Ma 	/* XXX todo */
4627138fc88SLe Ma }
4637138fc88SLe Ma 
4647138fc88SLe Ma /**
465aaff9c08SJiapeng Chong  * sdma_v4_4_2_inst_page_stop - stop the page async dma engines
4667138fc88SLe Ma  *
4677138fc88SLe Ma  * @adev: amdgpu_device pointer
46823616d1fSSrinivasan Shanmugam  * @inst_mask: mask of dma engine instances to be disabled
4697138fc88SLe Ma  *
4707138fc88SLe Ma  * Stop the page async dma ring buffers.
4717138fc88SLe Ma  */
sdma_v4_4_2_inst_page_stop(struct amdgpu_device * adev,uint32_t inst_mask)472527c670eSLijo Lazar static void sdma_v4_4_2_inst_page_stop(struct amdgpu_device *adev,
473527c670eSLijo Lazar 				       uint32_t inst_mask)
4747138fc88SLe Ma {
4757138fc88SLe Ma 	struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
4767138fc88SLe Ma 	u32 rb_cntl, ib_cntl;
4777138fc88SLe Ma 	int i;
4787138fc88SLe Ma 	bool unset = false;
4797138fc88SLe Ma 
480527c670eSLijo Lazar 	for_each_inst(i, inst_mask) {
4817138fc88SLe Ma 		sdma[i] = &adev->sdma.instance[i].page;
4827138fc88SLe Ma 
4837138fc88SLe Ma 		if ((adev->mman.buffer_funcs_ring == sdma[i]) &&
4847138fc88SLe Ma 			(!unset)) {
4857138fc88SLe Ma 			amdgpu_ttm_set_buffer_funcs_status(adev, false);
4867138fc88SLe Ma 			unset = true;
4877138fc88SLe Ma 		}
4887138fc88SLe Ma 
4897138fc88SLe Ma 		rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL);
4907138fc88SLe Ma 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL,
4917138fc88SLe Ma 					RB_ENABLE, 0);
4927138fc88SLe Ma 		WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl);
4937138fc88SLe Ma 		ib_cntl = RREG32_SDMA(i, regSDMA_PAGE_IB_CNTL);
4947138fc88SLe Ma 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_PAGE_IB_CNTL,
4957138fc88SLe Ma 					IB_ENABLE, 0);
4967138fc88SLe Ma 		WREG32_SDMA(i, regSDMA_PAGE_IB_CNTL, ib_cntl);
4977138fc88SLe Ma 	}
4987138fc88SLe Ma }
4997138fc88SLe Ma 
5007138fc88SLe Ma /**
501aaff9c08SJiapeng Chong  * sdma_v4_4_2_inst_ctx_switch_enable - stop the async dma engines context switch
5027138fc88SLe Ma  *
5037138fc88SLe Ma  * @adev: amdgpu_device pointer
5047138fc88SLe Ma  * @enable: enable/disable the DMA MEs context switch.
50523616d1fSSrinivasan Shanmugam  * @inst_mask: mask of dma engine instances to be enabled
5067138fc88SLe Ma  *
5077138fc88SLe Ma  * Halt or unhalt the async dma engines context switch.
5087138fc88SLe Ma  */
sdma_v4_4_2_inst_ctx_switch_enable(struct amdgpu_device * adev,bool enable,uint32_t inst_mask)509527c670eSLijo Lazar static void sdma_v4_4_2_inst_ctx_switch_enable(struct amdgpu_device *adev,
510527c670eSLijo Lazar 					       bool enable, uint32_t inst_mask)
5117138fc88SLe Ma {
5127138fc88SLe Ma 	u32 f32_cntl, phase_quantum = 0;
5137138fc88SLe Ma 	int i;
5147138fc88SLe Ma 
5157138fc88SLe Ma 	if (amdgpu_sdma_phase_quantum) {
5167138fc88SLe Ma 		unsigned value = amdgpu_sdma_phase_quantum;
5177138fc88SLe Ma 		unsigned unit = 0;
5187138fc88SLe Ma 
5197138fc88SLe Ma 		while (value > (SDMA_PHASE0_QUANTUM__VALUE_MASK >>
5207138fc88SLe Ma 				SDMA_PHASE0_QUANTUM__VALUE__SHIFT)) {
5217138fc88SLe Ma 			value = (value + 1) >> 1;
5227138fc88SLe Ma 			unit++;
5237138fc88SLe Ma 		}
5247138fc88SLe Ma 		if (unit > (SDMA_PHASE0_QUANTUM__UNIT_MASK >>
5257138fc88SLe Ma 			    SDMA_PHASE0_QUANTUM__UNIT__SHIFT)) {
5267138fc88SLe Ma 			value = (SDMA_PHASE0_QUANTUM__VALUE_MASK >>
5277138fc88SLe Ma 				 SDMA_PHASE0_QUANTUM__VALUE__SHIFT);
5287138fc88SLe Ma 			unit = (SDMA_PHASE0_QUANTUM__UNIT_MASK >>
5297138fc88SLe Ma 				SDMA_PHASE0_QUANTUM__UNIT__SHIFT);
5307138fc88SLe Ma 			WARN_ONCE(1,
5317138fc88SLe Ma 			"clamping sdma_phase_quantum to %uK clock cycles\n",
5327138fc88SLe Ma 				  value << unit);
5337138fc88SLe Ma 		}
5347138fc88SLe Ma 		phase_quantum =
5357138fc88SLe Ma 			value << SDMA_PHASE0_QUANTUM__VALUE__SHIFT |
5367138fc88SLe Ma 			unit  << SDMA_PHASE0_QUANTUM__UNIT__SHIFT;
5377138fc88SLe Ma 	}
5387138fc88SLe Ma 
539527c670eSLijo Lazar 	for_each_inst(i, inst_mask) {
5407138fc88SLe Ma 		f32_cntl = RREG32_SDMA(i, regSDMA_CNTL);
5417138fc88SLe Ma 		f32_cntl = REG_SET_FIELD(f32_cntl, SDMA_CNTL,
5427138fc88SLe Ma 				AUTO_CTXSW_ENABLE, enable ? 1 : 0);
5437138fc88SLe Ma 		if (enable && amdgpu_sdma_phase_quantum) {
5447138fc88SLe Ma 			WREG32_SDMA(i, regSDMA_PHASE0_QUANTUM, phase_quantum);
5457138fc88SLe Ma 			WREG32_SDMA(i, regSDMA_PHASE1_QUANTUM, phase_quantum);
5467138fc88SLe Ma 			WREG32_SDMA(i, regSDMA_PHASE2_QUANTUM, phase_quantum);
5477138fc88SLe Ma 		}
5487138fc88SLe Ma 		WREG32_SDMA(i, regSDMA_CNTL, f32_cntl);
5497138fc88SLe Ma 
5507138fc88SLe Ma 		/* Extend page fault timeout to avoid interrupt storm */
5517138fc88SLe Ma 		WREG32_SDMA(i, regSDMA_UTCL1_TIMEOUT, 0x00800080);
5527138fc88SLe Ma 	}
5537138fc88SLe Ma }
5547138fc88SLe Ma 
5557138fc88SLe Ma /**
556aaff9c08SJiapeng Chong  * sdma_v4_4_2_inst_enable - stop the async dma engines
5577138fc88SLe Ma  *
5587138fc88SLe Ma  * @adev: amdgpu_device pointer
5597138fc88SLe Ma  * @enable: enable/disable the DMA MEs.
560527c670eSLijo Lazar  * @inst_mask: mask of dma engine instances to be enabled
5617138fc88SLe Ma  *
5627138fc88SLe Ma  * Halt or unhalt the async dma engines.
5637138fc88SLe Ma  */
sdma_v4_4_2_inst_enable(struct amdgpu_device * adev,bool enable,uint32_t inst_mask)564527c670eSLijo Lazar static void sdma_v4_4_2_inst_enable(struct amdgpu_device *adev, bool enable,
565527c670eSLijo Lazar 				    uint32_t inst_mask)
5667138fc88SLe Ma {
5677138fc88SLe Ma 	u32 f32_cntl;
5687138fc88SLe Ma 	int i;
5697138fc88SLe Ma 
5707138fc88SLe Ma 	if (!enable) {
571527c670eSLijo Lazar 		sdma_v4_4_2_inst_gfx_stop(adev, inst_mask);
572527c670eSLijo Lazar 		sdma_v4_4_2_inst_rlc_stop(adev, inst_mask);
5737138fc88SLe Ma 		if (adev->sdma.has_page_queue)
574527c670eSLijo Lazar 			sdma_v4_4_2_inst_page_stop(adev, inst_mask);
5757389c751SLijo Lazar 
5767389c751SLijo Lazar 		/* SDMA FW needs to respond to FREEZE requests during reset.
5777389c751SLijo Lazar 		 * Keep it running during reset */
5787389c751SLijo Lazar 		if (!amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
5797389c751SLijo Lazar 			return;
5807138fc88SLe Ma 	}
5817138fc88SLe Ma 
5827f03b1d1SMangesh Gadre 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
5837f03b1d1SMangesh Gadre 		return;
5847f03b1d1SMangesh Gadre 
585527c670eSLijo Lazar 	for_each_inst(i, inst_mask) {
5867138fc88SLe Ma 		f32_cntl = RREG32_SDMA(i, regSDMA_F32_CNTL);
5877138fc88SLe Ma 		f32_cntl = REG_SET_FIELD(f32_cntl, SDMA_F32_CNTL, HALT, enable ? 0 : 1);
5887138fc88SLe Ma 		WREG32_SDMA(i, regSDMA_F32_CNTL, f32_cntl);
5897138fc88SLe Ma 	}
5907138fc88SLe Ma }
5917138fc88SLe Ma 
5927138fc88SLe Ma /*
5937138fc88SLe Ma  * sdma_v4_4_2_rb_cntl - get parameters for rb_cntl
5947138fc88SLe Ma  */
sdma_v4_4_2_rb_cntl(struct amdgpu_ring * ring,uint32_t rb_cntl)5957138fc88SLe Ma static uint32_t sdma_v4_4_2_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl)
5967138fc88SLe Ma {
5977138fc88SLe Ma 	/* Set ring buffer size in dwords */
5987138fc88SLe Ma 	uint32_t rb_bufsz = order_base_2(ring->ring_size / 4);
5997138fc88SLe Ma 
6007138fc88SLe Ma 	barrier(); /* work around https://bugs.llvm.org/show_bug.cgi?id=42576 */
6017138fc88SLe Ma 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
6027138fc88SLe Ma #ifdef __BIG_ENDIAN
6037138fc88SLe Ma 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
6047138fc88SLe Ma 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL,
6057138fc88SLe Ma 				RPTR_WRITEBACK_SWAP_ENABLE, 1);
6067138fc88SLe Ma #endif
6077138fc88SLe Ma 	return rb_cntl;
6087138fc88SLe Ma }
6097138fc88SLe Ma 
6107138fc88SLe Ma /**
6117138fc88SLe Ma  * sdma_v4_4_2_gfx_resume - setup and start the async dma engines
6127138fc88SLe Ma  *
6137138fc88SLe Ma  * @adev: amdgpu_device pointer
6147138fc88SLe Ma  * @i: instance to resume
6157138fc88SLe Ma  *
6167138fc88SLe Ma  * Set up the gfx DMA ring buffers and enable them.
6177138fc88SLe Ma  * Returns 0 for success, error for failure.
6187138fc88SLe Ma  */
sdma_v4_4_2_gfx_resume(struct amdgpu_device * adev,unsigned int i)6197138fc88SLe Ma static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i)
6207138fc88SLe Ma {
6217138fc88SLe Ma 	struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
6227138fc88SLe Ma 	u32 rb_cntl, ib_cntl, wptr_poll_cntl;
6237138fc88SLe Ma 	u32 wb_offset;
6247138fc88SLe Ma 	u32 doorbell;
6257138fc88SLe Ma 	u32 doorbell_offset;
6267138fc88SLe Ma 	u64 wptr_gpu_addr;
6277138fc88SLe Ma 
6287138fc88SLe Ma 	wb_offset = (ring->rptr_offs * 4);
6297138fc88SLe Ma 
6307138fc88SLe Ma 	rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL);
6317138fc88SLe Ma 	rb_cntl = sdma_v4_4_2_rb_cntl(ring, rb_cntl);
6327138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
6337138fc88SLe Ma 
6347138fc88SLe Ma 	/* Initialize the ring buffer's read and write pointers */
6357138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0);
6367138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0);
6377138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0);
6387138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0);
6397138fc88SLe Ma 
6407138fc88SLe Ma 	/* set the wb address whether it's enabled or not */
6417138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_ADDR_HI,
6427138fc88SLe Ma 	       upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
6437138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_ADDR_LO,
6447138fc88SLe Ma 	       lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
6457138fc88SLe Ma 
6467138fc88SLe Ma 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL,
6477138fc88SLe Ma 				RPTR_WRITEBACK_ENABLE, 1);
6487138fc88SLe Ma 
6497138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_GFX_RB_BASE, ring->gpu_addr >> 8);
6507138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_GFX_RB_BASE_HI, ring->gpu_addr >> 40);
6517138fc88SLe Ma 
6527138fc88SLe Ma 	ring->wptr = 0;
6537138fc88SLe Ma 
6547138fc88SLe Ma 	/* before programing wptr to a less value, need set minor_ptr_update first */
6557138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 1);
6567138fc88SLe Ma 
6577138fc88SLe Ma 	doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL);
6587138fc88SLe Ma 	doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET);
6597138fc88SLe Ma 
6607138fc88SLe Ma 	doorbell = REG_SET_FIELD(doorbell, SDMA_GFX_DOORBELL, ENABLE,
6617138fc88SLe Ma 				 ring->use_doorbell);
6627138fc88SLe Ma 	doorbell_offset = REG_SET_FIELD(doorbell_offset,
6637138fc88SLe Ma 					SDMA_GFX_DOORBELL_OFFSET,
6647138fc88SLe Ma 					OFFSET, ring->doorbell_index);
6657138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_GFX_DOORBELL, doorbell);
6667138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET, doorbell_offset);
6677138fc88SLe Ma 
6687138fc88SLe Ma 	sdma_v4_4_2_ring_set_wptr(ring);
6697138fc88SLe Ma 
6707138fc88SLe Ma 	/* set minor_ptr_update to 0 after wptr programed */
6717138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 0);
6727138fc88SLe Ma 
6737138fc88SLe Ma 	/* setup the wptr shadow polling */
6747138fc88SLe Ma 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
6757138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_POLL_ADDR_LO,
6767138fc88SLe Ma 		    lower_32_bits(wptr_gpu_addr));
6777138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_POLL_ADDR_HI,
6787138fc88SLe Ma 		    upper_32_bits(wptr_gpu_addr));
6797138fc88SLe Ma 	wptr_poll_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_WPTR_POLL_CNTL);
6807138fc88SLe Ma 	wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
6817138fc88SLe Ma 				       SDMA_GFX_RB_WPTR_POLL_CNTL,
6827138fc88SLe Ma 				       F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
6837138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
6847138fc88SLe Ma 
6857138fc88SLe Ma 	/* enable DMA RB */
6867138fc88SLe Ma 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_ENABLE, 1);
6877138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
6887138fc88SLe Ma 
6897138fc88SLe Ma 	ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL);
6907138fc88SLe Ma 	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 1);
6917138fc88SLe Ma #ifdef __BIG_ENDIAN
6927138fc88SLe Ma 	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
6937138fc88SLe Ma #endif
6947138fc88SLe Ma 	/* enable DMA IBs */
6957138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl);
6967138fc88SLe Ma }
6977138fc88SLe Ma 
6987138fc88SLe Ma /**
6997138fc88SLe Ma  * sdma_v4_4_2_page_resume - setup and start the async dma engines
7007138fc88SLe Ma  *
7017138fc88SLe Ma  * @adev: amdgpu_device pointer
7027138fc88SLe Ma  * @i: instance to resume
7037138fc88SLe Ma  *
7047138fc88SLe Ma  * Set up the page DMA ring buffers and enable them.
7057138fc88SLe Ma  * Returns 0 for success, error for failure.
7067138fc88SLe Ma  */
sdma_v4_4_2_page_resume(struct amdgpu_device * adev,unsigned int i)7077138fc88SLe Ma static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i)
7087138fc88SLe Ma {
7097138fc88SLe Ma 	struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
7107138fc88SLe Ma 	u32 rb_cntl, ib_cntl, wptr_poll_cntl;
7117138fc88SLe Ma 	u32 wb_offset;
7127138fc88SLe Ma 	u32 doorbell;
7137138fc88SLe Ma 	u32 doorbell_offset;
7147138fc88SLe Ma 	u64 wptr_gpu_addr;
7157138fc88SLe Ma 
7167138fc88SLe Ma 	wb_offset = (ring->rptr_offs * 4);
7177138fc88SLe Ma 
7187138fc88SLe Ma 	rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL);
7197138fc88SLe Ma 	rb_cntl = sdma_v4_4_2_rb_cntl(ring, rb_cntl);
7207138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl);
7217138fc88SLe Ma 
7227138fc88SLe Ma 	/* Initialize the ring buffer's read and write pointers */
7237138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0);
7247138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0);
7257138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0);
7267138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0);
7277138fc88SLe Ma 
7287138fc88SLe Ma 	/* set the wb address whether it's enabled or not */
7297138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_ADDR_HI,
7307138fc88SLe Ma 	       upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
7317138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_ADDR_LO,
7327138fc88SLe Ma 	       lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
7337138fc88SLe Ma 
7347138fc88SLe Ma 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL,
7357138fc88SLe Ma 				RPTR_WRITEBACK_ENABLE, 1);
7367138fc88SLe Ma 
7377138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_PAGE_RB_BASE, ring->gpu_addr >> 8);
7387138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_PAGE_RB_BASE_HI, ring->gpu_addr >> 40);
7397138fc88SLe Ma 
7407138fc88SLe Ma 	ring->wptr = 0;
7417138fc88SLe Ma 
7427138fc88SLe Ma 	/* before programing wptr to a less value, need set minor_ptr_update first */
7437138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_PAGE_MINOR_PTR_UPDATE, 1);
7447138fc88SLe Ma 
7457138fc88SLe Ma 	doorbell = RREG32_SDMA(i, regSDMA_PAGE_DOORBELL);
7467138fc88SLe Ma 	doorbell_offset = RREG32_SDMA(i, regSDMA_PAGE_DOORBELL_OFFSET);
7477138fc88SLe Ma 
7487138fc88SLe Ma 	doorbell = REG_SET_FIELD(doorbell, SDMA_PAGE_DOORBELL, ENABLE,
7497138fc88SLe Ma 				 ring->use_doorbell);
7507138fc88SLe Ma 	doorbell_offset = REG_SET_FIELD(doorbell_offset,
7517138fc88SLe Ma 					SDMA_PAGE_DOORBELL_OFFSET,
7527138fc88SLe Ma 					OFFSET, ring->doorbell_index);
7537138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_PAGE_DOORBELL, doorbell);
7547138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_PAGE_DOORBELL_OFFSET, doorbell_offset);
7557138fc88SLe Ma 
7567138fc88SLe Ma 	/* paging queue doorbell range is setup at sdma_v4_4_2_gfx_resume */
7577138fc88SLe Ma 	sdma_v4_4_2_page_ring_set_wptr(ring);
7587138fc88SLe Ma 
7597138fc88SLe Ma 	/* set minor_ptr_update to 0 after wptr programed */
7607138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_PAGE_MINOR_PTR_UPDATE, 0);
7617138fc88SLe Ma 
7627138fc88SLe Ma 	/* setup the wptr shadow polling */
7637138fc88SLe Ma 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
7647138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_POLL_ADDR_LO,
7657138fc88SLe Ma 		    lower_32_bits(wptr_gpu_addr));
7667138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_POLL_ADDR_HI,
7677138fc88SLe Ma 		    upper_32_bits(wptr_gpu_addr));
7687138fc88SLe Ma 	wptr_poll_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_POLL_CNTL);
7697138fc88SLe Ma 	wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
7707138fc88SLe Ma 				       SDMA_PAGE_RB_WPTR_POLL_CNTL,
7717138fc88SLe Ma 				       F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
7727138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
7737138fc88SLe Ma 
7747138fc88SLe Ma 	/* enable DMA RB */
7757138fc88SLe Ma 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL, RB_ENABLE, 1);
7767138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl);
7777138fc88SLe Ma 
7787138fc88SLe Ma 	ib_cntl = RREG32_SDMA(i, regSDMA_PAGE_IB_CNTL);
7797138fc88SLe Ma 	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_PAGE_IB_CNTL, IB_ENABLE, 1);
7807138fc88SLe Ma #ifdef __BIG_ENDIAN
7817138fc88SLe Ma 	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_PAGE_IB_CNTL, IB_SWAP_ENABLE, 1);
7827138fc88SLe Ma #endif
7837138fc88SLe Ma 	/* enable DMA IBs */
7847138fc88SLe Ma 	WREG32_SDMA(i, regSDMA_PAGE_IB_CNTL, ib_cntl);
7857138fc88SLe Ma }
7867138fc88SLe Ma 
sdma_v4_4_2_init_pg(struct amdgpu_device * adev)7877138fc88SLe Ma static void sdma_v4_4_2_init_pg(struct amdgpu_device *adev)
7887138fc88SLe Ma {
7897138fc88SLe Ma 
7907138fc88SLe Ma }
7917138fc88SLe Ma 
7927138fc88SLe Ma /**
793aaff9c08SJiapeng Chong  * sdma_v4_4_2_inst_rlc_resume - setup and start the async dma engines
7947138fc88SLe Ma  *
7957138fc88SLe Ma  * @adev: amdgpu_device pointer
79623616d1fSSrinivasan Shanmugam  * @inst_mask: mask of dma engine instances to be enabled
7977138fc88SLe Ma  *
7987138fc88SLe Ma  * Set up the compute DMA queues and enable them.
7997138fc88SLe Ma  * Returns 0 for success, error for failure.
8007138fc88SLe Ma  */
sdma_v4_4_2_inst_rlc_resume(struct amdgpu_device * adev,uint32_t inst_mask)801527c670eSLijo Lazar static int sdma_v4_4_2_inst_rlc_resume(struct amdgpu_device *adev,
802527c670eSLijo Lazar 				       uint32_t inst_mask)
8037138fc88SLe Ma {
8047138fc88SLe Ma 	sdma_v4_4_2_init_pg(adev);
8057138fc88SLe Ma 
8067138fc88SLe Ma 	return 0;
8077138fc88SLe Ma }
8087138fc88SLe Ma 
8097138fc88SLe Ma /**
810aaff9c08SJiapeng Chong  * sdma_v4_4_2_inst_load_microcode - load the sDMA ME ucode
8117138fc88SLe Ma  *
8127138fc88SLe Ma  * @adev: amdgpu_device pointer
81323616d1fSSrinivasan Shanmugam  * @inst_mask: mask of dma engine instances to be enabled
8147138fc88SLe Ma  *
8157138fc88SLe Ma  * Loads the sDMA0/1 ucode.
8167138fc88SLe Ma  * Returns 0 for success, -EINVAL if the ucode is not available.
8177138fc88SLe Ma  */
sdma_v4_4_2_inst_load_microcode(struct amdgpu_device * adev,uint32_t inst_mask)818527c670eSLijo Lazar static int sdma_v4_4_2_inst_load_microcode(struct amdgpu_device *adev,
819527c670eSLijo Lazar 					   uint32_t inst_mask)
8207138fc88SLe Ma {
8217138fc88SLe Ma 	const struct sdma_firmware_header_v1_0 *hdr;
8227138fc88SLe Ma 	const __le32 *fw_data;
8237138fc88SLe Ma 	u32 fw_size;
8247138fc88SLe Ma 	int i, j;
8257138fc88SLe Ma 
8267138fc88SLe Ma 	/* halt the MEs */
827527c670eSLijo Lazar 	sdma_v4_4_2_inst_enable(adev, false, inst_mask);
8287138fc88SLe Ma 
829527c670eSLijo Lazar 	for_each_inst(i, inst_mask) {
8307138fc88SLe Ma 		if (!adev->sdma.instance[i].fw)
8317138fc88SLe Ma 			return -EINVAL;
8327138fc88SLe Ma 
8337138fc88SLe Ma 		hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
8347138fc88SLe Ma 		amdgpu_ucode_print_sdma_hdr(&hdr->header);
8357138fc88SLe Ma 		fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
8367138fc88SLe Ma 
8377138fc88SLe Ma 		fw_data = (const __le32 *)
8387138fc88SLe Ma 			(adev->sdma.instance[i].fw->data +
8397138fc88SLe Ma 				le32_to_cpu(hdr->header.ucode_array_offset_bytes));
8407138fc88SLe Ma 
8417138fc88SLe Ma 		WREG32_SDMA(i, regSDMA_UCODE_ADDR, 0);
8427138fc88SLe Ma 
8437138fc88SLe Ma 		for (j = 0; j < fw_size; j++)
8447138fc88SLe Ma 			WREG32_SDMA(i, regSDMA_UCODE_DATA,
8457138fc88SLe Ma 				    le32_to_cpup(fw_data++));
8467138fc88SLe Ma 
8477138fc88SLe Ma 		WREG32_SDMA(i, regSDMA_UCODE_ADDR,
8487138fc88SLe Ma 			    adev->sdma.instance[i].fw_version);
8497138fc88SLe Ma 	}
8507138fc88SLe Ma 
8517138fc88SLe Ma 	return 0;
8527138fc88SLe Ma }
8537138fc88SLe Ma 
8547138fc88SLe Ma /**
855527c670eSLijo Lazar  * sdma_v4_4_2_inst_start - setup and start the async dma engines
8567138fc88SLe Ma  *
8577138fc88SLe Ma  * @adev: amdgpu_device pointer
85823616d1fSSrinivasan Shanmugam  * @inst_mask: mask of dma engine instances to be enabled
8597138fc88SLe Ma  *
8607138fc88SLe Ma  * Set up the DMA engines and enable them.
8617138fc88SLe Ma  * Returns 0 for success, error for failure.
8627138fc88SLe Ma  */
sdma_v4_4_2_inst_start(struct amdgpu_device * adev,uint32_t inst_mask)863527c670eSLijo Lazar static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev,
864527c670eSLijo Lazar 				  uint32_t inst_mask)
8657138fc88SLe Ma {
8667138fc88SLe Ma 	struct amdgpu_ring *ring;
867527c670eSLijo Lazar 	uint32_t tmp_mask;
8687138fc88SLe Ma 	int i, r = 0;
8697138fc88SLe Ma 
8707138fc88SLe Ma 	if (amdgpu_sriov_vf(adev)) {
871527c670eSLijo Lazar 		sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask);
872527c670eSLijo Lazar 		sdma_v4_4_2_inst_enable(adev, false, inst_mask);
8737138fc88SLe Ma 	} else {
8747138fc88SLe Ma 		/* bypass sdma microcode loading on Gopher */
8757138fc88SLe Ma 		if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP &&
876527c670eSLijo Lazar 		    adev->sdma.instance[0].fw) {
877527c670eSLijo Lazar 			r = sdma_v4_4_2_inst_load_microcode(adev, inst_mask);
8787138fc88SLe Ma 			if (r)
8797138fc88SLe Ma 				return r;
8807138fc88SLe Ma 		}
8817138fc88SLe Ma 
8827138fc88SLe Ma 		/* unhalt the MEs */
883527c670eSLijo Lazar 		sdma_v4_4_2_inst_enable(adev, true, inst_mask);
8847138fc88SLe Ma 		/* enable sdma ring preemption */
885527c670eSLijo Lazar 		sdma_v4_4_2_inst_ctx_switch_enable(adev, true, inst_mask);
8867138fc88SLe Ma 	}
8877138fc88SLe Ma 
8887138fc88SLe Ma 	/* start the gfx rings and rlc compute queues */
889527c670eSLijo Lazar 	tmp_mask = inst_mask;
890527c670eSLijo Lazar 	for_each_inst(i, tmp_mask) {
8917138fc88SLe Ma 		uint32_t temp;
8927138fc88SLe Ma 
8937138fc88SLe Ma 		WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
8947138fc88SLe Ma 		sdma_v4_4_2_gfx_resume(adev, i);
8957138fc88SLe Ma 		if (adev->sdma.has_page_queue)
8967138fc88SLe Ma 			sdma_v4_4_2_page_resume(adev, i);
8977138fc88SLe Ma 
8987138fc88SLe Ma 		/* set utc l1 enable flag always to 1 */
8997138fc88SLe Ma 		temp = RREG32_SDMA(i, regSDMA_CNTL);
9007138fc88SLe Ma 		temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1);
90135ff4301SLe Ma 		/* enable context empty interrupt during initialization */
90235ff4301SLe Ma 		temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1);
9037138fc88SLe Ma 		WREG32_SDMA(i, regSDMA_CNTL, temp);
9047138fc88SLe Ma 
9057138fc88SLe Ma 		if (!amdgpu_sriov_vf(adev)) {
9067f03b1d1SMangesh Gadre 			if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
9077138fc88SLe Ma 				/* unhalt engine */
9087138fc88SLe Ma 				temp = RREG32_SDMA(i, regSDMA_F32_CNTL);
9097138fc88SLe Ma 				temp = REG_SET_FIELD(temp, SDMA_F32_CNTL, HALT, 0);
9107138fc88SLe Ma 				WREG32_SDMA(i, regSDMA_F32_CNTL, temp);
9117138fc88SLe Ma 			}
9127138fc88SLe Ma 		}
9137f03b1d1SMangesh Gadre 	}
9147138fc88SLe Ma 
9157138fc88SLe Ma 	if (amdgpu_sriov_vf(adev)) {
916527c670eSLijo Lazar 		sdma_v4_4_2_inst_ctx_switch_enable(adev, true, inst_mask);
917527c670eSLijo Lazar 		sdma_v4_4_2_inst_enable(adev, true, inst_mask);
9187138fc88SLe Ma 	} else {
919527c670eSLijo Lazar 		r = sdma_v4_4_2_inst_rlc_resume(adev, inst_mask);
9207138fc88SLe Ma 		if (r)
9217138fc88SLe Ma 			return r;
9227138fc88SLe Ma 	}
9237138fc88SLe Ma 
924527c670eSLijo Lazar 	tmp_mask = inst_mask;
925527c670eSLijo Lazar 	for_each_inst(i, tmp_mask) {
9267138fc88SLe Ma 		ring = &adev->sdma.instance[i].ring;
9277138fc88SLe Ma 
9287138fc88SLe Ma 		r = amdgpu_ring_test_helper(ring);
9297138fc88SLe Ma 		if (r)
9307138fc88SLe Ma 			return r;
9317138fc88SLe Ma 
9327138fc88SLe Ma 		if (adev->sdma.has_page_queue) {
9337138fc88SLe Ma 			struct amdgpu_ring *page = &adev->sdma.instance[i].page;
9347138fc88SLe Ma 
9357138fc88SLe Ma 			r = amdgpu_ring_test_helper(page);
9367138fc88SLe Ma 			if (r)
9377138fc88SLe Ma 				return r;
9387138fc88SLe Ma 
9397138fc88SLe Ma 			if (adev->mman.buffer_funcs_ring == page)
9407138fc88SLe Ma 				amdgpu_ttm_set_buffer_funcs_status(adev, true);
9417138fc88SLe Ma 		}
9427138fc88SLe Ma 
9437138fc88SLe Ma 		if (adev->mman.buffer_funcs_ring == ring)
9447138fc88SLe Ma 			amdgpu_ttm_set_buffer_funcs_status(adev, true);
9457138fc88SLe Ma 	}
9467138fc88SLe Ma 
9477138fc88SLe Ma 	return r;
9487138fc88SLe Ma }
9497138fc88SLe Ma 
9507138fc88SLe Ma /**
9517138fc88SLe Ma  * sdma_v4_4_2_ring_test_ring - simple async dma engine test
9527138fc88SLe Ma  *
9537138fc88SLe Ma  * @ring: amdgpu_ring structure holding ring information
9547138fc88SLe Ma  *
9557138fc88SLe Ma  * Test the DMA engine by writing using it to write an
9567138fc88SLe Ma  * value to memory.
9577138fc88SLe Ma  * Returns 0 for success, error for failure.
9587138fc88SLe Ma  */
sdma_v4_4_2_ring_test_ring(struct amdgpu_ring * ring)9597138fc88SLe Ma static int sdma_v4_4_2_ring_test_ring(struct amdgpu_ring *ring)
9607138fc88SLe Ma {
9617138fc88SLe Ma 	struct amdgpu_device *adev = ring->adev;
9627138fc88SLe Ma 	unsigned i;
9637138fc88SLe Ma 	unsigned index;
9647138fc88SLe Ma 	int r;
9657138fc88SLe Ma 	u32 tmp;
9667138fc88SLe Ma 	u64 gpu_addr;
9677138fc88SLe Ma 
9687138fc88SLe Ma 	r = amdgpu_device_wb_get(adev, &index);
9697138fc88SLe Ma 	if (r)
9707138fc88SLe Ma 		return r;
9717138fc88SLe Ma 
9727138fc88SLe Ma 	gpu_addr = adev->wb.gpu_addr + (index * 4);
9737138fc88SLe Ma 	tmp = 0xCAFEDEAD;
9747138fc88SLe Ma 	adev->wb.wb[index] = cpu_to_le32(tmp);
9757138fc88SLe Ma 
9767138fc88SLe Ma 	r = amdgpu_ring_alloc(ring, 5);
9777138fc88SLe Ma 	if (r)
9787138fc88SLe Ma 		goto error_free_wb;
9797138fc88SLe Ma 
9807138fc88SLe Ma 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
9817138fc88SLe Ma 			  SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
9827138fc88SLe Ma 	amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
9837138fc88SLe Ma 	amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
9847138fc88SLe Ma 	amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
9857138fc88SLe Ma 	amdgpu_ring_write(ring, 0xDEADBEEF);
9867138fc88SLe Ma 	amdgpu_ring_commit(ring);
9877138fc88SLe Ma 
9887138fc88SLe Ma 	for (i = 0; i < adev->usec_timeout; i++) {
9897138fc88SLe Ma 		tmp = le32_to_cpu(adev->wb.wb[index]);
9907138fc88SLe Ma 		if (tmp == 0xDEADBEEF)
9917138fc88SLe Ma 			break;
9927138fc88SLe Ma 		udelay(1);
9937138fc88SLe Ma 	}
9947138fc88SLe Ma 
9957138fc88SLe Ma 	if (i >= adev->usec_timeout)
9967138fc88SLe Ma 		r = -ETIMEDOUT;
9977138fc88SLe Ma 
9987138fc88SLe Ma error_free_wb:
9997138fc88SLe Ma 	amdgpu_device_wb_free(adev, index);
10007138fc88SLe Ma 	return r;
10017138fc88SLe Ma }
10027138fc88SLe Ma 
10037138fc88SLe Ma /**
10047138fc88SLe Ma  * sdma_v4_4_2_ring_test_ib - test an IB on the DMA engine
10057138fc88SLe Ma  *
10067138fc88SLe Ma  * @ring: amdgpu_ring structure holding ring information
10077138fc88SLe Ma  * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
10087138fc88SLe Ma  *
10097138fc88SLe Ma  * Test a simple IB in the DMA ring.
10107138fc88SLe Ma  * Returns 0 on success, error on failure.
10117138fc88SLe Ma  */
sdma_v4_4_2_ring_test_ib(struct amdgpu_ring * ring,long timeout)10127138fc88SLe Ma static int sdma_v4_4_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
10137138fc88SLe Ma {
10147138fc88SLe Ma 	struct amdgpu_device *adev = ring->adev;
10157138fc88SLe Ma 	struct amdgpu_ib ib;
10167138fc88SLe Ma 	struct dma_fence *f = NULL;
10177138fc88SLe Ma 	unsigned index;
10187138fc88SLe Ma 	long r;
10197138fc88SLe Ma 	u32 tmp = 0;
10207138fc88SLe Ma 	u64 gpu_addr;
10217138fc88SLe Ma 
10227138fc88SLe Ma 	r = amdgpu_device_wb_get(adev, &index);
10237138fc88SLe Ma 	if (r)
10247138fc88SLe Ma 		return r;
10257138fc88SLe Ma 
10267138fc88SLe Ma 	gpu_addr = adev->wb.gpu_addr + (index * 4);
10277138fc88SLe Ma 	tmp = 0xCAFEDEAD;
10287138fc88SLe Ma 	adev->wb.wb[index] = cpu_to_le32(tmp);
10297138fc88SLe Ma 	memset(&ib, 0, sizeof(ib));
10307138fc88SLe Ma 	r = amdgpu_ib_get(adev, NULL, 256,
10317138fc88SLe Ma 					AMDGPU_IB_POOL_DIRECT, &ib);
10327138fc88SLe Ma 	if (r)
10337138fc88SLe Ma 		goto err0;
10347138fc88SLe Ma 
10357138fc88SLe Ma 	ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
10367138fc88SLe Ma 		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
10377138fc88SLe Ma 	ib.ptr[1] = lower_32_bits(gpu_addr);
10387138fc88SLe Ma 	ib.ptr[2] = upper_32_bits(gpu_addr);
10397138fc88SLe Ma 	ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
10407138fc88SLe Ma 	ib.ptr[4] = 0xDEADBEEF;
10417138fc88SLe Ma 	ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
10427138fc88SLe Ma 	ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
10437138fc88SLe Ma 	ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
10447138fc88SLe Ma 	ib.length_dw = 8;
10457138fc88SLe Ma 
10467138fc88SLe Ma 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
10477138fc88SLe Ma 	if (r)
10487138fc88SLe Ma 		goto err1;
10497138fc88SLe Ma 
10507138fc88SLe Ma 	r = dma_fence_wait_timeout(f, false, timeout);
10517138fc88SLe Ma 	if (r == 0) {
10527138fc88SLe Ma 		r = -ETIMEDOUT;
10537138fc88SLe Ma 		goto err1;
10547138fc88SLe Ma 	} else if (r < 0) {
10557138fc88SLe Ma 		goto err1;
10567138fc88SLe Ma 	}
10577138fc88SLe Ma 	tmp = le32_to_cpu(adev->wb.wb[index]);
10587138fc88SLe Ma 	if (tmp == 0xDEADBEEF)
10597138fc88SLe Ma 		r = 0;
10607138fc88SLe Ma 	else
10617138fc88SLe Ma 		r = -EINVAL;
10627138fc88SLe Ma 
10637138fc88SLe Ma err1:
10647138fc88SLe Ma 	amdgpu_ib_free(adev, &ib, NULL);
10657138fc88SLe Ma 	dma_fence_put(f);
10667138fc88SLe Ma err0:
10677138fc88SLe Ma 	amdgpu_device_wb_free(adev, index);
10687138fc88SLe Ma 	return r;
10697138fc88SLe Ma }
10707138fc88SLe Ma 
10717138fc88SLe Ma 
10727138fc88SLe Ma /**
10737138fc88SLe Ma  * sdma_v4_4_2_vm_copy_pte - update PTEs by copying them from the GART
10747138fc88SLe Ma  *
10757138fc88SLe Ma  * @ib: indirect buffer to fill with commands
10767138fc88SLe Ma  * @pe: addr of the page entry
10777138fc88SLe Ma  * @src: src addr to copy from
10787138fc88SLe Ma  * @count: number of page entries to update
10797138fc88SLe Ma  *
10807138fc88SLe Ma  * Update PTEs by copying them from the GART using sDMA.
10817138fc88SLe Ma  */
sdma_v4_4_2_vm_copy_pte(struct amdgpu_ib * ib,uint64_t pe,uint64_t src,unsigned count)10827138fc88SLe Ma static void sdma_v4_4_2_vm_copy_pte(struct amdgpu_ib *ib,
10837138fc88SLe Ma 				  uint64_t pe, uint64_t src,
10847138fc88SLe Ma 				  unsigned count)
10857138fc88SLe Ma {
10867138fc88SLe Ma 	unsigned bytes = count * 8;
10877138fc88SLe Ma 
10887138fc88SLe Ma 	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
10897138fc88SLe Ma 		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
10907138fc88SLe Ma 	ib->ptr[ib->length_dw++] = bytes - 1;
10917138fc88SLe Ma 	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
10927138fc88SLe Ma 	ib->ptr[ib->length_dw++] = lower_32_bits(src);
10937138fc88SLe Ma 	ib->ptr[ib->length_dw++] = upper_32_bits(src);
10947138fc88SLe Ma 	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
10957138fc88SLe Ma 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
10967138fc88SLe Ma 
10977138fc88SLe Ma }
10987138fc88SLe Ma 
10997138fc88SLe Ma /**
11007138fc88SLe Ma  * sdma_v4_4_2_vm_write_pte - update PTEs by writing them manually
11017138fc88SLe Ma  *
11027138fc88SLe Ma  * @ib: indirect buffer to fill with commands
11037138fc88SLe Ma  * @pe: addr of the page entry
11047138fc88SLe Ma  * @value: dst addr to write into pe
11057138fc88SLe Ma  * @count: number of page entries to update
11067138fc88SLe Ma  * @incr: increase next addr by incr bytes
11077138fc88SLe Ma  *
11087138fc88SLe Ma  * Update PTEs by writing them manually using sDMA.
11097138fc88SLe Ma  */
sdma_v4_4_2_vm_write_pte(struct amdgpu_ib * ib,uint64_t pe,uint64_t value,unsigned count,uint32_t incr)11107138fc88SLe Ma static void sdma_v4_4_2_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
11117138fc88SLe Ma 				   uint64_t value, unsigned count,
11127138fc88SLe Ma 				   uint32_t incr)
11137138fc88SLe Ma {
11147138fc88SLe Ma 	unsigned ndw = count * 2;
11157138fc88SLe Ma 
11167138fc88SLe Ma 	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
11177138fc88SLe Ma 		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
11187138fc88SLe Ma 	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
11197138fc88SLe Ma 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
11207138fc88SLe Ma 	ib->ptr[ib->length_dw++] = ndw - 1;
11217138fc88SLe Ma 	for (; ndw > 0; ndw -= 2) {
11227138fc88SLe Ma 		ib->ptr[ib->length_dw++] = lower_32_bits(value);
11237138fc88SLe Ma 		ib->ptr[ib->length_dw++] = upper_32_bits(value);
11247138fc88SLe Ma 		value += incr;
11257138fc88SLe Ma 	}
11267138fc88SLe Ma }
11277138fc88SLe Ma 
11287138fc88SLe Ma /**
11297138fc88SLe Ma  * sdma_v4_4_2_vm_set_pte_pde - update the page tables using sDMA
11307138fc88SLe Ma  *
11317138fc88SLe Ma  * @ib: indirect buffer to fill with commands
11327138fc88SLe Ma  * @pe: addr of the page entry
11337138fc88SLe Ma  * @addr: dst addr to write into pe
11347138fc88SLe Ma  * @count: number of page entries to update
11357138fc88SLe Ma  * @incr: increase next addr by incr bytes
11367138fc88SLe Ma  * @flags: access flags
11377138fc88SLe Ma  *
11387138fc88SLe Ma  * Update the page tables using sDMA.
11397138fc88SLe Ma  */
sdma_v4_4_2_vm_set_pte_pde(struct amdgpu_ib * ib,uint64_t pe,uint64_t addr,unsigned count,uint32_t incr,uint64_t flags)11407138fc88SLe Ma static void sdma_v4_4_2_vm_set_pte_pde(struct amdgpu_ib *ib,
11417138fc88SLe Ma 				     uint64_t pe,
11427138fc88SLe Ma 				     uint64_t addr, unsigned count,
11437138fc88SLe Ma 				     uint32_t incr, uint64_t flags)
11447138fc88SLe Ma {
11457138fc88SLe Ma 	/* for physically contiguous pages (vram) */
11467138fc88SLe Ma 	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE);
11477138fc88SLe Ma 	ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
11487138fc88SLe Ma 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
11497138fc88SLe Ma 	ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
11507138fc88SLe Ma 	ib->ptr[ib->length_dw++] = upper_32_bits(flags);
11517138fc88SLe Ma 	ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
11527138fc88SLe Ma 	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
11537138fc88SLe Ma 	ib->ptr[ib->length_dw++] = incr; /* increment size */
11547138fc88SLe Ma 	ib->ptr[ib->length_dw++] = 0;
11557138fc88SLe Ma 	ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
11567138fc88SLe Ma }
11577138fc88SLe Ma 
11587138fc88SLe Ma /**
11597138fc88SLe Ma  * sdma_v4_4_2_ring_pad_ib - pad the IB to the required number of dw
11607138fc88SLe Ma  *
11617138fc88SLe Ma  * @ring: amdgpu_ring structure holding ring information
11627138fc88SLe Ma  * @ib: indirect buffer to fill with padding
11637138fc88SLe Ma  */
sdma_v4_4_2_ring_pad_ib(struct amdgpu_ring * ring,struct amdgpu_ib * ib)11647138fc88SLe Ma static void sdma_v4_4_2_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
11657138fc88SLe Ma {
11667138fc88SLe Ma 	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
11677138fc88SLe Ma 	u32 pad_count;
11687138fc88SLe Ma 	int i;
11697138fc88SLe Ma 
11707138fc88SLe Ma 	pad_count = (-ib->length_dw) & 7;
11717138fc88SLe Ma 	for (i = 0; i < pad_count; i++)
11727138fc88SLe Ma 		if (sdma && sdma->burst_nop && (i == 0))
11737138fc88SLe Ma 			ib->ptr[ib->length_dw++] =
11747138fc88SLe Ma 				SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
11757138fc88SLe Ma 				SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
11767138fc88SLe Ma 		else
11777138fc88SLe Ma 			ib->ptr[ib->length_dw++] =
11787138fc88SLe Ma 				SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
11797138fc88SLe Ma }
11807138fc88SLe Ma 
11817138fc88SLe Ma 
11827138fc88SLe Ma /**
11837138fc88SLe Ma  * sdma_v4_4_2_ring_emit_pipeline_sync - sync the pipeline
11847138fc88SLe Ma  *
11857138fc88SLe Ma  * @ring: amdgpu_ring pointer
11867138fc88SLe Ma  *
11877138fc88SLe Ma  * Make sure all previous operations are completed (CIK).
11887138fc88SLe Ma  */
sdma_v4_4_2_ring_emit_pipeline_sync(struct amdgpu_ring * ring)11897138fc88SLe Ma static void sdma_v4_4_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
11907138fc88SLe Ma {
11917138fc88SLe Ma 	uint32_t seq = ring->fence_drv.sync_seq;
11927138fc88SLe Ma 	uint64_t addr = ring->fence_drv.gpu_addr;
11937138fc88SLe Ma 
11947138fc88SLe Ma 	/* wait for idle */
11957138fc88SLe Ma 	sdma_v4_4_2_wait_reg_mem(ring, 1, 0,
11967138fc88SLe Ma 			       addr & 0xfffffffc,
11977138fc88SLe Ma 			       upper_32_bits(addr) & 0xffffffff,
11987138fc88SLe Ma 			       seq, 0xffffffff, 4);
11997138fc88SLe Ma }
12007138fc88SLe Ma 
12017138fc88SLe Ma 
12027138fc88SLe Ma /**
12037138fc88SLe Ma  * sdma_v4_4_2_ring_emit_vm_flush - vm flush using sDMA
12047138fc88SLe Ma  *
12057138fc88SLe Ma  * @ring: amdgpu_ring pointer
12067138fc88SLe Ma  * @vmid: vmid number to use
12077138fc88SLe Ma  * @pd_addr: address
12087138fc88SLe Ma  *
12097138fc88SLe Ma  * Update the page table base and flush the VM TLB
12107138fc88SLe Ma  * using sDMA.
12117138fc88SLe Ma  */
sdma_v4_4_2_ring_emit_vm_flush(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)12127138fc88SLe Ma static void sdma_v4_4_2_ring_emit_vm_flush(struct amdgpu_ring *ring,
12137138fc88SLe Ma 					 unsigned vmid, uint64_t pd_addr)
12147138fc88SLe Ma {
12157138fc88SLe Ma 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
12167138fc88SLe Ma }
12177138fc88SLe Ma 
sdma_v4_4_2_ring_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)12187138fc88SLe Ma static void sdma_v4_4_2_ring_emit_wreg(struct amdgpu_ring *ring,
12197138fc88SLe Ma 				     uint32_t reg, uint32_t val)
12207138fc88SLe Ma {
12217138fc88SLe Ma 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
12227138fc88SLe Ma 			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
12237138fc88SLe Ma 	amdgpu_ring_write(ring, reg);
12247138fc88SLe Ma 	amdgpu_ring_write(ring, val);
12257138fc88SLe Ma }
12267138fc88SLe Ma 
sdma_v4_4_2_ring_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)12277138fc88SLe Ma static void sdma_v4_4_2_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
12287138fc88SLe Ma 					 uint32_t val, uint32_t mask)
12297138fc88SLe Ma {
12307138fc88SLe Ma 	sdma_v4_4_2_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10);
12317138fc88SLe Ma }
12327138fc88SLe Ma 
sdma_v4_4_2_fw_support_paging_queue(struct amdgpu_device * adev)12337138fc88SLe Ma static bool sdma_v4_4_2_fw_support_paging_queue(struct amdgpu_device *adev)
12347138fc88SLe Ma {
12357138fc88SLe Ma 	switch (adev->ip_versions[SDMA0_HWIP][0]) {
12367138fc88SLe Ma 	case IP_VERSION(4, 4, 2):
12377138fc88SLe Ma 		return false;
12387138fc88SLe Ma 	default:
12397138fc88SLe Ma 		return false;
12407138fc88SLe Ma 	}
12417138fc88SLe Ma }
12427138fc88SLe Ma 
sdma_v4_4_2_early_init(void * handle)12437138fc88SLe Ma static int sdma_v4_4_2_early_init(void *handle)
12447138fc88SLe Ma {
12457138fc88SLe Ma 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
12467138fc88SLe Ma 	int r;
12477138fc88SLe Ma 
12487138fc88SLe Ma 	r = sdma_v4_4_2_init_microcode(adev);
12497138fc88SLe Ma 	if (r) {
12507138fc88SLe Ma 		DRM_ERROR("Failed to load sdma firmware!\n");
12517138fc88SLe Ma 		return r;
12527138fc88SLe Ma 	}
12537138fc88SLe Ma 
12547138fc88SLe Ma 	/* TODO: Page queue breaks driver reload under SRIOV */
12557138fc88SLe Ma 	if (sdma_v4_4_2_fw_support_paging_queue(adev))
12567138fc88SLe Ma 		adev->sdma.has_page_queue = true;
12577138fc88SLe Ma 
12587138fc88SLe Ma 	sdma_v4_4_2_set_ring_funcs(adev);
12597138fc88SLe Ma 	sdma_v4_4_2_set_buffer_funcs(adev);
12607138fc88SLe Ma 	sdma_v4_4_2_set_vm_pte_funcs(adev);
12617138fc88SLe Ma 	sdma_v4_4_2_set_irq_funcs(adev);
12621e69fde7SHawking Zhang 	sdma_v4_4_2_set_ras_funcs(adev);
12637138fc88SLe Ma 
12647138fc88SLe Ma 	return 0;
12657138fc88SLe Ma }
12667138fc88SLe Ma 
12677138fc88SLe Ma #if 0
12687138fc88SLe Ma static int sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev,
12697138fc88SLe Ma 		void *err_data,
12707138fc88SLe Ma 		struct amdgpu_iv_entry *entry);
12717138fc88SLe Ma #endif
12727138fc88SLe Ma 
sdma_v4_4_2_late_init(void * handle)12737138fc88SLe Ma static int sdma_v4_4_2_late_init(void *handle)
12747138fc88SLe Ma {
12757138fc88SLe Ma 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
12767138fc88SLe Ma #if 0
12777138fc88SLe Ma 	struct ras_ih_if ih_info = {
12787138fc88SLe Ma 		.cb = sdma_v4_4_2_process_ras_data_cb,
12797138fc88SLe Ma 	};
12807138fc88SLe Ma #endif
12817138fc88SLe Ma 	if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
12827138fc88SLe Ma 		if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops &&
12837138fc88SLe Ma 		    adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count)
12847138fc88SLe Ma 			adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count(adev);
12857138fc88SLe Ma 	}
12867138fc88SLe Ma 
12877138fc88SLe Ma 	return 0;
12887138fc88SLe Ma }
12897138fc88SLe Ma 
sdma_v4_4_2_sw_init(void * handle)12907138fc88SLe Ma static int sdma_v4_4_2_sw_init(void *handle)
12917138fc88SLe Ma {
12927138fc88SLe Ma 	struct amdgpu_ring *ring;
12937138fc88SLe Ma 	int r, i;
12947138fc88SLe Ma 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1295f786b1d4SLe Ma 	u32 aid_id;
12967138fc88SLe Ma 
12977138fc88SLe Ma 	/* SDMA trap event */
1298f786b1d4SLe Ma 	for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
12997138fc88SLe Ma 		r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
13007138fc88SLe Ma 				      SDMA0_4_0__SRCID__SDMA_TRAP,
13017138fc88SLe Ma 				      &adev->sdma.trap_irq);
13027138fc88SLe Ma 		if (r)
13037138fc88SLe Ma 			return r;
13047138fc88SLe Ma 	}
13057138fc88SLe Ma 
13067138fc88SLe Ma 	/* SDMA SRAM ECC event */
1307f786b1d4SLe Ma 	for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
13087138fc88SLe Ma 		r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
13097138fc88SLe Ma 				      SDMA0_4_0__SRCID__SDMA_SRAM_ECC,
13107138fc88SLe Ma 				      &adev->sdma.ecc_irq);
13117138fc88SLe Ma 		if (r)
13127138fc88SLe Ma 			return r;
13137138fc88SLe Ma 	}
13147138fc88SLe Ma 
13157138fc88SLe Ma 	/* SDMA VM_HOLE/DOORBELL_INV/POLL_TIMEOUT/SRBM_WRITE_PROTECTION event*/
1316f786b1d4SLe Ma 	for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
13177138fc88SLe Ma 		r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
13187138fc88SLe Ma 				      SDMA0_4_0__SRCID__SDMA_VM_HOLE,
13197138fc88SLe Ma 				      &adev->sdma.vm_hole_irq);
13207138fc88SLe Ma 		if (r)
13217138fc88SLe Ma 			return r;
13227138fc88SLe Ma 
13237138fc88SLe Ma 		r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
13247138fc88SLe Ma 				      SDMA0_4_0__SRCID__SDMA_DOORBELL_INVALID,
13257138fc88SLe Ma 				      &adev->sdma.doorbell_invalid_irq);
13267138fc88SLe Ma 		if (r)
13277138fc88SLe Ma 			return r;
13287138fc88SLe Ma 
13297138fc88SLe Ma 		r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
13307138fc88SLe Ma 				      SDMA0_4_0__SRCID__SDMA_POLL_TIMEOUT,
13317138fc88SLe Ma 				      &adev->sdma.pool_timeout_irq);
13327138fc88SLe Ma 		if (r)
13337138fc88SLe Ma 			return r;
13347138fc88SLe Ma 
13357138fc88SLe Ma 		r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
13367138fc88SLe Ma 				      SDMA0_4_0__SRCID__SDMA_SRBMWRITE,
13377138fc88SLe Ma 				      &adev->sdma.srbm_write_irq);
13387138fc88SLe Ma 		if (r)
13397138fc88SLe Ma 			return r;
13407138fc88SLe Ma 	}
13417138fc88SLe Ma 
13427138fc88SLe Ma 	for (i = 0; i < adev->sdma.num_instances; i++) {
13437138fc88SLe Ma 		ring = &adev->sdma.instance[i].ring;
13447138fc88SLe Ma 		ring->ring_obj = NULL;
13457138fc88SLe Ma 		ring->use_doorbell = true;
1346f786b1d4SLe Ma 		aid_id = adev->sdma.instance[i].aid_id;
13477138fc88SLe Ma 
13487138fc88SLe Ma 		DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
13497138fc88SLe Ma 				ring->use_doorbell?"true":"false");
13507138fc88SLe Ma 
13517138fc88SLe Ma 		/* doorbell size is 2 dwords, get DWORD offset */
135220bedf13SLe Ma 		ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
1353f786b1d4SLe Ma 		ring->vm_hub = AMDGPU_MMHUB0(aid_id);
13547138fc88SLe Ma 
1355f786b1d4SLe Ma 		sprintf(ring->name, "sdma%d.%d", aid_id,
1356f786b1d4SLe Ma 				i % adev->sdma.num_inst_per_aid);
13577138fc88SLe Ma 		r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
13587138fc88SLe Ma 				     AMDGPU_SDMA_IRQ_INSTANCE0 + i,
13597138fc88SLe Ma 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
13607138fc88SLe Ma 		if (r)
13617138fc88SLe Ma 			return r;
13627138fc88SLe Ma 
13637138fc88SLe Ma 		if (adev->sdma.has_page_queue) {
13647138fc88SLe Ma 			ring = &adev->sdma.instance[i].page;
13657138fc88SLe Ma 			ring->ring_obj = NULL;
13667138fc88SLe Ma 			ring->use_doorbell = true;
13677138fc88SLe Ma 
13680ee20b86SLe Ma 			/* doorbell index of page queue is assigned right after
13690ee20b86SLe Ma 			 * gfx queue on the same instance
13707138fc88SLe Ma 			 */
13712a47a2d9SLe Ma 			ring->doorbell_index =
13722a47a2d9SLe Ma 				(adev->doorbell_index.sdma_engine[i] + 1) << 1;
1373f786b1d4SLe Ma 			ring->vm_hub = AMDGPU_MMHUB0(aid_id);
13747138fc88SLe Ma 
1375f786b1d4SLe Ma 			sprintf(ring->name, "page%d.%d", aid_id,
1376f786b1d4SLe Ma 					i % adev->sdma.num_inst_per_aid);
13777138fc88SLe Ma 			r = amdgpu_ring_init(adev, ring, 1024,
13787138fc88SLe Ma 					     &adev->sdma.trap_irq,
13797138fc88SLe Ma 					     AMDGPU_SDMA_IRQ_INSTANCE0 + i,
13807138fc88SLe Ma 					     AMDGPU_RING_PRIO_DEFAULT, NULL);
13817138fc88SLe Ma 			if (r)
13827138fc88SLe Ma 				return r;
13837138fc88SLe Ma 		}
13847138fc88SLe Ma 	}
13857138fc88SLe Ma 
13861e69fde7SHawking Zhang 	if (amdgpu_sdma_ras_sw_init(adev)) {
13871e69fde7SHawking Zhang 		dev_err(adev->dev, "fail to initialize sdma ras block\n");
13881e69fde7SHawking Zhang 		return -EINVAL;
13891e69fde7SHawking Zhang 	}
13901e69fde7SHawking Zhang 
13917138fc88SLe Ma 	return r;
13927138fc88SLe Ma }
13937138fc88SLe Ma 
sdma_v4_4_2_sw_fini(void * handle)13947138fc88SLe Ma static int sdma_v4_4_2_sw_fini(void *handle)
13957138fc88SLe Ma {
13967138fc88SLe Ma 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
13977138fc88SLe Ma 	int i;
13987138fc88SLe Ma 
13997138fc88SLe Ma 	for (i = 0; i < adev->sdma.num_instances; i++) {
14007138fc88SLe Ma 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
14017138fc88SLe Ma 		if (adev->sdma.has_page_queue)
14027138fc88SLe Ma 			amdgpu_ring_fini(&adev->sdma.instance[i].page);
14037138fc88SLe Ma 	}
14047138fc88SLe Ma 
14057138fc88SLe Ma 	if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 2))
14067138fc88SLe Ma 		amdgpu_sdma_destroy_inst_ctx(adev, true);
14077138fc88SLe Ma 	else
14087138fc88SLe Ma 		amdgpu_sdma_destroy_inst_ctx(adev, false);
14097138fc88SLe Ma 
14107138fc88SLe Ma 	return 0;
14117138fc88SLe Ma }
14127138fc88SLe Ma 
sdma_v4_4_2_hw_init(void * handle)14137138fc88SLe Ma static int sdma_v4_4_2_hw_init(void *handle)
14147138fc88SLe Ma {
14157138fc88SLe Ma 	int r;
14167138fc88SLe Ma 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1417527c670eSLijo Lazar 	uint32_t inst_mask;
14187138fc88SLe Ma 
1419527c670eSLijo Lazar 	inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
14207138fc88SLe Ma 	if (!amdgpu_sriov_vf(adev))
1421527c670eSLijo Lazar 		sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask);
14227138fc88SLe Ma 
1423527c670eSLijo Lazar 	r = sdma_v4_4_2_inst_start(adev, inst_mask);
14247138fc88SLe Ma 
14257138fc88SLe Ma 	return r;
14267138fc88SLe Ma }
14277138fc88SLe Ma 
sdma_v4_4_2_hw_fini(void * handle)14287138fc88SLe Ma static int sdma_v4_4_2_hw_fini(void *handle)
14297138fc88SLe Ma {
14307138fc88SLe Ma 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1431527c670eSLijo Lazar 	uint32_t inst_mask;
14327138fc88SLe Ma 	int i;
14337138fc88SLe Ma 
14347138fc88SLe Ma 	if (amdgpu_sriov_vf(adev))
14357138fc88SLe Ma 		return 0;
14367138fc88SLe Ma 
1437527c670eSLijo Lazar 	inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
143882a1f42fSLijo Lazar 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
14397138fc88SLe Ma 		for (i = 0; i < adev->sdma.num_instances; i++) {
14407138fc88SLe Ma 			amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
14417138fc88SLe Ma 				       AMDGPU_SDMA_IRQ_INSTANCE0 + i);
14427138fc88SLe Ma 		}
144382a1f42fSLijo Lazar 	}
14447138fc88SLe Ma 
1445527c670eSLijo Lazar 	sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask);
1446527c670eSLijo Lazar 	sdma_v4_4_2_inst_enable(adev, false, inst_mask);
14477138fc88SLe Ma 
14487138fc88SLe Ma 	return 0;
14497138fc88SLe Ma }
14507138fc88SLe Ma 
1451527c670eSLijo Lazar static int sdma_v4_4_2_set_clockgating_state(void *handle,
1452527c670eSLijo Lazar 					     enum amd_clockgating_state state);
1453527c670eSLijo Lazar 
sdma_v4_4_2_suspend(void * handle)14547138fc88SLe Ma static int sdma_v4_4_2_suspend(void *handle)
14557138fc88SLe Ma {
14567138fc88SLe Ma 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
14577138fc88SLe Ma 
14587389c751SLijo Lazar 	if (amdgpu_in_reset(adev))
14597389c751SLijo Lazar 		sdma_v4_4_2_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
14607389c751SLijo Lazar 
14617138fc88SLe Ma 	return sdma_v4_4_2_hw_fini(adev);
14627138fc88SLe Ma }
14637138fc88SLe Ma 
sdma_v4_4_2_resume(void * handle)14647138fc88SLe Ma static int sdma_v4_4_2_resume(void *handle)
14657138fc88SLe Ma {
14667138fc88SLe Ma 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
14677138fc88SLe Ma 
14687138fc88SLe Ma 	return sdma_v4_4_2_hw_init(adev);
14697138fc88SLe Ma }
14707138fc88SLe Ma 
sdma_v4_4_2_is_idle(void * handle)14717138fc88SLe Ma static bool sdma_v4_4_2_is_idle(void *handle)
14727138fc88SLe Ma {
14737138fc88SLe Ma 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
14747138fc88SLe Ma 	u32 i;
14757138fc88SLe Ma 
14767138fc88SLe Ma 	for (i = 0; i < adev->sdma.num_instances; i++) {
14777138fc88SLe Ma 		u32 tmp = RREG32_SDMA(i, regSDMA_STATUS_REG);
14787138fc88SLe Ma 
14797138fc88SLe Ma 		if (!(tmp & SDMA_STATUS_REG__IDLE_MASK))
14807138fc88SLe Ma 			return false;
14817138fc88SLe Ma 	}
14827138fc88SLe Ma 
14837138fc88SLe Ma 	return true;
14847138fc88SLe Ma }
14857138fc88SLe Ma 
sdma_v4_4_2_wait_for_idle(void * handle)14867138fc88SLe Ma static int sdma_v4_4_2_wait_for_idle(void *handle)
14877138fc88SLe Ma {
14887138fc88SLe Ma 	unsigned i, j;
14897138fc88SLe Ma 	u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
14907138fc88SLe Ma 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
14917138fc88SLe Ma 
14927138fc88SLe Ma 	for (i = 0; i < adev->usec_timeout; i++) {
14937138fc88SLe Ma 		for (j = 0; j < adev->sdma.num_instances; j++) {
14947138fc88SLe Ma 			sdma[j] = RREG32_SDMA(j, regSDMA_STATUS_REG);
14957138fc88SLe Ma 			if (!(sdma[j] & SDMA_STATUS_REG__IDLE_MASK))
14967138fc88SLe Ma 				break;
14977138fc88SLe Ma 		}
14987138fc88SLe Ma 		if (j == adev->sdma.num_instances)
14997138fc88SLe Ma 			return 0;
15007138fc88SLe Ma 		udelay(1);
15017138fc88SLe Ma 	}
15027138fc88SLe Ma 	return -ETIMEDOUT;
15037138fc88SLe Ma }
15047138fc88SLe Ma 
sdma_v4_4_2_soft_reset(void * handle)15057138fc88SLe Ma static int sdma_v4_4_2_soft_reset(void *handle)
15067138fc88SLe Ma {
15077138fc88SLe Ma 	/* todo */
15087138fc88SLe Ma 
15097138fc88SLe Ma 	return 0;
15107138fc88SLe Ma }
15117138fc88SLe Ma 
sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)15127138fc88SLe Ma static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev,
15137138fc88SLe Ma 					struct amdgpu_irq_src *source,
15147138fc88SLe Ma 					unsigned type,
15157138fc88SLe Ma 					enum amdgpu_interrupt_state state)
15167138fc88SLe Ma {
15177138fc88SLe Ma 	u32 sdma_cntl;
15187138fc88SLe Ma 
15197138fc88SLe Ma 	sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL);
15207138fc88SLe Ma 	sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, TRAP_ENABLE,
15217138fc88SLe Ma 		       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
15227138fc88SLe Ma 	WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
15237138fc88SLe Ma 
15247138fc88SLe Ma 	return 0;
15257138fc88SLe Ma }
15267138fc88SLe Ma 
sdma_v4_4_2_process_trap_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)15277138fc88SLe Ma static int sdma_v4_4_2_process_trap_irq(struct amdgpu_device *adev,
15287138fc88SLe Ma 				      struct amdgpu_irq_src *source,
15297138fc88SLe Ma 				      struct amdgpu_iv_entry *entry)
15307138fc88SLe Ma {
1531f8b34a05SLijo Lazar 	uint32_t instance, i;
15327138fc88SLe Ma 
15337138fc88SLe Ma 	DRM_DEBUG("IH: SDMA trap\n");
15347138fc88SLe Ma 	instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
1535f8b34a05SLijo Lazar 
1536f8b34a05SLijo Lazar 	/* Client id gives the SDMA instance in AID. To know the exact SDMA
1537f8b34a05SLijo Lazar 	 * instance, interrupt entry gives the node id which corresponds to the AID instance.
1538f8b34a05SLijo Lazar 	 * Match node id with the AID id associated with the SDMA instance. */
1539f8b34a05SLijo Lazar 	for (i = instance; i < adev->sdma.num_instances;
1540f8b34a05SLijo Lazar 	     i += adev->sdma.num_inst_per_aid) {
1541f8b34a05SLijo Lazar 		if (adev->sdma.instance[i].aid_id ==
1542f8b34a05SLijo Lazar 		    node_id_to_phys_map[entry->node_id])
1543f8b34a05SLijo Lazar 			break;
1544f8b34a05SLijo Lazar 	}
1545f8b34a05SLijo Lazar 
1546f8b34a05SLijo Lazar 	if (i >= adev->sdma.num_instances) {
1547f8b34a05SLijo Lazar 		dev_WARN_ONCE(
1548f8b34a05SLijo Lazar 			adev->dev, 1,
1549f8b34a05SLijo Lazar 			"Couldn't find the right sdma instance in trap handler");
1550f8b34a05SLijo Lazar 		return 0;
1551f8b34a05SLijo Lazar 	}
15520d81101cSLe Ma 
15537138fc88SLe Ma 	switch (entry->ring_id) {
15547138fc88SLe Ma 	case 0:
1555f8b34a05SLijo Lazar 		amdgpu_fence_process(&adev->sdma.instance[i].ring);
15567138fc88SLe Ma 		break;
15577138fc88SLe Ma 	default:
15587138fc88SLe Ma 		break;
15597138fc88SLe Ma 	}
15607138fc88SLe Ma 	return 0;
15617138fc88SLe Ma }
15627138fc88SLe Ma 
15637138fc88SLe Ma #if 0
15647138fc88SLe Ma static int sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev,
15657138fc88SLe Ma 		void *err_data,
15667138fc88SLe Ma 		struct amdgpu_iv_entry *entry)
15677138fc88SLe Ma {
15687138fc88SLe Ma 	int instance;
15697138fc88SLe Ma 
15707138fc88SLe Ma 	/* When “Full RAS” is enabled, the per-IP interrupt sources should
15717138fc88SLe Ma 	 * be disabled and the driver should only look for the aggregated
15727138fc88SLe Ma 	 * interrupt via sync flood
15737138fc88SLe Ma 	 */
15741e69fde7SHawking Zhang 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA))
15757138fc88SLe Ma 		goto out;
15767138fc88SLe Ma 
15777138fc88SLe Ma 	instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
15787138fc88SLe Ma 	if (instance < 0)
15797138fc88SLe Ma 		goto out;
15807138fc88SLe Ma 
15817138fc88SLe Ma 	amdgpu_sdma_process_ras_data_cb(adev, err_data, entry);
15827138fc88SLe Ma 
15837138fc88SLe Ma out:
15847138fc88SLe Ma 	return AMDGPU_RAS_SUCCESS;
15857138fc88SLe Ma }
15867138fc88SLe Ma #endif
15877138fc88SLe Ma 
sdma_v4_4_2_process_illegal_inst_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)15887138fc88SLe Ma static int sdma_v4_4_2_process_illegal_inst_irq(struct amdgpu_device *adev,
15897138fc88SLe Ma 					      struct amdgpu_irq_src *source,
15907138fc88SLe Ma 					      struct amdgpu_iv_entry *entry)
15917138fc88SLe Ma {
15927138fc88SLe Ma 	int instance;
15937138fc88SLe Ma 
15947138fc88SLe Ma 	DRM_ERROR("Illegal instruction in SDMA command stream\n");
15957138fc88SLe Ma 
15967138fc88SLe Ma 	instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
15977138fc88SLe Ma 	if (instance < 0)
15987138fc88SLe Ma 		return 0;
15997138fc88SLe Ma 
16007138fc88SLe Ma 	switch (entry->ring_id) {
16017138fc88SLe Ma 	case 0:
16027138fc88SLe Ma 		drm_sched_fault(&adev->sdma.instance[instance].ring.sched);
16037138fc88SLe Ma 		break;
16047138fc88SLe Ma 	}
16057138fc88SLe Ma 	return 0;
16067138fc88SLe Ma }
16077138fc88SLe Ma 
sdma_v4_4_2_set_ecc_irq_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)16087138fc88SLe Ma static int sdma_v4_4_2_set_ecc_irq_state(struct amdgpu_device *adev,
16097138fc88SLe Ma 					struct amdgpu_irq_src *source,
16107138fc88SLe Ma 					unsigned type,
16117138fc88SLe Ma 					enum amdgpu_interrupt_state state)
16127138fc88SLe Ma {
16131e69fde7SHawking Zhang 	u32 sdma_cntl;
16147138fc88SLe Ma 
16151e69fde7SHawking Zhang 	sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL);
1616c0beff4eSTao Zhou 	sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, DRAM_ECC_INT_ENABLE,
1617c0beff4eSTao Zhou 					state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
16181e69fde7SHawking Zhang 	WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
16197138fc88SLe Ma 
16207138fc88SLe Ma 	return 0;
16217138fc88SLe Ma }
16227138fc88SLe Ma 
sdma_v4_4_2_print_iv_entry(struct amdgpu_device * adev,struct amdgpu_iv_entry * entry)16237138fc88SLe Ma static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev,
16247138fc88SLe Ma 					      struct amdgpu_iv_entry *entry)
16257138fc88SLe Ma {
16267138fc88SLe Ma 	int instance;
16277138fc88SLe Ma 	struct amdgpu_task_info task_info;
16287138fc88SLe Ma 	u64 addr;
16297138fc88SLe Ma 
16307138fc88SLe Ma 	instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
16317138fc88SLe Ma 	if (instance < 0 || instance >= adev->sdma.num_instances) {
16327138fc88SLe Ma 		dev_err(adev->dev, "sdma instance invalid %d\n", instance);
16337138fc88SLe Ma 		return -EINVAL;
16347138fc88SLe Ma 	}
16357138fc88SLe Ma 
16367138fc88SLe Ma 	addr = (u64)entry->src_data[0] << 12;
16377138fc88SLe Ma 	addr |= ((u64)entry->src_data[1] & 0xf) << 44;
16387138fc88SLe Ma 
16397138fc88SLe Ma 	memset(&task_info, 0, sizeof(struct amdgpu_task_info));
16407138fc88SLe Ma 	amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
16417138fc88SLe Ma 
16427138fc88SLe Ma 	dev_dbg_ratelimited(adev->dev,
16437138fc88SLe Ma 		   "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u "
16447138fc88SLe Ma 		   "pasid:%u, for process %s pid %d thread %s pid %d\n",
16457138fc88SLe Ma 		   instance, addr, entry->src_id, entry->ring_id, entry->vmid,
16467138fc88SLe Ma 		   entry->pasid, task_info.process_name, task_info.tgid,
16477138fc88SLe Ma 		   task_info.task_name, task_info.pid);
16487138fc88SLe Ma 	return 0;
16497138fc88SLe Ma }
16507138fc88SLe Ma 
sdma_v4_4_2_process_vm_hole_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)16517138fc88SLe Ma static int sdma_v4_4_2_process_vm_hole_irq(struct amdgpu_device *adev,
16527138fc88SLe Ma 					      struct amdgpu_irq_src *source,
16537138fc88SLe Ma 					      struct amdgpu_iv_entry *entry)
16547138fc88SLe Ma {
16557138fc88SLe Ma 	dev_dbg_ratelimited(adev->dev, "MC or SEM address in VM hole\n");
16567138fc88SLe Ma 	sdma_v4_4_2_print_iv_entry(adev, entry);
16577138fc88SLe Ma 	return 0;
16587138fc88SLe Ma }
16597138fc88SLe Ma 
sdma_v4_4_2_process_doorbell_invalid_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)16607138fc88SLe Ma static int sdma_v4_4_2_process_doorbell_invalid_irq(struct amdgpu_device *adev,
16617138fc88SLe Ma 					      struct amdgpu_irq_src *source,
16627138fc88SLe Ma 					      struct amdgpu_iv_entry *entry)
16637138fc88SLe Ma {
16647138fc88SLe Ma 
16657138fc88SLe Ma 	dev_dbg_ratelimited(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n");
16667138fc88SLe Ma 	sdma_v4_4_2_print_iv_entry(adev, entry);
16677138fc88SLe Ma 	return 0;
16687138fc88SLe Ma }
16697138fc88SLe Ma 
sdma_v4_4_2_process_pool_timeout_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)16707138fc88SLe Ma static int sdma_v4_4_2_process_pool_timeout_irq(struct amdgpu_device *adev,
16717138fc88SLe Ma 					      struct amdgpu_irq_src *source,
16727138fc88SLe Ma 					      struct amdgpu_iv_entry *entry)
16737138fc88SLe Ma {
16747138fc88SLe Ma 	dev_dbg_ratelimited(adev->dev,
16757138fc88SLe Ma 		"Polling register/memory timeout executing POLL_REG/MEM with finite timer\n");
16767138fc88SLe Ma 	sdma_v4_4_2_print_iv_entry(adev, entry);
16777138fc88SLe Ma 	return 0;
16787138fc88SLe Ma }
16797138fc88SLe Ma 
sdma_v4_4_2_process_srbm_write_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)16807138fc88SLe Ma static int sdma_v4_4_2_process_srbm_write_irq(struct amdgpu_device *adev,
16817138fc88SLe Ma 					      struct amdgpu_irq_src *source,
16827138fc88SLe Ma 					      struct amdgpu_iv_entry *entry)
16837138fc88SLe Ma {
16847138fc88SLe Ma 	dev_dbg_ratelimited(adev->dev,
16857138fc88SLe Ma 		"SDMA gets an Register Write SRBM_WRITE command in non-privilege command buffer\n");
16867138fc88SLe Ma 	sdma_v4_4_2_print_iv_entry(adev, entry);
16877138fc88SLe Ma 	return 0;
16887138fc88SLe Ma }
16897138fc88SLe Ma 
sdma_v4_4_2_inst_update_medium_grain_light_sleep(struct amdgpu_device * adev,bool enable,uint32_t inst_mask)1690527c670eSLijo Lazar static void sdma_v4_4_2_inst_update_medium_grain_light_sleep(
1691527c670eSLijo Lazar 	struct amdgpu_device *adev, bool enable, uint32_t inst_mask)
1692527c670eSLijo Lazar {
1693527c670eSLijo Lazar 	uint32_t data, def;
1694527c670eSLijo Lazar 	int i;
1695527c670eSLijo Lazar 
1696cbf9e46aSLijo Lazar 	/* leave as default if it is not driver controlled */
1697cbf9e46aSLijo Lazar 	if (!(adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS))
1698cbf9e46aSLijo Lazar 		return;
1699cbf9e46aSLijo Lazar 
1700cbf9e46aSLijo Lazar 	if (enable) {
1701527c670eSLijo Lazar 		for_each_inst(i, inst_mask) {
1702527c670eSLijo Lazar 			/* 1-not override: enable sdma mem light sleep */
1703527c670eSLijo Lazar 			def = data = RREG32_SDMA(i, regSDMA_POWER_CNTL);
1704527c670eSLijo Lazar 			data |= SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
1705527c670eSLijo Lazar 			if (def != data)
1706527c670eSLijo Lazar 				WREG32_SDMA(i, regSDMA_POWER_CNTL, data);
1707527c670eSLijo Lazar 		}
1708527c670eSLijo Lazar 	} else {
1709527c670eSLijo Lazar 		for_each_inst(i, inst_mask) {
1710527c670eSLijo Lazar 			/* 0-override:disable sdma mem light sleep */
1711527c670eSLijo Lazar 			def = data = RREG32_SDMA(i, regSDMA_POWER_CNTL);
1712527c670eSLijo Lazar 			data &= ~SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
1713527c670eSLijo Lazar 			if (def != data)
1714527c670eSLijo Lazar 				WREG32_SDMA(i, regSDMA_POWER_CNTL, data);
1715527c670eSLijo Lazar 		}
1716527c670eSLijo Lazar 	}
1717527c670eSLijo Lazar }
1718527c670eSLijo Lazar 
sdma_v4_4_2_inst_update_medium_grain_clock_gating(struct amdgpu_device * adev,bool enable,uint32_t inst_mask)1719527c670eSLijo Lazar static void sdma_v4_4_2_inst_update_medium_grain_clock_gating(
1720527c670eSLijo Lazar 	struct amdgpu_device *adev, bool enable, uint32_t inst_mask)
17217138fc88SLe Ma {
17227138fc88SLe Ma 	uint32_t data, def;
17237138fc88SLe Ma 	int i;
17247138fc88SLe Ma 
1725cbf9e46aSLijo Lazar 	/* leave as default if it is not driver controlled */
1726cbf9e46aSLijo Lazar 	if (!(adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG))
1727cbf9e46aSLijo Lazar 		return;
1728cbf9e46aSLijo Lazar 
1729cbf9e46aSLijo Lazar 	if (enable) {
1730527c670eSLijo Lazar 		for_each_inst(i, inst_mask) {
17317138fc88SLe Ma 			def = data = RREG32_SDMA(i, regSDMA_CLK_CTRL);
1732cbf9e46aSLijo Lazar 			data &= ~(SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK |
17337138fc88SLe Ma 				  SDMA_CLK_CTRL__SOFT_OVERRIDE4_MASK |
17347138fc88SLe Ma 				  SDMA_CLK_CTRL__SOFT_OVERRIDE3_MASK |
17357138fc88SLe Ma 				  SDMA_CLK_CTRL__SOFT_OVERRIDE2_MASK |
17367138fc88SLe Ma 				  SDMA_CLK_CTRL__SOFT_OVERRIDE1_MASK |
17377138fc88SLe Ma 				  SDMA_CLK_CTRL__SOFT_OVERRIDE0_MASK);
17387138fc88SLe Ma 			if (def != data)
17397138fc88SLe Ma 				WREG32_SDMA(i, regSDMA_CLK_CTRL, data);
17407138fc88SLe Ma 		}
17417138fc88SLe Ma 	} else {
1742527c670eSLijo Lazar 		for_each_inst(i, inst_mask) {
17437138fc88SLe Ma 			def = data = RREG32_SDMA(i, regSDMA_CLK_CTRL);
1744cbf9e46aSLijo Lazar 			data |= (SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK |
17457138fc88SLe Ma 				 SDMA_CLK_CTRL__SOFT_OVERRIDE4_MASK |
17467138fc88SLe Ma 				 SDMA_CLK_CTRL__SOFT_OVERRIDE3_MASK |
17477138fc88SLe Ma 				 SDMA_CLK_CTRL__SOFT_OVERRIDE2_MASK |
17487138fc88SLe Ma 				 SDMA_CLK_CTRL__SOFT_OVERRIDE1_MASK |
17497138fc88SLe Ma 				 SDMA_CLK_CTRL__SOFT_OVERRIDE0_MASK);
17507138fc88SLe Ma 			if (def != data)
17517138fc88SLe Ma 				WREG32_SDMA(i, regSDMA_CLK_CTRL, data);
17527138fc88SLe Ma 		}
17537138fc88SLe Ma 	}
17547138fc88SLe Ma }
17557138fc88SLe Ma 
sdma_v4_4_2_set_clockgating_state(void * handle,enum amd_clockgating_state state)17567138fc88SLe Ma static int sdma_v4_4_2_set_clockgating_state(void *handle,
17577138fc88SLe Ma 					  enum amd_clockgating_state state)
17587138fc88SLe Ma {
17597138fc88SLe Ma 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1760527c670eSLijo Lazar 	uint32_t inst_mask;
17617138fc88SLe Ma 
17627138fc88SLe Ma 	if (amdgpu_sriov_vf(adev))
17637138fc88SLe Ma 		return 0;
17647138fc88SLe Ma 
1765527c670eSLijo Lazar 	inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
1766527c670eSLijo Lazar 
1767527c670eSLijo Lazar 	sdma_v4_4_2_inst_update_medium_grain_clock_gating(
1768527c670eSLijo Lazar 		adev, state == AMD_CG_STATE_GATE, inst_mask);
1769527c670eSLijo Lazar 	sdma_v4_4_2_inst_update_medium_grain_light_sleep(
1770527c670eSLijo Lazar 		adev, state == AMD_CG_STATE_GATE, inst_mask);
17717138fc88SLe Ma 	return 0;
17727138fc88SLe Ma }
17737138fc88SLe Ma 
sdma_v4_4_2_set_powergating_state(void * handle,enum amd_powergating_state state)17747138fc88SLe Ma static int sdma_v4_4_2_set_powergating_state(void *handle,
17757138fc88SLe Ma 					  enum amd_powergating_state state)
17767138fc88SLe Ma {
17777138fc88SLe Ma 	return 0;
17787138fc88SLe Ma }
17797138fc88SLe Ma 
sdma_v4_4_2_get_clockgating_state(void * handle,u64 * flags)17807138fc88SLe Ma static void sdma_v4_4_2_get_clockgating_state(void *handle, u64 *flags)
17817138fc88SLe Ma {
17827138fc88SLe Ma 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
17837138fc88SLe Ma 	int data;
17847138fc88SLe Ma 
17857138fc88SLe Ma 	if (amdgpu_sriov_vf(adev))
17867138fc88SLe Ma 		*flags = 0;
17877138fc88SLe Ma 
17887138fc88SLe Ma 	/* AMD_CG_SUPPORT_SDMA_MGCG */
1789f8b34a05SLijo Lazar 	data = RREG32(SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, 0), regSDMA_CLK_CTRL));
1790cbf9e46aSLijo Lazar 	if (!(data & SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK))
17917138fc88SLe Ma 		*flags |= AMD_CG_SUPPORT_SDMA_MGCG;
17927138fc88SLe Ma 
17937138fc88SLe Ma 	/* AMD_CG_SUPPORT_SDMA_LS */
1794f8b34a05SLijo Lazar 	data = RREG32(SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, 0), regSDMA_POWER_CNTL));
17957138fc88SLe Ma 	if (data & SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
17967138fc88SLe Ma 		*flags |= AMD_CG_SUPPORT_SDMA_LS;
17977138fc88SLe Ma }
17987138fc88SLe Ma 
17997138fc88SLe Ma const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = {
18007138fc88SLe Ma 	.name = "sdma_v4_4_2",
18017138fc88SLe Ma 	.early_init = sdma_v4_4_2_early_init,
18027138fc88SLe Ma 	.late_init = sdma_v4_4_2_late_init,
18037138fc88SLe Ma 	.sw_init = sdma_v4_4_2_sw_init,
18047138fc88SLe Ma 	.sw_fini = sdma_v4_4_2_sw_fini,
18057138fc88SLe Ma 	.hw_init = sdma_v4_4_2_hw_init,
18067138fc88SLe Ma 	.hw_fini = sdma_v4_4_2_hw_fini,
18077138fc88SLe Ma 	.suspend = sdma_v4_4_2_suspend,
18087138fc88SLe Ma 	.resume = sdma_v4_4_2_resume,
18097138fc88SLe Ma 	.is_idle = sdma_v4_4_2_is_idle,
18107138fc88SLe Ma 	.wait_for_idle = sdma_v4_4_2_wait_for_idle,
18117138fc88SLe Ma 	.soft_reset = sdma_v4_4_2_soft_reset,
18127138fc88SLe Ma 	.set_clockgating_state = sdma_v4_4_2_set_clockgating_state,
18137138fc88SLe Ma 	.set_powergating_state = sdma_v4_4_2_set_powergating_state,
18147138fc88SLe Ma 	.get_clockgating_state = sdma_v4_4_2_get_clockgating_state,
18157138fc88SLe Ma };
18167138fc88SLe Ma 
18177138fc88SLe Ma static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = {
18187138fc88SLe Ma 	.type = AMDGPU_RING_TYPE_SDMA,
1819e5df16d9SAlex Deucher 	.align_mask = 0xff,
18207138fc88SLe Ma 	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
18217138fc88SLe Ma 	.support_64bit_ptrs = true,
18227138fc88SLe Ma 	.get_rptr = sdma_v4_4_2_ring_get_rptr,
18237138fc88SLe Ma 	.get_wptr = sdma_v4_4_2_ring_get_wptr,
18247138fc88SLe Ma 	.set_wptr = sdma_v4_4_2_ring_set_wptr,
18257138fc88SLe Ma 	.emit_frame_size =
18267138fc88SLe Ma 		6 + /* sdma_v4_4_2_ring_emit_hdp_flush */
18277138fc88SLe Ma 		3 + /* hdp invalidate */
18287138fc88SLe Ma 		6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */
18297138fc88SLe Ma 		/* sdma_v4_4_2_ring_emit_vm_flush */
18307138fc88SLe Ma 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
18317138fc88SLe Ma 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
18327138fc88SLe Ma 		10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */
18337138fc88SLe Ma 	.emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */
18347138fc88SLe Ma 	.emit_ib = sdma_v4_4_2_ring_emit_ib,
18357138fc88SLe Ma 	.emit_fence = sdma_v4_4_2_ring_emit_fence,
18367138fc88SLe Ma 	.emit_pipeline_sync = sdma_v4_4_2_ring_emit_pipeline_sync,
18377138fc88SLe Ma 	.emit_vm_flush = sdma_v4_4_2_ring_emit_vm_flush,
18387138fc88SLe Ma 	.emit_hdp_flush = sdma_v4_4_2_ring_emit_hdp_flush,
18397138fc88SLe Ma 	.test_ring = sdma_v4_4_2_ring_test_ring,
18407138fc88SLe Ma 	.test_ib = sdma_v4_4_2_ring_test_ib,
18417138fc88SLe Ma 	.insert_nop = sdma_v4_4_2_ring_insert_nop,
18427138fc88SLe Ma 	.pad_ib = sdma_v4_4_2_ring_pad_ib,
18437138fc88SLe Ma 	.emit_wreg = sdma_v4_4_2_ring_emit_wreg,
18447138fc88SLe Ma 	.emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
18457138fc88SLe Ma 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
18467138fc88SLe Ma };
18477138fc88SLe Ma 
18487138fc88SLe Ma static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = {
18497138fc88SLe Ma 	.type = AMDGPU_RING_TYPE_SDMA,
1850e5df16d9SAlex Deucher 	.align_mask = 0xff,
18517138fc88SLe Ma 	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
18527138fc88SLe Ma 	.support_64bit_ptrs = true,
18537138fc88SLe Ma 	.get_rptr = sdma_v4_4_2_ring_get_rptr,
18547138fc88SLe Ma 	.get_wptr = sdma_v4_4_2_page_ring_get_wptr,
18557138fc88SLe Ma 	.set_wptr = sdma_v4_4_2_page_ring_set_wptr,
18567138fc88SLe Ma 	.emit_frame_size =
18577138fc88SLe Ma 		6 + /* sdma_v4_4_2_ring_emit_hdp_flush */
18587138fc88SLe Ma 		3 + /* hdp invalidate */
18597138fc88SLe Ma 		6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */
18607138fc88SLe Ma 		/* sdma_v4_4_2_ring_emit_vm_flush */
18617138fc88SLe Ma 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
18627138fc88SLe Ma 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
18637138fc88SLe Ma 		10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */
18647138fc88SLe Ma 	.emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */
18657138fc88SLe Ma 	.emit_ib = sdma_v4_4_2_ring_emit_ib,
18667138fc88SLe Ma 	.emit_fence = sdma_v4_4_2_ring_emit_fence,
18677138fc88SLe Ma 	.emit_pipeline_sync = sdma_v4_4_2_ring_emit_pipeline_sync,
18687138fc88SLe Ma 	.emit_vm_flush = sdma_v4_4_2_ring_emit_vm_flush,
18697138fc88SLe Ma 	.emit_hdp_flush = sdma_v4_4_2_ring_emit_hdp_flush,
18707138fc88SLe Ma 	.test_ring = sdma_v4_4_2_ring_test_ring,
18717138fc88SLe Ma 	.test_ib = sdma_v4_4_2_ring_test_ib,
18727138fc88SLe Ma 	.insert_nop = sdma_v4_4_2_ring_insert_nop,
18737138fc88SLe Ma 	.pad_ib = sdma_v4_4_2_ring_pad_ib,
18747138fc88SLe Ma 	.emit_wreg = sdma_v4_4_2_ring_emit_wreg,
18757138fc88SLe Ma 	.emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
18767138fc88SLe Ma 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
18777138fc88SLe Ma };
18787138fc88SLe Ma 
sdma_v4_4_2_set_ring_funcs(struct amdgpu_device * adev)18797138fc88SLe Ma static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev)
18807138fc88SLe Ma {
1881f8b34a05SLijo Lazar 	int i, dev_inst;
18827138fc88SLe Ma 
18837138fc88SLe Ma 	for (i = 0; i < adev->sdma.num_instances; i++) {
18847138fc88SLe Ma 		adev->sdma.instance[i].ring.funcs = &sdma_v4_4_2_ring_funcs;
18857138fc88SLe Ma 		adev->sdma.instance[i].ring.me = i;
18867138fc88SLe Ma 		if (adev->sdma.has_page_queue) {
18877138fc88SLe Ma 			adev->sdma.instance[i].page.funcs =
18887138fc88SLe Ma 				&sdma_v4_4_2_page_ring_funcs;
18897138fc88SLe Ma 			adev->sdma.instance[i].page.me = i;
18907138fc88SLe Ma 		}
1891f786b1d4SLe Ma 
1892f8b34a05SLijo Lazar 		dev_inst = GET_INST(SDMA0, i);
1893f8b34a05SLijo Lazar 		/* AID to which SDMA belongs depends on physical instance */
1894f8b34a05SLijo Lazar 		adev->sdma.instance[i].aid_id =
1895f8b34a05SLijo Lazar 			dev_inst / adev->sdma.num_inst_per_aid;
18967138fc88SLe Ma 	}
18977138fc88SLe Ma }
18987138fc88SLe Ma 
18997138fc88SLe Ma static const struct amdgpu_irq_src_funcs sdma_v4_4_2_trap_irq_funcs = {
19007138fc88SLe Ma 	.set = sdma_v4_4_2_set_trap_irq_state,
19017138fc88SLe Ma 	.process = sdma_v4_4_2_process_trap_irq,
19027138fc88SLe Ma };
19037138fc88SLe Ma 
19047138fc88SLe Ma static const struct amdgpu_irq_src_funcs sdma_v4_4_2_illegal_inst_irq_funcs = {
19057138fc88SLe Ma 	.process = sdma_v4_4_2_process_illegal_inst_irq,
19067138fc88SLe Ma };
19077138fc88SLe Ma 
19087138fc88SLe Ma static const struct amdgpu_irq_src_funcs sdma_v4_4_2_ecc_irq_funcs = {
19097138fc88SLe Ma 	.set = sdma_v4_4_2_set_ecc_irq_state,
19107138fc88SLe Ma 	.process = amdgpu_sdma_process_ecc_irq,
19117138fc88SLe Ma };
19127138fc88SLe Ma 
19137138fc88SLe Ma static const struct amdgpu_irq_src_funcs sdma_v4_4_2_vm_hole_irq_funcs = {
19147138fc88SLe Ma 	.process = sdma_v4_4_2_process_vm_hole_irq,
19157138fc88SLe Ma };
19167138fc88SLe Ma 
19177138fc88SLe Ma static const struct amdgpu_irq_src_funcs sdma_v4_4_2_doorbell_invalid_irq_funcs = {
19187138fc88SLe Ma 	.process = sdma_v4_4_2_process_doorbell_invalid_irq,
19197138fc88SLe Ma };
19207138fc88SLe Ma 
19217138fc88SLe Ma static const struct amdgpu_irq_src_funcs sdma_v4_4_2_pool_timeout_irq_funcs = {
19227138fc88SLe Ma 	.process = sdma_v4_4_2_process_pool_timeout_irq,
19237138fc88SLe Ma };
19247138fc88SLe Ma 
19257138fc88SLe Ma static const struct amdgpu_irq_src_funcs sdma_v4_4_2_srbm_write_irq_funcs = {
19267138fc88SLe Ma 	.process = sdma_v4_4_2_process_srbm_write_irq,
19277138fc88SLe Ma };
19287138fc88SLe Ma 
sdma_v4_4_2_set_irq_funcs(struct amdgpu_device * adev)19297138fc88SLe Ma static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev)
19307138fc88SLe Ma {
19317138fc88SLe Ma 	adev->sdma.trap_irq.num_types = adev->sdma.num_instances;
19327138fc88SLe Ma 	adev->sdma.ecc_irq.num_types = adev->sdma.num_instances;
19337138fc88SLe Ma 	adev->sdma.vm_hole_irq.num_types = adev->sdma.num_instances;
19347138fc88SLe Ma 	adev->sdma.doorbell_invalid_irq.num_types = adev->sdma.num_instances;
19357138fc88SLe Ma 	adev->sdma.pool_timeout_irq.num_types = adev->sdma.num_instances;
19367138fc88SLe Ma 	adev->sdma.srbm_write_irq.num_types = adev->sdma.num_instances;
19377138fc88SLe Ma 
19387138fc88SLe Ma 	adev->sdma.trap_irq.funcs = &sdma_v4_4_2_trap_irq_funcs;
19397138fc88SLe Ma 	adev->sdma.illegal_inst_irq.funcs = &sdma_v4_4_2_illegal_inst_irq_funcs;
19407138fc88SLe Ma 	adev->sdma.ecc_irq.funcs = &sdma_v4_4_2_ecc_irq_funcs;
19417138fc88SLe Ma 	adev->sdma.vm_hole_irq.funcs = &sdma_v4_4_2_vm_hole_irq_funcs;
19427138fc88SLe Ma 	adev->sdma.doorbell_invalid_irq.funcs = &sdma_v4_4_2_doorbell_invalid_irq_funcs;
19437138fc88SLe Ma 	adev->sdma.pool_timeout_irq.funcs = &sdma_v4_4_2_pool_timeout_irq_funcs;
19447138fc88SLe Ma 	adev->sdma.srbm_write_irq.funcs = &sdma_v4_4_2_srbm_write_irq_funcs;
19457138fc88SLe Ma }
19467138fc88SLe Ma 
19477138fc88SLe Ma /**
19487138fc88SLe Ma  * sdma_v4_4_2_emit_copy_buffer - copy buffer using the sDMA engine
19497138fc88SLe Ma  *
19507138fc88SLe Ma  * @ib: indirect buffer to copy to
19517138fc88SLe Ma  * @src_offset: src GPU address
19527138fc88SLe Ma  * @dst_offset: dst GPU address
19537138fc88SLe Ma  * @byte_count: number of bytes to xfer
19547138fc88SLe Ma  * @tmz: if a secure copy should be used
19557138fc88SLe Ma  *
19567138fc88SLe Ma  * Copy GPU buffers using the DMA engine.
19577138fc88SLe Ma  * Used by the amdgpu ttm implementation to move pages if
19587138fc88SLe Ma  * registered as the asic copy callback.
19597138fc88SLe Ma  */
sdma_v4_4_2_emit_copy_buffer(struct amdgpu_ib * ib,uint64_t src_offset,uint64_t dst_offset,uint32_t byte_count,bool tmz)19607138fc88SLe Ma static void sdma_v4_4_2_emit_copy_buffer(struct amdgpu_ib *ib,
19617138fc88SLe Ma 				       uint64_t src_offset,
19627138fc88SLe Ma 				       uint64_t dst_offset,
19637138fc88SLe Ma 				       uint32_t byte_count,
19647138fc88SLe Ma 				       bool tmz)
19657138fc88SLe Ma {
19667138fc88SLe Ma 	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
19677138fc88SLe Ma 		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
19687138fc88SLe Ma 		SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
19697138fc88SLe Ma 	ib->ptr[ib->length_dw++] = byte_count - 1;
19707138fc88SLe Ma 	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
19717138fc88SLe Ma 	ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
19727138fc88SLe Ma 	ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
19737138fc88SLe Ma 	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
19747138fc88SLe Ma 	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
19757138fc88SLe Ma }
19767138fc88SLe Ma 
19777138fc88SLe Ma /**
19787138fc88SLe Ma  * sdma_v4_4_2_emit_fill_buffer - fill buffer using the sDMA engine
19797138fc88SLe Ma  *
19807138fc88SLe Ma  * @ib: indirect buffer to copy to
19817138fc88SLe Ma  * @src_data: value to write to buffer
19827138fc88SLe Ma  * @dst_offset: dst GPU address
19837138fc88SLe Ma  * @byte_count: number of bytes to xfer
19847138fc88SLe Ma  *
19857138fc88SLe Ma  * Fill GPU buffers using the DMA engine.
19867138fc88SLe Ma  */
sdma_v4_4_2_emit_fill_buffer(struct amdgpu_ib * ib,uint32_t src_data,uint64_t dst_offset,uint32_t byte_count)19877138fc88SLe Ma static void sdma_v4_4_2_emit_fill_buffer(struct amdgpu_ib *ib,
19887138fc88SLe Ma 				       uint32_t src_data,
19897138fc88SLe Ma 				       uint64_t dst_offset,
19907138fc88SLe Ma 				       uint32_t byte_count)
19917138fc88SLe Ma {
19927138fc88SLe Ma 	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
19937138fc88SLe Ma 	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
19947138fc88SLe Ma 	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
19957138fc88SLe Ma 	ib->ptr[ib->length_dw++] = src_data;
19967138fc88SLe Ma 	ib->ptr[ib->length_dw++] = byte_count - 1;
19977138fc88SLe Ma }
19987138fc88SLe Ma 
19997138fc88SLe Ma static const struct amdgpu_buffer_funcs sdma_v4_4_2_buffer_funcs = {
20007138fc88SLe Ma 	.copy_max_bytes = 0x400000,
20017138fc88SLe Ma 	.copy_num_dw = 7,
20027138fc88SLe Ma 	.emit_copy_buffer = sdma_v4_4_2_emit_copy_buffer,
20037138fc88SLe Ma 
20047138fc88SLe Ma 	.fill_max_bytes = 0x400000,
20057138fc88SLe Ma 	.fill_num_dw = 5,
20067138fc88SLe Ma 	.emit_fill_buffer = sdma_v4_4_2_emit_fill_buffer,
20077138fc88SLe Ma };
20087138fc88SLe Ma 
sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device * adev)20097138fc88SLe Ma static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev)
20107138fc88SLe Ma {
20117138fc88SLe Ma 	adev->mman.buffer_funcs = &sdma_v4_4_2_buffer_funcs;
20127138fc88SLe Ma 	if (adev->sdma.has_page_queue)
20137138fc88SLe Ma 		adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page;
20147138fc88SLe Ma 	else
20157138fc88SLe Ma 		adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
20167138fc88SLe Ma }
20177138fc88SLe Ma 
20187138fc88SLe Ma static const struct amdgpu_vm_pte_funcs sdma_v4_4_2_vm_pte_funcs = {
20197138fc88SLe Ma 	.copy_pte_num_dw = 7,
20207138fc88SLe Ma 	.copy_pte = sdma_v4_4_2_vm_copy_pte,
20217138fc88SLe Ma 
20227138fc88SLe Ma 	.write_pte = sdma_v4_4_2_vm_write_pte,
20237138fc88SLe Ma 	.set_pte_pde = sdma_v4_4_2_vm_set_pte_pde,
20247138fc88SLe Ma };
20257138fc88SLe Ma 
sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device * adev)20267138fc88SLe Ma static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev)
20277138fc88SLe Ma {
20287138fc88SLe Ma 	struct drm_gpu_scheduler *sched;
20297138fc88SLe Ma 	unsigned i;
20307138fc88SLe Ma 
20317138fc88SLe Ma 	adev->vm_manager.vm_pte_funcs = &sdma_v4_4_2_vm_pte_funcs;
20327138fc88SLe Ma 	for (i = 0; i < adev->sdma.num_instances; i++) {
20337138fc88SLe Ma 		if (adev->sdma.has_page_queue)
20347138fc88SLe Ma 			sched = &adev->sdma.instance[i].page.sched;
20357138fc88SLe Ma 		else
20367138fc88SLe Ma 			sched = &adev->sdma.instance[i].ring.sched;
20377138fc88SLe Ma 		adev->vm_manager.vm_pte_scheds[i] = sched;
20387138fc88SLe Ma 	}
20397138fc88SLe Ma 	adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
20407138fc88SLe Ma }
20417138fc88SLe Ma 
20427138fc88SLe Ma const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block = {
20437138fc88SLe Ma 	.type = AMD_IP_BLOCK_TYPE_SDMA,
20447138fc88SLe Ma 	.major = 4,
20457138fc88SLe Ma 	.minor = 4,
20467138fc88SLe Ma 	.rev = 0,
20477138fc88SLe Ma 	.funcs = &sdma_v4_4_2_ip_funcs,
20487138fc88SLe Ma };
20493446cb78SLijo Lazar 
sdma_v4_4_2_xcp_resume(void * handle,uint32_t inst_mask)20503446cb78SLijo Lazar static int sdma_v4_4_2_xcp_resume(void *handle, uint32_t inst_mask)
20513446cb78SLijo Lazar {
20523446cb78SLijo Lazar 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
20533446cb78SLijo Lazar 	int r;
20543446cb78SLijo Lazar 
20553446cb78SLijo Lazar 	if (!amdgpu_sriov_vf(adev))
20563446cb78SLijo Lazar 		sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask);
20573446cb78SLijo Lazar 
20583446cb78SLijo Lazar 	r = sdma_v4_4_2_inst_start(adev, inst_mask);
20593446cb78SLijo Lazar 
20603446cb78SLijo Lazar 	return r;
20613446cb78SLijo Lazar }
20623446cb78SLijo Lazar 
sdma_v4_4_2_xcp_suspend(void * handle,uint32_t inst_mask)20633446cb78SLijo Lazar static int sdma_v4_4_2_xcp_suspend(void *handle, uint32_t inst_mask)
20643446cb78SLijo Lazar {
20653446cb78SLijo Lazar 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
20663446cb78SLijo Lazar 	uint32_t tmp_mask = inst_mask;
20673446cb78SLijo Lazar 	int i;
20683446cb78SLijo Lazar 
206982a1f42fSLijo Lazar 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
20703446cb78SLijo Lazar 		for_each_inst(i, tmp_mask) {
20713446cb78SLijo Lazar 			amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
20723446cb78SLijo Lazar 				       AMDGPU_SDMA_IRQ_INSTANCE0 + i);
20733446cb78SLijo Lazar 		}
207482a1f42fSLijo Lazar 	}
20753446cb78SLijo Lazar 
20763446cb78SLijo Lazar 	sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask);
20773446cb78SLijo Lazar 	sdma_v4_4_2_inst_enable(adev, false, inst_mask);
20783446cb78SLijo Lazar 
20793446cb78SLijo Lazar 	return 0;
20803446cb78SLijo Lazar }
20813446cb78SLijo Lazar 
20823446cb78SLijo Lazar struct amdgpu_xcp_ip_funcs sdma_v4_4_2_xcp_funcs = {
20833446cb78SLijo Lazar 	.suspend = &sdma_v4_4_2_xcp_suspend,
20843446cb78SLijo Lazar 	.resume = &sdma_v4_4_2_xcp_resume
20853446cb78SLijo Lazar };
2086dc37a919SHawking Zhang 
2087dc37a919SHawking Zhang static const struct amdgpu_ras_err_status_reg_entry sdma_v4_2_2_ue_reg_list[] = {
2088dc37a919SHawking Zhang 	{AMDGPU_RAS_REG_ENTRY(SDMA0, 0, regSDMA_UE_ERR_STATUS_LO, regSDMA_UE_ERR_STATUS_HI),
2089dc37a919SHawking Zhang 	1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SDMA"},
2090dc37a919SHawking Zhang };
2091dc37a919SHawking Zhang 
2092dc37a919SHawking Zhang static const struct amdgpu_ras_memory_id_entry sdma_v4_4_2_ras_memory_list[] = {
2093dc37a919SHawking Zhang 	{AMDGPU_SDMA_MBANK_DATA_BUF0, "SDMA_MBANK_DATA_BUF0"},
2094dc37a919SHawking Zhang 	{AMDGPU_SDMA_MBANK_DATA_BUF1, "SDMA_MBANK_DATA_BUF1"},
2095dc37a919SHawking Zhang 	{AMDGPU_SDMA_MBANK_DATA_BUF2, "SDMA_MBANK_DATA_BUF2"},
2096dc37a919SHawking Zhang 	{AMDGPU_SDMA_MBANK_DATA_BUF3, "SDMA_MBANK_DATA_BUF3"},
2097dc37a919SHawking Zhang 	{AMDGPU_SDMA_MBANK_DATA_BUF4, "SDMA_MBANK_DATA_BUF4"},
2098dc37a919SHawking Zhang 	{AMDGPU_SDMA_MBANK_DATA_BUF5, "SDMA_MBANK_DATA_BUF5"},
2099dc37a919SHawking Zhang 	{AMDGPU_SDMA_MBANK_DATA_BUF6, "SDMA_MBANK_DATA_BUF6"},
2100dc37a919SHawking Zhang 	{AMDGPU_SDMA_MBANK_DATA_BUF7, "SDMA_MBANK_DATA_BUF7"},
2101dc37a919SHawking Zhang 	{AMDGPU_SDMA_MBANK_DATA_BUF8, "SDMA_MBANK_DATA_BUF8"},
2102dc37a919SHawking Zhang 	{AMDGPU_SDMA_MBANK_DATA_BUF9, "SDMA_MBANK_DATA_BUF9"},
2103dc37a919SHawking Zhang 	{AMDGPU_SDMA_MBANK_DATA_BUF10, "SDMA_MBANK_DATA_BUF10"},
2104dc37a919SHawking Zhang 	{AMDGPU_SDMA_MBANK_DATA_BUF11, "SDMA_MBANK_DATA_BUF11"},
2105dc37a919SHawking Zhang 	{AMDGPU_SDMA_MBANK_DATA_BUF12, "SDMA_MBANK_DATA_BUF12"},
2106dc37a919SHawking Zhang 	{AMDGPU_SDMA_MBANK_DATA_BUF13, "SDMA_MBANK_DATA_BUF13"},
2107dc37a919SHawking Zhang 	{AMDGPU_SDMA_MBANK_DATA_BUF14, "SDMA_MBANK_DATA_BUF14"},
2108dc37a919SHawking Zhang 	{AMDGPU_SDMA_MBANK_DATA_BUF15, "SDMA_MBANK_DATA_BUF15"},
2109dc37a919SHawking Zhang 	{AMDGPU_SDMA_UCODE_BUF, "SDMA_UCODE_BUF"},
2110dc37a919SHawking Zhang 	{AMDGPU_SDMA_RB_CMD_BUF, "SDMA_RB_CMD_BUF"},
2111dc37a919SHawking Zhang 	{AMDGPU_SDMA_IB_CMD_BUF, "SDMA_IB_CMD_BUF"},
2112dc37a919SHawking Zhang 	{AMDGPU_SDMA_UTCL1_RD_FIFO, "SDMA_UTCL1_RD_FIFO"},
2113dc37a919SHawking Zhang 	{AMDGPU_SDMA_UTCL1_RDBST_FIFO, "SDMA_UTCL1_RDBST_FIFO"},
2114dc37a919SHawking Zhang 	{AMDGPU_SDMA_UTCL1_WR_FIFO, "SDMA_UTCL1_WR_FIFO"},
2115dc37a919SHawking Zhang 	{AMDGPU_SDMA_DATA_LUT_FIFO, "SDMA_DATA_LUT_FIFO"},
2116dc37a919SHawking Zhang 	{AMDGPU_SDMA_SPLIT_DAT_BUF, "SDMA_SPLIT_DAT_BUF"},
2117dc37a919SHawking Zhang };
2118dc37a919SHawking Zhang 
sdma_v4_4_2_inst_query_ras_error_count(struct amdgpu_device * adev,uint32_t sdma_inst,void * ras_err_status)2119dc37a919SHawking Zhang static void sdma_v4_4_2_inst_query_ras_error_count(struct amdgpu_device *adev,
2120dc37a919SHawking Zhang 						   uint32_t sdma_inst,
2121dc37a919SHawking Zhang 						   void *ras_err_status)
2122dc37a919SHawking Zhang {
2123dc37a919SHawking Zhang 	struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
21241ad29cb3SStanley.Yang 	uint32_t sdma_dev_inst = GET_INST(SDMA0, sdma_inst);
2125dc37a919SHawking Zhang 
2126dc37a919SHawking Zhang 	/* sdma v4_4_2 doesn't support query ce counts */
2127dc37a919SHawking Zhang 	amdgpu_ras_inst_query_ras_error_count(adev,
2128dc37a919SHawking Zhang 					sdma_v4_2_2_ue_reg_list,
2129dc37a919SHawking Zhang 					ARRAY_SIZE(sdma_v4_2_2_ue_reg_list),
2130dc37a919SHawking Zhang 					sdma_v4_4_2_ras_memory_list,
2131dc37a919SHawking Zhang 					ARRAY_SIZE(sdma_v4_4_2_ras_memory_list),
21321ad29cb3SStanley.Yang 					sdma_dev_inst,
2133dc37a919SHawking Zhang 					AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
2134dc37a919SHawking Zhang 					&err_data->ue_count);
2135dc37a919SHawking Zhang }
2136dc37a919SHawking Zhang 
sdma_v4_4_2_query_ras_error_count(struct amdgpu_device * adev,void * ras_err_status)2137dc37a919SHawking Zhang static void sdma_v4_4_2_query_ras_error_count(struct amdgpu_device *adev,
2138dc37a919SHawking Zhang 					      void *ras_err_status)
2139dc37a919SHawking Zhang {
2140dc37a919SHawking Zhang 	uint32_t inst_mask;
2141dc37a919SHawking Zhang 	int i = 0;
2142dc37a919SHawking Zhang 
2143dc37a919SHawking Zhang 	inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
2144dc37a919SHawking Zhang 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
2145dc37a919SHawking Zhang 		for_each_inst(i, inst_mask)
2146dc37a919SHawking Zhang 			sdma_v4_4_2_inst_query_ras_error_count(adev, i, ras_err_status);
2147dc37a919SHawking Zhang 	} else {
2148dc37a919SHawking Zhang 		dev_warn(adev->dev, "SDMA RAS is not supported\n");
2149dc37a919SHawking Zhang 	}
2150dc37a919SHawking Zhang }
2151a64b1552SHawking Zhang 
sdma_v4_4_2_inst_reset_ras_error_count(struct amdgpu_device * adev,uint32_t sdma_inst)2152a64b1552SHawking Zhang static void sdma_v4_4_2_inst_reset_ras_error_count(struct amdgpu_device *adev,
2153a64b1552SHawking Zhang 						   uint32_t sdma_inst)
2154a64b1552SHawking Zhang {
21551ad29cb3SStanley.Yang 	uint32_t sdma_dev_inst = GET_INST(SDMA0, sdma_inst);
21561ad29cb3SStanley.Yang 
2157a64b1552SHawking Zhang 	amdgpu_ras_inst_reset_ras_error_count(adev,
2158a64b1552SHawking Zhang 					sdma_v4_2_2_ue_reg_list,
2159a64b1552SHawking Zhang 					ARRAY_SIZE(sdma_v4_2_2_ue_reg_list),
21601ad29cb3SStanley.Yang 					sdma_dev_inst);
2161a64b1552SHawking Zhang }
2162a64b1552SHawking Zhang 
sdma_v4_4_2_reset_ras_error_count(struct amdgpu_device * adev)2163a64b1552SHawking Zhang static void sdma_v4_4_2_reset_ras_error_count(struct amdgpu_device *adev)
2164a64b1552SHawking Zhang {
2165a64b1552SHawking Zhang 	uint32_t inst_mask;
2166a64b1552SHawking Zhang 	int i = 0;
2167a64b1552SHawking Zhang 
2168a64b1552SHawking Zhang 	inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
2169a64b1552SHawking Zhang 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
2170a64b1552SHawking Zhang 		for_each_inst(i, inst_mask)
2171a64b1552SHawking Zhang 			sdma_v4_4_2_inst_reset_ras_error_count(adev, i);
2172a64b1552SHawking Zhang 	} else {
2173a64b1552SHawking Zhang 		dev_warn(adev->dev, "SDMA RAS is not supported\n");
2174a64b1552SHawking Zhang 	}
2175a64b1552SHawking Zhang }
21761e69fde7SHawking Zhang 
21771e69fde7SHawking Zhang static const struct amdgpu_ras_block_hw_ops sdma_v4_4_2_ras_hw_ops = {
21781e69fde7SHawking Zhang 	.query_ras_error_count = sdma_v4_4_2_query_ras_error_count,
21791e69fde7SHawking Zhang 	.reset_ras_error_count = sdma_v4_4_2_reset_ras_error_count,
21801e69fde7SHawking Zhang };
21811e69fde7SHawking Zhang 
21821e69fde7SHawking Zhang static struct amdgpu_sdma_ras sdma_v4_4_2_ras = {
21831e69fde7SHawking Zhang 	.ras_block = {
21841e69fde7SHawking Zhang 		.hw_ops = &sdma_v4_4_2_ras_hw_ops,
21851e69fde7SHawking Zhang 	},
21861e69fde7SHawking Zhang };
21871e69fde7SHawking Zhang 
sdma_v4_4_2_set_ras_funcs(struct amdgpu_device * adev)21881e69fde7SHawking Zhang static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev)
21891e69fde7SHawking Zhang {
21901e69fde7SHawking Zhang 	adev->sdma.ras = &sdma_v4_4_2_ras;
21911e69fde7SHawking Zhang }
2192