16bdadb20SHawking Zhang /*
26bdadb20SHawking Zhang  * Copyright 2019 Advanced Micro Devices, Inc.
36bdadb20SHawking Zhang  *
46bdadb20SHawking Zhang  * Permission is hereby granted, free of charge, to any person obtaining a
56bdadb20SHawking Zhang  * copy of this software and associated documentation files (the "Software"),
66bdadb20SHawking Zhang  * to deal in the Software without restriction, including without limitation
76bdadb20SHawking Zhang  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
86bdadb20SHawking Zhang  * and/or sell copies of the Software, and to permit persons to whom the
96bdadb20SHawking Zhang  * Software is furnished to do so, subject to the following conditions:
106bdadb20SHawking Zhang  *
116bdadb20SHawking Zhang  * The above copyright notice and this permission notice shall be included in
126bdadb20SHawking Zhang  * all copies or substantial portions of the Software.
136bdadb20SHawking Zhang  *
146bdadb20SHawking Zhang  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
156bdadb20SHawking Zhang  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
166bdadb20SHawking Zhang  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
176bdadb20SHawking Zhang  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
186bdadb20SHawking Zhang  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
196bdadb20SHawking Zhang  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
206bdadb20SHawking Zhang  * OTHER DEALINGS IN THE SOFTWARE.
216bdadb20SHawking Zhang  */
226bdadb20SHawking Zhang #undef pr_fmt
236bdadb20SHawking Zhang #define pr_fmt(fmt) "kfd2kgd: " fmt
246bdadb20SHawking Zhang 
256bdadb20SHawking Zhang #include <linux/module.h>
266bdadb20SHawking Zhang #include <linux/fdtable.h>
276bdadb20SHawking Zhang #include <linux/uaccess.h>
286bdadb20SHawking Zhang #include <linux/firmware.h>
296bdadb20SHawking Zhang #include <linux/mmu_context.h>
306bdadb20SHawking Zhang #include "amdgpu.h"
316bdadb20SHawking Zhang #include "amdgpu_amdkfd.h"
326bdadb20SHawking Zhang #include "amdgpu_ucode.h"
336bdadb20SHawking Zhang #include "soc15_hw_ip.h"
346bdadb20SHawking Zhang #include "gc/gc_10_1_0_offset.h"
356bdadb20SHawking Zhang #include "gc/gc_10_1_0_sh_mask.h"
366bdadb20SHawking Zhang #include "navi10_enum.h"
376bdadb20SHawking Zhang #include "athub/athub_2_0_0_offset.h"
386bdadb20SHawking Zhang #include "athub/athub_2_0_0_sh_mask.h"
396bdadb20SHawking Zhang #include "oss/osssys_5_0_0_offset.h"
406bdadb20SHawking Zhang #include "oss/osssys_5_0_0_sh_mask.h"
416bdadb20SHawking Zhang #include "soc15_common.h"
426bdadb20SHawking Zhang #include "v10_structs.h"
436bdadb20SHawking Zhang #include "nv.h"
446bdadb20SHawking Zhang #include "nvd.h"
456bdadb20SHawking Zhang 
466bdadb20SHawking Zhang enum hqd_dequeue_request_type {
476bdadb20SHawking Zhang 	NO_ACTION = 0,
486bdadb20SHawking Zhang 	DRAIN_PIPE,
496bdadb20SHawking Zhang 	RESET_WAVES,
506bdadb20SHawking Zhang 	SAVE_WAVES
516bdadb20SHawking Zhang };
526bdadb20SHawking Zhang 
536bdadb20SHawking Zhang /*
546bdadb20SHawking Zhang  * Register access functions
556bdadb20SHawking Zhang  */
566bdadb20SHawking Zhang 
576bdadb20SHawking Zhang static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
586bdadb20SHawking Zhang 		uint32_t sh_mem_config,
596bdadb20SHawking Zhang 		uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
606bdadb20SHawking Zhang 		uint32_t sh_mem_bases);
616bdadb20SHawking Zhang static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
626bdadb20SHawking Zhang 		unsigned int vmid);
636bdadb20SHawking Zhang static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
646bdadb20SHawking Zhang static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
656bdadb20SHawking Zhang 			uint32_t queue_id, uint32_t __user *wptr,
666bdadb20SHawking Zhang 			uint32_t wptr_shift, uint32_t wptr_mask,
676bdadb20SHawking Zhang 			struct mm_struct *mm);
686bdadb20SHawking Zhang static int kgd_hqd_dump(struct kgd_dev *kgd,
696bdadb20SHawking Zhang 			uint32_t pipe_id, uint32_t queue_id,
706bdadb20SHawking Zhang 			uint32_t (**dump)[2], uint32_t *n_regs);
716bdadb20SHawking Zhang static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
726bdadb20SHawking Zhang 			     uint32_t __user *wptr, struct mm_struct *mm);
736bdadb20SHawking Zhang static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
746bdadb20SHawking Zhang 			     uint32_t engine_id, uint32_t queue_id,
756bdadb20SHawking Zhang 			     uint32_t (**dump)[2], uint32_t *n_regs);
766bdadb20SHawking Zhang static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
776bdadb20SHawking Zhang 		uint32_t pipe_id, uint32_t queue_id);
786bdadb20SHawking Zhang static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
796bdadb20SHawking Zhang static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
806bdadb20SHawking Zhang 				enum kfd_preempt_type reset_type,
816bdadb20SHawking Zhang 				unsigned int utimeout, uint32_t pipe_id,
826bdadb20SHawking Zhang 				uint32_t queue_id);
836bdadb20SHawking Zhang static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
846bdadb20SHawking Zhang 				unsigned int utimeout);
856bdadb20SHawking Zhang #if 0
866bdadb20SHawking Zhang static uint32_t get_watch_base_addr(struct amdgpu_device *adev);
876bdadb20SHawking Zhang #endif
886bdadb20SHawking Zhang static int kgd_address_watch_disable(struct kgd_dev *kgd);
896bdadb20SHawking Zhang static int kgd_address_watch_execute(struct kgd_dev *kgd,
906bdadb20SHawking Zhang 					unsigned int watch_point_id,
916bdadb20SHawking Zhang 					uint32_t cntl_val,
926bdadb20SHawking Zhang 					uint32_t addr_hi,
936bdadb20SHawking Zhang 					uint32_t addr_lo);
946bdadb20SHawking Zhang static int kgd_wave_control_execute(struct kgd_dev *kgd,
956bdadb20SHawking Zhang 					uint32_t gfx_index_val,
966bdadb20SHawking Zhang 					uint32_t sq_cmd);
976bdadb20SHawking Zhang static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
986bdadb20SHawking Zhang 					unsigned int watch_point_id,
996bdadb20SHawking Zhang 					unsigned int reg_offset);
1006bdadb20SHawking Zhang 
1016bdadb20SHawking Zhang static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
1026bdadb20SHawking Zhang 		uint8_t vmid);
1036bdadb20SHawking Zhang static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
1046bdadb20SHawking Zhang 		uint8_t vmid);
1056bdadb20SHawking Zhang static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
1066bdadb20SHawking Zhang 		uint64_t page_table_base);
1076bdadb20SHawking Zhang static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
1086bdadb20SHawking Zhang static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
1096bdadb20SHawking Zhang 
1106bdadb20SHawking Zhang /* Because of REG_GET_FIELD() being used, we put this function in the
1116bdadb20SHawking Zhang  * asic specific file.
1126bdadb20SHawking Zhang  */
1136bdadb20SHawking Zhang static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
1146bdadb20SHawking Zhang 		struct tile_config *config)
1156bdadb20SHawking Zhang {
1166bdadb20SHawking Zhang 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
1176bdadb20SHawking Zhang 
1186bdadb20SHawking Zhang 	config->gb_addr_config = adev->gfx.config.gb_addr_config;
1196bdadb20SHawking Zhang #if 0
1206bdadb20SHawking Zhang /* TODO - confirm REG_GET_FIELD x2, should be OK as is... but
1216bdadb20SHawking Zhang  * MC_ARB_RAMCFG register doesn't exist on Vega10 - initial amdgpu
1226bdadb20SHawking Zhang  * changes commented out related code, doing the same here for now but
1236bdadb20SHawking Zhang  * need to sync with Ken et al
1246bdadb20SHawking Zhang  */
1256bdadb20SHawking Zhang 	config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
1266bdadb20SHawking Zhang 				MC_ARB_RAMCFG, NOOFBANK);
1276bdadb20SHawking Zhang 	config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
1286bdadb20SHawking Zhang 				MC_ARB_RAMCFG, NOOFRANKS);
1296bdadb20SHawking Zhang #endif
1306bdadb20SHawking Zhang 
1316bdadb20SHawking Zhang 	config->tile_config_ptr = adev->gfx.config.tile_mode_array;
1326bdadb20SHawking Zhang 	config->num_tile_configs =
1336bdadb20SHawking Zhang 			ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1346bdadb20SHawking Zhang 	config->macro_tile_config_ptr =
1356bdadb20SHawking Zhang 			adev->gfx.config.macrotile_mode_array;
1366bdadb20SHawking Zhang 	config->num_macro_tile_configs =
1376bdadb20SHawking Zhang 			ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1386bdadb20SHawking Zhang 
1396bdadb20SHawking Zhang 	return 0;
1406bdadb20SHawking Zhang }
1416bdadb20SHawking Zhang 
1426bdadb20SHawking Zhang static const struct kfd2kgd_calls kfd2kgd = {
1436bdadb20SHawking Zhang 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
1446bdadb20SHawking Zhang 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
1456bdadb20SHawking Zhang 	.init_interrupts = kgd_init_interrupts,
1466bdadb20SHawking Zhang 	.hqd_load = kgd_hqd_load,
1476bdadb20SHawking Zhang 	.hqd_sdma_load = kgd_hqd_sdma_load,
1486bdadb20SHawking Zhang 	.hqd_dump = kgd_hqd_dump,
1496bdadb20SHawking Zhang 	.hqd_sdma_dump = kgd_hqd_sdma_dump,
1506bdadb20SHawking Zhang 	.hqd_is_occupied = kgd_hqd_is_occupied,
1516bdadb20SHawking Zhang 	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
1526bdadb20SHawking Zhang 	.hqd_destroy = kgd_hqd_destroy,
1536bdadb20SHawking Zhang 	.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
1546bdadb20SHawking Zhang 	.address_watch_disable = kgd_address_watch_disable,
1556bdadb20SHawking Zhang 	.address_watch_execute = kgd_address_watch_execute,
1566bdadb20SHawking Zhang 	.wave_control_execute = kgd_wave_control_execute,
1576bdadb20SHawking Zhang 	.address_watch_get_offset = kgd_address_watch_get_offset,
1586bdadb20SHawking Zhang 	.get_atc_vmid_pasid_mapping_pasid =
1596bdadb20SHawking Zhang 			get_atc_vmid_pasid_mapping_pasid,
1606bdadb20SHawking Zhang 	.get_atc_vmid_pasid_mapping_valid =
1616bdadb20SHawking Zhang 			get_atc_vmid_pasid_mapping_valid,
1626bdadb20SHawking Zhang 	.invalidate_tlbs = invalidate_tlbs,
1636bdadb20SHawking Zhang 	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
1646bdadb20SHawking Zhang 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
1656bdadb20SHawking Zhang 	.get_tile_config = amdgpu_amdkfd_get_tile_config,
1666bdadb20SHawking Zhang };
1676bdadb20SHawking Zhang 
1686bdadb20SHawking Zhang struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions()
1696bdadb20SHawking Zhang {
1706bdadb20SHawking Zhang 	return (struct kfd2kgd_calls *)&kfd2kgd;
1716bdadb20SHawking Zhang }
1726bdadb20SHawking Zhang 
1736bdadb20SHawking Zhang static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
1746bdadb20SHawking Zhang {
1756bdadb20SHawking Zhang 	return (struct amdgpu_device *)kgd;
1766bdadb20SHawking Zhang }
1776bdadb20SHawking Zhang 
1786bdadb20SHawking Zhang static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
1796bdadb20SHawking Zhang 			uint32_t queue, uint32_t vmid)
1806bdadb20SHawking Zhang {
1816bdadb20SHawking Zhang 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
1826bdadb20SHawking Zhang 
1836bdadb20SHawking Zhang 	mutex_lock(&adev->srbm_mutex);
1846bdadb20SHawking Zhang 	nv_grbm_select(adev, mec, pipe, queue, vmid);
1856bdadb20SHawking Zhang }
1866bdadb20SHawking Zhang 
1876bdadb20SHawking Zhang static void unlock_srbm(struct kgd_dev *kgd)
1886bdadb20SHawking Zhang {
1896bdadb20SHawking Zhang 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
1906bdadb20SHawking Zhang 
1916bdadb20SHawking Zhang 	nv_grbm_select(adev, 0, 0, 0, 0);
1926bdadb20SHawking Zhang 	mutex_unlock(&adev->srbm_mutex);
1936bdadb20SHawking Zhang }
1946bdadb20SHawking Zhang 
1956bdadb20SHawking Zhang static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
1966bdadb20SHawking Zhang 				uint32_t queue_id)
1976bdadb20SHawking Zhang {
1986bdadb20SHawking Zhang 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
1996bdadb20SHawking Zhang 
2006bdadb20SHawking Zhang 	uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
2016bdadb20SHawking Zhang 	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
2026bdadb20SHawking Zhang 
2036bdadb20SHawking Zhang 	lock_srbm(kgd, mec, pipe, queue_id, 0);
2046bdadb20SHawking Zhang }
2056bdadb20SHawking Zhang 
2066bdadb20SHawking Zhang static uint32_t get_queue_mask(struct amdgpu_device *adev,
2076bdadb20SHawking Zhang 			       uint32_t pipe_id, uint32_t queue_id)
2086bdadb20SHawking Zhang {
2096bdadb20SHawking Zhang 	unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
2106bdadb20SHawking Zhang 			    queue_id) & 31;
2116bdadb20SHawking Zhang 
2126bdadb20SHawking Zhang 	return ((uint32_t)1) << bit;
2136bdadb20SHawking Zhang }
2146bdadb20SHawking Zhang 
2156bdadb20SHawking Zhang static void release_queue(struct kgd_dev *kgd)
2166bdadb20SHawking Zhang {
2176bdadb20SHawking Zhang 	unlock_srbm(kgd);
2186bdadb20SHawking Zhang }
2196bdadb20SHawking Zhang 
2206bdadb20SHawking Zhang static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
2216bdadb20SHawking Zhang 					uint32_t sh_mem_config,
2226bdadb20SHawking Zhang 					uint32_t sh_mem_ape1_base,
2236bdadb20SHawking Zhang 					uint32_t sh_mem_ape1_limit,
2246bdadb20SHawking Zhang 					uint32_t sh_mem_bases)
2256bdadb20SHawking Zhang {
2266bdadb20SHawking Zhang 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
2276bdadb20SHawking Zhang 
2286bdadb20SHawking Zhang 	lock_srbm(kgd, 0, 0, 0, vmid);
2296bdadb20SHawking Zhang 
2306bdadb20SHawking Zhang 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
2316bdadb20SHawking Zhang 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
2326bdadb20SHawking Zhang 	/* APE1 no longer exists on GFX9 */
2336bdadb20SHawking Zhang 
2346bdadb20SHawking Zhang 	unlock_srbm(kgd);
2356bdadb20SHawking Zhang }
2366bdadb20SHawking Zhang 
2376bdadb20SHawking Zhang static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
2386bdadb20SHawking Zhang 					unsigned int vmid)
2396bdadb20SHawking Zhang {
2406bdadb20SHawking Zhang 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
2416bdadb20SHawking Zhang 
2426bdadb20SHawking Zhang 	/*
2436bdadb20SHawking Zhang 	 * We have to assume that there is no outstanding mapping.
2446bdadb20SHawking Zhang 	 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
2456bdadb20SHawking Zhang 	 * a mapping is in progress or because a mapping finished
2466bdadb20SHawking Zhang 	 * and the SW cleared it.
2476bdadb20SHawking Zhang 	 * So the protocol is to always wait & clear.
2486bdadb20SHawking Zhang 	 */
2496bdadb20SHawking Zhang 	uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
2506bdadb20SHawking Zhang 			ATC_VMID0_PASID_MAPPING__VALID_MASK;
2516bdadb20SHawking Zhang 
2526bdadb20SHawking Zhang 	pr_debug("pasid 0x%x vmid %d, reg value %x\n", pasid, vmid, pasid_mapping);
2536bdadb20SHawking Zhang 	/*
2546bdadb20SHawking Zhang 	 * need to do this twice, once for gfx and once for mmhub
2556bdadb20SHawking Zhang 	 * for ATC add 16 to VMID for mmhub, for IH different registers.
2566bdadb20SHawking Zhang 	 * ATC_VMID0..15 registers are separate from ATC_VMID16..31.
2576bdadb20SHawking Zhang 	 */
2586bdadb20SHawking Zhang 
2596bdadb20SHawking Zhang 	pr_debug("ATHUB, reg %x\n", SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid);
2606bdadb20SHawking Zhang 	WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
2616bdadb20SHawking Zhang 	       pasid_mapping);
2626bdadb20SHawking Zhang 
2636bdadb20SHawking Zhang #if 0
2646bdadb20SHawking Zhang 	/* TODO: uncomment this code when the hardware support is ready. */
2656bdadb20SHawking Zhang 	while (!(RREG32(SOC15_REG_OFFSET(
2666bdadb20SHawking Zhang 				ATHUB, 0,
2676bdadb20SHawking Zhang 				mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
2686bdadb20SHawking Zhang 		 (1U << vmid)))
2696bdadb20SHawking Zhang 		cpu_relax();
2706bdadb20SHawking Zhang 
2716bdadb20SHawking Zhang 	pr_debug("ATHUB mapping update finished\n");
2726bdadb20SHawking Zhang 	WREG32(SOC15_REG_OFFSET(ATHUB, 0,
2736bdadb20SHawking Zhang 				mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
2746bdadb20SHawking Zhang 	       1U << vmid);
2756bdadb20SHawking Zhang #endif
2766bdadb20SHawking Zhang 
2776bdadb20SHawking Zhang 	/* Mapping vmid to pasid also for IH block */
2786bdadb20SHawking Zhang 	pr_debug("update mapping for IH block and mmhub");
2796bdadb20SHawking Zhang 	WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
2806bdadb20SHawking Zhang 	       pasid_mapping);
2816bdadb20SHawking Zhang 
2826bdadb20SHawking Zhang 	return 0;
2836bdadb20SHawking Zhang }
2846bdadb20SHawking Zhang 
2856bdadb20SHawking Zhang /* TODO - RING0 form of field is obsolete, seems to date back to SI
2866bdadb20SHawking Zhang  * but still works
2876bdadb20SHawking Zhang  */
2886bdadb20SHawking Zhang 
2896bdadb20SHawking Zhang static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
2906bdadb20SHawking Zhang {
2916bdadb20SHawking Zhang 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
2926bdadb20SHawking Zhang 	uint32_t mec;
2936bdadb20SHawking Zhang 	uint32_t pipe;
2946bdadb20SHawking Zhang 
2956bdadb20SHawking Zhang 	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
2966bdadb20SHawking Zhang 	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
2976bdadb20SHawking Zhang 
2986bdadb20SHawking Zhang 	lock_srbm(kgd, mec, pipe, 0, 0);
2996bdadb20SHawking Zhang 
3006bdadb20SHawking Zhang 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
3016bdadb20SHawking Zhang 		CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
3026bdadb20SHawking Zhang 		CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
3036bdadb20SHawking Zhang 
3046bdadb20SHawking Zhang 	unlock_srbm(kgd);
3056bdadb20SHawking Zhang 
3066bdadb20SHawking Zhang 	return 0;
3076bdadb20SHawking Zhang }
3086bdadb20SHawking Zhang 
3096bdadb20SHawking Zhang static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
3106bdadb20SHawking Zhang 				unsigned int engine_id,
3116bdadb20SHawking Zhang 				unsigned int queue_id)
3126bdadb20SHawking Zhang {
3136bdadb20SHawking Zhang 	uint32_t base[2] = {
3146bdadb20SHawking Zhang 		SOC15_REG_OFFSET(SDMA0, 0,
3156bdadb20SHawking Zhang 				 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
3166bdadb20SHawking Zhang 		/* On gfx10, mmSDMA1_xxx registers are defined NOT based
3176bdadb20SHawking Zhang 		 * on SDMA1 base address (dw 0x1860) but based on SDMA0
3186bdadb20SHawking Zhang 		 * base address (dw 0x1260). Therefore use mmSDMA0_RLC0_RB_CNTL
3196bdadb20SHawking Zhang 		 * instead of mmSDMA1_RLC0_RB_CNTL for the base address calc
3206bdadb20SHawking Zhang 		 * below
3216bdadb20SHawking Zhang 		 */
3226bdadb20SHawking Zhang 		SOC15_REG_OFFSET(SDMA1, 0,
3236bdadb20SHawking Zhang 				 mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL
3246bdadb20SHawking Zhang 	};
3256bdadb20SHawking Zhang 	uint32_t retval;
3266bdadb20SHawking Zhang 
3276bdadb20SHawking Zhang 	retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
3286bdadb20SHawking Zhang 					       mmSDMA0_RLC0_RB_CNTL);
3296bdadb20SHawking Zhang 
3306bdadb20SHawking Zhang 	pr_debug("sdma base address: 0x%x\n", retval);
3316bdadb20SHawking Zhang 
3326bdadb20SHawking Zhang 	return retval;
3336bdadb20SHawking Zhang }
3346bdadb20SHawking Zhang 
3356bdadb20SHawking Zhang #if 0
3366bdadb20SHawking Zhang static uint32_t get_watch_base_addr(struct amdgpu_device *adev)
3376bdadb20SHawking Zhang {
3386bdadb20SHawking Zhang 	uint32_t retval = SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) -
3396bdadb20SHawking Zhang 			mmTCP_WATCH0_ADDR_H;
3406bdadb20SHawking Zhang 
3416bdadb20SHawking Zhang 	pr_debug("kfd: reg watch base address: 0x%x\n", retval);
3426bdadb20SHawking Zhang 
3436bdadb20SHawking Zhang 	return retval;
3446bdadb20SHawking Zhang }
3456bdadb20SHawking Zhang #endif
3466bdadb20SHawking Zhang 
3476bdadb20SHawking Zhang static inline struct v10_compute_mqd *get_mqd(void *mqd)
3486bdadb20SHawking Zhang {
3496bdadb20SHawking Zhang 	return (struct v10_compute_mqd *)mqd;
3506bdadb20SHawking Zhang }
3516bdadb20SHawking Zhang 
3526bdadb20SHawking Zhang static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
3536bdadb20SHawking Zhang {
3546bdadb20SHawking Zhang 	return (struct v10_sdma_mqd *)mqd;
3556bdadb20SHawking Zhang }
3566bdadb20SHawking Zhang 
3576bdadb20SHawking Zhang static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
3586bdadb20SHawking Zhang 			uint32_t queue_id, uint32_t __user *wptr,
3596bdadb20SHawking Zhang 			uint32_t wptr_shift, uint32_t wptr_mask,
3606bdadb20SHawking Zhang 			struct mm_struct *mm)
3616bdadb20SHawking Zhang {
3626bdadb20SHawking Zhang 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
3636bdadb20SHawking Zhang 	struct v10_compute_mqd *m;
3646bdadb20SHawking Zhang 	uint32_t *mqd_hqd;
3656bdadb20SHawking Zhang 	uint32_t reg, hqd_base, data;
3666bdadb20SHawking Zhang 
3676bdadb20SHawking Zhang 	m = get_mqd(mqd);
3686bdadb20SHawking Zhang 
3696bdadb20SHawking Zhang 	pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
3706bdadb20SHawking Zhang 	acquire_queue(kgd, pipe_id, queue_id);
3716bdadb20SHawking Zhang 
3726bdadb20SHawking Zhang 	/* HIQ is set during driver init period with vmid set to 0*/
3736bdadb20SHawking Zhang 	if (m->cp_hqd_vmid == 0) {
3746bdadb20SHawking Zhang 		uint32_t value, mec, pipe;
3756bdadb20SHawking Zhang 
3766bdadb20SHawking Zhang 		mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
3776bdadb20SHawking Zhang 		pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
3786bdadb20SHawking Zhang 
3796bdadb20SHawking Zhang 		pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
3806bdadb20SHawking Zhang 			mec, pipe, queue_id);
3816bdadb20SHawking Zhang 		value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
3826bdadb20SHawking Zhang 		value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
3836bdadb20SHawking Zhang 			((mec << 5) | (pipe << 3) | queue_id | 0x80));
3846bdadb20SHawking Zhang 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
3856bdadb20SHawking Zhang 	}
3866bdadb20SHawking Zhang 
3876bdadb20SHawking Zhang 	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
3886bdadb20SHawking Zhang 	mqd_hqd = &m->cp_mqd_base_addr_lo;
3896bdadb20SHawking Zhang 	hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
3906bdadb20SHawking Zhang 
3916bdadb20SHawking Zhang 	for (reg = hqd_base;
3926bdadb20SHawking Zhang 	     reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
3936bdadb20SHawking Zhang 		WREG32(reg, mqd_hqd[reg - hqd_base]);
3946bdadb20SHawking Zhang 
3956bdadb20SHawking Zhang 
3966bdadb20SHawking Zhang 	/* Activate doorbell logic before triggering WPTR poll. */
3976bdadb20SHawking Zhang 	data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
3986bdadb20SHawking Zhang 			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3996bdadb20SHawking Zhang 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
4006bdadb20SHawking Zhang 
4016bdadb20SHawking Zhang 	if (wptr) {
4026bdadb20SHawking Zhang 		/* Don't read wptr with get_user because the user
4036bdadb20SHawking Zhang 		 * context may not be accessible (if this function
4046bdadb20SHawking Zhang 		 * runs in a work queue). Instead trigger a one-shot
4056bdadb20SHawking Zhang 		 * polling read from memory in the CP. This assumes
4066bdadb20SHawking Zhang 		 * that wptr is GPU-accessible in the queue's VMID via
4076bdadb20SHawking Zhang 		 * ATC or SVM. WPTR==RPTR before starting the poll so
4086bdadb20SHawking Zhang 		 * the CP starts fetching new commands from the right
4096bdadb20SHawking Zhang 		 * place.
4106bdadb20SHawking Zhang 		 *
4116bdadb20SHawking Zhang 		 * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
4126bdadb20SHawking Zhang 		 * tricky. Assume that the queue didn't overflow. The
4136bdadb20SHawking Zhang 		 * number of valid bits in the 32-bit RPTR depends on
4146bdadb20SHawking Zhang 		 * the queue size. The remaining bits are taken from
4156bdadb20SHawking Zhang 		 * the saved 64-bit WPTR. If the WPTR wrapped, add the
4166bdadb20SHawking Zhang 		 * queue size.
4176bdadb20SHawking Zhang 		 */
4186bdadb20SHawking Zhang 		uint32_t queue_size =
4196bdadb20SHawking Zhang 			2 << REG_GET_FIELD(m->cp_hqd_pq_control,
4206bdadb20SHawking Zhang 					   CP_HQD_PQ_CONTROL, QUEUE_SIZE);
4216bdadb20SHawking Zhang 		uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
4226bdadb20SHawking Zhang 
4236bdadb20SHawking Zhang 		if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
4246bdadb20SHawking Zhang 			guessed_wptr += queue_size;
4256bdadb20SHawking Zhang 		guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
4266bdadb20SHawking Zhang 		guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
4276bdadb20SHawking Zhang 
4286bdadb20SHawking Zhang 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
4296bdadb20SHawking Zhang 		       lower_32_bits(guessed_wptr));
4306bdadb20SHawking Zhang 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
4316bdadb20SHawking Zhang 		       upper_32_bits(guessed_wptr));
4326bdadb20SHawking Zhang 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
4336bdadb20SHawking Zhang 		       lower_32_bits((uint64_t)wptr));
4346bdadb20SHawking Zhang 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
4356bdadb20SHawking Zhang 		       upper_32_bits((uint64_t)wptr));
4366bdadb20SHawking Zhang 		pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, get_queue_mask(adev, pipe_id, queue_id));
4376bdadb20SHawking Zhang 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
4386bdadb20SHawking Zhang 		       get_queue_mask(adev, pipe_id, queue_id));
4396bdadb20SHawking Zhang 	}
4406bdadb20SHawking Zhang 
4416bdadb20SHawking Zhang 	/* Start the EOP fetcher */
4426bdadb20SHawking Zhang 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
4436bdadb20SHawking Zhang 	       REG_SET_FIELD(m->cp_hqd_eop_rptr,
4446bdadb20SHawking Zhang 			     CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
4456bdadb20SHawking Zhang 
4466bdadb20SHawking Zhang 	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
4476bdadb20SHawking Zhang 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
4486bdadb20SHawking Zhang 
4496bdadb20SHawking Zhang 	release_queue(kgd);
4506bdadb20SHawking Zhang 
4516bdadb20SHawking Zhang 	return 0;
4526bdadb20SHawking Zhang }
4536bdadb20SHawking Zhang 
4546bdadb20SHawking Zhang static int kgd_hqd_dump(struct kgd_dev *kgd,
4556bdadb20SHawking Zhang 			uint32_t pipe_id, uint32_t queue_id,
4566bdadb20SHawking Zhang 			uint32_t (**dump)[2], uint32_t *n_regs)
4576bdadb20SHawking Zhang {
4586bdadb20SHawking Zhang 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
4596bdadb20SHawking Zhang 	uint32_t i = 0, reg;
4606bdadb20SHawking Zhang #define HQD_N_REGS 56
4616bdadb20SHawking Zhang #define DUMP_REG(addr) do {				\
4626bdadb20SHawking Zhang 		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
4636bdadb20SHawking Zhang 			break;				\
4646bdadb20SHawking Zhang 		(*dump)[i][0] = (addr) << 2;		\
4656bdadb20SHawking Zhang 		(*dump)[i++][1] = RREG32(addr);		\
4666bdadb20SHawking Zhang 	} while (0)
4676bdadb20SHawking Zhang 
4686bdadb20SHawking Zhang 	*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
4696bdadb20SHawking Zhang 	if (*dump == NULL)
4706bdadb20SHawking Zhang 		return -ENOMEM;
4716bdadb20SHawking Zhang 
4726bdadb20SHawking Zhang 	acquire_queue(kgd, pipe_id, queue_id);
4736bdadb20SHawking Zhang 
4746bdadb20SHawking Zhang 	for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
4756bdadb20SHawking Zhang 	     reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
4766bdadb20SHawking Zhang 		DUMP_REG(reg);
4776bdadb20SHawking Zhang 
4786bdadb20SHawking Zhang 	release_queue(kgd);
4796bdadb20SHawking Zhang 
4806bdadb20SHawking Zhang 	WARN_ON_ONCE(i != HQD_N_REGS);
4816bdadb20SHawking Zhang 	*n_regs = i;
4826bdadb20SHawking Zhang 
4836bdadb20SHawking Zhang 	return 0;
4846bdadb20SHawking Zhang }
4856bdadb20SHawking Zhang 
4866bdadb20SHawking Zhang static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
4876bdadb20SHawking Zhang 			     uint32_t __user *wptr, struct mm_struct *mm)
4886bdadb20SHawking Zhang {
4896bdadb20SHawking Zhang 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
4906bdadb20SHawking Zhang 	struct v10_sdma_mqd *m;
4916bdadb20SHawking Zhang 	uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
4926bdadb20SHawking Zhang 	unsigned long end_jiffies;
4936bdadb20SHawking Zhang 	uint32_t data;
4946bdadb20SHawking Zhang 	uint64_t data64;
4956bdadb20SHawking Zhang 	uint64_t __user *wptr64 = (uint64_t __user *)wptr;
4966bdadb20SHawking Zhang 
4976bdadb20SHawking Zhang 	m = get_sdma_mqd(mqd);
4986bdadb20SHawking Zhang 	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
4996bdadb20SHawking Zhang 					    m->sdma_queue_id);
5006bdadb20SHawking Zhang 	pr_debug("sdma load base addr %x for engine %d, queue %d\n", sdma_base_addr, m->sdma_engine_id, m->sdma_queue_id);
5016bdadb20SHawking Zhang 	sdmax_gfx_context_cntl = m->sdma_engine_id ?
5026bdadb20SHawking Zhang 		SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) :
5036bdadb20SHawking Zhang 		SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL);
5046bdadb20SHawking Zhang 
5056bdadb20SHawking Zhang 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
5066bdadb20SHawking Zhang 		m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
5076bdadb20SHawking Zhang 
5086bdadb20SHawking Zhang 	end_jiffies = msecs_to_jiffies(2000) + jiffies;
5096bdadb20SHawking Zhang 	while (true) {
5106bdadb20SHawking Zhang 		data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
5116bdadb20SHawking Zhang 		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
5126bdadb20SHawking Zhang 			break;
513812330ebSYong Zhao 		if (time_after(jiffies, end_jiffies)) {
514812330ebSYong Zhao 			pr_err("SDMA RLC not idle in %s\n", __func__);
5156bdadb20SHawking Zhang 			return -ETIME;
516812330ebSYong Zhao 		}
5176bdadb20SHawking Zhang 		usleep_range(500, 1000);
5186bdadb20SHawking Zhang 	}
5196bdadb20SHawking Zhang 	data = RREG32(sdmax_gfx_context_cntl);
5206bdadb20SHawking Zhang 	data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
5216bdadb20SHawking Zhang 			     RESUME_CTX, 0);
5226bdadb20SHawking Zhang 	WREG32(sdmax_gfx_context_cntl, data);
5236bdadb20SHawking Zhang 
5246bdadb20SHawking Zhang 	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
5256bdadb20SHawking Zhang 	       m->sdmax_rlcx_doorbell_offset);
5266bdadb20SHawking Zhang 
5276bdadb20SHawking Zhang 	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
5286bdadb20SHawking Zhang 			     ENABLE, 1);
5296bdadb20SHawking Zhang 	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
5306bdadb20SHawking Zhang 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
5316bdadb20SHawking Zhang 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
5326bdadb20SHawking Zhang 				m->sdmax_rlcx_rb_rptr_hi);
5336bdadb20SHawking Zhang 
5346bdadb20SHawking Zhang 	WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
5356bdadb20SHawking Zhang 	if (read_user_wptr(mm, wptr64, data64)) {
5366bdadb20SHawking Zhang 		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
5376bdadb20SHawking Zhang 		       lower_32_bits(data64));
5386bdadb20SHawking Zhang 		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
5396bdadb20SHawking Zhang 		       upper_32_bits(data64));
5406bdadb20SHawking Zhang 	} else {
5416bdadb20SHawking Zhang 		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
5426bdadb20SHawking Zhang 		       m->sdmax_rlcx_rb_rptr);
5436bdadb20SHawking Zhang 		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
5446bdadb20SHawking Zhang 		       m->sdmax_rlcx_rb_rptr_hi);
5456bdadb20SHawking Zhang 	}
5466bdadb20SHawking Zhang 	WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
5476bdadb20SHawking Zhang 
5486bdadb20SHawking Zhang 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
5496bdadb20SHawking Zhang 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
5506bdadb20SHawking Zhang 			m->sdmax_rlcx_rb_base_hi);
5516bdadb20SHawking Zhang 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
5526bdadb20SHawking Zhang 			m->sdmax_rlcx_rb_rptr_addr_lo);
5536bdadb20SHawking Zhang 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
5546bdadb20SHawking Zhang 			m->sdmax_rlcx_rb_rptr_addr_hi);
5556bdadb20SHawking Zhang 
5566bdadb20SHawking Zhang 	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
5576bdadb20SHawking Zhang 			     RB_ENABLE, 1);
5586bdadb20SHawking Zhang 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
5596bdadb20SHawking Zhang 
5606bdadb20SHawking Zhang 	return 0;
5616bdadb20SHawking Zhang }
5626bdadb20SHawking Zhang 
5636bdadb20SHawking Zhang static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
5646bdadb20SHawking Zhang 			     uint32_t engine_id, uint32_t queue_id,
5656bdadb20SHawking Zhang 			     uint32_t (**dump)[2], uint32_t *n_regs)
5666bdadb20SHawking Zhang {
5676bdadb20SHawking Zhang 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
5686bdadb20SHawking Zhang 	uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id);
5696bdadb20SHawking Zhang 	uint32_t i = 0, reg;
5706bdadb20SHawking Zhang #undef HQD_N_REGS
5716bdadb20SHawking Zhang #define HQD_N_REGS (19+6+7+10)
5726bdadb20SHawking Zhang 
5736bdadb20SHawking Zhang 	pr_debug("sdma dump engine id %d queue_id %d\n", engine_id, queue_id);
5746bdadb20SHawking Zhang 	pr_debug("sdma base addr %x\n", sdma_base_addr);
5756bdadb20SHawking Zhang 
5766bdadb20SHawking Zhang 	*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
5776bdadb20SHawking Zhang 	if (*dump == NULL)
5786bdadb20SHawking Zhang 		return -ENOMEM;
5796bdadb20SHawking Zhang 
5806bdadb20SHawking Zhang 	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
5816bdadb20SHawking Zhang 		DUMP_REG(sdma_base_addr + reg);
5826bdadb20SHawking Zhang 	for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
5836bdadb20SHawking Zhang 		DUMP_REG(sdma_base_addr + reg);
5846bdadb20SHawking Zhang 	for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
5856bdadb20SHawking Zhang 	     reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
5866bdadb20SHawking Zhang 		DUMP_REG(sdma_base_addr + reg);
5876bdadb20SHawking Zhang 	for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
5886bdadb20SHawking Zhang 	     reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
5896bdadb20SHawking Zhang 		DUMP_REG(sdma_base_addr + reg);
5906bdadb20SHawking Zhang 
5916bdadb20SHawking Zhang 	WARN_ON_ONCE(i != HQD_N_REGS);
5926bdadb20SHawking Zhang 	*n_regs = i;
5936bdadb20SHawking Zhang 
5946bdadb20SHawking Zhang 	return 0;
5956bdadb20SHawking Zhang }
5966bdadb20SHawking Zhang 
5976bdadb20SHawking Zhang static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
5986bdadb20SHawking Zhang 				uint32_t pipe_id, uint32_t queue_id)
5996bdadb20SHawking Zhang {
6006bdadb20SHawking Zhang 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
6016bdadb20SHawking Zhang 	uint32_t act;
6026bdadb20SHawking Zhang 	bool retval = false;
6036bdadb20SHawking Zhang 	uint32_t low, high;
6046bdadb20SHawking Zhang 
6056bdadb20SHawking Zhang 	acquire_queue(kgd, pipe_id, queue_id);
6066bdadb20SHawking Zhang 	act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
6076bdadb20SHawking Zhang 	if (act) {
6086bdadb20SHawking Zhang 		low = lower_32_bits(queue_address >> 8);
6096bdadb20SHawking Zhang 		high = upper_32_bits(queue_address >> 8);
6106bdadb20SHawking Zhang 
6116bdadb20SHawking Zhang 		if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
6126bdadb20SHawking Zhang 		   high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
6136bdadb20SHawking Zhang 			retval = true;
6146bdadb20SHawking Zhang 	}
6156bdadb20SHawking Zhang 	release_queue(kgd);
6166bdadb20SHawking Zhang 	return retval;
6176bdadb20SHawking Zhang }
6186bdadb20SHawking Zhang 
6196bdadb20SHawking Zhang static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
6206bdadb20SHawking Zhang {
6216bdadb20SHawking Zhang 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
6226bdadb20SHawking Zhang 	struct v10_sdma_mqd *m;
6236bdadb20SHawking Zhang 	uint32_t sdma_base_addr;
6246bdadb20SHawking Zhang 	uint32_t sdma_rlc_rb_cntl;
6256bdadb20SHawking Zhang 
6266bdadb20SHawking Zhang 	m = get_sdma_mqd(mqd);
6276bdadb20SHawking Zhang 	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
6286bdadb20SHawking Zhang 					    m->sdma_queue_id);
6296bdadb20SHawking Zhang 
6306bdadb20SHawking Zhang 	sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
6316bdadb20SHawking Zhang 
6326bdadb20SHawking Zhang 	if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
6336bdadb20SHawking Zhang 		return true;
6346bdadb20SHawking Zhang 
6356bdadb20SHawking Zhang 	return false;
6366bdadb20SHawking Zhang }
6376bdadb20SHawking Zhang 
6386bdadb20SHawking Zhang static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
6396bdadb20SHawking Zhang 				enum kfd_preempt_type reset_type,
6406bdadb20SHawking Zhang 				unsigned int utimeout, uint32_t pipe_id,
6416bdadb20SHawking Zhang 				uint32_t queue_id)
6426bdadb20SHawking Zhang {
6436bdadb20SHawking Zhang 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
6446bdadb20SHawking Zhang 	enum hqd_dequeue_request_type type;
6456bdadb20SHawking Zhang 	unsigned long end_jiffies;
6466bdadb20SHawking Zhang 	uint32_t temp;
6476bdadb20SHawking Zhang 	struct v10_compute_mqd *m = get_mqd(mqd);
6486bdadb20SHawking Zhang 
6496bdadb20SHawking Zhang #if 0
6506bdadb20SHawking Zhang 	unsigned long flags;
6516bdadb20SHawking Zhang 	int retry;
6526bdadb20SHawking Zhang #endif
6536bdadb20SHawking Zhang 
6546bdadb20SHawking Zhang 	acquire_queue(kgd, pipe_id, queue_id);
6556bdadb20SHawking Zhang 
6566bdadb20SHawking Zhang 	if (m->cp_hqd_vmid == 0)
6576bdadb20SHawking Zhang 		WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
6586bdadb20SHawking Zhang 
6596bdadb20SHawking Zhang 	switch (reset_type) {
6606bdadb20SHawking Zhang 	case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
6616bdadb20SHawking Zhang 		type = DRAIN_PIPE;
6626bdadb20SHawking Zhang 		break;
6636bdadb20SHawking Zhang 	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
6646bdadb20SHawking Zhang 		type = RESET_WAVES;
6656bdadb20SHawking Zhang 		break;
6666bdadb20SHawking Zhang 	default:
6676bdadb20SHawking Zhang 		type = DRAIN_PIPE;
6686bdadb20SHawking Zhang 		break;
6696bdadb20SHawking Zhang 	}
6706bdadb20SHawking Zhang 
6716bdadb20SHawking Zhang #if 0 /* Is this still needed? */
6726bdadb20SHawking Zhang 	/* Workaround: If IQ timer is active and the wait time is close to or
6736bdadb20SHawking Zhang 	 * equal to 0, dequeueing is not safe. Wait until either the wait time
6746bdadb20SHawking Zhang 	 * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
6756bdadb20SHawking Zhang 	 * cleared before continuing. Also, ensure wait times are set to at
6766bdadb20SHawking Zhang 	 * least 0x3.
6776bdadb20SHawking Zhang 	 */
6786bdadb20SHawking Zhang 	local_irq_save(flags);
6796bdadb20SHawking Zhang 	preempt_disable();
6806bdadb20SHawking Zhang 	retry = 5000; /* wait for 500 usecs at maximum */
6816bdadb20SHawking Zhang 	while (true) {
6826bdadb20SHawking Zhang 		temp = RREG32(mmCP_HQD_IQ_TIMER);
6836bdadb20SHawking Zhang 		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
6846bdadb20SHawking Zhang 			pr_debug("HW is processing IQ\n");
6856bdadb20SHawking Zhang 			goto loop;
6866bdadb20SHawking Zhang 		}
6876bdadb20SHawking Zhang 		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
6886bdadb20SHawking Zhang 			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
6896bdadb20SHawking Zhang 					== 3) /* SEM-rearm is safe */
6906bdadb20SHawking Zhang 				break;
6916bdadb20SHawking Zhang 			/* Wait time 3 is safe for CP, but our MMIO read/write
6926bdadb20SHawking Zhang 			 * time is close to 1 microsecond, so check for 10 to
6936bdadb20SHawking Zhang 			 * leave more buffer room
6946bdadb20SHawking Zhang 			 */
6956bdadb20SHawking Zhang 			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
6966bdadb20SHawking Zhang 					>= 10)
6976bdadb20SHawking Zhang 				break;
6986bdadb20SHawking Zhang 			pr_debug("IQ timer is active\n");
6996bdadb20SHawking Zhang 		} else
7006bdadb20SHawking Zhang 			break;
7016bdadb20SHawking Zhang loop:
7026bdadb20SHawking Zhang 		if (!retry) {
7036bdadb20SHawking Zhang 			pr_err("CP HQD IQ timer status time out\n");
7046bdadb20SHawking Zhang 			break;
7056bdadb20SHawking Zhang 		}
7066bdadb20SHawking Zhang 		ndelay(100);
7076bdadb20SHawking Zhang 		--retry;
7086bdadb20SHawking Zhang 	}
7096bdadb20SHawking Zhang 	retry = 1000;
7106bdadb20SHawking Zhang 	while (true) {
7116bdadb20SHawking Zhang 		temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
7126bdadb20SHawking Zhang 		if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
7136bdadb20SHawking Zhang 			break;
7146bdadb20SHawking Zhang 		pr_debug("Dequeue request is pending\n");
7156bdadb20SHawking Zhang 
7166bdadb20SHawking Zhang 		if (!retry) {
7176bdadb20SHawking Zhang 			pr_err("CP HQD dequeue request time out\n");
7186bdadb20SHawking Zhang 			break;
7196bdadb20SHawking Zhang 		}
7206bdadb20SHawking Zhang 		ndelay(100);
7216bdadb20SHawking Zhang 		--retry;
7226bdadb20SHawking Zhang 	}
7236bdadb20SHawking Zhang 	local_irq_restore(flags);
7246bdadb20SHawking Zhang 	preempt_enable();
7256bdadb20SHawking Zhang #endif
7266bdadb20SHawking Zhang 
7276bdadb20SHawking Zhang 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
7286bdadb20SHawking Zhang 
7296bdadb20SHawking Zhang 	end_jiffies = (utimeout * HZ / 1000) + jiffies;
7306bdadb20SHawking Zhang 	while (true) {
7316bdadb20SHawking Zhang 		temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
7326bdadb20SHawking Zhang 		if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
7336bdadb20SHawking Zhang 			break;
7346bdadb20SHawking Zhang 		if (time_after(jiffies, end_jiffies)) {
7356bdadb20SHawking Zhang 			pr_err("cp queue preemption time out.\n");
7366bdadb20SHawking Zhang 			release_queue(kgd);
7376bdadb20SHawking Zhang 			return -ETIME;
7386bdadb20SHawking Zhang 		}
7396bdadb20SHawking Zhang 		usleep_range(500, 1000);
7406bdadb20SHawking Zhang 	}
7416bdadb20SHawking Zhang 
7426bdadb20SHawking Zhang 	release_queue(kgd);
7436bdadb20SHawking Zhang 	return 0;
7446bdadb20SHawking Zhang }
7456bdadb20SHawking Zhang 
7466bdadb20SHawking Zhang static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
7476bdadb20SHawking Zhang 				unsigned int utimeout)
7486bdadb20SHawking Zhang {
7496bdadb20SHawking Zhang 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
7506bdadb20SHawking Zhang 	struct v10_sdma_mqd *m;
7516bdadb20SHawking Zhang 	uint32_t sdma_base_addr;
7526bdadb20SHawking Zhang 	uint32_t temp;
7536bdadb20SHawking Zhang 	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
7546bdadb20SHawking Zhang 
7556bdadb20SHawking Zhang 	m = get_sdma_mqd(mqd);
7566bdadb20SHawking Zhang 	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
7576bdadb20SHawking Zhang 					    m->sdma_queue_id);
7586bdadb20SHawking Zhang 
7596bdadb20SHawking Zhang 	temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
7606bdadb20SHawking Zhang 	temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
7616bdadb20SHawking Zhang 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
7626bdadb20SHawking Zhang 
7636bdadb20SHawking Zhang 	while (true) {
7646bdadb20SHawking Zhang 		temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
7656bdadb20SHawking Zhang 		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
7666bdadb20SHawking Zhang 			break;
767812330ebSYong Zhao 		if (time_after(jiffies, end_jiffies)) {
768812330ebSYong Zhao 			pr_err("SDMA RLC not idle in %s\n", __func__);
7696bdadb20SHawking Zhang 			return -ETIME;
770812330ebSYong Zhao 		}
7716bdadb20SHawking Zhang 		usleep_range(500, 1000);
7726bdadb20SHawking Zhang 	}
7736bdadb20SHawking Zhang 
7746bdadb20SHawking Zhang 	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
7756bdadb20SHawking Zhang 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
7766bdadb20SHawking Zhang 		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
7776bdadb20SHawking Zhang 		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
7786bdadb20SHawking Zhang 
7796bdadb20SHawking Zhang 	m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
7806bdadb20SHawking Zhang 	m->sdmax_rlcx_rb_rptr_hi =
7816bdadb20SHawking Zhang 		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
7826bdadb20SHawking Zhang 
7836bdadb20SHawking Zhang 	return 0;
7846bdadb20SHawking Zhang }
7856bdadb20SHawking Zhang 
7866bdadb20SHawking Zhang static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
7876bdadb20SHawking Zhang 							uint8_t vmid)
7886bdadb20SHawking Zhang {
7896bdadb20SHawking Zhang 	uint32_t reg;
7906bdadb20SHawking Zhang 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
7916bdadb20SHawking Zhang 
7926bdadb20SHawking Zhang 	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
7936bdadb20SHawking Zhang 		     + vmid);
7946bdadb20SHawking Zhang 	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
7956bdadb20SHawking Zhang }
7966bdadb20SHawking Zhang 
7976bdadb20SHawking Zhang static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
7986bdadb20SHawking Zhang 								uint8_t vmid)
7996bdadb20SHawking Zhang {
8006bdadb20SHawking Zhang 	uint32_t reg;
8016bdadb20SHawking Zhang 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
8026bdadb20SHawking Zhang 
8036bdadb20SHawking Zhang 	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
8046bdadb20SHawking Zhang 		     + vmid);
8056bdadb20SHawking Zhang 	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
8066bdadb20SHawking Zhang }
8076bdadb20SHawking Zhang 
8086bdadb20SHawking Zhang static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
8096bdadb20SHawking Zhang {
8106bdadb20SHawking Zhang 	signed long r;
8116bdadb20SHawking Zhang 	uint32_t seq;
8126bdadb20SHawking Zhang 	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
8136bdadb20SHawking Zhang 
8146bdadb20SHawking Zhang 	spin_lock(&adev->gfx.kiq.ring_lock);
8156bdadb20SHawking Zhang 	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
8166bdadb20SHawking Zhang 	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
8176bdadb20SHawking Zhang 	amdgpu_ring_write(ring,
8186bdadb20SHawking Zhang 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
8196bdadb20SHawking Zhang 			PACKET3_INVALIDATE_TLBS_PASID(pasid));
8206bdadb20SHawking Zhang 	amdgpu_fence_emit_polling(ring, &seq);
8216bdadb20SHawking Zhang 	amdgpu_ring_commit(ring);
8226bdadb20SHawking Zhang 	spin_unlock(&adev->gfx.kiq.ring_lock);
8236bdadb20SHawking Zhang 
8246bdadb20SHawking Zhang 	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
8256bdadb20SHawking Zhang 	if (r < 1) {
8266bdadb20SHawking Zhang 		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
8276bdadb20SHawking Zhang 		return -ETIME;
8286bdadb20SHawking Zhang 	}
8296bdadb20SHawking Zhang 
8306bdadb20SHawking Zhang 	return 0;
8316bdadb20SHawking Zhang }
8326bdadb20SHawking Zhang 
8336bdadb20SHawking Zhang static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
8346bdadb20SHawking Zhang {
8356bdadb20SHawking Zhang 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
8366bdadb20SHawking Zhang 	int vmid;
8376bdadb20SHawking Zhang 	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
8386bdadb20SHawking Zhang 
8396bdadb20SHawking Zhang 	if (amdgpu_emu_mode == 0 && ring->sched.ready)
8406bdadb20SHawking Zhang 		return invalidate_tlbs_with_kiq(adev, pasid);
8416bdadb20SHawking Zhang 
8426bdadb20SHawking Zhang 	for (vmid = 0; vmid < 16; vmid++) {
8436bdadb20SHawking Zhang 		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
8446bdadb20SHawking Zhang 			continue;
8456bdadb20SHawking Zhang 		if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
8466bdadb20SHawking Zhang 			if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
8476bdadb20SHawking Zhang 				== pasid) {
8485413fce4SOak Zeng 				amdgpu_gmc_flush_gpu_tlb(adev, vmid,
8495413fce4SOak Zeng 						AMDGPU_GFXHUB_0, 0);
8506bdadb20SHawking Zhang 				break;
8516bdadb20SHawking Zhang 			}
8526bdadb20SHawking Zhang 		}
8536bdadb20SHawking Zhang 	}
8546bdadb20SHawking Zhang 
8556bdadb20SHawking Zhang 	return 0;
8566bdadb20SHawking Zhang }
8576bdadb20SHawking Zhang 
8586bdadb20SHawking Zhang static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
8596bdadb20SHawking Zhang {
8606bdadb20SHawking Zhang 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
8616bdadb20SHawking Zhang 
8626bdadb20SHawking Zhang 	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
8636bdadb20SHawking Zhang 		pr_err("non kfd vmid %d\n", vmid);
8646bdadb20SHawking Zhang 		return 0;
8656bdadb20SHawking Zhang 	}
8666bdadb20SHawking Zhang 
8675413fce4SOak Zeng 	amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
8686bdadb20SHawking Zhang 	return 0;
8696bdadb20SHawking Zhang }
8706bdadb20SHawking Zhang 
8716bdadb20SHawking Zhang static int kgd_address_watch_disable(struct kgd_dev *kgd)
8726bdadb20SHawking Zhang {
8736bdadb20SHawking Zhang 	return 0;
8746bdadb20SHawking Zhang }
8756bdadb20SHawking Zhang 
8766bdadb20SHawking Zhang static int kgd_address_watch_execute(struct kgd_dev *kgd,
8776bdadb20SHawking Zhang 					unsigned int watch_point_id,
8786bdadb20SHawking Zhang 					uint32_t cntl_val,
8796bdadb20SHawking Zhang 					uint32_t addr_hi,
8806bdadb20SHawking Zhang 					uint32_t addr_lo)
8816bdadb20SHawking Zhang {
8826bdadb20SHawking Zhang 	return 0;
8836bdadb20SHawking Zhang }
8846bdadb20SHawking Zhang 
8856bdadb20SHawking Zhang static int kgd_wave_control_execute(struct kgd_dev *kgd,
8866bdadb20SHawking Zhang 					uint32_t gfx_index_val,
8876bdadb20SHawking Zhang 					uint32_t sq_cmd)
8886bdadb20SHawking Zhang {
8896bdadb20SHawking Zhang 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
8906bdadb20SHawking Zhang 	uint32_t data = 0;
8916bdadb20SHawking Zhang 
8926bdadb20SHawking Zhang 	mutex_lock(&adev->grbm_idx_mutex);
8936bdadb20SHawking Zhang 
8946bdadb20SHawking Zhang 	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
8956bdadb20SHawking Zhang 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
8966bdadb20SHawking Zhang 
8976bdadb20SHawking Zhang 	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
8986bdadb20SHawking Zhang 		INSTANCE_BROADCAST_WRITES, 1);
8996bdadb20SHawking Zhang 	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
9006bdadb20SHawking Zhang 		SA_BROADCAST_WRITES, 1);
9016bdadb20SHawking Zhang 	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
9026bdadb20SHawking Zhang 		SE_BROADCAST_WRITES, 1);
9036bdadb20SHawking Zhang 
9046bdadb20SHawking Zhang 	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
9056bdadb20SHawking Zhang 	mutex_unlock(&adev->grbm_idx_mutex);
9066bdadb20SHawking Zhang 
9076bdadb20SHawking Zhang 	return 0;
9086bdadb20SHawking Zhang }
9096bdadb20SHawking Zhang 
9106bdadb20SHawking Zhang static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
9116bdadb20SHawking Zhang 					unsigned int watch_point_id,
9126bdadb20SHawking Zhang 					unsigned int reg_offset)
9136bdadb20SHawking Zhang {
9146bdadb20SHawking Zhang 	return 0;
9156bdadb20SHawking Zhang }
9166bdadb20SHawking Zhang 
9176bdadb20SHawking Zhang static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
9186bdadb20SHawking Zhang 		uint64_t page_table_base)
9196bdadb20SHawking Zhang {
9206bdadb20SHawking Zhang 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
9216bdadb20SHawking Zhang 	uint64_t base = page_table_base | AMDGPU_PTE_VALID;
9226bdadb20SHawking Zhang 
9236bdadb20SHawking Zhang 	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
9246bdadb20SHawking Zhang 		pr_err("trying to set page table base for wrong VMID %u\n",
9256bdadb20SHawking Zhang 		       vmid);
9266bdadb20SHawking Zhang 		return;
9276bdadb20SHawking Zhang 	}
9286bdadb20SHawking Zhang 
9296bdadb20SHawking Zhang 	/* TODO: take advantage of per-process address space size. For
9306bdadb20SHawking Zhang 	 * now, all processes share the same address space size, like
9316bdadb20SHawking Zhang 	 * on GFX8 and older.
9326bdadb20SHawking Zhang 	 */
9336bdadb20SHawking Zhang 	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
9346bdadb20SHawking Zhang 	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
9356bdadb20SHawking Zhang 
9366bdadb20SHawking Zhang 	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
9376bdadb20SHawking Zhang 			lower_32_bits(adev->vm_manager.max_pfn - 1));
9386bdadb20SHawking Zhang 	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
9396bdadb20SHawking Zhang 			upper_32_bits(adev->vm_manager.max_pfn - 1));
9406bdadb20SHawking Zhang 
9416bdadb20SHawking Zhang 	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
9426bdadb20SHawking Zhang 	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
9436bdadb20SHawking Zhang }
944