1d5a114a6SFelix Kuehling /* 2d5a114a6SFelix Kuehling * Copyright 2014-2018 Advanced Micro Devices, Inc. 3d5a114a6SFelix Kuehling * 4d5a114a6SFelix Kuehling * Permission is hereby granted, free of charge, to any person obtaining a 5d5a114a6SFelix Kuehling * copy of this software and associated documentation files (the "Software"), 6d5a114a6SFelix Kuehling * to deal in the Software without restriction, including without limitation 7d5a114a6SFelix Kuehling * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8d5a114a6SFelix Kuehling * and/or sell copies of the Software, and to permit persons to whom the 9d5a114a6SFelix Kuehling * Software is furnished to do so, subject to the following conditions: 10d5a114a6SFelix Kuehling * 11d5a114a6SFelix Kuehling * The above copyright notice and this permission notice shall be included in 12d5a114a6SFelix Kuehling * all copies or substantial portions of the Software. 13d5a114a6SFelix Kuehling * 14d5a114a6SFelix Kuehling * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15d5a114a6SFelix Kuehling * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16d5a114a6SFelix Kuehling * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17d5a114a6SFelix Kuehling * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18d5a114a6SFelix Kuehling * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19d5a114a6SFelix Kuehling * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20d5a114a6SFelix Kuehling * OTHER DEALINGS IN THE SOFTWARE. 21d5a114a6SFelix Kuehling */ 225634e38cSKuehling, Felix #include <linux/mmu_context.h> 23fdf2f6c5SSam Ravnborg 24d5a114a6SFelix Kuehling #include "amdgpu.h" 25d5a114a6SFelix Kuehling #include "amdgpu_amdkfd.h" 26d5a114a6SFelix Kuehling #include "gc/gc_9_0_offset.h" 27d5a114a6SFelix Kuehling #include "gc/gc_9_0_sh_mask.h" 28d5a114a6SFelix Kuehling #include "vega10_enum.h" 29d5a114a6SFelix Kuehling #include "sdma0/sdma0_4_0_offset.h" 30d5a114a6SFelix Kuehling #include "sdma0/sdma0_4_0_sh_mask.h" 31d5a114a6SFelix Kuehling #include "sdma1/sdma1_4_0_offset.h" 32d5a114a6SFelix Kuehling #include "sdma1/sdma1_4_0_sh_mask.h" 33d5a114a6SFelix Kuehling #include "athub/athub_1_0_offset.h" 34d5a114a6SFelix Kuehling #include "athub/athub_1_0_sh_mask.h" 35d5a114a6SFelix Kuehling #include "oss/osssys_4_0_offset.h" 36d5a114a6SFelix Kuehling #include "oss/osssys_4_0_sh_mask.h" 37d5a114a6SFelix Kuehling #include "soc15_common.h" 38d5a114a6SFelix Kuehling #include "v9_structs.h" 39d5a114a6SFelix Kuehling #include "soc15.h" 40d5a114a6SFelix Kuehling #include "soc15d.h" 41e4312d45SAlex Deucher #include "mmhub_v1_0.h" 42e4312d45SAlex Deucher #include "gfxhub_v1_0.h" 43d5a114a6SFelix Kuehling 44d5a114a6SFelix Kuehling 45d5a114a6SFelix Kuehling enum hqd_dequeue_request_type { 46d5a114a6SFelix Kuehling NO_ACTION = 0, 47d5a114a6SFelix Kuehling DRAIN_PIPE, 48d5a114a6SFelix Kuehling RESET_WAVES 49d5a114a6SFelix Kuehling }; 50d5a114a6SFelix Kuehling 51d5a114a6SFelix Kuehling 52d5a114a6SFelix Kuehling /* Because of REG_GET_FIELD() being used, we put this function in the 53d5a114a6SFelix Kuehling * asic specific file. 54d5a114a6SFelix Kuehling */ 553e205a08SOak Zeng int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, 56d5a114a6SFelix Kuehling struct tile_config *config) 57d5a114a6SFelix Kuehling { 58d5a114a6SFelix Kuehling struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 59d5a114a6SFelix Kuehling 60d5a114a6SFelix Kuehling config->gb_addr_config = adev->gfx.config.gb_addr_config; 61d5a114a6SFelix Kuehling 62d5a114a6SFelix Kuehling config->tile_config_ptr = adev->gfx.config.tile_mode_array; 63d5a114a6SFelix Kuehling config->num_tile_configs = 64d5a114a6SFelix Kuehling ARRAY_SIZE(adev->gfx.config.tile_mode_array); 65d5a114a6SFelix Kuehling config->macro_tile_config_ptr = 66d5a114a6SFelix Kuehling adev->gfx.config.macrotile_mode_array; 67d5a114a6SFelix Kuehling config->num_macro_tile_configs = 68d5a114a6SFelix Kuehling ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 69d5a114a6SFelix Kuehling 70d5a114a6SFelix Kuehling return 0; 71d5a114a6SFelix Kuehling } 72d5a114a6SFelix Kuehling 73d5a114a6SFelix Kuehling static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) 74d5a114a6SFelix Kuehling { 75d5a114a6SFelix Kuehling return (struct amdgpu_device *)kgd; 76d5a114a6SFelix Kuehling } 77d5a114a6SFelix Kuehling 78d5a114a6SFelix Kuehling static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, 79d5a114a6SFelix Kuehling uint32_t queue, uint32_t vmid) 80d5a114a6SFelix Kuehling { 81d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 82d5a114a6SFelix Kuehling 83d5a114a6SFelix Kuehling mutex_lock(&adev->srbm_mutex); 84d5a114a6SFelix Kuehling soc15_grbm_select(adev, mec, pipe, queue, vmid); 85d5a114a6SFelix Kuehling } 86d5a114a6SFelix Kuehling 87d5a114a6SFelix Kuehling static void unlock_srbm(struct kgd_dev *kgd) 88d5a114a6SFelix Kuehling { 89d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 90d5a114a6SFelix Kuehling 91d5a114a6SFelix Kuehling soc15_grbm_select(adev, 0, 0, 0, 0); 92d5a114a6SFelix Kuehling mutex_unlock(&adev->srbm_mutex); 93d5a114a6SFelix Kuehling } 94d5a114a6SFelix Kuehling 95d5a114a6SFelix Kuehling static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, 96d5a114a6SFelix Kuehling uint32_t queue_id) 97d5a114a6SFelix Kuehling { 98d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 99d5a114a6SFelix Kuehling 100d5a114a6SFelix Kuehling uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 101d5a114a6SFelix Kuehling uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 102d5a114a6SFelix Kuehling 103d5a114a6SFelix Kuehling lock_srbm(kgd, mec, pipe, queue_id, 0); 104d5a114a6SFelix Kuehling } 105d5a114a6SFelix Kuehling 10635cd89d5SAaron Liu static uint64_t get_queue_mask(struct amdgpu_device *adev, 107d5a114a6SFelix Kuehling uint32_t pipe_id, uint32_t queue_id) 108d5a114a6SFelix Kuehling { 10935cd89d5SAaron Liu unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + 11035cd89d5SAaron Liu queue_id; 111d5a114a6SFelix Kuehling 11235cd89d5SAaron Liu return 1ull << bit; 113d5a114a6SFelix Kuehling } 114d5a114a6SFelix Kuehling 115d5a114a6SFelix Kuehling static void release_queue(struct kgd_dev *kgd) 116d5a114a6SFelix Kuehling { 117d5a114a6SFelix Kuehling unlock_srbm(kgd); 118d5a114a6SFelix Kuehling } 119d5a114a6SFelix Kuehling 1203e205a08SOak Zeng void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, 121d5a114a6SFelix Kuehling uint32_t sh_mem_config, 122d5a114a6SFelix Kuehling uint32_t sh_mem_ape1_base, 123d5a114a6SFelix Kuehling uint32_t sh_mem_ape1_limit, 124d5a114a6SFelix Kuehling uint32_t sh_mem_bases) 125d5a114a6SFelix Kuehling { 126d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 127d5a114a6SFelix Kuehling 128d5a114a6SFelix Kuehling lock_srbm(kgd, 0, 0, 0, vmid); 129d5a114a6SFelix Kuehling 1301bff7f6cSTrigger Huang WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); 1311bff7f6cSTrigger Huang WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); 132d5a114a6SFelix Kuehling /* APE1 no longer exists on GFX9 */ 133d5a114a6SFelix Kuehling 134d5a114a6SFelix Kuehling unlock_srbm(kgd); 135d5a114a6SFelix Kuehling } 136d5a114a6SFelix Kuehling 1373e205a08SOak Zeng int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, 138d5a114a6SFelix Kuehling unsigned int vmid) 139d5a114a6SFelix Kuehling { 140d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 141d5a114a6SFelix Kuehling 142d5a114a6SFelix Kuehling /* 143d5a114a6SFelix Kuehling * We have to assume that there is no outstanding mapping. 144d5a114a6SFelix Kuehling * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because 145d5a114a6SFelix Kuehling * a mapping is in progress or because a mapping finished 146d5a114a6SFelix Kuehling * and the SW cleared it. 147d5a114a6SFelix Kuehling * So the protocol is to always wait & clear. 148d5a114a6SFelix Kuehling */ 149d5a114a6SFelix Kuehling uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | 150d5a114a6SFelix Kuehling ATC_VMID0_PASID_MAPPING__VALID_MASK; 151d5a114a6SFelix Kuehling 152d5a114a6SFelix Kuehling /* 153d5a114a6SFelix Kuehling * need to do this twice, once for gfx and once for mmhub 154d5a114a6SFelix Kuehling * for ATC add 16 to VMID for mmhub, for IH different registers. 155d5a114a6SFelix Kuehling * ATC_VMID0..15 registers are separate from ATC_VMID16..31. 156d5a114a6SFelix Kuehling */ 157d5a114a6SFelix Kuehling 158d5a114a6SFelix Kuehling WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid, 159d5a114a6SFelix Kuehling pasid_mapping); 160d5a114a6SFelix Kuehling 161d5a114a6SFelix Kuehling while (!(RREG32(SOC15_REG_OFFSET( 162d5a114a6SFelix Kuehling ATHUB, 0, 163d5a114a6SFelix Kuehling mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & 164d5a114a6SFelix Kuehling (1U << vmid))) 165d5a114a6SFelix Kuehling cpu_relax(); 166d5a114a6SFelix Kuehling 167d5a114a6SFelix Kuehling WREG32(SOC15_REG_OFFSET(ATHUB, 0, 168d5a114a6SFelix Kuehling mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), 169d5a114a6SFelix Kuehling 1U << vmid); 170d5a114a6SFelix Kuehling 171d5a114a6SFelix Kuehling /* Mapping vmid to pasid also for IH block */ 172d5a114a6SFelix Kuehling WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, 173d5a114a6SFelix Kuehling pasid_mapping); 174d5a114a6SFelix Kuehling 175d5a114a6SFelix Kuehling WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid, 176d5a114a6SFelix Kuehling pasid_mapping); 177d5a114a6SFelix Kuehling 178d5a114a6SFelix Kuehling while (!(RREG32(SOC15_REG_OFFSET( 179d5a114a6SFelix Kuehling ATHUB, 0, 180d5a114a6SFelix Kuehling mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & 181d5a114a6SFelix Kuehling (1U << (vmid + 16)))) 182d5a114a6SFelix Kuehling cpu_relax(); 183d5a114a6SFelix Kuehling 184d5a114a6SFelix Kuehling WREG32(SOC15_REG_OFFSET(ATHUB, 0, 185d5a114a6SFelix Kuehling mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), 186d5a114a6SFelix Kuehling 1U << (vmid + 16)); 187d5a114a6SFelix Kuehling 188d5a114a6SFelix Kuehling /* Mapping vmid to pasid also for IH block */ 189d5a114a6SFelix Kuehling WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid, 190d5a114a6SFelix Kuehling pasid_mapping); 191d5a114a6SFelix Kuehling return 0; 192d5a114a6SFelix Kuehling } 193d5a114a6SFelix Kuehling 194d5a114a6SFelix Kuehling /* TODO - RING0 form of field is obsolete, seems to date back to SI 195d5a114a6SFelix Kuehling * but still works 196d5a114a6SFelix Kuehling */ 197d5a114a6SFelix Kuehling 1983e205a08SOak Zeng int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) 199d5a114a6SFelix Kuehling { 200d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 201d5a114a6SFelix Kuehling uint32_t mec; 202d5a114a6SFelix Kuehling uint32_t pipe; 203d5a114a6SFelix Kuehling 204d5a114a6SFelix Kuehling mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 205d5a114a6SFelix Kuehling pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 206d5a114a6SFelix Kuehling 207d5a114a6SFelix Kuehling lock_srbm(kgd, mec, pipe, 0, 0); 208d5a114a6SFelix Kuehling 209d5a114a6SFelix Kuehling WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), 210d5a114a6SFelix Kuehling CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | 211d5a114a6SFelix Kuehling CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); 212d5a114a6SFelix Kuehling 213d5a114a6SFelix Kuehling unlock_srbm(kgd); 214d5a114a6SFelix Kuehling 215d5a114a6SFelix Kuehling return 0; 216d5a114a6SFelix Kuehling } 217d5a114a6SFelix Kuehling 218b55a8b8bSYong Zhao static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, 219d5a114a6SFelix Kuehling unsigned int engine_id, 220d5a114a6SFelix Kuehling unsigned int queue_id) 221d5a114a6SFelix Kuehling { 222b55a8b8bSYong Zhao uint32_t sdma_engine_reg_base[2] = { 223d5a114a6SFelix Kuehling SOC15_REG_OFFSET(SDMA0, 0, 224d5a114a6SFelix Kuehling mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, 225d5a114a6SFelix Kuehling SOC15_REG_OFFSET(SDMA1, 0, 226d5a114a6SFelix Kuehling mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL 227d5a114a6SFelix Kuehling }; 228b55a8b8bSYong Zhao uint32_t retval = sdma_engine_reg_base[engine_id] 229b55a8b8bSYong Zhao + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); 230d5a114a6SFelix Kuehling 231b55a8b8bSYong Zhao pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, 232b55a8b8bSYong Zhao queue_id, retval); 233d5a114a6SFelix Kuehling 234d5a114a6SFelix Kuehling return retval; 235d5a114a6SFelix Kuehling } 236d5a114a6SFelix Kuehling 237d5a114a6SFelix Kuehling static inline struct v9_mqd *get_mqd(void *mqd) 238d5a114a6SFelix Kuehling { 239d5a114a6SFelix Kuehling return (struct v9_mqd *)mqd; 240d5a114a6SFelix Kuehling } 241d5a114a6SFelix Kuehling 242d5a114a6SFelix Kuehling static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) 243d5a114a6SFelix Kuehling { 244d5a114a6SFelix Kuehling return (struct v9_sdma_mqd *)mqd; 245d5a114a6SFelix Kuehling } 246d5a114a6SFelix Kuehling 2473e205a08SOak Zeng int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 248d5a114a6SFelix Kuehling uint32_t queue_id, uint32_t __user *wptr, 249d5a114a6SFelix Kuehling uint32_t wptr_shift, uint32_t wptr_mask, 250d5a114a6SFelix Kuehling struct mm_struct *mm) 251d5a114a6SFelix Kuehling { 252d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 253d5a114a6SFelix Kuehling struct v9_mqd *m; 254d5a114a6SFelix Kuehling uint32_t *mqd_hqd; 255d5a114a6SFelix Kuehling uint32_t reg, hqd_base, data; 256d5a114a6SFelix Kuehling 257d5a114a6SFelix Kuehling m = get_mqd(mqd); 258d5a114a6SFelix Kuehling 259d5a114a6SFelix Kuehling acquire_queue(kgd, pipe_id, queue_id); 260d5a114a6SFelix Kuehling 261d5a114a6SFelix Kuehling /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ 262d5a114a6SFelix Kuehling mqd_hqd = &m->cp_mqd_base_addr_lo; 263d5a114a6SFelix Kuehling hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); 264d5a114a6SFelix Kuehling 265d5a114a6SFelix Kuehling for (reg = hqd_base; 266d5a114a6SFelix Kuehling reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) 2671bff7f6cSTrigger Huang WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); 268d5a114a6SFelix Kuehling 269d5a114a6SFelix Kuehling 270d5a114a6SFelix Kuehling /* Activate doorbell logic before triggering WPTR poll. */ 271d5a114a6SFelix Kuehling data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, 272d5a114a6SFelix Kuehling CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 2731bff7f6cSTrigger Huang WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); 274d5a114a6SFelix Kuehling 275d5a114a6SFelix Kuehling if (wptr) { 276d5a114a6SFelix Kuehling /* Don't read wptr with get_user because the user 277d5a114a6SFelix Kuehling * context may not be accessible (if this function 278d5a114a6SFelix Kuehling * runs in a work queue). Instead trigger a one-shot 279d5a114a6SFelix Kuehling * polling read from memory in the CP. This assumes 280d5a114a6SFelix Kuehling * that wptr is GPU-accessible in the queue's VMID via 281d5a114a6SFelix Kuehling * ATC or SVM. WPTR==RPTR before starting the poll so 282d5a114a6SFelix Kuehling * the CP starts fetching new commands from the right 283d5a114a6SFelix Kuehling * place. 284d5a114a6SFelix Kuehling * 285d5a114a6SFelix Kuehling * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit 286d5a114a6SFelix Kuehling * tricky. Assume that the queue didn't overflow. The 287d5a114a6SFelix Kuehling * number of valid bits in the 32-bit RPTR depends on 288d5a114a6SFelix Kuehling * the queue size. The remaining bits are taken from 289d5a114a6SFelix Kuehling * the saved 64-bit WPTR. If the WPTR wrapped, add the 290d5a114a6SFelix Kuehling * queue size. 291d5a114a6SFelix Kuehling */ 292d5a114a6SFelix Kuehling uint32_t queue_size = 293d5a114a6SFelix Kuehling 2 << REG_GET_FIELD(m->cp_hqd_pq_control, 294d5a114a6SFelix Kuehling CP_HQD_PQ_CONTROL, QUEUE_SIZE); 295d5a114a6SFelix Kuehling uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); 296d5a114a6SFelix Kuehling 297d5a114a6SFelix Kuehling if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) 298d5a114a6SFelix Kuehling guessed_wptr += queue_size; 299d5a114a6SFelix Kuehling guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); 300d5a114a6SFelix Kuehling guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; 301d5a114a6SFelix Kuehling 3021bff7f6cSTrigger Huang WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), 303d5a114a6SFelix Kuehling lower_32_bits(guessed_wptr)); 3041bff7f6cSTrigger Huang WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), 305d5a114a6SFelix Kuehling upper_32_bits(guessed_wptr)); 3061bff7f6cSTrigger Huang WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), 307ebe1d22bSArnd Bergmann lower_32_bits((uintptr_t)wptr)); 3081bff7f6cSTrigger Huang WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), 309ebe1d22bSArnd Bergmann upper_32_bits((uintptr_t)wptr)); 310d5a114a6SFelix Kuehling WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), 31135cd89d5SAaron Liu (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); 312d5a114a6SFelix Kuehling } 313d5a114a6SFelix Kuehling 314d5a114a6SFelix Kuehling /* Start the EOP fetcher */ 3151bff7f6cSTrigger Huang WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), 316d5a114a6SFelix Kuehling REG_SET_FIELD(m->cp_hqd_eop_rptr, 317d5a114a6SFelix Kuehling CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); 318d5a114a6SFelix Kuehling 319d5a114a6SFelix Kuehling data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); 3201bff7f6cSTrigger Huang WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); 321d5a114a6SFelix Kuehling 322d5a114a6SFelix Kuehling release_queue(kgd); 323d5a114a6SFelix Kuehling 324d5a114a6SFelix Kuehling return 0; 325d5a114a6SFelix Kuehling } 326d5a114a6SFelix Kuehling 32735cd89d5SAaron Liu int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, 32835cd89d5SAaron Liu uint32_t pipe_id, uint32_t queue_id, 32935cd89d5SAaron Liu uint32_t doorbell_off) 33035cd89d5SAaron Liu { 33135cd89d5SAaron Liu struct amdgpu_device *adev = get_amdgpu_device(kgd); 33235cd89d5SAaron Liu struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 33335cd89d5SAaron Liu struct v9_mqd *m; 33435cd89d5SAaron Liu uint32_t mec, pipe; 33535cd89d5SAaron Liu int r; 33635cd89d5SAaron Liu 33735cd89d5SAaron Liu m = get_mqd(mqd); 33835cd89d5SAaron Liu 33935cd89d5SAaron Liu acquire_queue(kgd, pipe_id, queue_id); 34035cd89d5SAaron Liu 34135cd89d5SAaron Liu mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 34235cd89d5SAaron Liu pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 34335cd89d5SAaron Liu 34435cd89d5SAaron Liu pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", 34535cd89d5SAaron Liu mec, pipe, queue_id); 34635cd89d5SAaron Liu 34735cd89d5SAaron Liu spin_lock(&adev->gfx.kiq.ring_lock); 34835cd89d5SAaron Liu r = amdgpu_ring_alloc(kiq_ring, 7); 34935cd89d5SAaron Liu if (r) { 35035cd89d5SAaron Liu pr_err("Failed to alloc KIQ (%d).\n", r); 35135cd89d5SAaron Liu goto out_unlock; 35235cd89d5SAaron Liu } 35335cd89d5SAaron Liu 35435cd89d5SAaron Liu amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 35535cd89d5SAaron Liu amdgpu_ring_write(kiq_ring, 35635cd89d5SAaron Liu PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 35735cd89d5SAaron Liu PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ 35835cd89d5SAaron Liu PACKET3_MAP_QUEUES_QUEUE(queue_id) | 35935cd89d5SAaron Liu PACKET3_MAP_QUEUES_PIPE(pipe) | 36035cd89d5SAaron Liu PACKET3_MAP_QUEUES_ME((mec - 1)) | 36135cd89d5SAaron Liu PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 36235cd89d5SAaron Liu PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 36335cd89d5SAaron Liu PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ 36435cd89d5SAaron Liu PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 36535cd89d5SAaron Liu amdgpu_ring_write(kiq_ring, 36635cd89d5SAaron Liu PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); 36735cd89d5SAaron Liu amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); 36835cd89d5SAaron Liu amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); 36935cd89d5SAaron Liu amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); 37035cd89d5SAaron Liu amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); 37135cd89d5SAaron Liu amdgpu_ring_commit(kiq_ring); 37235cd89d5SAaron Liu 37335cd89d5SAaron Liu out_unlock: 37435cd89d5SAaron Liu spin_unlock(&adev->gfx.kiq.ring_lock); 37535cd89d5SAaron Liu release_queue(kgd); 37635cd89d5SAaron Liu 37735cd89d5SAaron Liu return r; 37835cd89d5SAaron Liu } 37935cd89d5SAaron Liu 3803e205a08SOak Zeng int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, 381d5a114a6SFelix Kuehling uint32_t pipe_id, uint32_t queue_id, 382d5a114a6SFelix Kuehling uint32_t (**dump)[2], uint32_t *n_regs) 383d5a114a6SFelix Kuehling { 384d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 385d5a114a6SFelix Kuehling uint32_t i = 0, reg; 386d5a114a6SFelix Kuehling #define HQD_N_REGS 56 387d5a114a6SFelix Kuehling #define DUMP_REG(addr) do { \ 388d5a114a6SFelix Kuehling if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ 389d5a114a6SFelix Kuehling break; \ 390d5a114a6SFelix Kuehling (*dump)[i][0] = (addr) << 2; \ 391d5a114a6SFelix Kuehling (*dump)[i++][1] = RREG32(addr); \ 392d5a114a6SFelix Kuehling } while (0) 393d5a114a6SFelix Kuehling 3946da2ec56SKees Cook *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); 395d5a114a6SFelix Kuehling if (*dump == NULL) 396d5a114a6SFelix Kuehling return -ENOMEM; 397d5a114a6SFelix Kuehling 398d5a114a6SFelix Kuehling acquire_queue(kgd, pipe_id, queue_id); 399d5a114a6SFelix Kuehling 400d5a114a6SFelix Kuehling for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); 401d5a114a6SFelix Kuehling reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) 402d5a114a6SFelix Kuehling DUMP_REG(reg); 403d5a114a6SFelix Kuehling 404d5a114a6SFelix Kuehling release_queue(kgd); 405d5a114a6SFelix Kuehling 406d5a114a6SFelix Kuehling WARN_ON_ONCE(i != HQD_N_REGS); 407d5a114a6SFelix Kuehling *n_regs = i; 408d5a114a6SFelix Kuehling 409d5a114a6SFelix Kuehling return 0; 410d5a114a6SFelix Kuehling } 411d5a114a6SFelix Kuehling 412d5a114a6SFelix Kuehling static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, 413d5a114a6SFelix Kuehling uint32_t __user *wptr, struct mm_struct *mm) 414d5a114a6SFelix Kuehling { 415d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 416d5a114a6SFelix Kuehling struct v9_sdma_mqd *m; 417b55a8b8bSYong Zhao uint32_t sdma_rlc_reg_offset; 418d5a114a6SFelix Kuehling unsigned long end_jiffies; 419d5a114a6SFelix Kuehling uint32_t data; 420d5a114a6SFelix Kuehling uint64_t data64; 421d5a114a6SFelix Kuehling uint64_t __user *wptr64 = (uint64_t __user *)wptr; 422d5a114a6SFelix Kuehling 423d5a114a6SFelix Kuehling m = get_sdma_mqd(mqd); 424b55a8b8bSYong Zhao sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 425d5a114a6SFelix Kuehling m->sdma_queue_id); 426d5a114a6SFelix Kuehling 427b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, 428d5a114a6SFelix Kuehling m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); 429d5a114a6SFelix Kuehling 430d5a114a6SFelix Kuehling end_jiffies = msecs_to_jiffies(2000) + jiffies; 431d5a114a6SFelix Kuehling while (true) { 432b55a8b8bSYong Zhao data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); 433d5a114a6SFelix Kuehling if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) 434d5a114a6SFelix Kuehling break; 435812330ebSYong Zhao if (time_after(jiffies, end_jiffies)) { 436812330ebSYong Zhao pr_err("SDMA RLC not idle in %s\n", __func__); 437d5a114a6SFelix Kuehling return -ETIME; 438812330ebSYong Zhao } 439d5a114a6SFelix Kuehling usleep_range(500, 1000); 440d5a114a6SFelix Kuehling } 441d5a114a6SFelix Kuehling 442b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, 443d5a114a6SFelix Kuehling m->sdmax_rlcx_doorbell_offset); 444d5a114a6SFelix Kuehling 445d5a114a6SFelix Kuehling data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, 446d5a114a6SFelix Kuehling ENABLE, 1); 447b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); 448b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, 449b55a8b8bSYong Zhao m->sdmax_rlcx_rb_rptr); 450b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, 451d5a114a6SFelix Kuehling m->sdmax_rlcx_rb_rptr_hi); 452d5a114a6SFelix Kuehling 453b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); 454d5a114a6SFelix Kuehling if (read_user_wptr(mm, wptr64, data64)) { 455b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, 456d5a114a6SFelix Kuehling lower_32_bits(data64)); 457b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, 458d5a114a6SFelix Kuehling upper_32_bits(data64)); 459d5a114a6SFelix Kuehling } else { 460b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, 461d5a114a6SFelix Kuehling m->sdmax_rlcx_rb_rptr); 462b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, 463d5a114a6SFelix Kuehling m->sdmax_rlcx_rb_rptr_hi); 464d5a114a6SFelix Kuehling } 465b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); 466d5a114a6SFelix Kuehling 467b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); 468b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, 469d5a114a6SFelix Kuehling m->sdmax_rlcx_rb_base_hi); 470b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, 471d5a114a6SFelix Kuehling m->sdmax_rlcx_rb_rptr_addr_lo); 472b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, 473d5a114a6SFelix Kuehling m->sdmax_rlcx_rb_rptr_addr_hi); 474d5a114a6SFelix Kuehling 475d5a114a6SFelix Kuehling data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, 476d5a114a6SFelix Kuehling RB_ENABLE, 1); 477b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); 478d5a114a6SFelix Kuehling 479d5a114a6SFelix Kuehling return 0; 480d5a114a6SFelix Kuehling } 481d5a114a6SFelix Kuehling 482d5a114a6SFelix Kuehling static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, 483d5a114a6SFelix Kuehling uint32_t engine_id, uint32_t queue_id, 484d5a114a6SFelix Kuehling uint32_t (**dump)[2], uint32_t *n_regs) 485d5a114a6SFelix Kuehling { 486d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 487b55a8b8bSYong Zhao uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, 488b55a8b8bSYong Zhao engine_id, queue_id); 489d5a114a6SFelix Kuehling uint32_t i = 0, reg; 490d5a114a6SFelix Kuehling #undef HQD_N_REGS 491d5a114a6SFelix Kuehling #define HQD_N_REGS (19+6+7+10) 492d5a114a6SFelix Kuehling 4936da2ec56SKees Cook *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); 494d5a114a6SFelix Kuehling if (*dump == NULL) 495d5a114a6SFelix Kuehling return -ENOMEM; 496d5a114a6SFelix Kuehling 497d5a114a6SFelix Kuehling for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) 498b55a8b8bSYong Zhao DUMP_REG(sdma_rlc_reg_offset + reg); 499d5a114a6SFelix Kuehling for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) 500b55a8b8bSYong Zhao DUMP_REG(sdma_rlc_reg_offset + reg); 501d5a114a6SFelix Kuehling for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; 502d5a114a6SFelix Kuehling reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) 503b55a8b8bSYong Zhao DUMP_REG(sdma_rlc_reg_offset + reg); 504d5a114a6SFelix Kuehling for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; 505d5a114a6SFelix Kuehling reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) 506b55a8b8bSYong Zhao DUMP_REG(sdma_rlc_reg_offset + reg); 507d5a114a6SFelix Kuehling 508d5a114a6SFelix Kuehling WARN_ON_ONCE(i != HQD_N_REGS); 509d5a114a6SFelix Kuehling *n_regs = i; 510d5a114a6SFelix Kuehling 511d5a114a6SFelix Kuehling return 0; 512d5a114a6SFelix Kuehling } 513d5a114a6SFelix Kuehling 5143e205a08SOak Zeng bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, 515d5a114a6SFelix Kuehling uint32_t pipe_id, uint32_t queue_id) 516d5a114a6SFelix Kuehling { 517d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 518d5a114a6SFelix Kuehling uint32_t act; 519d5a114a6SFelix Kuehling bool retval = false; 520d5a114a6SFelix Kuehling uint32_t low, high; 521d5a114a6SFelix Kuehling 522d5a114a6SFelix Kuehling acquire_queue(kgd, pipe_id, queue_id); 523d5a114a6SFelix Kuehling act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); 524d5a114a6SFelix Kuehling if (act) { 525d5a114a6SFelix Kuehling low = lower_32_bits(queue_address >> 8); 526d5a114a6SFelix Kuehling high = upper_32_bits(queue_address >> 8); 527d5a114a6SFelix Kuehling 528d5a114a6SFelix Kuehling if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && 529d5a114a6SFelix Kuehling high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) 530d5a114a6SFelix Kuehling retval = true; 531d5a114a6SFelix Kuehling } 532d5a114a6SFelix Kuehling release_queue(kgd); 533d5a114a6SFelix Kuehling return retval; 534d5a114a6SFelix Kuehling } 535d5a114a6SFelix Kuehling 536d5a114a6SFelix Kuehling static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) 537d5a114a6SFelix Kuehling { 538d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 539d5a114a6SFelix Kuehling struct v9_sdma_mqd *m; 540b55a8b8bSYong Zhao uint32_t sdma_rlc_reg_offset; 541d5a114a6SFelix Kuehling uint32_t sdma_rlc_rb_cntl; 542d5a114a6SFelix Kuehling 543d5a114a6SFelix Kuehling m = get_sdma_mqd(mqd); 544b55a8b8bSYong Zhao sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 545d5a114a6SFelix Kuehling m->sdma_queue_id); 546d5a114a6SFelix Kuehling 547b55a8b8bSYong Zhao sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); 548d5a114a6SFelix Kuehling 549d5a114a6SFelix Kuehling if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) 550d5a114a6SFelix Kuehling return true; 551d5a114a6SFelix Kuehling 552d5a114a6SFelix Kuehling return false; 553d5a114a6SFelix Kuehling } 554d5a114a6SFelix Kuehling 5553e205a08SOak Zeng int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, 556d5a114a6SFelix Kuehling enum kfd_preempt_type reset_type, 557d5a114a6SFelix Kuehling unsigned int utimeout, uint32_t pipe_id, 558d5a114a6SFelix Kuehling uint32_t queue_id) 559d5a114a6SFelix Kuehling { 560d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 561d5a114a6SFelix Kuehling enum hqd_dequeue_request_type type; 562d5a114a6SFelix Kuehling unsigned long end_jiffies; 563d5a114a6SFelix Kuehling uint32_t temp; 564d5a114a6SFelix Kuehling struct v9_mqd *m = get_mqd(mqd); 565d5a114a6SFelix Kuehling 5661b0bfcffSShaoyun Liu if (adev->in_gpu_reset) 5671b0bfcffSShaoyun Liu return -EIO; 5681b0bfcffSShaoyun Liu 569d5a114a6SFelix Kuehling acquire_queue(kgd, pipe_id, queue_id); 570d5a114a6SFelix Kuehling 571d5a114a6SFelix Kuehling if (m->cp_hqd_vmid == 0) 5721bff7f6cSTrigger Huang WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); 573d5a114a6SFelix Kuehling 574d5a114a6SFelix Kuehling switch (reset_type) { 575d5a114a6SFelix Kuehling case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: 576d5a114a6SFelix Kuehling type = DRAIN_PIPE; 577d5a114a6SFelix Kuehling break; 578d5a114a6SFelix Kuehling case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: 579d5a114a6SFelix Kuehling type = RESET_WAVES; 580d5a114a6SFelix Kuehling break; 581d5a114a6SFelix Kuehling default: 582d5a114a6SFelix Kuehling type = DRAIN_PIPE; 583d5a114a6SFelix Kuehling break; 584d5a114a6SFelix Kuehling } 585d5a114a6SFelix Kuehling 5861bff7f6cSTrigger Huang WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); 587d5a114a6SFelix Kuehling 588d5a114a6SFelix Kuehling end_jiffies = (utimeout * HZ / 1000) + jiffies; 589d5a114a6SFelix Kuehling while (true) { 590d5a114a6SFelix Kuehling temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); 591d5a114a6SFelix Kuehling if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) 592d5a114a6SFelix Kuehling break; 593d5a114a6SFelix Kuehling if (time_after(jiffies, end_jiffies)) { 594d5a114a6SFelix Kuehling pr_err("cp queue preemption time out.\n"); 595d5a114a6SFelix Kuehling release_queue(kgd); 596d5a114a6SFelix Kuehling return -ETIME; 597d5a114a6SFelix Kuehling } 598d5a114a6SFelix Kuehling usleep_range(500, 1000); 599d5a114a6SFelix Kuehling } 600d5a114a6SFelix Kuehling 601d5a114a6SFelix Kuehling release_queue(kgd); 602d5a114a6SFelix Kuehling return 0; 603d5a114a6SFelix Kuehling } 604d5a114a6SFelix Kuehling 605d5a114a6SFelix Kuehling static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, 606d5a114a6SFelix Kuehling unsigned int utimeout) 607d5a114a6SFelix Kuehling { 608d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 609d5a114a6SFelix Kuehling struct v9_sdma_mqd *m; 610b55a8b8bSYong Zhao uint32_t sdma_rlc_reg_offset; 611d5a114a6SFelix Kuehling uint32_t temp; 612d5a114a6SFelix Kuehling unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; 613d5a114a6SFelix Kuehling 614d5a114a6SFelix Kuehling m = get_sdma_mqd(mqd); 615b55a8b8bSYong Zhao sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 616d5a114a6SFelix Kuehling m->sdma_queue_id); 617d5a114a6SFelix Kuehling 618b55a8b8bSYong Zhao temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); 619d5a114a6SFelix Kuehling temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; 620b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); 621d5a114a6SFelix Kuehling 622d5a114a6SFelix Kuehling while (true) { 623b55a8b8bSYong Zhao temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); 624d5a114a6SFelix Kuehling if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) 625d5a114a6SFelix Kuehling break; 626812330ebSYong Zhao if (time_after(jiffies, end_jiffies)) { 627812330ebSYong Zhao pr_err("SDMA RLC not idle in %s\n", __func__); 628d5a114a6SFelix Kuehling return -ETIME; 629812330ebSYong Zhao } 630d5a114a6SFelix Kuehling usleep_range(500, 1000); 631d5a114a6SFelix Kuehling } 632d5a114a6SFelix Kuehling 633b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); 634b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, 635b55a8b8bSYong Zhao RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | 636d5a114a6SFelix Kuehling SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); 637d5a114a6SFelix Kuehling 638b55a8b8bSYong Zhao m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); 639d5a114a6SFelix Kuehling m->sdmax_rlcx_rb_rptr_hi = 640b55a8b8bSYong Zhao RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); 641d5a114a6SFelix Kuehling 642d5a114a6SFelix Kuehling return 0; 643d5a114a6SFelix Kuehling } 644d5a114a6SFelix Kuehling 64556fc40abSYong Zhao bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, 64656fc40abSYong Zhao uint8_t vmid, uint16_t *p_pasid) 647d5a114a6SFelix Kuehling { 64856fc40abSYong Zhao uint32_t value; 649d5a114a6SFelix Kuehling struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 650d5a114a6SFelix Kuehling 65156fc40abSYong Zhao value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) 652d5a114a6SFelix Kuehling + vmid); 65356fc40abSYong Zhao *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; 654d5a114a6SFelix Kuehling 65556fc40abSYong Zhao return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); 656d5a114a6SFelix Kuehling } 657d5a114a6SFelix Kuehling 6583e205a08SOak Zeng int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd) 659d5a114a6SFelix Kuehling { 660d5a114a6SFelix Kuehling return 0; 661d5a114a6SFelix Kuehling } 662d5a114a6SFelix Kuehling 6633e205a08SOak Zeng int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, 664d5a114a6SFelix Kuehling unsigned int watch_point_id, 665d5a114a6SFelix Kuehling uint32_t cntl_val, 666d5a114a6SFelix Kuehling uint32_t addr_hi, 667d5a114a6SFelix Kuehling uint32_t addr_lo) 668d5a114a6SFelix Kuehling { 669d5a114a6SFelix Kuehling return 0; 670d5a114a6SFelix Kuehling } 671d5a114a6SFelix Kuehling 6723e205a08SOak Zeng int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, 673d5a114a6SFelix Kuehling uint32_t gfx_index_val, 674d5a114a6SFelix Kuehling uint32_t sq_cmd) 675d5a114a6SFelix Kuehling { 676d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 677d5a114a6SFelix Kuehling uint32_t data = 0; 678d5a114a6SFelix Kuehling 679d5a114a6SFelix Kuehling mutex_lock(&adev->grbm_idx_mutex); 680d5a114a6SFelix Kuehling 6811bff7f6cSTrigger Huang WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val); 682d5a114a6SFelix Kuehling WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); 683d5a114a6SFelix Kuehling 684d5a114a6SFelix Kuehling data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 685d5a114a6SFelix Kuehling INSTANCE_BROADCAST_WRITES, 1); 686d5a114a6SFelix Kuehling data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 687d5a114a6SFelix Kuehling SH_BROADCAST_WRITES, 1); 688d5a114a6SFelix Kuehling data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 689d5a114a6SFelix Kuehling SE_BROADCAST_WRITES, 1); 690d5a114a6SFelix Kuehling 6911bff7f6cSTrigger Huang WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 692d5a114a6SFelix Kuehling mutex_unlock(&adev->grbm_idx_mutex); 693d5a114a6SFelix Kuehling 694d5a114a6SFelix Kuehling return 0; 695d5a114a6SFelix Kuehling } 696d5a114a6SFelix Kuehling 6973e205a08SOak Zeng uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, 698d5a114a6SFelix Kuehling unsigned int watch_point_id, 699d5a114a6SFelix Kuehling unsigned int reg_offset) 700d5a114a6SFelix Kuehling { 701d5a114a6SFelix Kuehling return 0; 702d5a114a6SFelix Kuehling } 703d5a114a6SFelix Kuehling 704ad5901dfSYong Zhao static void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, 705ad5901dfSYong Zhao uint32_t vmid, uint64_t page_table_base) 706d5a114a6SFelix Kuehling { 707d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 708d5a114a6SFelix Kuehling 709d5a114a6SFelix Kuehling if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { 710d5a114a6SFelix Kuehling pr_err("trying to set page table base for wrong VMID %u\n", 711d5a114a6SFelix Kuehling vmid); 712d5a114a6SFelix Kuehling return; 713d5a114a6SFelix Kuehling } 714d5a114a6SFelix Kuehling 715435e2f97SYong Zhao mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); 716d5a114a6SFelix Kuehling 717435e2f97SYong Zhao gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); 718d5a114a6SFelix Kuehling } 7193e205a08SOak Zeng 720e392c887SYong Zhao const struct kfd2kgd_calls gfx_v9_kfd2kgd = { 7213e205a08SOak Zeng .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, 7223e205a08SOak Zeng .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, 7233e205a08SOak Zeng .init_interrupts = kgd_gfx_v9_init_interrupts, 7243e205a08SOak Zeng .hqd_load = kgd_gfx_v9_hqd_load, 72535cd89d5SAaron Liu .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, 7263e205a08SOak Zeng .hqd_sdma_load = kgd_hqd_sdma_load, 7273e205a08SOak Zeng .hqd_dump = kgd_gfx_v9_hqd_dump, 7283e205a08SOak Zeng .hqd_sdma_dump = kgd_hqd_sdma_dump, 7293e205a08SOak Zeng .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, 7303e205a08SOak Zeng .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, 7313e205a08SOak Zeng .hqd_destroy = kgd_gfx_v9_hqd_destroy, 7323e205a08SOak Zeng .hqd_sdma_destroy = kgd_hqd_sdma_destroy, 7333e205a08SOak Zeng .address_watch_disable = kgd_gfx_v9_address_watch_disable, 7343e205a08SOak Zeng .address_watch_execute = kgd_gfx_v9_address_watch_execute, 7353e205a08SOak Zeng .wave_control_execute = kgd_gfx_v9_wave_control_execute, 7363e205a08SOak Zeng .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, 73756fc40abSYong Zhao .get_atc_vmid_pasid_mapping_info = 73856fc40abSYong Zhao kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, 7393e205a08SOak Zeng .get_tile_config = kgd_gfx_v9_get_tile_config, 7403e205a08SOak Zeng .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, 7413e205a08SOak Zeng .get_hive_id = amdgpu_amdkfd_get_hive_id, 7420c663695SDivya Shikre .get_unique_id = amdgpu_amdkfd_get_unique_id, 7433e205a08SOak Zeng }; 744