1d5a114a6SFelix Kuehling /* 2d5a114a6SFelix Kuehling * Copyright 2014-2018 Advanced Micro Devices, Inc. 3d5a114a6SFelix Kuehling * 4d5a114a6SFelix Kuehling * Permission is hereby granted, free of charge, to any person obtaining a 5d5a114a6SFelix Kuehling * copy of this software and associated documentation files (the "Software"), 6d5a114a6SFelix Kuehling * to deal in the Software without restriction, including without limitation 7d5a114a6SFelix Kuehling * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8d5a114a6SFelix Kuehling * and/or sell copies of the Software, and to permit persons to whom the 9d5a114a6SFelix Kuehling * Software is furnished to do so, subject to the following conditions: 10d5a114a6SFelix Kuehling * 11d5a114a6SFelix Kuehling * The above copyright notice and this permission notice shall be included in 12d5a114a6SFelix Kuehling * all copies or substantial portions of the Software. 13d5a114a6SFelix Kuehling * 14d5a114a6SFelix Kuehling * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15d5a114a6SFelix Kuehling * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16d5a114a6SFelix Kuehling * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17d5a114a6SFelix Kuehling * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18d5a114a6SFelix Kuehling * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19d5a114a6SFelix Kuehling * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20d5a114a6SFelix Kuehling * OTHER DEALINGS IN THE SOFTWARE. 21d5a114a6SFelix Kuehling */ 22d5a114a6SFelix Kuehling #include "amdgpu.h" 23d5a114a6SFelix Kuehling #include "amdgpu_amdkfd.h" 24d5a114a6SFelix Kuehling #include "gc/gc_9_0_offset.h" 25d5a114a6SFelix Kuehling #include "gc/gc_9_0_sh_mask.h" 26d5a114a6SFelix Kuehling #include "vega10_enum.h" 27d5a114a6SFelix Kuehling #include "sdma0/sdma0_4_0_offset.h" 28d5a114a6SFelix Kuehling #include "sdma0/sdma0_4_0_sh_mask.h" 29d5a114a6SFelix Kuehling #include "sdma1/sdma1_4_0_offset.h" 30d5a114a6SFelix Kuehling #include "sdma1/sdma1_4_0_sh_mask.h" 31d5a114a6SFelix Kuehling #include "athub/athub_1_0_offset.h" 32d5a114a6SFelix Kuehling #include "athub/athub_1_0_sh_mask.h" 33d5a114a6SFelix Kuehling #include "oss/osssys_4_0_offset.h" 34d5a114a6SFelix Kuehling #include "oss/osssys_4_0_sh_mask.h" 35d5a114a6SFelix Kuehling #include "soc15_common.h" 36d5a114a6SFelix Kuehling #include "v9_structs.h" 37d5a114a6SFelix Kuehling #include "soc15.h" 38d5a114a6SFelix Kuehling #include "soc15d.h" 39e4312d45SAlex Deucher #include "mmhub_v1_0.h" 40e4312d45SAlex Deucher #include "gfxhub_v1_0.h" 41d5a114a6SFelix Kuehling 42d5a114a6SFelix Kuehling 43d5a114a6SFelix Kuehling enum hqd_dequeue_request_type { 44d5a114a6SFelix Kuehling NO_ACTION = 0, 45d5a114a6SFelix Kuehling DRAIN_PIPE, 46d5a114a6SFelix Kuehling RESET_WAVES 47d5a114a6SFelix Kuehling }; 48d5a114a6SFelix Kuehling 49d5a114a6SFelix Kuehling static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) 50d5a114a6SFelix Kuehling { 51d5a114a6SFelix Kuehling return (struct amdgpu_device *)kgd; 52d5a114a6SFelix Kuehling } 53d5a114a6SFelix Kuehling 54d5a114a6SFelix Kuehling static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, 55d5a114a6SFelix Kuehling uint32_t queue, uint32_t vmid) 56d5a114a6SFelix Kuehling { 57d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 58d5a114a6SFelix Kuehling 59d5a114a6SFelix Kuehling mutex_lock(&adev->srbm_mutex); 60d5a114a6SFelix Kuehling soc15_grbm_select(adev, mec, pipe, queue, vmid); 61d5a114a6SFelix Kuehling } 62d5a114a6SFelix Kuehling 63d5a114a6SFelix Kuehling static void unlock_srbm(struct kgd_dev *kgd) 64d5a114a6SFelix Kuehling { 65d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 66d5a114a6SFelix Kuehling 67d5a114a6SFelix Kuehling soc15_grbm_select(adev, 0, 0, 0, 0); 68d5a114a6SFelix Kuehling mutex_unlock(&adev->srbm_mutex); 69d5a114a6SFelix Kuehling } 70d5a114a6SFelix Kuehling 71d5a114a6SFelix Kuehling static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, 72d5a114a6SFelix Kuehling uint32_t queue_id) 73d5a114a6SFelix Kuehling { 74d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 75d5a114a6SFelix Kuehling 76d5a114a6SFelix Kuehling uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 77d5a114a6SFelix Kuehling uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 78d5a114a6SFelix Kuehling 79d5a114a6SFelix Kuehling lock_srbm(kgd, mec, pipe, queue_id, 0); 80d5a114a6SFelix Kuehling } 81d5a114a6SFelix Kuehling 8235cd89d5SAaron Liu static uint64_t get_queue_mask(struct amdgpu_device *adev, 83d5a114a6SFelix Kuehling uint32_t pipe_id, uint32_t queue_id) 84d5a114a6SFelix Kuehling { 8535cd89d5SAaron Liu unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + 8635cd89d5SAaron Liu queue_id; 87d5a114a6SFelix Kuehling 8835cd89d5SAaron Liu return 1ull << bit; 89d5a114a6SFelix Kuehling } 90d5a114a6SFelix Kuehling 91d5a114a6SFelix Kuehling static void release_queue(struct kgd_dev *kgd) 92d5a114a6SFelix Kuehling { 93d5a114a6SFelix Kuehling unlock_srbm(kgd); 94d5a114a6SFelix Kuehling } 95d5a114a6SFelix Kuehling 963e205a08SOak Zeng void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, 97d5a114a6SFelix Kuehling uint32_t sh_mem_config, 98d5a114a6SFelix Kuehling uint32_t sh_mem_ape1_base, 99d5a114a6SFelix Kuehling uint32_t sh_mem_ape1_limit, 100d5a114a6SFelix Kuehling uint32_t sh_mem_bases) 101d5a114a6SFelix Kuehling { 102d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 103d5a114a6SFelix Kuehling 104d5a114a6SFelix Kuehling lock_srbm(kgd, 0, 0, 0, vmid); 105d5a114a6SFelix Kuehling 1061bff7f6cSTrigger Huang WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); 1071bff7f6cSTrigger Huang WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); 108d5a114a6SFelix Kuehling /* APE1 no longer exists on GFX9 */ 109d5a114a6SFelix Kuehling 110d5a114a6SFelix Kuehling unlock_srbm(kgd); 111d5a114a6SFelix Kuehling } 112d5a114a6SFelix Kuehling 113c7b6bac9SFenghua Yu int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, 114d5a114a6SFelix Kuehling unsigned int vmid) 115d5a114a6SFelix Kuehling { 116d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 117d5a114a6SFelix Kuehling 118d5a114a6SFelix Kuehling /* 119d5a114a6SFelix Kuehling * We have to assume that there is no outstanding mapping. 120d5a114a6SFelix Kuehling * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because 121d5a114a6SFelix Kuehling * a mapping is in progress or because a mapping finished 122d5a114a6SFelix Kuehling * and the SW cleared it. 123d5a114a6SFelix Kuehling * So the protocol is to always wait & clear. 124d5a114a6SFelix Kuehling */ 125d5a114a6SFelix Kuehling uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | 126d5a114a6SFelix Kuehling ATC_VMID0_PASID_MAPPING__VALID_MASK; 127d5a114a6SFelix Kuehling 128d5a114a6SFelix Kuehling /* 129d5a114a6SFelix Kuehling * need to do this twice, once for gfx and once for mmhub 130d5a114a6SFelix Kuehling * for ATC add 16 to VMID for mmhub, for IH different registers. 131d5a114a6SFelix Kuehling * ATC_VMID0..15 registers are separate from ATC_VMID16..31. 132d5a114a6SFelix Kuehling */ 133d5a114a6SFelix Kuehling 134d5a114a6SFelix Kuehling WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid, 135d5a114a6SFelix Kuehling pasid_mapping); 136d5a114a6SFelix Kuehling 137d5a114a6SFelix Kuehling while (!(RREG32(SOC15_REG_OFFSET( 138d5a114a6SFelix Kuehling ATHUB, 0, 139d5a114a6SFelix Kuehling mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & 140d5a114a6SFelix Kuehling (1U << vmid))) 141d5a114a6SFelix Kuehling cpu_relax(); 142d5a114a6SFelix Kuehling 143d5a114a6SFelix Kuehling WREG32(SOC15_REG_OFFSET(ATHUB, 0, 144d5a114a6SFelix Kuehling mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), 145d5a114a6SFelix Kuehling 1U << vmid); 146d5a114a6SFelix Kuehling 147d5a114a6SFelix Kuehling /* Mapping vmid to pasid also for IH block */ 148d5a114a6SFelix Kuehling WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, 149d5a114a6SFelix Kuehling pasid_mapping); 150d5a114a6SFelix Kuehling 151d5a114a6SFelix Kuehling WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid, 152d5a114a6SFelix Kuehling pasid_mapping); 153d5a114a6SFelix Kuehling 154d5a114a6SFelix Kuehling while (!(RREG32(SOC15_REG_OFFSET( 155d5a114a6SFelix Kuehling ATHUB, 0, 156d5a114a6SFelix Kuehling mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & 157d5a114a6SFelix Kuehling (1U << (vmid + 16)))) 158d5a114a6SFelix Kuehling cpu_relax(); 159d5a114a6SFelix Kuehling 160d5a114a6SFelix Kuehling WREG32(SOC15_REG_OFFSET(ATHUB, 0, 161d5a114a6SFelix Kuehling mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), 162d5a114a6SFelix Kuehling 1U << (vmid + 16)); 163d5a114a6SFelix Kuehling 164d5a114a6SFelix Kuehling /* Mapping vmid to pasid also for IH block */ 165d5a114a6SFelix Kuehling WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid, 166d5a114a6SFelix Kuehling pasid_mapping); 167d5a114a6SFelix Kuehling return 0; 168d5a114a6SFelix Kuehling } 169d5a114a6SFelix Kuehling 170d5a114a6SFelix Kuehling /* TODO - RING0 form of field is obsolete, seems to date back to SI 171d5a114a6SFelix Kuehling * but still works 172d5a114a6SFelix Kuehling */ 173d5a114a6SFelix Kuehling 1743e205a08SOak Zeng int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) 175d5a114a6SFelix Kuehling { 176d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 177d5a114a6SFelix Kuehling uint32_t mec; 178d5a114a6SFelix Kuehling uint32_t pipe; 179d5a114a6SFelix Kuehling 180d5a114a6SFelix Kuehling mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 181d5a114a6SFelix Kuehling pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 182d5a114a6SFelix Kuehling 183d5a114a6SFelix Kuehling lock_srbm(kgd, mec, pipe, 0, 0); 184d5a114a6SFelix Kuehling 185d5a114a6SFelix Kuehling WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), 186d5a114a6SFelix Kuehling CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | 187d5a114a6SFelix Kuehling CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); 188d5a114a6SFelix Kuehling 189d5a114a6SFelix Kuehling unlock_srbm(kgd); 190d5a114a6SFelix Kuehling 191d5a114a6SFelix Kuehling return 0; 192d5a114a6SFelix Kuehling } 193d5a114a6SFelix Kuehling 194b55a8b8bSYong Zhao static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, 195d5a114a6SFelix Kuehling unsigned int engine_id, 196d5a114a6SFelix Kuehling unsigned int queue_id) 197d5a114a6SFelix Kuehling { 19834174b89SHuang Rui uint32_t sdma_engine_reg_base = 0; 19934174b89SHuang Rui uint32_t sdma_rlc_reg_offset; 20034174b89SHuang Rui 20134174b89SHuang Rui switch (engine_id) { 20234174b89SHuang Rui default: 20334174b89SHuang Rui dev_warn(adev->dev, 20434174b89SHuang Rui "Invalid sdma engine id (%d), using engine id 0\n", 20534174b89SHuang Rui engine_id); 20634174b89SHuang Rui fallthrough; 20734174b89SHuang Rui case 0: 20834174b89SHuang Rui sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, 20934174b89SHuang Rui mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; 21034174b89SHuang Rui break; 21134174b89SHuang Rui case 1: 21234174b89SHuang Rui sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0, 21334174b89SHuang Rui mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; 21434174b89SHuang Rui break; 21534174b89SHuang Rui } 21634174b89SHuang Rui 21734174b89SHuang Rui sdma_rlc_reg_offset = sdma_engine_reg_base 218b55a8b8bSYong Zhao + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); 219d5a114a6SFelix Kuehling 220b55a8b8bSYong Zhao pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, 22134174b89SHuang Rui queue_id, sdma_rlc_reg_offset); 222d5a114a6SFelix Kuehling 22334174b89SHuang Rui return sdma_rlc_reg_offset; 224d5a114a6SFelix Kuehling } 225d5a114a6SFelix Kuehling 226d5a114a6SFelix Kuehling static inline struct v9_mqd *get_mqd(void *mqd) 227d5a114a6SFelix Kuehling { 228d5a114a6SFelix Kuehling return (struct v9_mqd *)mqd; 229d5a114a6SFelix Kuehling } 230d5a114a6SFelix Kuehling 231d5a114a6SFelix Kuehling static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) 232d5a114a6SFelix Kuehling { 233d5a114a6SFelix Kuehling return (struct v9_sdma_mqd *)mqd; 234d5a114a6SFelix Kuehling } 235d5a114a6SFelix Kuehling 2363e205a08SOak Zeng int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 237d5a114a6SFelix Kuehling uint32_t queue_id, uint32_t __user *wptr, 238d5a114a6SFelix Kuehling uint32_t wptr_shift, uint32_t wptr_mask, 239d5a114a6SFelix Kuehling struct mm_struct *mm) 240d5a114a6SFelix Kuehling { 241d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 242d5a114a6SFelix Kuehling struct v9_mqd *m; 243d5a114a6SFelix Kuehling uint32_t *mqd_hqd; 244d5a114a6SFelix Kuehling uint32_t reg, hqd_base, data; 245d5a114a6SFelix Kuehling 246d5a114a6SFelix Kuehling m = get_mqd(mqd); 247d5a114a6SFelix Kuehling 248d5a114a6SFelix Kuehling acquire_queue(kgd, pipe_id, queue_id); 249d5a114a6SFelix Kuehling 250d5a114a6SFelix Kuehling /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ 251d5a114a6SFelix Kuehling mqd_hqd = &m->cp_mqd_base_addr_lo; 252d5a114a6SFelix Kuehling hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); 253d5a114a6SFelix Kuehling 254d5a114a6SFelix Kuehling for (reg = hqd_base; 255d5a114a6SFelix Kuehling reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) 2561bff7f6cSTrigger Huang WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); 257d5a114a6SFelix Kuehling 258d5a114a6SFelix Kuehling 259d5a114a6SFelix Kuehling /* Activate doorbell logic before triggering WPTR poll. */ 260d5a114a6SFelix Kuehling data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, 261d5a114a6SFelix Kuehling CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 2621bff7f6cSTrigger Huang WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); 263d5a114a6SFelix Kuehling 264d5a114a6SFelix Kuehling if (wptr) { 265d5a114a6SFelix Kuehling /* Don't read wptr with get_user because the user 266d5a114a6SFelix Kuehling * context may not be accessible (if this function 267d5a114a6SFelix Kuehling * runs in a work queue). Instead trigger a one-shot 268d5a114a6SFelix Kuehling * polling read from memory in the CP. This assumes 269d5a114a6SFelix Kuehling * that wptr is GPU-accessible in the queue's VMID via 270d5a114a6SFelix Kuehling * ATC or SVM. WPTR==RPTR before starting the poll so 271d5a114a6SFelix Kuehling * the CP starts fetching new commands from the right 272d5a114a6SFelix Kuehling * place. 273d5a114a6SFelix Kuehling * 274d5a114a6SFelix Kuehling * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit 275d5a114a6SFelix Kuehling * tricky. Assume that the queue didn't overflow. The 276d5a114a6SFelix Kuehling * number of valid bits in the 32-bit RPTR depends on 277d5a114a6SFelix Kuehling * the queue size. The remaining bits are taken from 278d5a114a6SFelix Kuehling * the saved 64-bit WPTR. If the WPTR wrapped, add the 279d5a114a6SFelix Kuehling * queue size. 280d5a114a6SFelix Kuehling */ 281d5a114a6SFelix Kuehling uint32_t queue_size = 282d5a114a6SFelix Kuehling 2 << REG_GET_FIELD(m->cp_hqd_pq_control, 283d5a114a6SFelix Kuehling CP_HQD_PQ_CONTROL, QUEUE_SIZE); 284d5a114a6SFelix Kuehling uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); 285d5a114a6SFelix Kuehling 286d5a114a6SFelix Kuehling if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) 287d5a114a6SFelix Kuehling guessed_wptr += queue_size; 288d5a114a6SFelix Kuehling guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); 289d5a114a6SFelix Kuehling guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; 290d5a114a6SFelix Kuehling 2911bff7f6cSTrigger Huang WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), 292d5a114a6SFelix Kuehling lower_32_bits(guessed_wptr)); 2931bff7f6cSTrigger Huang WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), 294d5a114a6SFelix Kuehling upper_32_bits(guessed_wptr)); 2951bff7f6cSTrigger Huang WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), 296ebe1d22bSArnd Bergmann lower_32_bits((uintptr_t)wptr)); 2971bff7f6cSTrigger Huang WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), 298ebe1d22bSArnd Bergmann upper_32_bits((uintptr_t)wptr)); 299d5a114a6SFelix Kuehling WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), 30035cd89d5SAaron Liu (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); 301d5a114a6SFelix Kuehling } 302d5a114a6SFelix Kuehling 303d5a114a6SFelix Kuehling /* Start the EOP fetcher */ 3041bff7f6cSTrigger Huang WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), 305d5a114a6SFelix Kuehling REG_SET_FIELD(m->cp_hqd_eop_rptr, 306d5a114a6SFelix Kuehling CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); 307d5a114a6SFelix Kuehling 308d5a114a6SFelix Kuehling data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); 3091bff7f6cSTrigger Huang WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); 310d5a114a6SFelix Kuehling 311d5a114a6SFelix Kuehling release_queue(kgd); 312d5a114a6SFelix Kuehling 313d5a114a6SFelix Kuehling return 0; 314d5a114a6SFelix Kuehling } 315d5a114a6SFelix Kuehling 31635cd89d5SAaron Liu int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, 31735cd89d5SAaron Liu uint32_t pipe_id, uint32_t queue_id, 31835cd89d5SAaron Liu uint32_t doorbell_off) 31935cd89d5SAaron Liu { 32035cd89d5SAaron Liu struct amdgpu_device *adev = get_amdgpu_device(kgd); 32135cd89d5SAaron Liu struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 32235cd89d5SAaron Liu struct v9_mqd *m; 32335cd89d5SAaron Liu uint32_t mec, pipe; 32435cd89d5SAaron Liu int r; 32535cd89d5SAaron Liu 32635cd89d5SAaron Liu m = get_mqd(mqd); 32735cd89d5SAaron Liu 32835cd89d5SAaron Liu acquire_queue(kgd, pipe_id, queue_id); 32935cd89d5SAaron Liu 33035cd89d5SAaron Liu mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 33135cd89d5SAaron Liu pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 33235cd89d5SAaron Liu 33335cd89d5SAaron Liu pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", 33435cd89d5SAaron Liu mec, pipe, queue_id); 33535cd89d5SAaron Liu 33635cd89d5SAaron Liu spin_lock(&adev->gfx.kiq.ring_lock); 33735cd89d5SAaron Liu r = amdgpu_ring_alloc(kiq_ring, 7); 33835cd89d5SAaron Liu if (r) { 33935cd89d5SAaron Liu pr_err("Failed to alloc KIQ (%d).\n", r); 34035cd89d5SAaron Liu goto out_unlock; 34135cd89d5SAaron Liu } 34235cd89d5SAaron Liu 34335cd89d5SAaron Liu amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 34435cd89d5SAaron Liu amdgpu_ring_write(kiq_ring, 34535cd89d5SAaron Liu PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 34635cd89d5SAaron Liu PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ 34735cd89d5SAaron Liu PACKET3_MAP_QUEUES_QUEUE(queue_id) | 34835cd89d5SAaron Liu PACKET3_MAP_QUEUES_PIPE(pipe) | 34935cd89d5SAaron Liu PACKET3_MAP_QUEUES_ME((mec - 1)) | 35035cd89d5SAaron Liu PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 35135cd89d5SAaron Liu PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 35235cd89d5SAaron Liu PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ 35335cd89d5SAaron Liu PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 35435cd89d5SAaron Liu amdgpu_ring_write(kiq_ring, 35535cd89d5SAaron Liu PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); 35635cd89d5SAaron Liu amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); 35735cd89d5SAaron Liu amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); 35835cd89d5SAaron Liu amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); 35935cd89d5SAaron Liu amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); 36035cd89d5SAaron Liu amdgpu_ring_commit(kiq_ring); 36135cd89d5SAaron Liu 36235cd89d5SAaron Liu out_unlock: 36335cd89d5SAaron Liu spin_unlock(&adev->gfx.kiq.ring_lock); 36435cd89d5SAaron Liu release_queue(kgd); 36535cd89d5SAaron Liu 36635cd89d5SAaron Liu return r; 36735cd89d5SAaron Liu } 36835cd89d5SAaron Liu 3693e205a08SOak Zeng int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, 370d5a114a6SFelix Kuehling uint32_t pipe_id, uint32_t queue_id, 371d5a114a6SFelix Kuehling uint32_t (**dump)[2], uint32_t *n_regs) 372d5a114a6SFelix Kuehling { 373d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 374d5a114a6SFelix Kuehling uint32_t i = 0, reg; 375d5a114a6SFelix Kuehling #define HQD_N_REGS 56 376d5a114a6SFelix Kuehling #define DUMP_REG(addr) do { \ 377d5a114a6SFelix Kuehling if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ 378d5a114a6SFelix Kuehling break; \ 379d5a114a6SFelix Kuehling (*dump)[i][0] = (addr) << 2; \ 380d5a114a6SFelix Kuehling (*dump)[i++][1] = RREG32(addr); \ 381d5a114a6SFelix Kuehling } while (0) 382d5a114a6SFelix Kuehling 3836da2ec56SKees Cook *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); 384d5a114a6SFelix Kuehling if (*dump == NULL) 385d5a114a6SFelix Kuehling return -ENOMEM; 386d5a114a6SFelix Kuehling 387d5a114a6SFelix Kuehling acquire_queue(kgd, pipe_id, queue_id); 388d5a114a6SFelix Kuehling 389d5a114a6SFelix Kuehling for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); 390d5a114a6SFelix Kuehling reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) 391d5a114a6SFelix Kuehling DUMP_REG(reg); 392d5a114a6SFelix Kuehling 393d5a114a6SFelix Kuehling release_queue(kgd); 394d5a114a6SFelix Kuehling 395d5a114a6SFelix Kuehling WARN_ON_ONCE(i != HQD_N_REGS); 396d5a114a6SFelix Kuehling *n_regs = i; 397d5a114a6SFelix Kuehling 398d5a114a6SFelix Kuehling return 0; 399d5a114a6SFelix Kuehling } 400d5a114a6SFelix Kuehling 401d5a114a6SFelix Kuehling static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, 402d5a114a6SFelix Kuehling uint32_t __user *wptr, struct mm_struct *mm) 403d5a114a6SFelix Kuehling { 404d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 405d5a114a6SFelix Kuehling struct v9_sdma_mqd *m; 406b55a8b8bSYong Zhao uint32_t sdma_rlc_reg_offset; 407d5a114a6SFelix Kuehling unsigned long end_jiffies; 408d5a114a6SFelix Kuehling uint32_t data; 409d5a114a6SFelix Kuehling uint64_t data64; 410d5a114a6SFelix Kuehling uint64_t __user *wptr64 = (uint64_t __user *)wptr; 411d5a114a6SFelix Kuehling 412d5a114a6SFelix Kuehling m = get_sdma_mqd(mqd); 413b55a8b8bSYong Zhao sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 414d5a114a6SFelix Kuehling m->sdma_queue_id); 415d5a114a6SFelix Kuehling 416b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, 417d5a114a6SFelix Kuehling m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); 418d5a114a6SFelix Kuehling 419d5a114a6SFelix Kuehling end_jiffies = msecs_to_jiffies(2000) + jiffies; 420d5a114a6SFelix Kuehling while (true) { 421b55a8b8bSYong Zhao data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); 422d5a114a6SFelix Kuehling if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) 423d5a114a6SFelix Kuehling break; 424812330ebSYong Zhao if (time_after(jiffies, end_jiffies)) { 425812330ebSYong Zhao pr_err("SDMA RLC not idle in %s\n", __func__); 426d5a114a6SFelix Kuehling return -ETIME; 427812330ebSYong Zhao } 428d5a114a6SFelix Kuehling usleep_range(500, 1000); 429d5a114a6SFelix Kuehling } 430d5a114a6SFelix Kuehling 431b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, 432d5a114a6SFelix Kuehling m->sdmax_rlcx_doorbell_offset); 433d5a114a6SFelix Kuehling 434d5a114a6SFelix Kuehling data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, 435d5a114a6SFelix Kuehling ENABLE, 1); 436b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); 437b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, 438b55a8b8bSYong Zhao m->sdmax_rlcx_rb_rptr); 439b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, 440d5a114a6SFelix Kuehling m->sdmax_rlcx_rb_rptr_hi); 441d5a114a6SFelix Kuehling 442b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); 443d5a114a6SFelix Kuehling if (read_user_wptr(mm, wptr64, data64)) { 444b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, 445d5a114a6SFelix Kuehling lower_32_bits(data64)); 446b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, 447d5a114a6SFelix Kuehling upper_32_bits(data64)); 448d5a114a6SFelix Kuehling } else { 449b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, 450d5a114a6SFelix Kuehling m->sdmax_rlcx_rb_rptr); 451b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, 452d5a114a6SFelix Kuehling m->sdmax_rlcx_rb_rptr_hi); 453d5a114a6SFelix Kuehling } 454b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); 455d5a114a6SFelix Kuehling 456b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); 457b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, 458d5a114a6SFelix Kuehling m->sdmax_rlcx_rb_base_hi); 459b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, 460d5a114a6SFelix Kuehling m->sdmax_rlcx_rb_rptr_addr_lo); 461b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, 462d5a114a6SFelix Kuehling m->sdmax_rlcx_rb_rptr_addr_hi); 463d5a114a6SFelix Kuehling 464d5a114a6SFelix Kuehling data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, 465d5a114a6SFelix Kuehling RB_ENABLE, 1); 466b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); 467d5a114a6SFelix Kuehling 468d5a114a6SFelix Kuehling return 0; 469d5a114a6SFelix Kuehling } 470d5a114a6SFelix Kuehling 471d5a114a6SFelix Kuehling static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, 472d5a114a6SFelix Kuehling uint32_t engine_id, uint32_t queue_id, 473d5a114a6SFelix Kuehling uint32_t (**dump)[2], uint32_t *n_regs) 474d5a114a6SFelix Kuehling { 475d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 476b55a8b8bSYong Zhao uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, 477b55a8b8bSYong Zhao engine_id, queue_id); 478d5a114a6SFelix Kuehling uint32_t i = 0, reg; 479d5a114a6SFelix Kuehling #undef HQD_N_REGS 480d5a114a6SFelix Kuehling #define HQD_N_REGS (19+6+7+10) 481d5a114a6SFelix Kuehling 4826da2ec56SKees Cook *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); 483d5a114a6SFelix Kuehling if (*dump == NULL) 484d5a114a6SFelix Kuehling return -ENOMEM; 485d5a114a6SFelix Kuehling 486d5a114a6SFelix Kuehling for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) 487b55a8b8bSYong Zhao DUMP_REG(sdma_rlc_reg_offset + reg); 488d5a114a6SFelix Kuehling for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) 489b55a8b8bSYong Zhao DUMP_REG(sdma_rlc_reg_offset + reg); 490d5a114a6SFelix Kuehling for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; 491d5a114a6SFelix Kuehling reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) 492b55a8b8bSYong Zhao DUMP_REG(sdma_rlc_reg_offset + reg); 493d5a114a6SFelix Kuehling for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; 494d5a114a6SFelix Kuehling reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) 495b55a8b8bSYong Zhao DUMP_REG(sdma_rlc_reg_offset + reg); 496d5a114a6SFelix Kuehling 497d5a114a6SFelix Kuehling WARN_ON_ONCE(i != HQD_N_REGS); 498d5a114a6SFelix Kuehling *n_regs = i; 499d5a114a6SFelix Kuehling 500d5a114a6SFelix Kuehling return 0; 501d5a114a6SFelix Kuehling } 502d5a114a6SFelix Kuehling 5033e205a08SOak Zeng bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, 504d5a114a6SFelix Kuehling uint32_t pipe_id, uint32_t queue_id) 505d5a114a6SFelix Kuehling { 506d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 507d5a114a6SFelix Kuehling uint32_t act; 508d5a114a6SFelix Kuehling bool retval = false; 509d5a114a6SFelix Kuehling uint32_t low, high; 510d5a114a6SFelix Kuehling 511d5a114a6SFelix Kuehling acquire_queue(kgd, pipe_id, queue_id); 512d5a114a6SFelix Kuehling act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); 513d5a114a6SFelix Kuehling if (act) { 514d5a114a6SFelix Kuehling low = lower_32_bits(queue_address >> 8); 515d5a114a6SFelix Kuehling high = upper_32_bits(queue_address >> 8); 516d5a114a6SFelix Kuehling 517d5a114a6SFelix Kuehling if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && 518d5a114a6SFelix Kuehling high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) 519d5a114a6SFelix Kuehling retval = true; 520d5a114a6SFelix Kuehling } 521d5a114a6SFelix Kuehling release_queue(kgd); 522d5a114a6SFelix Kuehling return retval; 523d5a114a6SFelix Kuehling } 524d5a114a6SFelix Kuehling 525d5a114a6SFelix Kuehling static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) 526d5a114a6SFelix Kuehling { 527d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 528d5a114a6SFelix Kuehling struct v9_sdma_mqd *m; 529b55a8b8bSYong Zhao uint32_t sdma_rlc_reg_offset; 530d5a114a6SFelix Kuehling uint32_t sdma_rlc_rb_cntl; 531d5a114a6SFelix Kuehling 532d5a114a6SFelix Kuehling m = get_sdma_mqd(mqd); 533b55a8b8bSYong Zhao sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 534d5a114a6SFelix Kuehling m->sdma_queue_id); 535d5a114a6SFelix Kuehling 536b55a8b8bSYong Zhao sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); 537d5a114a6SFelix Kuehling 538d5a114a6SFelix Kuehling if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) 539d5a114a6SFelix Kuehling return true; 540d5a114a6SFelix Kuehling 541d5a114a6SFelix Kuehling return false; 542d5a114a6SFelix Kuehling } 543d5a114a6SFelix Kuehling 5443e205a08SOak Zeng int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, 545d5a114a6SFelix Kuehling enum kfd_preempt_type reset_type, 546d5a114a6SFelix Kuehling unsigned int utimeout, uint32_t pipe_id, 547d5a114a6SFelix Kuehling uint32_t queue_id) 548d5a114a6SFelix Kuehling { 549d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 550d5a114a6SFelix Kuehling enum hqd_dequeue_request_type type; 551d5a114a6SFelix Kuehling unsigned long end_jiffies; 552d5a114a6SFelix Kuehling uint32_t temp; 553d5a114a6SFelix Kuehling struct v9_mqd *m = get_mqd(mqd); 554d5a114a6SFelix Kuehling 5551b0bfcffSShaoyun Liu if (adev->in_gpu_reset) 5561b0bfcffSShaoyun Liu return -EIO; 5571b0bfcffSShaoyun Liu 558d5a114a6SFelix Kuehling acquire_queue(kgd, pipe_id, queue_id); 559d5a114a6SFelix Kuehling 560d5a114a6SFelix Kuehling if (m->cp_hqd_vmid == 0) 5611bff7f6cSTrigger Huang WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); 562d5a114a6SFelix Kuehling 563d5a114a6SFelix Kuehling switch (reset_type) { 564d5a114a6SFelix Kuehling case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: 565d5a114a6SFelix Kuehling type = DRAIN_PIPE; 566d5a114a6SFelix Kuehling break; 567d5a114a6SFelix Kuehling case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: 568d5a114a6SFelix Kuehling type = RESET_WAVES; 569d5a114a6SFelix Kuehling break; 570d5a114a6SFelix Kuehling default: 571d5a114a6SFelix Kuehling type = DRAIN_PIPE; 572d5a114a6SFelix Kuehling break; 573d5a114a6SFelix Kuehling } 574d5a114a6SFelix Kuehling 5751bff7f6cSTrigger Huang WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); 576d5a114a6SFelix Kuehling 577d5a114a6SFelix Kuehling end_jiffies = (utimeout * HZ / 1000) + jiffies; 578d5a114a6SFelix Kuehling while (true) { 579d5a114a6SFelix Kuehling temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); 580d5a114a6SFelix Kuehling if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) 581d5a114a6SFelix Kuehling break; 582d5a114a6SFelix Kuehling if (time_after(jiffies, end_jiffies)) { 583d5a114a6SFelix Kuehling pr_err("cp queue preemption time out.\n"); 584d5a114a6SFelix Kuehling release_queue(kgd); 585d5a114a6SFelix Kuehling return -ETIME; 586d5a114a6SFelix Kuehling } 587d5a114a6SFelix Kuehling usleep_range(500, 1000); 588d5a114a6SFelix Kuehling } 589d5a114a6SFelix Kuehling 590d5a114a6SFelix Kuehling release_queue(kgd); 591d5a114a6SFelix Kuehling return 0; 592d5a114a6SFelix Kuehling } 593d5a114a6SFelix Kuehling 594d5a114a6SFelix Kuehling static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, 595d5a114a6SFelix Kuehling unsigned int utimeout) 596d5a114a6SFelix Kuehling { 597d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 598d5a114a6SFelix Kuehling struct v9_sdma_mqd *m; 599b55a8b8bSYong Zhao uint32_t sdma_rlc_reg_offset; 600d5a114a6SFelix Kuehling uint32_t temp; 601d5a114a6SFelix Kuehling unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; 602d5a114a6SFelix Kuehling 603d5a114a6SFelix Kuehling m = get_sdma_mqd(mqd); 604b55a8b8bSYong Zhao sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 605d5a114a6SFelix Kuehling m->sdma_queue_id); 606d5a114a6SFelix Kuehling 607b55a8b8bSYong Zhao temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); 608d5a114a6SFelix Kuehling temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; 609b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); 610d5a114a6SFelix Kuehling 611d5a114a6SFelix Kuehling while (true) { 612b55a8b8bSYong Zhao temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); 613d5a114a6SFelix Kuehling if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) 614d5a114a6SFelix Kuehling break; 615812330ebSYong Zhao if (time_after(jiffies, end_jiffies)) { 616812330ebSYong Zhao pr_err("SDMA RLC not idle in %s\n", __func__); 617d5a114a6SFelix Kuehling return -ETIME; 618812330ebSYong Zhao } 619d5a114a6SFelix Kuehling usleep_range(500, 1000); 620d5a114a6SFelix Kuehling } 621d5a114a6SFelix Kuehling 622b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); 623b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, 624b55a8b8bSYong Zhao RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | 625d5a114a6SFelix Kuehling SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); 626d5a114a6SFelix Kuehling 627b55a8b8bSYong Zhao m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); 628d5a114a6SFelix Kuehling m->sdmax_rlcx_rb_rptr_hi = 629b55a8b8bSYong Zhao RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); 630d5a114a6SFelix Kuehling 631d5a114a6SFelix Kuehling return 0; 632d5a114a6SFelix Kuehling } 633d5a114a6SFelix Kuehling 63456fc40abSYong Zhao bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, 63556fc40abSYong Zhao uint8_t vmid, uint16_t *p_pasid) 636d5a114a6SFelix Kuehling { 63756fc40abSYong Zhao uint32_t value; 638d5a114a6SFelix Kuehling struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 639d5a114a6SFelix Kuehling 64056fc40abSYong Zhao value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) 641d5a114a6SFelix Kuehling + vmid); 64256fc40abSYong Zhao *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; 643d5a114a6SFelix Kuehling 64456fc40abSYong Zhao return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); 645d5a114a6SFelix Kuehling } 646d5a114a6SFelix Kuehling 6473e205a08SOak Zeng int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd) 648d5a114a6SFelix Kuehling { 649d5a114a6SFelix Kuehling return 0; 650d5a114a6SFelix Kuehling } 651d5a114a6SFelix Kuehling 6523e205a08SOak Zeng int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, 653d5a114a6SFelix Kuehling unsigned int watch_point_id, 654d5a114a6SFelix Kuehling uint32_t cntl_val, 655d5a114a6SFelix Kuehling uint32_t addr_hi, 656d5a114a6SFelix Kuehling uint32_t addr_lo) 657d5a114a6SFelix Kuehling { 658d5a114a6SFelix Kuehling return 0; 659d5a114a6SFelix Kuehling } 660d5a114a6SFelix Kuehling 6613e205a08SOak Zeng int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, 662d5a114a6SFelix Kuehling uint32_t gfx_index_val, 663d5a114a6SFelix Kuehling uint32_t sq_cmd) 664d5a114a6SFelix Kuehling { 665d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 666d5a114a6SFelix Kuehling uint32_t data = 0; 667d5a114a6SFelix Kuehling 668d5a114a6SFelix Kuehling mutex_lock(&adev->grbm_idx_mutex); 669d5a114a6SFelix Kuehling 6701bff7f6cSTrigger Huang WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val); 671d5a114a6SFelix Kuehling WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); 672d5a114a6SFelix Kuehling 673d5a114a6SFelix Kuehling data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 674d5a114a6SFelix Kuehling INSTANCE_BROADCAST_WRITES, 1); 675d5a114a6SFelix Kuehling data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 676d5a114a6SFelix Kuehling SH_BROADCAST_WRITES, 1); 677d5a114a6SFelix Kuehling data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 678d5a114a6SFelix Kuehling SE_BROADCAST_WRITES, 1); 679d5a114a6SFelix Kuehling 6801bff7f6cSTrigger Huang WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 681d5a114a6SFelix Kuehling mutex_unlock(&adev->grbm_idx_mutex); 682d5a114a6SFelix Kuehling 683d5a114a6SFelix Kuehling return 0; 684d5a114a6SFelix Kuehling } 685d5a114a6SFelix Kuehling 6863e205a08SOak Zeng uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, 687d5a114a6SFelix Kuehling unsigned int watch_point_id, 688d5a114a6SFelix Kuehling unsigned int reg_offset) 689d5a114a6SFelix Kuehling { 690d5a114a6SFelix Kuehling return 0; 691d5a114a6SFelix Kuehling } 692d5a114a6SFelix Kuehling 693ad5901dfSYong Zhao static void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, 694ad5901dfSYong Zhao uint32_t vmid, uint64_t page_table_base) 695d5a114a6SFelix Kuehling { 696d5a114a6SFelix Kuehling struct amdgpu_device *adev = get_amdgpu_device(kgd); 697d5a114a6SFelix Kuehling 698d5a114a6SFelix Kuehling if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { 699d5a114a6SFelix Kuehling pr_err("trying to set page table base for wrong VMID %u\n", 700d5a114a6SFelix Kuehling vmid); 701d5a114a6SFelix Kuehling return; 702d5a114a6SFelix Kuehling } 703d5a114a6SFelix Kuehling 704435e2f97SYong Zhao mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); 705d5a114a6SFelix Kuehling 706435e2f97SYong Zhao gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); 707d5a114a6SFelix Kuehling } 7083e205a08SOak Zeng 709e392c887SYong Zhao const struct kfd2kgd_calls gfx_v9_kfd2kgd = { 7103e205a08SOak Zeng .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, 7113e205a08SOak Zeng .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, 7123e205a08SOak Zeng .init_interrupts = kgd_gfx_v9_init_interrupts, 7133e205a08SOak Zeng .hqd_load = kgd_gfx_v9_hqd_load, 71435cd89d5SAaron Liu .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, 7153e205a08SOak Zeng .hqd_sdma_load = kgd_hqd_sdma_load, 7163e205a08SOak Zeng .hqd_dump = kgd_gfx_v9_hqd_dump, 7173e205a08SOak Zeng .hqd_sdma_dump = kgd_hqd_sdma_dump, 7183e205a08SOak Zeng .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, 7193e205a08SOak Zeng .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, 7203e205a08SOak Zeng .hqd_destroy = kgd_gfx_v9_hqd_destroy, 7213e205a08SOak Zeng .hqd_sdma_destroy = kgd_hqd_sdma_destroy, 7223e205a08SOak Zeng .address_watch_disable = kgd_gfx_v9_address_watch_disable, 7233e205a08SOak Zeng .address_watch_execute = kgd_gfx_v9_address_watch_execute, 7243e205a08SOak Zeng .wave_control_execute = kgd_gfx_v9_wave_control_execute, 7253e205a08SOak Zeng .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, 72656fc40abSYong Zhao .get_atc_vmid_pasid_mapping_info = 72756fc40abSYong Zhao kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, 7283e205a08SOak Zeng .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, 7293e205a08SOak Zeng .get_hive_id = amdgpu_amdkfd_get_hive_id, 7300c663695SDivya Shikre .get_unique_id = amdgpu_amdkfd_get_unique_id, 7313e205a08SOak Zeng }; 732