16bdadb20SHawking Zhang /* 26bdadb20SHawking Zhang * Copyright 2019 Advanced Micro Devices, Inc. 36bdadb20SHawking Zhang * 46bdadb20SHawking Zhang * Permission is hereby granted, free of charge, to any person obtaining a 56bdadb20SHawking Zhang * copy of this software and associated documentation files (the "Software"), 66bdadb20SHawking Zhang * to deal in the Software without restriction, including without limitation 76bdadb20SHawking Zhang * the rights to use, copy, modify, merge, publish, distribute, sublicense, 86bdadb20SHawking Zhang * and/or sell copies of the Software, and to permit persons to whom the 96bdadb20SHawking Zhang * Software is furnished to do so, subject to the following conditions: 106bdadb20SHawking Zhang * 116bdadb20SHawking Zhang * The above copyright notice and this permission notice shall be included in 126bdadb20SHawking Zhang * all copies or substantial portions of the Software. 136bdadb20SHawking Zhang * 146bdadb20SHawking Zhang * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 156bdadb20SHawking Zhang * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 166bdadb20SHawking Zhang * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 176bdadb20SHawking Zhang * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 186bdadb20SHawking Zhang * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 196bdadb20SHawking Zhang * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 206bdadb20SHawking Zhang * OTHER DEALINGS IN THE SOFTWARE. 216bdadb20SHawking Zhang */ 226bdadb20SHawking Zhang #include "amdgpu.h" 236bdadb20SHawking Zhang #include "amdgpu_amdkfd.h" 246bdadb20SHawking Zhang #include "gc/gc_10_1_0_offset.h" 256bdadb20SHawking Zhang #include "gc/gc_10_1_0_sh_mask.h" 266bdadb20SHawking Zhang #include "navi10_enum.h" 276bdadb20SHawking Zhang #include "athub/athub_2_0_0_offset.h" 286bdadb20SHawking Zhang #include "athub/athub_2_0_0_sh_mask.h" 296bdadb20SHawking Zhang #include "oss/osssys_5_0_0_offset.h" 306bdadb20SHawking Zhang #include "oss/osssys_5_0_0_sh_mask.h" 316bdadb20SHawking Zhang #include "soc15_common.h" 326bdadb20SHawking Zhang #include "v10_structs.h" 336bdadb20SHawking Zhang #include "nv.h" 346bdadb20SHawking Zhang #include "nvd.h" 356bdadb20SHawking Zhang 366bdadb20SHawking Zhang enum hqd_dequeue_request_type { 376bdadb20SHawking Zhang NO_ACTION = 0, 386bdadb20SHawking Zhang DRAIN_PIPE, 396bdadb20SHawking Zhang RESET_WAVES, 406bdadb20SHawking Zhang SAVE_WAVES 416bdadb20SHawking Zhang }; 426bdadb20SHawking Zhang 436bdadb20SHawking Zhang static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) 446bdadb20SHawking Zhang { 456bdadb20SHawking Zhang return (struct amdgpu_device *)kgd; 466bdadb20SHawking Zhang } 476bdadb20SHawking Zhang 486bdadb20SHawking Zhang static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, 496bdadb20SHawking Zhang uint32_t queue, uint32_t vmid) 506bdadb20SHawking Zhang { 516bdadb20SHawking Zhang struct amdgpu_device *adev = get_amdgpu_device(kgd); 526bdadb20SHawking Zhang 536bdadb20SHawking Zhang mutex_lock(&adev->srbm_mutex); 546bdadb20SHawking Zhang nv_grbm_select(adev, mec, pipe, queue, vmid); 556bdadb20SHawking Zhang } 566bdadb20SHawking Zhang 576bdadb20SHawking Zhang static void unlock_srbm(struct kgd_dev *kgd) 586bdadb20SHawking Zhang { 596bdadb20SHawking Zhang struct amdgpu_device *adev = get_amdgpu_device(kgd); 606bdadb20SHawking Zhang 616bdadb20SHawking Zhang nv_grbm_select(adev, 0, 0, 0, 0); 626bdadb20SHawking Zhang mutex_unlock(&adev->srbm_mutex); 636bdadb20SHawking Zhang } 646bdadb20SHawking Zhang 656bdadb20SHawking Zhang static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, 666bdadb20SHawking Zhang uint32_t queue_id) 676bdadb20SHawking Zhang { 686bdadb20SHawking Zhang struct amdgpu_device *adev = get_amdgpu_device(kgd); 696bdadb20SHawking Zhang 706bdadb20SHawking Zhang uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 716bdadb20SHawking Zhang uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 726bdadb20SHawking Zhang 736bdadb20SHawking Zhang lock_srbm(kgd, mec, pipe, queue_id, 0); 746bdadb20SHawking Zhang } 756bdadb20SHawking Zhang 768eee00f6SHuang Rui static uint64_t get_queue_mask(struct amdgpu_device *adev, 776bdadb20SHawking Zhang uint32_t pipe_id, uint32_t queue_id) 786bdadb20SHawking Zhang { 798eee00f6SHuang Rui unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + 808eee00f6SHuang Rui queue_id; 816bdadb20SHawking Zhang 828eee00f6SHuang Rui return 1ull << bit; 836bdadb20SHawking Zhang } 846bdadb20SHawking Zhang 856bdadb20SHawking Zhang static void release_queue(struct kgd_dev *kgd) 866bdadb20SHawking Zhang { 876bdadb20SHawking Zhang unlock_srbm(kgd); 886bdadb20SHawking Zhang } 896bdadb20SHawking Zhang 906bdadb20SHawking Zhang static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, 916bdadb20SHawking Zhang uint32_t sh_mem_config, 926bdadb20SHawking Zhang uint32_t sh_mem_ape1_base, 936bdadb20SHawking Zhang uint32_t sh_mem_ape1_limit, 946bdadb20SHawking Zhang uint32_t sh_mem_bases) 956bdadb20SHawking Zhang { 966bdadb20SHawking Zhang struct amdgpu_device *adev = get_amdgpu_device(kgd); 976bdadb20SHawking Zhang 986bdadb20SHawking Zhang lock_srbm(kgd, 0, 0, 0, vmid); 996bdadb20SHawking Zhang 1006bdadb20SHawking Zhang WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); 1016bdadb20SHawking Zhang WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); 1026bdadb20SHawking Zhang /* APE1 no longer exists on GFX9 */ 1036bdadb20SHawking Zhang 1046bdadb20SHawking Zhang unlock_srbm(kgd); 1056bdadb20SHawking Zhang } 1066bdadb20SHawking Zhang 1076bdadb20SHawking Zhang static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, 1086bdadb20SHawking Zhang unsigned int vmid) 1096bdadb20SHawking Zhang { 1106bdadb20SHawking Zhang struct amdgpu_device *adev = get_amdgpu_device(kgd); 1116bdadb20SHawking Zhang 1126bdadb20SHawking Zhang /* 1136bdadb20SHawking Zhang * We have to assume that there is no outstanding mapping. 1146bdadb20SHawking Zhang * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because 1156bdadb20SHawking Zhang * a mapping is in progress or because a mapping finished 1166bdadb20SHawking Zhang * and the SW cleared it. 1176bdadb20SHawking Zhang * So the protocol is to always wait & clear. 1186bdadb20SHawking Zhang */ 1196bdadb20SHawking Zhang uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | 1206bdadb20SHawking Zhang ATC_VMID0_PASID_MAPPING__VALID_MASK; 1216bdadb20SHawking Zhang 1226bdadb20SHawking Zhang pr_debug("pasid 0x%x vmid %d, reg value %x\n", pasid, vmid, pasid_mapping); 1236bdadb20SHawking Zhang 1246bdadb20SHawking Zhang pr_debug("ATHUB, reg %x\n", SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); 1256bdadb20SHawking Zhang WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid, 1266bdadb20SHawking Zhang pasid_mapping); 1276bdadb20SHawking Zhang 1286bdadb20SHawking Zhang #if 0 1296bdadb20SHawking Zhang /* TODO: uncomment this code when the hardware support is ready. */ 1306bdadb20SHawking Zhang while (!(RREG32(SOC15_REG_OFFSET( 1316bdadb20SHawking Zhang ATHUB, 0, 1326bdadb20SHawking Zhang mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & 1336bdadb20SHawking Zhang (1U << vmid))) 1346bdadb20SHawking Zhang cpu_relax(); 1356bdadb20SHawking Zhang 1366bdadb20SHawking Zhang pr_debug("ATHUB mapping update finished\n"); 1376bdadb20SHawking Zhang WREG32(SOC15_REG_OFFSET(ATHUB, 0, 1386bdadb20SHawking Zhang mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), 1396bdadb20SHawking Zhang 1U << vmid); 1406bdadb20SHawking Zhang #endif 1416bdadb20SHawking Zhang 1426bdadb20SHawking Zhang /* Mapping vmid to pasid also for IH block */ 1436bdadb20SHawking Zhang pr_debug("update mapping for IH block and mmhub"); 1446bdadb20SHawking Zhang WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, 1456bdadb20SHawking Zhang pasid_mapping); 1466bdadb20SHawking Zhang 1476bdadb20SHawking Zhang return 0; 1486bdadb20SHawking Zhang } 1496bdadb20SHawking Zhang 1506bdadb20SHawking Zhang /* TODO - RING0 form of field is obsolete, seems to date back to SI 1516bdadb20SHawking Zhang * but still works 1526bdadb20SHawking Zhang */ 1536bdadb20SHawking Zhang 1546bdadb20SHawking Zhang static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) 1556bdadb20SHawking Zhang { 1566bdadb20SHawking Zhang struct amdgpu_device *adev = get_amdgpu_device(kgd); 1576bdadb20SHawking Zhang uint32_t mec; 1586bdadb20SHawking Zhang uint32_t pipe; 1596bdadb20SHawking Zhang 1606bdadb20SHawking Zhang mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 1616bdadb20SHawking Zhang pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 1626bdadb20SHawking Zhang 1636bdadb20SHawking Zhang lock_srbm(kgd, mec, pipe, 0, 0); 1646bdadb20SHawking Zhang 1656bdadb20SHawking Zhang WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), 1666bdadb20SHawking Zhang CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | 1676bdadb20SHawking Zhang CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); 1686bdadb20SHawking Zhang 1696bdadb20SHawking Zhang unlock_srbm(kgd); 1706bdadb20SHawking Zhang 1716bdadb20SHawking Zhang return 0; 1726bdadb20SHawking Zhang } 1736bdadb20SHawking Zhang 174b55a8b8bSYong Zhao static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, 1756bdadb20SHawking Zhang unsigned int engine_id, 1766bdadb20SHawking Zhang unsigned int queue_id) 1776bdadb20SHawking Zhang { 178b55a8b8bSYong Zhao uint32_t sdma_engine_reg_base[2] = { 1796bdadb20SHawking Zhang SOC15_REG_OFFSET(SDMA0, 0, 1806bdadb20SHawking Zhang mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, 1816bdadb20SHawking Zhang /* On gfx10, mmSDMA1_xxx registers are defined NOT based 1826bdadb20SHawking Zhang * on SDMA1 base address (dw 0x1860) but based on SDMA0 1836bdadb20SHawking Zhang * base address (dw 0x1260). Therefore use mmSDMA0_RLC0_RB_CNTL 1846bdadb20SHawking Zhang * instead of mmSDMA1_RLC0_RB_CNTL for the base address calc 1856bdadb20SHawking Zhang * below 1866bdadb20SHawking Zhang */ 1876bdadb20SHawking Zhang SOC15_REG_OFFSET(SDMA1, 0, 1886bdadb20SHawking Zhang mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL 1896bdadb20SHawking Zhang }; 1906bdadb20SHawking Zhang 191b55a8b8bSYong Zhao uint32_t retval = sdma_engine_reg_base[engine_id] 192b55a8b8bSYong Zhao + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); 1936bdadb20SHawking Zhang 194b55a8b8bSYong Zhao pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, 195b55a8b8bSYong Zhao queue_id, retval); 1966bdadb20SHawking Zhang 1976bdadb20SHawking Zhang return retval; 1986bdadb20SHawking Zhang } 1996bdadb20SHawking Zhang 2006bdadb20SHawking Zhang #if 0 2016bdadb20SHawking Zhang static uint32_t get_watch_base_addr(struct amdgpu_device *adev) 2026bdadb20SHawking Zhang { 2036bdadb20SHawking Zhang uint32_t retval = SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) - 2046bdadb20SHawking Zhang mmTCP_WATCH0_ADDR_H; 2056bdadb20SHawking Zhang 2066bdadb20SHawking Zhang pr_debug("kfd: reg watch base address: 0x%x\n", retval); 2076bdadb20SHawking Zhang 2086bdadb20SHawking Zhang return retval; 2096bdadb20SHawking Zhang } 2106bdadb20SHawking Zhang #endif 2116bdadb20SHawking Zhang 2126bdadb20SHawking Zhang static inline struct v10_compute_mqd *get_mqd(void *mqd) 2136bdadb20SHawking Zhang { 2146bdadb20SHawking Zhang return (struct v10_compute_mqd *)mqd; 2156bdadb20SHawking Zhang } 2166bdadb20SHawking Zhang 2176bdadb20SHawking Zhang static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd) 2186bdadb20SHawking Zhang { 2196bdadb20SHawking Zhang return (struct v10_sdma_mqd *)mqd; 2206bdadb20SHawking Zhang } 2216bdadb20SHawking Zhang 2226bdadb20SHawking Zhang static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 2236bdadb20SHawking Zhang uint32_t queue_id, uint32_t __user *wptr, 2246bdadb20SHawking Zhang uint32_t wptr_shift, uint32_t wptr_mask, 2256bdadb20SHawking Zhang struct mm_struct *mm) 2266bdadb20SHawking Zhang { 2276bdadb20SHawking Zhang struct amdgpu_device *adev = get_amdgpu_device(kgd); 2286bdadb20SHawking Zhang struct v10_compute_mqd *m; 2296bdadb20SHawking Zhang uint32_t *mqd_hqd; 2306bdadb20SHawking Zhang uint32_t reg, hqd_base, data; 2316bdadb20SHawking Zhang 2326bdadb20SHawking Zhang m = get_mqd(mqd); 2336bdadb20SHawking Zhang 2346bdadb20SHawking Zhang pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id); 2356bdadb20SHawking Zhang acquire_queue(kgd, pipe_id, queue_id); 2366bdadb20SHawking Zhang 2376bdadb20SHawking Zhang /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ 2386bdadb20SHawking Zhang mqd_hqd = &m->cp_mqd_base_addr_lo; 2396bdadb20SHawking Zhang hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); 2406bdadb20SHawking Zhang 2416bdadb20SHawking Zhang for (reg = hqd_base; 2426bdadb20SHawking Zhang reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) 2436bdadb20SHawking Zhang WREG32(reg, mqd_hqd[reg - hqd_base]); 2446bdadb20SHawking Zhang 2456bdadb20SHawking Zhang 2466bdadb20SHawking Zhang /* Activate doorbell logic before triggering WPTR poll. */ 2476bdadb20SHawking Zhang data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, 2486bdadb20SHawking Zhang CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 2496bdadb20SHawking Zhang WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); 2506bdadb20SHawking Zhang 2516bdadb20SHawking Zhang if (wptr) { 2526bdadb20SHawking Zhang /* Don't read wptr with get_user because the user 2536bdadb20SHawking Zhang * context may not be accessible (if this function 2546bdadb20SHawking Zhang * runs in a work queue). Instead trigger a one-shot 2556bdadb20SHawking Zhang * polling read from memory in the CP. This assumes 2566bdadb20SHawking Zhang * that wptr is GPU-accessible in the queue's VMID via 2576bdadb20SHawking Zhang * ATC or SVM. WPTR==RPTR before starting the poll so 2586bdadb20SHawking Zhang * the CP starts fetching new commands from the right 2596bdadb20SHawking Zhang * place. 2606bdadb20SHawking Zhang * 2616bdadb20SHawking Zhang * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit 2626bdadb20SHawking Zhang * tricky. Assume that the queue didn't overflow. The 2636bdadb20SHawking Zhang * number of valid bits in the 32-bit RPTR depends on 2646bdadb20SHawking Zhang * the queue size. The remaining bits are taken from 2656bdadb20SHawking Zhang * the saved 64-bit WPTR. If the WPTR wrapped, add the 2666bdadb20SHawking Zhang * queue size. 2676bdadb20SHawking Zhang */ 2686bdadb20SHawking Zhang uint32_t queue_size = 2696bdadb20SHawking Zhang 2 << REG_GET_FIELD(m->cp_hqd_pq_control, 2706bdadb20SHawking Zhang CP_HQD_PQ_CONTROL, QUEUE_SIZE); 2716bdadb20SHawking Zhang uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); 2726bdadb20SHawking Zhang 2736bdadb20SHawking Zhang if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) 2746bdadb20SHawking Zhang guessed_wptr += queue_size; 2756bdadb20SHawking Zhang guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); 2766bdadb20SHawking Zhang guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; 2776bdadb20SHawking Zhang 2786bdadb20SHawking Zhang WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), 2796bdadb20SHawking Zhang lower_32_bits(guessed_wptr)); 2806bdadb20SHawking Zhang WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), 2816bdadb20SHawking Zhang upper_32_bits(guessed_wptr)); 2826bdadb20SHawking Zhang WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), 2836bdadb20SHawking Zhang lower_32_bits((uint64_t)wptr)); 2846bdadb20SHawking Zhang WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), 2856bdadb20SHawking Zhang upper_32_bits((uint64_t)wptr)); 2868eee00f6SHuang Rui pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, 2878eee00f6SHuang Rui (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); 2886bdadb20SHawking Zhang WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), 2898eee00f6SHuang Rui (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); 2906bdadb20SHawking Zhang } 2916bdadb20SHawking Zhang 2926bdadb20SHawking Zhang /* Start the EOP fetcher */ 2936bdadb20SHawking Zhang WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), 2946bdadb20SHawking Zhang REG_SET_FIELD(m->cp_hqd_eop_rptr, 2956bdadb20SHawking Zhang CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); 2966bdadb20SHawking Zhang 2976bdadb20SHawking Zhang data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); 2986bdadb20SHawking Zhang WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); 2996bdadb20SHawking Zhang 3006bdadb20SHawking Zhang release_queue(kgd); 3016bdadb20SHawking Zhang 3026bdadb20SHawking Zhang return 0; 3036bdadb20SHawking Zhang } 3046bdadb20SHawking Zhang 3058eee00f6SHuang Rui static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, 3068eee00f6SHuang Rui uint32_t pipe_id, uint32_t queue_id, 3078eee00f6SHuang Rui uint32_t doorbell_off) 3088eee00f6SHuang Rui { 3098eee00f6SHuang Rui struct amdgpu_device *adev = get_amdgpu_device(kgd); 3108eee00f6SHuang Rui struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3118eee00f6SHuang Rui struct v10_compute_mqd *m; 3128eee00f6SHuang Rui uint32_t mec, pipe; 3138eee00f6SHuang Rui int r; 3148eee00f6SHuang Rui 3158eee00f6SHuang Rui m = get_mqd(mqd); 3168eee00f6SHuang Rui 3178eee00f6SHuang Rui acquire_queue(kgd, pipe_id, queue_id); 3188eee00f6SHuang Rui 3198eee00f6SHuang Rui mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 3208eee00f6SHuang Rui pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 3218eee00f6SHuang Rui 3228eee00f6SHuang Rui pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", 3238eee00f6SHuang Rui mec, pipe, queue_id); 3248eee00f6SHuang Rui 3258eee00f6SHuang Rui spin_lock(&adev->gfx.kiq.ring_lock); 3268eee00f6SHuang Rui r = amdgpu_ring_alloc(kiq_ring, 7); 3278eee00f6SHuang Rui if (r) { 3288eee00f6SHuang Rui pr_err("Failed to alloc KIQ (%d).\n", r); 3298eee00f6SHuang Rui goto out_unlock; 3308eee00f6SHuang Rui } 3318eee00f6SHuang Rui 3328eee00f6SHuang Rui amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 3338eee00f6SHuang Rui amdgpu_ring_write(kiq_ring, 3348eee00f6SHuang Rui PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 3358eee00f6SHuang Rui PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ 3368eee00f6SHuang Rui PACKET3_MAP_QUEUES_QUEUE(queue_id) | 3378eee00f6SHuang Rui PACKET3_MAP_QUEUES_PIPE(pipe) | 3388eee00f6SHuang Rui PACKET3_MAP_QUEUES_ME((mec - 1)) | 3398eee00f6SHuang Rui PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 3408eee00f6SHuang Rui PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 3418eee00f6SHuang Rui PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ 3428eee00f6SHuang Rui PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 3438eee00f6SHuang Rui amdgpu_ring_write(kiq_ring, 3448eee00f6SHuang Rui PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); 3458eee00f6SHuang Rui amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); 3468eee00f6SHuang Rui amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); 3478eee00f6SHuang Rui amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); 3488eee00f6SHuang Rui amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); 3498eee00f6SHuang Rui amdgpu_ring_commit(kiq_ring); 3508eee00f6SHuang Rui 3518eee00f6SHuang Rui out_unlock: 3528eee00f6SHuang Rui spin_unlock(&adev->gfx.kiq.ring_lock); 3538eee00f6SHuang Rui release_queue(kgd); 3548eee00f6SHuang Rui 3558eee00f6SHuang Rui return r; 3568eee00f6SHuang Rui } 3578eee00f6SHuang Rui 3586bdadb20SHawking Zhang static int kgd_hqd_dump(struct kgd_dev *kgd, 3596bdadb20SHawking Zhang uint32_t pipe_id, uint32_t queue_id, 3606bdadb20SHawking Zhang uint32_t (**dump)[2], uint32_t *n_regs) 3616bdadb20SHawking Zhang { 3626bdadb20SHawking Zhang struct amdgpu_device *adev = get_amdgpu_device(kgd); 3636bdadb20SHawking Zhang uint32_t i = 0, reg; 3646bdadb20SHawking Zhang #define HQD_N_REGS 56 3656bdadb20SHawking Zhang #define DUMP_REG(addr) do { \ 3666bdadb20SHawking Zhang if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ 3676bdadb20SHawking Zhang break; \ 3686bdadb20SHawking Zhang (*dump)[i][0] = (addr) << 2; \ 3696bdadb20SHawking Zhang (*dump)[i++][1] = RREG32(addr); \ 3706bdadb20SHawking Zhang } while (0) 3716bdadb20SHawking Zhang 3726bdadb20SHawking Zhang *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); 3736bdadb20SHawking Zhang if (*dump == NULL) 3746bdadb20SHawking Zhang return -ENOMEM; 3756bdadb20SHawking Zhang 3766bdadb20SHawking Zhang acquire_queue(kgd, pipe_id, queue_id); 3776bdadb20SHawking Zhang 3786bdadb20SHawking Zhang for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); 3796bdadb20SHawking Zhang reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) 3806bdadb20SHawking Zhang DUMP_REG(reg); 3816bdadb20SHawking Zhang 3826bdadb20SHawking Zhang release_queue(kgd); 3836bdadb20SHawking Zhang 3846bdadb20SHawking Zhang WARN_ON_ONCE(i != HQD_N_REGS); 3856bdadb20SHawking Zhang *n_regs = i; 3866bdadb20SHawking Zhang 3876bdadb20SHawking Zhang return 0; 3886bdadb20SHawking Zhang } 3896bdadb20SHawking Zhang 3906bdadb20SHawking Zhang static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, 3916bdadb20SHawking Zhang uint32_t __user *wptr, struct mm_struct *mm) 3926bdadb20SHawking Zhang { 3936bdadb20SHawking Zhang struct amdgpu_device *adev = get_amdgpu_device(kgd); 3946bdadb20SHawking Zhang struct v10_sdma_mqd *m; 395b55a8b8bSYong Zhao uint32_t sdma_rlc_reg_offset; 3966bdadb20SHawking Zhang unsigned long end_jiffies; 3976bdadb20SHawking Zhang uint32_t data; 3986bdadb20SHawking Zhang uint64_t data64; 3996bdadb20SHawking Zhang uint64_t __user *wptr64 = (uint64_t __user *)wptr; 4006bdadb20SHawking Zhang 4016bdadb20SHawking Zhang m = get_sdma_mqd(mqd); 402b55a8b8bSYong Zhao sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 4036bdadb20SHawking Zhang m->sdma_queue_id); 4046bdadb20SHawking Zhang 405b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, 4066bdadb20SHawking Zhang m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); 4076bdadb20SHawking Zhang 4086bdadb20SHawking Zhang end_jiffies = msecs_to_jiffies(2000) + jiffies; 4096bdadb20SHawking Zhang while (true) { 410b55a8b8bSYong Zhao data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); 4116bdadb20SHawking Zhang if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) 4126bdadb20SHawking Zhang break; 413812330ebSYong Zhao if (time_after(jiffies, end_jiffies)) { 414812330ebSYong Zhao pr_err("SDMA RLC not idle in %s\n", __func__); 4156bdadb20SHawking Zhang return -ETIME; 416812330ebSYong Zhao } 4176bdadb20SHawking Zhang usleep_range(500, 1000); 4186bdadb20SHawking Zhang } 4196bdadb20SHawking Zhang 420b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, 4216bdadb20SHawking Zhang m->sdmax_rlcx_doorbell_offset); 4226bdadb20SHawking Zhang 4236bdadb20SHawking Zhang data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, 4246bdadb20SHawking Zhang ENABLE, 1); 425b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); 426b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, 427b55a8b8bSYong Zhao m->sdmax_rlcx_rb_rptr); 428b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, 4296bdadb20SHawking Zhang m->sdmax_rlcx_rb_rptr_hi); 4306bdadb20SHawking Zhang 431b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); 4326bdadb20SHawking Zhang if (read_user_wptr(mm, wptr64, data64)) { 433b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, 4346bdadb20SHawking Zhang lower_32_bits(data64)); 435b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, 4366bdadb20SHawking Zhang upper_32_bits(data64)); 4376bdadb20SHawking Zhang } else { 438b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, 4396bdadb20SHawking Zhang m->sdmax_rlcx_rb_rptr); 440b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, 4416bdadb20SHawking Zhang m->sdmax_rlcx_rb_rptr_hi); 4426bdadb20SHawking Zhang } 443b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); 4446bdadb20SHawking Zhang 445b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); 446b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, 4476bdadb20SHawking Zhang m->sdmax_rlcx_rb_base_hi); 448b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, 4496bdadb20SHawking Zhang m->sdmax_rlcx_rb_rptr_addr_lo); 450b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, 4516bdadb20SHawking Zhang m->sdmax_rlcx_rb_rptr_addr_hi); 4526bdadb20SHawking Zhang 4536bdadb20SHawking Zhang data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, 4546bdadb20SHawking Zhang RB_ENABLE, 1); 455b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); 4566bdadb20SHawking Zhang 4576bdadb20SHawking Zhang return 0; 4586bdadb20SHawking Zhang } 4596bdadb20SHawking Zhang 4606bdadb20SHawking Zhang static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, 4616bdadb20SHawking Zhang uint32_t engine_id, uint32_t queue_id, 4626bdadb20SHawking Zhang uint32_t (**dump)[2], uint32_t *n_regs) 4636bdadb20SHawking Zhang { 4646bdadb20SHawking Zhang struct amdgpu_device *adev = get_amdgpu_device(kgd); 465b55a8b8bSYong Zhao uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, 466b55a8b8bSYong Zhao engine_id, queue_id); 4676bdadb20SHawking Zhang uint32_t i = 0, reg; 4686bdadb20SHawking Zhang #undef HQD_N_REGS 4696bdadb20SHawking Zhang #define HQD_N_REGS (19+6+7+10) 4706bdadb20SHawking Zhang 4716bdadb20SHawking Zhang *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); 4726bdadb20SHawking Zhang if (*dump == NULL) 4736bdadb20SHawking Zhang return -ENOMEM; 4746bdadb20SHawking Zhang 4756bdadb20SHawking Zhang for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) 476b55a8b8bSYong Zhao DUMP_REG(sdma_rlc_reg_offset + reg); 4776bdadb20SHawking Zhang for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) 478b55a8b8bSYong Zhao DUMP_REG(sdma_rlc_reg_offset + reg); 4796bdadb20SHawking Zhang for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; 4806bdadb20SHawking Zhang reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) 481b55a8b8bSYong Zhao DUMP_REG(sdma_rlc_reg_offset + reg); 4826bdadb20SHawking Zhang for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; 4836bdadb20SHawking Zhang reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) 484b55a8b8bSYong Zhao DUMP_REG(sdma_rlc_reg_offset + reg); 4856bdadb20SHawking Zhang 4866bdadb20SHawking Zhang WARN_ON_ONCE(i != HQD_N_REGS); 4876bdadb20SHawking Zhang *n_regs = i; 4886bdadb20SHawking Zhang 4896bdadb20SHawking Zhang return 0; 4906bdadb20SHawking Zhang } 4916bdadb20SHawking Zhang 4926bdadb20SHawking Zhang static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, 4936bdadb20SHawking Zhang uint32_t pipe_id, uint32_t queue_id) 4946bdadb20SHawking Zhang { 4956bdadb20SHawking Zhang struct amdgpu_device *adev = get_amdgpu_device(kgd); 4966bdadb20SHawking Zhang uint32_t act; 4976bdadb20SHawking Zhang bool retval = false; 4986bdadb20SHawking Zhang uint32_t low, high; 4996bdadb20SHawking Zhang 5006bdadb20SHawking Zhang acquire_queue(kgd, pipe_id, queue_id); 5016bdadb20SHawking Zhang act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); 5026bdadb20SHawking Zhang if (act) { 5036bdadb20SHawking Zhang low = lower_32_bits(queue_address >> 8); 5046bdadb20SHawking Zhang high = upper_32_bits(queue_address >> 8); 5056bdadb20SHawking Zhang 5066bdadb20SHawking Zhang if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && 5076bdadb20SHawking Zhang high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) 5086bdadb20SHawking Zhang retval = true; 5096bdadb20SHawking Zhang } 5106bdadb20SHawking Zhang release_queue(kgd); 5116bdadb20SHawking Zhang return retval; 5126bdadb20SHawking Zhang } 5136bdadb20SHawking Zhang 5146bdadb20SHawking Zhang static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) 5156bdadb20SHawking Zhang { 5166bdadb20SHawking Zhang struct amdgpu_device *adev = get_amdgpu_device(kgd); 5176bdadb20SHawking Zhang struct v10_sdma_mqd *m; 518b55a8b8bSYong Zhao uint32_t sdma_rlc_reg_offset; 5196bdadb20SHawking Zhang uint32_t sdma_rlc_rb_cntl; 5206bdadb20SHawking Zhang 5216bdadb20SHawking Zhang m = get_sdma_mqd(mqd); 522b55a8b8bSYong Zhao sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 5236bdadb20SHawking Zhang m->sdma_queue_id); 5246bdadb20SHawking Zhang 525b55a8b8bSYong Zhao sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); 5266bdadb20SHawking Zhang 5276bdadb20SHawking Zhang if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) 5286bdadb20SHawking Zhang return true; 5296bdadb20SHawking Zhang 5306bdadb20SHawking Zhang return false; 5316bdadb20SHawking Zhang } 5326bdadb20SHawking Zhang 5336bdadb20SHawking Zhang static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, 5346bdadb20SHawking Zhang enum kfd_preempt_type reset_type, 5356bdadb20SHawking Zhang unsigned int utimeout, uint32_t pipe_id, 5366bdadb20SHawking Zhang uint32_t queue_id) 5376bdadb20SHawking Zhang { 5386bdadb20SHawking Zhang struct amdgpu_device *adev = get_amdgpu_device(kgd); 5396bdadb20SHawking Zhang enum hqd_dequeue_request_type type; 5406bdadb20SHawking Zhang unsigned long end_jiffies; 5416bdadb20SHawking Zhang uint32_t temp; 5426bdadb20SHawking Zhang struct v10_compute_mqd *m = get_mqd(mqd); 5436bdadb20SHawking Zhang 54453b3f8f4SDennis Li if (amdgpu_in_reset(adev)) 545fe9824d1SJack Zhang return -EIO; 54604bef61eSJack Zhang 5476bdadb20SHawking Zhang #if 0 5486bdadb20SHawking Zhang unsigned long flags; 5496bdadb20SHawking Zhang int retry; 5506bdadb20SHawking Zhang #endif 5516bdadb20SHawking Zhang 5526bdadb20SHawking Zhang acquire_queue(kgd, pipe_id, queue_id); 5536bdadb20SHawking Zhang 5546bdadb20SHawking Zhang if (m->cp_hqd_vmid == 0) 5556bdadb20SHawking Zhang WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); 5566bdadb20SHawking Zhang 5576bdadb20SHawking Zhang switch (reset_type) { 5586bdadb20SHawking Zhang case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: 5596bdadb20SHawking Zhang type = DRAIN_PIPE; 5606bdadb20SHawking Zhang break; 5616bdadb20SHawking Zhang case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: 5626bdadb20SHawking Zhang type = RESET_WAVES; 5636bdadb20SHawking Zhang break; 5646bdadb20SHawking Zhang default: 5656bdadb20SHawking Zhang type = DRAIN_PIPE; 5666bdadb20SHawking Zhang break; 5676bdadb20SHawking Zhang } 5686bdadb20SHawking Zhang 5696bdadb20SHawking Zhang #if 0 /* Is this still needed? */ 5706bdadb20SHawking Zhang /* Workaround: If IQ timer is active and the wait time is close to or 5716bdadb20SHawking Zhang * equal to 0, dequeueing is not safe. Wait until either the wait time 5726bdadb20SHawking Zhang * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is 5736bdadb20SHawking Zhang * cleared before continuing. Also, ensure wait times are set to at 5746bdadb20SHawking Zhang * least 0x3. 5756bdadb20SHawking Zhang */ 5766bdadb20SHawking Zhang local_irq_save(flags); 5776bdadb20SHawking Zhang preempt_disable(); 5786bdadb20SHawking Zhang retry = 5000; /* wait for 500 usecs at maximum */ 5796bdadb20SHawking Zhang while (true) { 5806bdadb20SHawking Zhang temp = RREG32(mmCP_HQD_IQ_TIMER); 5816bdadb20SHawking Zhang if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { 5826bdadb20SHawking Zhang pr_debug("HW is processing IQ\n"); 5836bdadb20SHawking Zhang goto loop; 5846bdadb20SHawking Zhang } 5856bdadb20SHawking Zhang if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { 5866bdadb20SHawking Zhang if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) 5876bdadb20SHawking Zhang == 3) /* SEM-rearm is safe */ 5886bdadb20SHawking Zhang break; 5896bdadb20SHawking Zhang /* Wait time 3 is safe for CP, but our MMIO read/write 5906bdadb20SHawking Zhang * time is close to 1 microsecond, so check for 10 to 5916bdadb20SHawking Zhang * leave more buffer room 5926bdadb20SHawking Zhang */ 5936bdadb20SHawking Zhang if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) 5946bdadb20SHawking Zhang >= 10) 5956bdadb20SHawking Zhang break; 5966bdadb20SHawking Zhang pr_debug("IQ timer is active\n"); 5976bdadb20SHawking Zhang } else 5986bdadb20SHawking Zhang break; 5996bdadb20SHawking Zhang loop: 6006bdadb20SHawking Zhang if (!retry) { 6016bdadb20SHawking Zhang pr_err("CP HQD IQ timer status time out\n"); 6026bdadb20SHawking Zhang break; 6036bdadb20SHawking Zhang } 6046bdadb20SHawking Zhang ndelay(100); 6056bdadb20SHawking Zhang --retry; 6066bdadb20SHawking Zhang } 6076bdadb20SHawking Zhang retry = 1000; 6086bdadb20SHawking Zhang while (true) { 6096bdadb20SHawking Zhang temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); 6106bdadb20SHawking Zhang if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) 6116bdadb20SHawking Zhang break; 6126bdadb20SHawking Zhang pr_debug("Dequeue request is pending\n"); 6136bdadb20SHawking Zhang 6146bdadb20SHawking Zhang if (!retry) { 6156bdadb20SHawking Zhang pr_err("CP HQD dequeue request time out\n"); 6166bdadb20SHawking Zhang break; 6176bdadb20SHawking Zhang } 6186bdadb20SHawking Zhang ndelay(100); 6196bdadb20SHawking Zhang --retry; 6206bdadb20SHawking Zhang } 6216bdadb20SHawking Zhang local_irq_restore(flags); 6226bdadb20SHawking Zhang preempt_enable(); 6236bdadb20SHawking Zhang #endif 6246bdadb20SHawking Zhang 6256bdadb20SHawking Zhang WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); 6266bdadb20SHawking Zhang 6276bdadb20SHawking Zhang end_jiffies = (utimeout * HZ / 1000) + jiffies; 6286bdadb20SHawking Zhang while (true) { 6296bdadb20SHawking Zhang temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); 6306bdadb20SHawking Zhang if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) 6316bdadb20SHawking Zhang break; 6326bdadb20SHawking Zhang if (time_after(jiffies, end_jiffies)) { 6336bdadb20SHawking Zhang pr_err("cp queue preemption time out.\n"); 6346bdadb20SHawking Zhang release_queue(kgd); 6356bdadb20SHawking Zhang return -ETIME; 6366bdadb20SHawking Zhang } 6376bdadb20SHawking Zhang usleep_range(500, 1000); 6386bdadb20SHawking Zhang } 6396bdadb20SHawking Zhang 6406bdadb20SHawking Zhang release_queue(kgd); 6416bdadb20SHawking Zhang return 0; 6426bdadb20SHawking Zhang } 6436bdadb20SHawking Zhang 6446bdadb20SHawking Zhang static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, 6456bdadb20SHawking Zhang unsigned int utimeout) 6466bdadb20SHawking Zhang { 6476bdadb20SHawking Zhang struct amdgpu_device *adev = get_amdgpu_device(kgd); 6486bdadb20SHawking Zhang struct v10_sdma_mqd *m; 649b55a8b8bSYong Zhao uint32_t sdma_rlc_reg_offset; 6506bdadb20SHawking Zhang uint32_t temp; 6516bdadb20SHawking Zhang unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; 6526bdadb20SHawking Zhang 6536bdadb20SHawking Zhang m = get_sdma_mqd(mqd); 654b55a8b8bSYong Zhao sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 6556bdadb20SHawking Zhang m->sdma_queue_id); 6566bdadb20SHawking Zhang 657b55a8b8bSYong Zhao temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); 6586bdadb20SHawking Zhang temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; 659b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); 6606bdadb20SHawking Zhang 6616bdadb20SHawking Zhang while (true) { 662b55a8b8bSYong Zhao temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); 6636bdadb20SHawking Zhang if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) 6646bdadb20SHawking Zhang break; 665812330ebSYong Zhao if (time_after(jiffies, end_jiffies)) { 666812330ebSYong Zhao pr_err("SDMA RLC not idle in %s\n", __func__); 6676bdadb20SHawking Zhang return -ETIME; 668812330ebSYong Zhao } 6696bdadb20SHawking Zhang usleep_range(500, 1000); 6706bdadb20SHawking Zhang } 6716bdadb20SHawking Zhang 672b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); 673b55a8b8bSYong Zhao WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, 674b55a8b8bSYong Zhao RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | 6756bdadb20SHawking Zhang SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); 6766bdadb20SHawking Zhang 677b55a8b8bSYong Zhao m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); 6786bdadb20SHawking Zhang m->sdmax_rlcx_rb_rptr_hi = 679b55a8b8bSYong Zhao RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); 6806bdadb20SHawking Zhang 6816bdadb20SHawking Zhang return 0; 6826bdadb20SHawking Zhang } 6836bdadb20SHawking Zhang 68456fc40abSYong Zhao static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, 68556fc40abSYong Zhao uint8_t vmid, uint16_t *p_pasid) 6866bdadb20SHawking Zhang { 68756fc40abSYong Zhao uint32_t value; 6886bdadb20SHawking Zhang struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 6896bdadb20SHawking Zhang 69056fc40abSYong Zhao value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) 6916bdadb20SHawking Zhang + vmid); 69256fc40abSYong Zhao *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; 6936bdadb20SHawking Zhang 69456fc40abSYong Zhao return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); 6956bdadb20SHawking Zhang } 6966bdadb20SHawking Zhang 6976bdadb20SHawking Zhang static int kgd_address_watch_disable(struct kgd_dev *kgd) 6986bdadb20SHawking Zhang { 6996bdadb20SHawking Zhang return 0; 7006bdadb20SHawking Zhang } 7016bdadb20SHawking Zhang 7026bdadb20SHawking Zhang static int kgd_address_watch_execute(struct kgd_dev *kgd, 7036bdadb20SHawking Zhang unsigned int watch_point_id, 7046bdadb20SHawking Zhang uint32_t cntl_val, 7056bdadb20SHawking Zhang uint32_t addr_hi, 7066bdadb20SHawking Zhang uint32_t addr_lo) 7076bdadb20SHawking Zhang { 7086bdadb20SHawking Zhang return 0; 7096bdadb20SHawking Zhang } 7106bdadb20SHawking Zhang 7116bdadb20SHawking Zhang static int kgd_wave_control_execute(struct kgd_dev *kgd, 7126bdadb20SHawking Zhang uint32_t gfx_index_val, 7136bdadb20SHawking Zhang uint32_t sq_cmd) 7146bdadb20SHawking Zhang { 7156bdadb20SHawking Zhang struct amdgpu_device *adev = get_amdgpu_device(kgd); 7166bdadb20SHawking Zhang uint32_t data = 0; 7176bdadb20SHawking Zhang 7186bdadb20SHawking Zhang mutex_lock(&adev->grbm_idx_mutex); 7196bdadb20SHawking Zhang 7206bdadb20SHawking Zhang WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val); 7216bdadb20SHawking Zhang WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); 7226bdadb20SHawking Zhang 7236bdadb20SHawking Zhang data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 7246bdadb20SHawking Zhang INSTANCE_BROADCAST_WRITES, 1); 7256bdadb20SHawking Zhang data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 7266bdadb20SHawking Zhang SA_BROADCAST_WRITES, 1); 7276bdadb20SHawking Zhang data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 7286bdadb20SHawking Zhang SE_BROADCAST_WRITES, 1); 7296bdadb20SHawking Zhang 7306bdadb20SHawking Zhang WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); 7316bdadb20SHawking Zhang mutex_unlock(&adev->grbm_idx_mutex); 7326bdadb20SHawking Zhang 7336bdadb20SHawking Zhang return 0; 7346bdadb20SHawking Zhang } 7356bdadb20SHawking Zhang 7366bdadb20SHawking Zhang static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, 7376bdadb20SHawking Zhang unsigned int watch_point_id, 7386bdadb20SHawking Zhang unsigned int reg_offset) 7396bdadb20SHawking Zhang { 7406bdadb20SHawking Zhang return 0; 7416bdadb20SHawking Zhang } 7426bdadb20SHawking Zhang 7436bdadb20SHawking Zhang static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, 7446bdadb20SHawking Zhang uint64_t page_table_base) 7456bdadb20SHawking Zhang { 7466bdadb20SHawking Zhang struct amdgpu_device *adev = get_amdgpu_device(kgd); 7476bdadb20SHawking Zhang 7486bdadb20SHawking Zhang if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { 7496bdadb20SHawking Zhang pr_err("trying to set page table base for wrong VMID %u\n", 7506bdadb20SHawking Zhang vmid); 7516bdadb20SHawking Zhang return; 7526bdadb20SHawking Zhang } 7536bdadb20SHawking Zhang 754b2100ce1SYong Zhao /* SDMA is on gfxhub as well for Navi1* series */ 7558ffff9b4SOak Zeng adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); 7566bdadb20SHawking Zhang } 75747c5ab6cSYong Zhao 758e392c887SYong Zhao const struct kfd2kgd_calls gfx_v10_kfd2kgd = { 75947c5ab6cSYong Zhao .program_sh_mem_settings = kgd_program_sh_mem_settings, 76047c5ab6cSYong Zhao .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, 76147c5ab6cSYong Zhao .init_interrupts = kgd_init_interrupts, 76247c5ab6cSYong Zhao .hqd_load = kgd_hqd_load, 7638eee00f6SHuang Rui .hiq_mqd_load = kgd_hiq_mqd_load, 76447c5ab6cSYong Zhao .hqd_sdma_load = kgd_hqd_sdma_load, 76547c5ab6cSYong Zhao .hqd_dump = kgd_hqd_dump, 76647c5ab6cSYong Zhao .hqd_sdma_dump = kgd_hqd_sdma_dump, 76747c5ab6cSYong Zhao .hqd_is_occupied = kgd_hqd_is_occupied, 76847c5ab6cSYong Zhao .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, 76947c5ab6cSYong Zhao .hqd_destroy = kgd_hqd_destroy, 77047c5ab6cSYong Zhao .hqd_sdma_destroy = kgd_hqd_sdma_destroy, 77147c5ab6cSYong Zhao .address_watch_disable = kgd_address_watch_disable, 77247c5ab6cSYong Zhao .address_watch_execute = kgd_address_watch_execute, 77347c5ab6cSYong Zhao .wave_control_execute = kgd_wave_control_execute, 77447c5ab6cSYong Zhao .address_watch_get_offset = kgd_address_watch_get_offset, 77547c5ab6cSYong Zhao .get_atc_vmid_pasid_mapping_info = 77647c5ab6cSYong Zhao get_atc_vmid_pasid_mapping_info, 77747c5ab6cSYong Zhao .set_vm_context_page_table_base = set_vm_context_page_table_base, 77847c5ab6cSYong Zhao }; 779