1 /* 2 * Copyright 2019 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 #include <linux/module.h> 23 #include <linux/fdtable.h> 24 #include <linux/uaccess.h> 25 #include <linux/firmware.h> 26 #include "amdgpu.h" 27 #include "amdgpu_amdkfd.h" 28 #include "sdma0/sdma0_4_2_2_offset.h" 29 #include "sdma0/sdma0_4_2_2_sh_mask.h" 30 #include "sdma1/sdma1_4_2_2_offset.h" 31 #include "sdma1/sdma1_4_2_2_sh_mask.h" 32 #include "sdma2/sdma2_4_2_2_offset.h" 33 #include "sdma2/sdma2_4_2_2_sh_mask.h" 34 #include "sdma3/sdma3_4_2_2_offset.h" 35 #include "sdma3/sdma3_4_2_2_sh_mask.h" 36 #include "sdma4/sdma4_4_2_2_offset.h" 37 #include "sdma4/sdma4_4_2_2_sh_mask.h" 38 #include "sdma5/sdma5_4_2_2_offset.h" 39 #include "sdma5/sdma5_4_2_2_sh_mask.h" 40 #include "sdma6/sdma6_4_2_2_offset.h" 41 #include "sdma6/sdma6_4_2_2_sh_mask.h" 42 #include "sdma7/sdma7_4_2_2_offset.h" 43 #include "sdma7/sdma7_4_2_2_sh_mask.h" 44 #include "v9_structs.h" 45 #include "soc15.h" 46 #include "soc15d.h" 47 #include "amdgpu_amdkfd_gfx_v9.h" 48 #include "gfxhub_v1_0.h" 49 #include "mmhub_v9_4.h" 50 51 #define HQD_N_REGS 56 52 #define DUMP_REG(addr) do { \ 53 if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ 54 break; \ 55 (*dump)[i][0] = (addr) << 2; \ 56 (*dump)[i++][1] = RREG32(addr); \ 57 } while (0) 58 59 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) 60 { 61 return (struct amdgpu_device *)kgd; 62 } 63 64 static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) 65 { 66 return (struct v9_sdma_mqd *)mqd; 67 } 68 69 static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, 70 unsigned int engine_id, 71 unsigned int queue_id) 72 { 73 uint32_t sdma_engine_reg_base = 0; 74 uint32_t sdma_rlc_reg_offset; 75 76 switch (engine_id) { 77 default: 78 dev_warn(adev->dev, 79 "Invalid sdma engine id (%d), using engine id 0\n", 80 engine_id); 81 fallthrough; 82 case 0: 83 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, 84 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; 85 break; 86 case 1: 87 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0, 88 mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL; 89 break; 90 case 2: 91 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0, 92 mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL; 93 break; 94 case 3: 95 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0, 96 mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL; 97 break; 98 case 4: 99 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0, 100 mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL; 101 break; 102 case 5: 103 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0, 104 mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL; 105 break; 106 case 6: 107 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0, 108 mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL; 109 break; 110 case 7: 111 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0, 112 mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL; 113 break; 114 } 115 116 sdma_rlc_reg_offset = sdma_engine_reg_base 117 + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); 118 119 pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, 120 queue_id, sdma_rlc_reg_offset); 121 122 return sdma_rlc_reg_offset; 123 } 124 125 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, 126 uint32_t __user *wptr, struct mm_struct *mm) 127 { 128 struct amdgpu_device *adev = get_amdgpu_device(kgd); 129 struct v9_sdma_mqd *m; 130 uint32_t sdma_rlc_reg_offset; 131 unsigned long end_jiffies; 132 uint32_t data; 133 uint64_t data64; 134 uint64_t __user *wptr64 = (uint64_t __user *)wptr; 135 136 m = get_sdma_mqd(mqd); 137 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 138 m->sdma_queue_id); 139 140 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, 141 m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); 142 143 end_jiffies = msecs_to_jiffies(2000) + jiffies; 144 while (true) { 145 data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); 146 if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) 147 break; 148 if (time_after(jiffies, end_jiffies)) { 149 pr_err("SDMA RLC not idle in %s\n", __func__); 150 return -ETIME; 151 } 152 usleep_range(500, 1000); 153 } 154 155 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, 156 m->sdmax_rlcx_doorbell_offset); 157 158 data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, 159 ENABLE, 1); 160 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); 161 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, 162 m->sdmax_rlcx_rb_rptr); 163 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, 164 m->sdmax_rlcx_rb_rptr_hi); 165 166 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); 167 if (read_user_wptr(mm, wptr64, data64)) { 168 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, 169 lower_32_bits(data64)); 170 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, 171 upper_32_bits(data64)); 172 } else { 173 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, 174 m->sdmax_rlcx_rb_rptr); 175 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, 176 m->sdmax_rlcx_rb_rptr_hi); 177 } 178 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); 179 180 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); 181 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, 182 m->sdmax_rlcx_rb_base_hi); 183 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, 184 m->sdmax_rlcx_rb_rptr_addr_lo); 185 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, 186 m->sdmax_rlcx_rb_rptr_addr_hi); 187 188 data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, 189 RB_ENABLE, 1); 190 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); 191 192 return 0; 193 } 194 195 static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, 196 uint32_t engine_id, uint32_t queue_id, 197 uint32_t (**dump)[2], uint32_t *n_regs) 198 { 199 struct amdgpu_device *adev = get_amdgpu_device(kgd); 200 uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, 201 engine_id, queue_id); 202 uint32_t i = 0, reg; 203 #undef HQD_N_REGS 204 #define HQD_N_REGS (19+6+7+10) 205 206 *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); 207 if (*dump == NULL) 208 return -ENOMEM; 209 210 for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) 211 DUMP_REG(sdma_rlc_reg_offset + reg); 212 for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) 213 DUMP_REG(sdma_rlc_reg_offset + reg); 214 for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; 215 reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) 216 DUMP_REG(sdma_rlc_reg_offset + reg); 217 for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; 218 reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) 219 DUMP_REG(sdma_rlc_reg_offset + reg); 220 221 WARN_ON_ONCE(i != HQD_N_REGS); 222 *n_regs = i; 223 224 return 0; 225 } 226 227 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) 228 { 229 struct amdgpu_device *adev = get_amdgpu_device(kgd); 230 struct v9_sdma_mqd *m; 231 uint32_t sdma_rlc_reg_offset; 232 uint32_t sdma_rlc_rb_cntl; 233 234 m = get_sdma_mqd(mqd); 235 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 236 m->sdma_queue_id); 237 238 sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); 239 240 if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) 241 return true; 242 243 return false; 244 } 245 246 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, 247 unsigned int utimeout) 248 { 249 struct amdgpu_device *adev = get_amdgpu_device(kgd); 250 struct v9_sdma_mqd *m; 251 uint32_t sdma_rlc_reg_offset; 252 uint32_t temp; 253 unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; 254 255 m = get_sdma_mqd(mqd); 256 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 257 m->sdma_queue_id); 258 259 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); 260 temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; 261 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); 262 263 while (true) { 264 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); 265 if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) 266 break; 267 if (time_after(jiffies, end_jiffies)) { 268 pr_err("SDMA RLC not idle in %s\n", __func__); 269 return -ETIME; 270 } 271 usleep_range(500, 1000); 272 } 273 274 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); 275 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, 276 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | 277 SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); 278 279 m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); 280 m->sdmax_rlcx_rb_rptr_hi = 281 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); 282 283 return 0; 284 } 285 286 static void kgd_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, 287 uint64_t page_table_base) 288 { 289 struct amdgpu_device *adev = get_amdgpu_device(kgd); 290 291 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { 292 pr_err("trying to set page table base for wrong VMID %u\n", 293 vmid); 294 return; 295 } 296 297 mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base); 298 299 gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); 300 } 301 302 const struct kfd2kgd_calls arcturus_kfd2kgd = { 303 .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, 304 .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, 305 .init_interrupts = kgd_gfx_v9_init_interrupts, 306 .hqd_load = kgd_gfx_v9_hqd_load, 307 .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, 308 .hqd_sdma_load = kgd_hqd_sdma_load, 309 .hqd_dump = kgd_gfx_v9_hqd_dump, 310 .hqd_sdma_dump = kgd_hqd_sdma_dump, 311 .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, 312 .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, 313 .hqd_destroy = kgd_gfx_v9_hqd_destroy, 314 .hqd_sdma_destroy = kgd_hqd_sdma_destroy, 315 .address_watch_disable = kgd_gfx_v9_address_watch_disable, 316 .address_watch_execute = kgd_gfx_v9_address_watch_execute, 317 .wave_control_execute = kgd_gfx_v9_wave_control_execute, 318 .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, 319 .get_atc_vmid_pasid_mapping_info = 320 kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, 321 .set_vm_context_page_table_base = kgd_set_vm_context_page_table_base, 322 .get_hive_id = amdgpu_amdkfd_get_hive_id, 323 }; 324