1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include <linux/module.h> 24 #include <linux/fdtable.h> 25 #include <linux/uaccess.h> 26 #include <linux/firmware.h> 27 #include <drm/drmP.h> 28 #include "amdgpu.h" 29 #include "amdgpu_amdkfd.h" 30 #include "amdgpu_ucode.h" 31 #include "gfx_v8_0.h" 32 #include "gca/gfx_8_0_sh_mask.h" 33 #include "gca/gfx_8_0_d.h" 34 #include "gca/gfx_8_0_enum.h" 35 #include "oss/oss_3_0_sh_mask.h" 36 #include "oss/oss_3_0_d.h" 37 #include "gmc/gmc_8_1_sh_mask.h" 38 #include "gmc/gmc_8_1_d.h" 39 #include "vi_structs.h" 40 #include "vid.h" 41 42 enum hqd_dequeue_request_type { 43 NO_ACTION = 0, 44 DRAIN_PIPE, 45 RESET_WAVES 46 }; 47 48 /* 49 * Register access functions 50 */ 51 52 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, 53 uint32_t sh_mem_config, 54 uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, 55 uint32_t sh_mem_bases); 56 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, 57 unsigned int vmid); 58 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); 59 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 60 uint32_t queue_id, uint32_t __user *wptr, 61 uint32_t wptr_shift, uint32_t wptr_mask, 62 struct mm_struct *mm); 63 static int kgd_hqd_dump(struct kgd_dev *kgd, 64 uint32_t pipe_id, uint32_t queue_id, 65 uint32_t (**dump)[2], uint32_t *n_regs); 66 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, 67 uint32_t __user *wptr, struct mm_struct *mm); 68 static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, 69 uint32_t engine_id, uint32_t queue_id, 70 uint32_t (**dump)[2], uint32_t *n_regs); 71 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, 72 uint32_t pipe_id, uint32_t queue_id); 73 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); 74 static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, 75 enum kfd_preempt_type reset_type, 76 unsigned int utimeout, uint32_t pipe_id, 77 uint32_t queue_id); 78 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, 79 unsigned int utimeout); 80 static int kgd_address_watch_disable(struct kgd_dev *kgd); 81 static int kgd_address_watch_execute(struct kgd_dev *kgd, 82 unsigned int watch_point_id, 83 uint32_t cntl_val, 84 uint32_t addr_hi, 85 uint32_t addr_lo); 86 static int kgd_wave_control_execute(struct kgd_dev *kgd, 87 uint32_t gfx_index_val, 88 uint32_t sq_cmd); 89 static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, 90 unsigned int watch_point_id, 91 unsigned int reg_offset); 92 93 static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, 94 uint8_t vmid); 95 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 96 uint8_t vmid); 97 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); 98 static void set_scratch_backing_va(struct kgd_dev *kgd, 99 uint64_t va, uint32_t vmid); 100 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, 101 uint64_t page_table_base); 102 static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); 103 static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); 104 105 /* Because of REG_GET_FIELD() being used, we put this function in the 106 * asic specific file. 107 */ 108 static int get_tile_config(struct kgd_dev *kgd, 109 struct tile_config *config) 110 { 111 struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 112 113 config->gb_addr_config = adev->gfx.config.gb_addr_config; 114 config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, 115 MC_ARB_RAMCFG, NOOFBANK); 116 config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, 117 MC_ARB_RAMCFG, NOOFRANKS); 118 119 config->tile_config_ptr = adev->gfx.config.tile_mode_array; 120 config->num_tile_configs = 121 ARRAY_SIZE(adev->gfx.config.tile_mode_array); 122 config->macro_tile_config_ptr = 123 adev->gfx.config.macrotile_mode_array; 124 config->num_macro_tile_configs = 125 ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 126 127 return 0; 128 } 129 130 static const struct kfd2kgd_calls kfd2kgd = { 131 .init_gtt_mem_allocation = alloc_gtt_mem, 132 .free_gtt_mem = free_gtt_mem, 133 .get_local_mem_info = get_local_mem_info, 134 .get_gpu_clock_counter = get_gpu_clock_counter, 135 .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, 136 .alloc_pasid = amdgpu_pasid_alloc, 137 .free_pasid = amdgpu_pasid_free, 138 .program_sh_mem_settings = kgd_program_sh_mem_settings, 139 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, 140 .init_interrupts = kgd_init_interrupts, 141 .hqd_load = kgd_hqd_load, 142 .hqd_sdma_load = kgd_hqd_sdma_load, 143 .hqd_dump = kgd_hqd_dump, 144 .hqd_sdma_dump = kgd_hqd_sdma_dump, 145 .hqd_is_occupied = kgd_hqd_is_occupied, 146 .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, 147 .hqd_destroy = kgd_hqd_destroy, 148 .hqd_sdma_destroy = kgd_hqd_sdma_destroy, 149 .address_watch_disable = kgd_address_watch_disable, 150 .address_watch_execute = kgd_address_watch_execute, 151 .wave_control_execute = kgd_wave_control_execute, 152 .address_watch_get_offset = kgd_address_watch_get_offset, 153 .get_atc_vmid_pasid_mapping_pasid = 154 get_atc_vmid_pasid_mapping_pasid, 155 .get_atc_vmid_pasid_mapping_valid = 156 get_atc_vmid_pasid_mapping_valid, 157 .get_fw_version = get_fw_version, 158 .set_scratch_backing_va = set_scratch_backing_va, 159 .get_tile_config = get_tile_config, 160 .get_cu_info = get_cu_info, 161 .get_vram_usage = amdgpu_amdkfd_get_vram_usage, 162 .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, 163 .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, 164 .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, 165 .release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm, 166 .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, 167 .set_vm_context_page_table_base = set_vm_context_page_table_base, 168 .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, 169 .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, 170 .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, 171 .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, 172 .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, 173 .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, 174 .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, 175 .invalidate_tlbs = invalidate_tlbs, 176 .invalidate_tlbs_vmid = invalidate_tlbs_vmid, 177 .submit_ib = amdgpu_amdkfd_submit_ib, 178 .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info, 179 .gpu_recover = amdgpu_amdkfd_gpu_reset, 180 .set_compute_idle = amdgpu_amdkfd_set_compute_idle 181 }; 182 183 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) 184 { 185 return (struct kfd2kgd_calls *)&kfd2kgd; 186 } 187 188 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) 189 { 190 return (struct amdgpu_device *)kgd; 191 } 192 193 static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, 194 uint32_t queue, uint32_t vmid) 195 { 196 struct amdgpu_device *adev = get_amdgpu_device(kgd); 197 uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue); 198 199 mutex_lock(&adev->srbm_mutex); 200 WREG32(mmSRBM_GFX_CNTL, value); 201 } 202 203 static void unlock_srbm(struct kgd_dev *kgd) 204 { 205 struct amdgpu_device *adev = get_amdgpu_device(kgd); 206 207 WREG32(mmSRBM_GFX_CNTL, 0); 208 mutex_unlock(&adev->srbm_mutex); 209 } 210 211 static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, 212 uint32_t queue_id) 213 { 214 struct amdgpu_device *adev = get_amdgpu_device(kgd); 215 216 uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 217 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 218 219 lock_srbm(kgd, mec, pipe, queue_id, 0); 220 } 221 222 static void release_queue(struct kgd_dev *kgd) 223 { 224 unlock_srbm(kgd); 225 } 226 227 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, 228 uint32_t sh_mem_config, 229 uint32_t sh_mem_ape1_base, 230 uint32_t sh_mem_ape1_limit, 231 uint32_t sh_mem_bases) 232 { 233 struct amdgpu_device *adev = get_amdgpu_device(kgd); 234 235 lock_srbm(kgd, 0, 0, 0, vmid); 236 237 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 238 WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base); 239 WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit); 240 WREG32(mmSH_MEM_BASES, sh_mem_bases); 241 242 unlock_srbm(kgd); 243 } 244 245 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, 246 unsigned int vmid) 247 { 248 struct amdgpu_device *adev = get_amdgpu_device(kgd); 249 250 /* 251 * We have to assume that there is no outstanding mapping. 252 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because 253 * a mapping is in progress or because a mapping finished 254 * and the SW cleared it. 255 * So the protocol is to always wait & clear. 256 */ 257 uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | 258 ATC_VMID0_PASID_MAPPING__VALID_MASK; 259 260 WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping); 261 262 while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid))) 263 cpu_relax(); 264 WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid); 265 266 /* Mapping vmid to pasid also for IH block */ 267 WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping); 268 269 return 0; 270 } 271 272 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) 273 { 274 struct amdgpu_device *adev = get_amdgpu_device(kgd); 275 uint32_t mec; 276 uint32_t pipe; 277 278 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 279 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 280 281 lock_srbm(kgd, mec, pipe, 0, 0); 282 283 WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | 284 CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); 285 286 unlock_srbm(kgd); 287 288 return 0; 289 } 290 291 static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m) 292 { 293 uint32_t retval; 294 295 retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + 296 m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET; 297 pr_debug("kfd: sdma base address: 0x%x\n", retval); 298 299 return retval; 300 } 301 302 static inline struct vi_mqd *get_mqd(void *mqd) 303 { 304 return (struct vi_mqd *)mqd; 305 } 306 307 static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd) 308 { 309 return (struct vi_sdma_mqd *)mqd; 310 } 311 312 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 313 uint32_t queue_id, uint32_t __user *wptr, 314 uint32_t wptr_shift, uint32_t wptr_mask, 315 struct mm_struct *mm) 316 { 317 struct amdgpu_device *adev = get_amdgpu_device(kgd); 318 struct vi_mqd *m; 319 uint32_t *mqd_hqd; 320 uint32_t reg, wptr_val, data; 321 bool valid_wptr = false; 322 323 m = get_mqd(mqd); 324 325 acquire_queue(kgd, pipe_id, queue_id); 326 327 /* HIQ is set during driver init period with vmid set to 0*/ 328 if (m->cp_hqd_vmid == 0) { 329 uint32_t value, mec, pipe; 330 331 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 332 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 333 334 pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", 335 mec, pipe, queue_id); 336 value = RREG32(mmRLC_CP_SCHEDULERS); 337 value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, 338 ((mec << 5) | (pipe << 3) | queue_id | 0x80)); 339 WREG32(mmRLC_CP_SCHEDULERS, value); 340 } 341 342 /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ 343 mqd_hqd = &m->cp_mqd_base_addr_lo; 344 345 for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_CONTROL; reg++) 346 WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); 347 348 /* Tonga errata: EOP RPTR/WPTR should be left unmodified. 349 * This is safe since EOP RPTR==WPTR for any inactive HQD 350 * on ASICs that do not support context-save. 351 * EOP writes/reads can start anywhere in the ring. 352 */ 353 if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) { 354 WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr); 355 WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr); 356 WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem); 357 } 358 359 for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++) 360 WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); 361 362 /* Copy userspace write pointer value to register. 363 * Activate doorbell logic to monitor subsequent changes. 364 */ 365 data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, 366 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 367 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data); 368 369 /* read_user_ptr may take the mm->mmap_sem. 370 * release srbm_mutex to avoid circular dependency between 371 * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex. 372 */ 373 release_queue(kgd); 374 valid_wptr = read_user_wptr(mm, wptr, wptr_val); 375 acquire_queue(kgd, pipe_id, queue_id); 376 if (valid_wptr) 377 WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); 378 379 data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); 380 WREG32(mmCP_HQD_ACTIVE, data); 381 382 release_queue(kgd); 383 384 return 0; 385 } 386 387 static int kgd_hqd_dump(struct kgd_dev *kgd, 388 uint32_t pipe_id, uint32_t queue_id, 389 uint32_t (**dump)[2], uint32_t *n_regs) 390 { 391 struct amdgpu_device *adev = get_amdgpu_device(kgd); 392 uint32_t i = 0, reg; 393 #define HQD_N_REGS (54+4) 394 #define DUMP_REG(addr) do { \ 395 if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ 396 break; \ 397 (*dump)[i][0] = (addr) << 2; \ 398 (*dump)[i++][1] = RREG32(addr); \ 399 } while (0) 400 401 *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); 402 if (*dump == NULL) 403 return -ENOMEM; 404 405 acquire_queue(kgd, pipe_id, queue_id); 406 407 DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0); 408 DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1); 409 DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2); 410 DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3); 411 412 for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++) 413 DUMP_REG(reg); 414 415 release_queue(kgd); 416 417 WARN_ON_ONCE(i != HQD_N_REGS); 418 *n_regs = i; 419 420 return 0; 421 } 422 423 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, 424 uint32_t __user *wptr, struct mm_struct *mm) 425 { 426 struct amdgpu_device *adev = get_amdgpu_device(kgd); 427 struct vi_sdma_mqd *m; 428 unsigned long end_jiffies; 429 uint32_t sdma_base_addr; 430 uint32_t data; 431 432 m = get_sdma_mqd(mqd); 433 sdma_base_addr = get_sdma_base_addr(m); 434 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, 435 m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); 436 437 end_jiffies = msecs_to_jiffies(2000) + jiffies; 438 while (true) { 439 data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); 440 if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) 441 break; 442 if (time_after(jiffies, end_jiffies)) 443 return -ETIME; 444 usleep_range(500, 1000); 445 } 446 if (m->sdma_engine_id) { 447 data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); 448 data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, 449 RESUME_CTX, 0); 450 WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); 451 } else { 452 data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); 453 data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, 454 RESUME_CTX, 0); 455 WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); 456 } 457 458 data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, 459 ENABLE, 1); 460 WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); 461 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); 462 463 if (read_user_wptr(mm, wptr, data)) 464 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); 465 else 466 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 467 m->sdmax_rlcx_rb_rptr); 468 469 WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, 470 m->sdmax_rlcx_virtual_addr); 471 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); 472 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, 473 m->sdmax_rlcx_rb_base_hi); 474 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, 475 m->sdmax_rlcx_rb_rptr_addr_lo); 476 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, 477 m->sdmax_rlcx_rb_rptr_addr_hi); 478 479 data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, 480 RB_ENABLE, 1); 481 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); 482 483 return 0; 484 } 485 486 static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, 487 uint32_t engine_id, uint32_t queue_id, 488 uint32_t (**dump)[2], uint32_t *n_regs) 489 { 490 struct amdgpu_device *adev = get_amdgpu_device(kgd); 491 uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET + 492 queue_id * KFD_VI_SDMA_QUEUE_OFFSET; 493 uint32_t i = 0, reg; 494 #undef HQD_N_REGS 495 #define HQD_N_REGS (19+4+2+3+7) 496 497 *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); 498 if (*dump == NULL) 499 return -ENOMEM; 500 501 for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) 502 DUMP_REG(sdma_offset + reg); 503 for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK; 504 reg++) 505 DUMP_REG(sdma_offset + reg); 506 for (reg = mmSDMA0_RLC0_CSA_ADDR_LO; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; 507 reg++) 508 DUMP_REG(sdma_offset + reg); 509 for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_DUMMY_REG; 510 reg++) 511 DUMP_REG(sdma_offset + reg); 512 for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL; 513 reg++) 514 DUMP_REG(sdma_offset + reg); 515 516 WARN_ON_ONCE(i != HQD_N_REGS); 517 *n_regs = i; 518 519 return 0; 520 } 521 522 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, 523 uint32_t pipe_id, uint32_t queue_id) 524 { 525 struct amdgpu_device *adev = get_amdgpu_device(kgd); 526 uint32_t act; 527 bool retval = false; 528 uint32_t low, high; 529 530 acquire_queue(kgd, pipe_id, queue_id); 531 act = RREG32(mmCP_HQD_ACTIVE); 532 if (act) { 533 low = lower_32_bits(queue_address >> 8); 534 high = upper_32_bits(queue_address >> 8); 535 536 if (low == RREG32(mmCP_HQD_PQ_BASE) && 537 high == RREG32(mmCP_HQD_PQ_BASE_HI)) 538 retval = true; 539 } 540 release_queue(kgd); 541 return retval; 542 } 543 544 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) 545 { 546 struct amdgpu_device *adev = get_amdgpu_device(kgd); 547 struct vi_sdma_mqd *m; 548 uint32_t sdma_base_addr; 549 uint32_t sdma_rlc_rb_cntl; 550 551 m = get_sdma_mqd(mqd); 552 sdma_base_addr = get_sdma_base_addr(m); 553 554 sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); 555 556 if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) 557 return true; 558 559 return false; 560 } 561 562 static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, 563 enum kfd_preempt_type reset_type, 564 unsigned int utimeout, uint32_t pipe_id, 565 uint32_t queue_id) 566 { 567 struct amdgpu_device *adev = get_amdgpu_device(kgd); 568 uint32_t temp; 569 enum hqd_dequeue_request_type type; 570 unsigned long flags, end_jiffies; 571 int retry; 572 struct vi_mqd *m = get_mqd(mqd); 573 574 if (adev->in_gpu_reset) 575 return -EIO; 576 577 acquire_queue(kgd, pipe_id, queue_id); 578 579 if (m->cp_hqd_vmid == 0) 580 WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0); 581 582 switch (reset_type) { 583 case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: 584 type = DRAIN_PIPE; 585 break; 586 case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: 587 type = RESET_WAVES; 588 break; 589 default: 590 type = DRAIN_PIPE; 591 break; 592 } 593 594 /* Workaround: If IQ timer is active and the wait time is close to or 595 * equal to 0, dequeueing is not safe. Wait until either the wait time 596 * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is 597 * cleared before continuing. Also, ensure wait times are set to at 598 * least 0x3. 599 */ 600 local_irq_save(flags); 601 preempt_disable(); 602 retry = 5000; /* wait for 500 usecs at maximum */ 603 while (true) { 604 temp = RREG32(mmCP_HQD_IQ_TIMER); 605 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { 606 pr_debug("HW is processing IQ\n"); 607 goto loop; 608 } 609 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { 610 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) 611 == 3) /* SEM-rearm is safe */ 612 break; 613 /* Wait time 3 is safe for CP, but our MMIO read/write 614 * time is close to 1 microsecond, so check for 10 to 615 * leave more buffer room 616 */ 617 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) 618 >= 10) 619 break; 620 pr_debug("IQ timer is active\n"); 621 } else 622 break; 623 loop: 624 if (!retry) { 625 pr_err("CP HQD IQ timer status time out\n"); 626 break; 627 } 628 ndelay(100); 629 --retry; 630 } 631 retry = 1000; 632 while (true) { 633 temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); 634 if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) 635 break; 636 pr_debug("Dequeue request is pending\n"); 637 638 if (!retry) { 639 pr_err("CP HQD dequeue request time out\n"); 640 break; 641 } 642 ndelay(100); 643 --retry; 644 } 645 local_irq_restore(flags); 646 preempt_enable(); 647 648 WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); 649 650 end_jiffies = (utimeout * HZ / 1000) + jiffies; 651 while (true) { 652 temp = RREG32(mmCP_HQD_ACTIVE); 653 if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) 654 break; 655 if (time_after(jiffies, end_jiffies)) { 656 pr_err("cp queue preemption time out.\n"); 657 release_queue(kgd); 658 return -ETIME; 659 } 660 usleep_range(500, 1000); 661 } 662 663 release_queue(kgd); 664 return 0; 665 } 666 667 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, 668 unsigned int utimeout) 669 { 670 struct amdgpu_device *adev = get_amdgpu_device(kgd); 671 struct vi_sdma_mqd *m; 672 uint32_t sdma_base_addr; 673 uint32_t temp; 674 unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; 675 676 m = get_sdma_mqd(mqd); 677 sdma_base_addr = get_sdma_base_addr(m); 678 679 temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); 680 temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; 681 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); 682 683 while (true) { 684 temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); 685 if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) 686 break; 687 if (time_after(jiffies, end_jiffies)) 688 return -ETIME; 689 usleep_range(500, 1000); 690 } 691 692 WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); 693 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, 694 RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | 695 SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); 696 697 m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); 698 699 return 0; 700 } 701 702 static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, 703 uint8_t vmid) 704 { 705 uint32_t reg; 706 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 707 708 reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); 709 return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; 710 } 711 712 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 713 uint8_t vmid) 714 { 715 uint32_t reg; 716 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 717 718 reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); 719 return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; 720 } 721 722 static int kgd_address_watch_disable(struct kgd_dev *kgd) 723 { 724 return 0; 725 } 726 727 static int kgd_address_watch_execute(struct kgd_dev *kgd, 728 unsigned int watch_point_id, 729 uint32_t cntl_val, 730 uint32_t addr_hi, 731 uint32_t addr_lo) 732 { 733 return 0; 734 } 735 736 static int kgd_wave_control_execute(struct kgd_dev *kgd, 737 uint32_t gfx_index_val, 738 uint32_t sq_cmd) 739 { 740 struct amdgpu_device *adev = get_amdgpu_device(kgd); 741 uint32_t data = 0; 742 743 mutex_lock(&adev->grbm_idx_mutex); 744 745 WREG32(mmGRBM_GFX_INDEX, gfx_index_val); 746 WREG32(mmSQ_CMD, sq_cmd); 747 748 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 749 INSTANCE_BROADCAST_WRITES, 1); 750 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 751 SH_BROADCAST_WRITES, 1); 752 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 753 SE_BROADCAST_WRITES, 1); 754 755 WREG32(mmGRBM_GFX_INDEX, data); 756 mutex_unlock(&adev->grbm_idx_mutex); 757 758 return 0; 759 } 760 761 static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, 762 unsigned int watch_point_id, 763 unsigned int reg_offset) 764 { 765 return 0; 766 } 767 768 static void set_scratch_backing_va(struct kgd_dev *kgd, 769 uint64_t va, uint32_t vmid) 770 { 771 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 772 773 lock_srbm(kgd, 0, 0, 0, vmid); 774 WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va); 775 unlock_srbm(kgd); 776 } 777 778 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) 779 { 780 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 781 const union amdgpu_firmware_header *hdr; 782 783 switch (type) { 784 case KGD_ENGINE_PFP: 785 hdr = (const union amdgpu_firmware_header *) 786 adev->gfx.pfp_fw->data; 787 break; 788 789 case KGD_ENGINE_ME: 790 hdr = (const union amdgpu_firmware_header *) 791 adev->gfx.me_fw->data; 792 break; 793 794 case KGD_ENGINE_CE: 795 hdr = (const union amdgpu_firmware_header *) 796 adev->gfx.ce_fw->data; 797 break; 798 799 case KGD_ENGINE_MEC1: 800 hdr = (const union amdgpu_firmware_header *) 801 adev->gfx.mec_fw->data; 802 break; 803 804 case KGD_ENGINE_MEC2: 805 hdr = (const union amdgpu_firmware_header *) 806 adev->gfx.mec2_fw->data; 807 break; 808 809 case KGD_ENGINE_RLC: 810 hdr = (const union amdgpu_firmware_header *) 811 adev->gfx.rlc_fw->data; 812 break; 813 814 case KGD_ENGINE_SDMA1: 815 hdr = (const union amdgpu_firmware_header *) 816 adev->sdma.instance[0].fw->data; 817 break; 818 819 case KGD_ENGINE_SDMA2: 820 hdr = (const union amdgpu_firmware_header *) 821 adev->sdma.instance[1].fw->data; 822 break; 823 824 default: 825 return 0; 826 } 827 828 if (hdr == NULL) 829 return 0; 830 831 /* Only 12 bit in use*/ 832 return hdr->common.ucode_version; 833 } 834 835 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, 836 uint64_t page_table_base) 837 { 838 struct amdgpu_device *adev = get_amdgpu_device(kgd); 839 840 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { 841 pr_err("trying to set page table base for wrong VMID\n"); 842 return; 843 } 844 WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, 845 lower_32_bits(page_table_base)); 846 } 847 848 static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) 849 { 850 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 851 int vmid; 852 unsigned int tmp; 853 854 if (adev->in_gpu_reset) 855 return -EIO; 856 857 for (vmid = 0; vmid < 16; vmid++) { 858 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) 859 continue; 860 861 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); 862 if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && 863 (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { 864 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); 865 RREG32(mmVM_INVALIDATE_RESPONSE); 866 break; 867 } 868 } 869 870 return 0; 871 } 872 873 static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) 874 { 875 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 876 877 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { 878 pr_err("non kfd vmid %d\n", vmid); 879 return -EINVAL; 880 } 881 882 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); 883 RREG32(mmVM_INVALIDATE_RESPONSE); 884 return 0; 885 } 886