1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include <linux/fdtable.h> 24 #include <linux/uaccess.h> 25 #include <linux/firmware.h> 26 #include <drm/drmP.h> 27 #include "amdgpu.h" 28 #include "amdgpu_amdkfd.h" 29 #include "cikd.h" 30 #include "cik_sdma.h" 31 #include "amdgpu_ucode.h" 32 #include "gfx_v7_0.h" 33 #include "gca/gfx_7_2_d.h" 34 #include "gca/gfx_7_2_enum.h" 35 #include "gca/gfx_7_2_sh_mask.h" 36 #include "oss/oss_2_0_d.h" 37 #include "oss/oss_2_0_sh_mask.h" 38 #include "gmc/gmc_7_1_d.h" 39 #include "gmc/gmc_7_1_sh_mask.h" 40 #include "cik_structs.h" 41 42 enum hqd_dequeue_request_type { 43 NO_ACTION = 0, 44 DRAIN_PIPE, 45 RESET_WAVES 46 }; 47 48 enum { 49 MAX_TRAPID = 8, /* 3 bits in the bitfield. */ 50 MAX_WATCH_ADDRESSES = 4 51 }; 52 53 enum { 54 ADDRESS_WATCH_REG_ADDR_HI = 0, 55 ADDRESS_WATCH_REG_ADDR_LO, 56 ADDRESS_WATCH_REG_CNTL, 57 ADDRESS_WATCH_REG_MAX 58 }; 59 60 /* not defined in the CI/KV reg file */ 61 enum { 62 ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL, 63 ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF, 64 ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000, 65 /* extend the mask to 26 bits to match the low address field */ 66 ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6, 67 ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF 68 }; 69 70 static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = { 71 mmTCP_WATCH0_ADDR_H, mmTCP_WATCH0_ADDR_L, mmTCP_WATCH0_CNTL, 72 mmTCP_WATCH1_ADDR_H, mmTCP_WATCH1_ADDR_L, mmTCP_WATCH1_CNTL, 73 mmTCP_WATCH2_ADDR_H, mmTCP_WATCH2_ADDR_L, mmTCP_WATCH2_CNTL, 74 mmTCP_WATCH3_ADDR_H, mmTCP_WATCH3_ADDR_L, mmTCP_WATCH3_CNTL 75 }; 76 77 union TCP_WATCH_CNTL_BITS { 78 struct { 79 uint32_t mask:24; 80 uint32_t vmid:4; 81 uint32_t atc:1; 82 uint32_t mode:2; 83 uint32_t valid:1; 84 } bitfields, bits; 85 uint32_t u32All; 86 signed int i32All; 87 float f32All; 88 }; 89 90 /* 91 * Register access functions 92 */ 93 94 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, 95 uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, 96 uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); 97 98 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, 99 unsigned int vmid); 100 101 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, 102 uint32_t hpd_size, uint64_t hpd_gpu_addr); 103 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); 104 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 105 uint32_t queue_id, uint32_t __user *wptr, 106 uint32_t wptr_shift, uint32_t wptr_mask, 107 struct mm_struct *mm); 108 static int kgd_hqd_dump(struct kgd_dev *kgd, 109 uint32_t pipe_id, uint32_t queue_id, 110 uint32_t (**dump)[2], uint32_t *n_regs); 111 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, 112 uint32_t __user *wptr, struct mm_struct *mm); 113 static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, 114 uint32_t engine_id, uint32_t queue_id, 115 uint32_t (**dump)[2], uint32_t *n_regs); 116 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, 117 uint32_t pipe_id, uint32_t queue_id); 118 119 static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, 120 enum kfd_preempt_type reset_type, 121 unsigned int utimeout, uint32_t pipe_id, 122 uint32_t queue_id); 123 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); 124 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, 125 unsigned int utimeout); 126 static int kgd_address_watch_disable(struct kgd_dev *kgd); 127 static int kgd_address_watch_execute(struct kgd_dev *kgd, 128 unsigned int watch_point_id, 129 uint32_t cntl_val, 130 uint32_t addr_hi, 131 uint32_t addr_lo); 132 static int kgd_wave_control_execute(struct kgd_dev *kgd, 133 uint32_t gfx_index_val, 134 uint32_t sq_cmd); 135 static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, 136 unsigned int watch_point_id, 137 unsigned int reg_offset); 138 139 static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); 140 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 141 uint8_t vmid); 142 143 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); 144 static void set_scratch_backing_va(struct kgd_dev *kgd, 145 uint64_t va, uint32_t vmid); 146 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, 147 uint32_t page_table_base); 148 static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); 149 static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); 150 151 /* Because of REG_GET_FIELD() being used, we put this function in the 152 * asic specific file. 153 */ 154 static int get_tile_config(struct kgd_dev *kgd, 155 struct tile_config *config) 156 { 157 struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 158 159 config->gb_addr_config = adev->gfx.config.gb_addr_config; 160 config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, 161 MC_ARB_RAMCFG, NOOFBANK); 162 config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, 163 MC_ARB_RAMCFG, NOOFRANKS); 164 165 config->tile_config_ptr = adev->gfx.config.tile_mode_array; 166 config->num_tile_configs = 167 ARRAY_SIZE(adev->gfx.config.tile_mode_array); 168 config->macro_tile_config_ptr = 169 adev->gfx.config.macrotile_mode_array; 170 config->num_macro_tile_configs = 171 ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 172 173 return 0; 174 } 175 176 static const struct kfd2kgd_calls kfd2kgd = { 177 .init_gtt_mem_allocation = alloc_gtt_mem, 178 .free_gtt_mem = free_gtt_mem, 179 .get_local_mem_info = get_local_mem_info, 180 .get_gpu_clock_counter = get_gpu_clock_counter, 181 .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, 182 .alloc_pasid = amdgpu_pasid_alloc, 183 .free_pasid = amdgpu_pasid_free, 184 .program_sh_mem_settings = kgd_program_sh_mem_settings, 185 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, 186 .init_pipeline = kgd_init_pipeline, 187 .init_interrupts = kgd_init_interrupts, 188 .hqd_load = kgd_hqd_load, 189 .hqd_sdma_load = kgd_hqd_sdma_load, 190 .hqd_dump = kgd_hqd_dump, 191 .hqd_sdma_dump = kgd_hqd_sdma_dump, 192 .hqd_is_occupied = kgd_hqd_is_occupied, 193 .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, 194 .hqd_destroy = kgd_hqd_destroy, 195 .hqd_sdma_destroy = kgd_hqd_sdma_destroy, 196 .address_watch_disable = kgd_address_watch_disable, 197 .address_watch_execute = kgd_address_watch_execute, 198 .wave_control_execute = kgd_wave_control_execute, 199 .address_watch_get_offset = kgd_address_watch_get_offset, 200 .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, 201 .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, 202 .get_fw_version = get_fw_version, 203 .set_scratch_backing_va = set_scratch_backing_va, 204 .get_tile_config = get_tile_config, 205 .get_cu_info = get_cu_info, 206 .get_vram_usage = amdgpu_amdkfd_get_vram_usage, 207 .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, 208 .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, 209 .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, 210 .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, 211 .set_vm_context_page_table_base = set_vm_context_page_table_base, 212 .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, 213 .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, 214 .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, 215 .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, 216 .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, 217 .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, 218 .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, 219 .invalidate_tlbs = invalidate_tlbs, 220 .invalidate_tlbs_vmid = invalidate_tlbs_vmid, 221 .submit_ib = amdgpu_amdkfd_submit_ib, 222 }; 223 224 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) 225 { 226 return (struct kfd2kgd_calls *)&kfd2kgd; 227 } 228 229 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) 230 { 231 return (struct amdgpu_device *)kgd; 232 } 233 234 static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, 235 uint32_t queue, uint32_t vmid) 236 { 237 struct amdgpu_device *adev = get_amdgpu_device(kgd); 238 uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue); 239 240 mutex_lock(&adev->srbm_mutex); 241 WREG32(mmSRBM_GFX_CNTL, value); 242 } 243 244 static void unlock_srbm(struct kgd_dev *kgd) 245 { 246 struct amdgpu_device *adev = get_amdgpu_device(kgd); 247 248 WREG32(mmSRBM_GFX_CNTL, 0); 249 mutex_unlock(&adev->srbm_mutex); 250 } 251 252 static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, 253 uint32_t queue_id) 254 { 255 struct amdgpu_device *adev = get_amdgpu_device(kgd); 256 257 uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 258 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 259 260 lock_srbm(kgd, mec, pipe, queue_id, 0); 261 } 262 263 static void release_queue(struct kgd_dev *kgd) 264 { 265 unlock_srbm(kgd); 266 } 267 268 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, 269 uint32_t sh_mem_config, 270 uint32_t sh_mem_ape1_base, 271 uint32_t sh_mem_ape1_limit, 272 uint32_t sh_mem_bases) 273 { 274 struct amdgpu_device *adev = get_amdgpu_device(kgd); 275 276 lock_srbm(kgd, 0, 0, 0, vmid); 277 278 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 279 WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base); 280 WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit); 281 WREG32(mmSH_MEM_BASES, sh_mem_bases); 282 283 unlock_srbm(kgd); 284 } 285 286 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, 287 unsigned int vmid) 288 { 289 struct amdgpu_device *adev = get_amdgpu_device(kgd); 290 291 /* 292 * We have to assume that there is no outstanding mapping. 293 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because 294 * a mapping is in progress or because a mapping finished and the 295 * SW cleared it. So the protocol is to always wait & clear. 296 */ 297 uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | 298 ATC_VMID0_PASID_MAPPING__VALID_MASK; 299 300 WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping); 301 302 while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid))) 303 cpu_relax(); 304 WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid); 305 306 /* Mapping vmid to pasid also for IH block */ 307 WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping); 308 309 return 0; 310 } 311 312 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, 313 uint32_t hpd_size, uint64_t hpd_gpu_addr) 314 { 315 /* amdgpu owns the per-pipe state */ 316 return 0; 317 } 318 319 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) 320 { 321 struct amdgpu_device *adev = get_amdgpu_device(kgd); 322 uint32_t mec; 323 uint32_t pipe; 324 325 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 326 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 327 328 lock_srbm(kgd, mec, pipe, 0, 0); 329 330 WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | 331 CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); 332 333 unlock_srbm(kgd); 334 335 return 0; 336 } 337 338 static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) 339 { 340 uint32_t retval; 341 342 retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + 343 m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; 344 345 pr_debug("kfd: sdma base address: 0x%x\n", retval); 346 347 return retval; 348 } 349 350 static inline struct cik_mqd *get_mqd(void *mqd) 351 { 352 return (struct cik_mqd *)mqd; 353 } 354 355 static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) 356 { 357 return (struct cik_sdma_rlc_registers *)mqd; 358 } 359 360 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 361 uint32_t queue_id, uint32_t __user *wptr, 362 uint32_t wptr_shift, uint32_t wptr_mask, 363 struct mm_struct *mm) 364 { 365 struct amdgpu_device *adev = get_amdgpu_device(kgd); 366 struct cik_mqd *m; 367 uint32_t *mqd_hqd; 368 uint32_t reg, wptr_val, data; 369 bool valid_wptr = false; 370 371 m = get_mqd(mqd); 372 373 acquire_queue(kgd, pipe_id, queue_id); 374 375 /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */ 376 mqd_hqd = &m->cp_mqd_base_addr_lo; 377 378 for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++) 379 WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); 380 381 /* Copy userspace write pointer value to register. 382 * Activate doorbell logic to monitor subsequent changes. 383 */ 384 data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, 385 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 386 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data); 387 388 /* read_user_ptr may take the mm->mmap_sem. 389 * release srbm_mutex to avoid circular dependency between 390 * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex. 391 */ 392 release_queue(kgd); 393 valid_wptr = read_user_wptr(mm, wptr, wptr_val); 394 acquire_queue(kgd, pipe_id, queue_id); 395 if (valid_wptr) 396 WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); 397 398 data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); 399 WREG32(mmCP_HQD_ACTIVE, data); 400 401 release_queue(kgd); 402 403 return 0; 404 } 405 406 static int kgd_hqd_dump(struct kgd_dev *kgd, 407 uint32_t pipe_id, uint32_t queue_id, 408 uint32_t (**dump)[2], uint32_t *n_regs) 409 { 410 struct amdgpu_device *adev = get_amdgpu_device(kgd); 411 uint32_t i = 0, reg; 412 #define HQD_N_REGS (35+4) 413 #define DUMP_REG(addr) do { \ 414 if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ 415 break; \ 416 (*dump)[i][0] = (addr) << 2; \ 417 (*dump)[i++][1] = RREG32(addr); \ 418 } while (0) 419 420 *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); 421 if (*dump == NULL) 422 return -ENOMEM; 423 424 acquire_queue(kgd, pipe_id, queue_id); 425 426 DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0); 427 DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1); 428 DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2); 429 DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3); 430 431 for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++) 432 DUMP_REG(reg); 433 434 release_queue(kgd); 435 436 WARN_ON_ONCE(i != HQD_N_REGS); 437 *n_regs = i; 438 439 return 0; 440 } 441 442 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, 443 uint32_t __user *wptr, struct mm_struct *mm) 444 { 445 struct amdgpu_device *adev = get_amdgpu_device(kgd); 446 struct cik_sdma_rlc_registers *m; 447 unsigned long end_jiffies; 448 uint32_t sdma_base_addr; 449 uint32_t data; 450 451 m = get_sdma_mqd(mqd); 452 sdma_base_addr = get_sdma_base_addr(m); 453 454 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, 455 m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); 456 457 end_jiffies = msecs_to_jiffies(2000) + jiffies; 458 while (true) { 459 data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); 460 if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) 461 break; 462 if (time_after(jiffies, end_jiffies)) 463 return -ETIME; 464 usleep_range(500, 1000); 465 } 466 if (m->sdma_engine_id) { 467 data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); 468 data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, 469 RESUME_CTX, 0); 470 WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); 471 } else { 472 data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); 473 data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, 474 RESUME_CTX, 0); 475 WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); 476 } 477 478 data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL, 479 ENABLE, 1); 480 WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); 481 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr); 482 483 if (read_user_wptr(mm, wptr, data)) 484 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); 485 else 486 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 487 m->sdma_rlc_rb_rptr); 488 489 WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, 490 m->sdma_rlc_virtual_addr); 491 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); 492 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, 493 m->sdma_rlc_rb_base_hi); 494 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, 495 m->sdma_rlc_rb_rptr_addr_lo); 496 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, 497 m->sdma_rlc_rb_rptr_addr_hi); 498 499 data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL, 500 RB_ENABLE, 1); 501 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); 502 503 return 0; 504 } 505 506 static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, 507 uint32_t engine_id, uint32_t queue_id, 508 uint32_t (**dump)[2], uint32_t *n_regs) 509 { 510 struct amdgpu_device *adev = get_amdgpu_device(kgd); 511 uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET + 512 queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; 513 uint32_t i = 0, reg; 514 #undef HQD_N_REGS 515 #define HQD_N_REGS (19+4) 516 517 *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); 518 if (*dump == NULL) 519 return -ENOMEM; 520 521 for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) 522 DUMP_REG(sdma_offset + reg); 523 for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK; 524 reg++) 525 DUMP_REG(sdma_offset + reg); 526 527 WARN_ON_ONCE(i != HQD_N_REGS); 528 *n_regs = i; 529 530 return 0; 531 } 532 533 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, 534 uint32_t pipe_id, uint32_t queue_id) 535 { 536 struct amdgpu_device *adev = get_amdgpu_device(kgd); 537 uint32_t act; 538 bool retval = false; 539 uint32_t low, high; 540 541 acquire_queue(kgd, pipe_id, queue_id); 542 act = RREG32(mmCP_HQD_ACTIVE); 543 if (act) { 544 low = lower_32_bits(queue_address >> 8); 545 high = upper_32_bits(queue_address >> 8); 546 547 if (low == RREG32(mmCP_HQD_PQ_BASE) && 548 high == RREG32(mmCP_HQD_PQ_BASE_HI)) 549 retval = true; 550 } 551 release_queue(kgd); 552 return retval; 553 } 554 555 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) 556 { 557 struct amdgpu_device *adev = get_amdgpu_device(kgd); 558 struct cik_sdma_rlc_registers *m; 559 uint32_t sdma_base_addr; 560 uint32_t sdma_rlc_rb_cntl; 561 562 m = get_sdma_mqd(mqd); 563 sdma_base_addr = get_sdma_base_addr(m); 564 565 sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); 566 567 if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) 568 return true; 569 570 return false; 571 } 572 573 static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, 574 enum kfd_preempt_type reset_type, 575 unsigned int utimeout, uint32_t pipe_id, 576 uint32_t queue_id) 577 { 578 struct amdgpu_device *adev = get_amdgpu_device(kgd); 579 uint32_t temp; 580 enum hqd_dequeue_request_type type; 581 unsigned long flags, end_jiffies; 582 int retry; 583 584 acquire_queue(kgd, pipe_id, queue_id); 585 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 586 587 switch (reset_type) { 588 case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: 589 type = DRAIN_PIPE; 590 break; 591 case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: 592 type = RESET_WAVES; 593 break; 594 default: 595 type = DRAIN_PIPE; 596 break; 597 } 598 599 /* Workaround: If IQ timer is active and the wait time is close to or 600 * equal to 0, dequeueing is not safe. Wait until either the wait time 601 * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is 602 * cleared before continuing. Also, ensure wait times are set to at 603 * least 0x3. 604 */ 605 local_irq_save(flags); 606 preempt_disable(); 607 retry = 5000; /* wait for 500 usecs at maximum */ 608 while (true) { 609 temp = RREG32(mmCP_HQD_IQ_TIMER); 610 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { 611 pr_debug("HW is processing IQ\n"); 612 goto loop; 613 } 614 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { 615 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) 616 == 3) /* SEM-rearm is safe */ 617 break; 618 /* Wait time 3 is safe for CP, but our MMIO read/write 619 * time is close to 1 microsecond, so check for 10 to 620 * leave more buffer room 621 */ 622 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) 623 >= 10) 624 break; 625 pr_debug("IQ timer is active\n"); 626 } else 627 break; 628 loop: 629 if (!retry) { 630 pr_err("CP HQD IQ timer status time out\n"); 631 break; 632 } 633 ndelay(100); 634 --retry; 635 } 636 retry = 1000; 637 while (true) { 638 temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); 639 if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) 640 break; 641 pr_debug("Dequeue request is pending\n"); 642 643 if (!retry) { 644 pr_err("CP HQD dequeue request time out\n"); 645 break; 646 } 647 ndelay(100); 648 --retry; 649 } 650 local_irq_restore(flags); 651 preempt_enable(); 652 653 WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); 654 655 end_jiffies = (utimeout * HZ / 1000) + jiffies; 656 while (true) { 657 temp = RREG32(mmCP_HQD_ACTIVE); 658 if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) 659 break; 660 if (time_after(jiffies, end_jiffies)) { 661 pr_err("cp queue preemption time out\n"); 662 release_queue(kgd); 663 return -ETIME; 664 } 665 usleep_range(500, 1000); 666 } 667 668 release_queue(kgd); 669 return 0; 670 } 671 672 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, 673 unsigned int utimeout) 674 { 675 struct amdgpu_device *adev = get_amdgpu_device(kgd); 676 struct cik_sdma_rlc_registers *m; 677 uint32_t sdma_base_addr; 678 uint32_t temp; 679 unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; 680 681 m = get_sdma_mqd(mqd); 682 sdma_base_addr = get_sdma_base_addr(m); 683 684 temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); 685 temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; 686 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); 687 688 while (true) { 689 temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); 690 if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT) 691 break; 692 if (time_after(jiffies, end_jiffies)) 693 return -ETIME; 694 usleep_range(500, 1000); 695 } 696 697 WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); 698 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, 699 RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | 700 SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); 701 702 m->sdma_rlc_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); 703 704 return 0; 705 } 706 707 static int kgd_address_watch_disable(struct kgd_dev *kgd) 708 { 709 struct amdgpu_device *adev = get_amdgpu_device(kgd); 710 union TCP_WATCH_CNTL_BITS cntl; 711 unsigned int i; 712 713 cntl.u32All = 0; 714 715 cntl.bitfields.valid = 0; 716 cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; 717 cntl.bitfields.atc = 1; 718 719 /* Turning off this address until we set all the registers */ 720 for (i = 0; i < MAX_WATCH_ADDRESSES; i++) 721 WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX + 722 ADDRESS_WATCH_REG_CNTL], cntl.u32All); 723 724 return 0; 725 } 726 727 static int kgd_address_watch_execute(struct kgd_dev *kgd, 728 unsigned int watch_point_id, 729 uint32_t cntl_val, 730 uint32_t addr_hi, 731 uint32_t addr_lo) 732 { 733 struct amdgpu_device *adev = get_amdgpu_device(kgd); 734 union TCP_WATCH_CNTL_BITS cntl; 735 736 cntl.u32All = cntl_val; 737 738 /* Turning off this watch point until we set all the registers */ 739 cntl.bitfields.valid = 0; 740 WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + 741 ADDRESS_WATCH_REG_CNTL], cntl.u32All); 742 743 WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + 744 ADDRESS_WATCH_REG_ADDR_HI], addr_hi); 745 746 WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + 747 ADDRESS_WATCH_REG_ADDR_LO], addr_lo); 748 749 /* Enable the watch point */ 750 cntl.bitfields.valid = 1; 751 752 WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + 753 ADDRESS_WATCH_REG_CNTL], cntl.u32All); 754 755 return 0; 756 } 757 758 static int kgd_wave_control_execute(struct kgd_dev *kgd, 759 uint32_t gfx_index_val, 760 uint32_t sq_cmd) 761 { 762 struct amdgpu_device *adev = get_amdgpu_device(kgd); 763 uint32_t data; 764 765 mutex_lock(&adev->grbm_idx_mutex); 766 767 WREG32(mmGRBM_GFX_INDEX, gfx_index_val); 768 WREG32(mmSQ_CMD, sq_cmd); 769 770 /* Restore the GRBM_GFX_INDEX register */ 771 772 data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | 773 GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | 774 GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK; 775 776 WREG32(mmGRBM_GFX_INDEX, data); 777 778 mutex_unlock(&adev->grbm_idx_mutex); 779 780 return 0; 781 } 782 783 static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, 784 unsigned int watch_point_id, 785 unsigned int reg_offset) 786 { 787 return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; 788 } 789 790 static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, 791 uint8_t vmid) 792 { 793 uint32_t reg; 794 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 795 796 reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); 797 return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; 798 } 799 800 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 801 uint8_t vmid) 802 { 803 uint32_t reg; 804 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 805 806 reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); 807 return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; 808 } 809 810 static void set_scratch_backing_va(struct kgd_dev *kgd, 811 uint64_t va, uint32_t vmid) 812 { 813 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 814 815 lock_srbm(kgd, 0, 0, 0, vmid); 816 WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va); 817 unlock_srbm(kgd); 818 } 819 820 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) 821 { 822 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 823 const union amdgpu_firmware_header *hdr; 824 825 switch (type) { 826 case KGD_ENGINE_PFP: 827 hdr = (const union amdgpu_firmware_header *) 828 adev->gfx.pfp_fw->data; 829 break; 830 831 case KGD_ENGINE_ME: 832 hdr = (const union amdgpu_firmware_header *) 833 adev->gfx.me_fw->data; 834 break; 835 836 case KGD_ENGINE_CE: 837 hdr = (const union amdgpu_firmware_header *) 838 adev->gfx.ce_fw->data; 839 break; 840 841 case KGD_ENGINE_MEC1: 842 hdr = (const union amdgpu_firmware_header *) 843 adev->gfx.mec_fw->data; 844 break; 845 846 case KGD_ENGINE_MEC2: 847 hdr = (const union amdgpu_firmware_header *) 848 adev->gfx.mec2_fw->data; 849 break; 850 851 case KGD_ENGINE_RLC: 852 hdr = (const union amdgpu_firmware_header *) 853 adev->gfx.rlc_fw->data; 854 break; 855 856 case KGD_ENGINE_SDMA1: 857 hdr = (const union amdgpu_firmware_header *) 858 adev->sdma.instance[0].fw->data; 859 break; 860 861 case KGD_ENGINE_SDMA2: 862 hdr = (const union amdgpu_firmware_header *) 863 adev->sdma.instance[1].fw->data; 864 break; 865 866 default: 867 return 0; 868 } 869 870 if (hdr == NULL) 871 return 0; 872 873 /* Only 12 bit in use*/ 874 return hdr->common.ucode_version; 875 } 876 877 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, 878 uint32_t page_table_base) 879 { 880 struct amdgpu_device *adev = get_amdgpu_device(kgd); 881 882 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { 883 pr_err("trying to set page table base for wrong VMID\n"); 884 return; 885 } 886 WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base); 887 } 888 889 static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) 890 { 891 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 892 int vmid; 893 unsigned int tmp; 894 895 for (vmid = 0; vmid < 16; vmid++) { 896 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) 897 continue; 898 899 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); 900 if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && 901 (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { 902 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); 903 RREG32(mmVM_INVALIDATE_RESPONSE); 904 break; 905 } 906 } 907 908 return 0; 909 } 910 911 static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) 912 { 913 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 914 915 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { 916 pr_err("non kfd vmid\n"); 917 return 0; 918 } 919 920 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); 921 RREG32(mmVM_INVALIDATE_RESPONSE); 922 return 0; 923 } 924