1 /* 2 * Copyright 2019 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 #include "amdgpu.h" 23 #include "amdgpu_amdkfd.h" 24 #include "gc/gc_10_1_0_offset.h" 25 #include "gc/gc_10_1_0_sh_mask.h" 26 #include "athub/athub_2_0_0_offset.h" 27 #include "athub/athub_2_0_0_sh_mask.h" 28 #include "oss/osssys_5_0_0_offset.h" 29 #include "oss/osssys_5_0_0_sh_mask.h" 30 #include "soc15_common.h" 31 #include "v10_structs.h" 32 #include "nv.h" 33 #include "nvd.h" 34 35 enum hqd_dequeue_request_type { 36 NO_ACTION = 0, 37 DRAIN_PIPE, 38 RESET_WAVES, 39 SAVE_WAVES 40 }; 41 42 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) 43 { 44 return (struct amdgpu_device *)kgd; 45 } 46 47 static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, 48 uint32_t queue, uint32_t vmid) 49 { 50 struct amdgpu_device *adev = get_amdgpu_device(kgd); 51 52 mutex_lock(&adev->srbm_mutex); 53 nv_grbm_select(adev, mec, pipe, queue, vmid); 54 } 55 56 static void unlock_srbm(struct kgd_dev *kgd) 57 { 58 struct amdgpu_device *adev = get_amdgpu_device(kgd); 59 60 nv_grbm_select(adev, 0, 0, 0, 0); 61 mutex_unlock(&adev->srbm_mutex); 62 } 63 64 static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, 65 uint32_t queue_id) 66 { 67 struct amdgpu_device *adev = get_amdgpu_device(kgd); 68 69 uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 70 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 71 72 lock_srbm(kgd, mec, pipe, queue_id, 0); 73 } 74 75 static uint64_t get_queue_mask(struct amdgpu_device *adev, 76 uint32_t pipe_id, uint32_t queue_id) 77 { 78 unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + 79 queue_id; 80 81 return 1ull << bit; 82 } 83 84 static void release_queue(struct kgd_dev *kgd) 85 { 86 unlock_srbm(kgd); 87 } 88 89 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, 90 uint32_t sh_mem_config, 91 uint32_t sh_mem_ape1_base, 92 uint32_t sh_mem_ape1_limit, 93 uint32_t sh_mem_bases) 94 { 95 struct amdgpu_device *adev = get_amdgpu_device(kgd); 96 97 lock_srbm(kgd, 0, 0, 0, vmid); 98 99 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 100 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 101 /* APE1 no longer exists on GFX9 */ 102 103 unlock_srbm(kgd); 104 } 105 106 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, 107 unsigned int vmid) 108 { 109 struct amdgpu_device *adev = get_amdgpu_device(kgd); 110 111 /* 112 * We have to assume that there is no outstanding mapping. 113 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because 114 * a mapping is in progress or because a mapping finished 115 * and the SW cleared it. 116 * So the protocol is to always wait & clear. 117 */ 118 uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | 119 ATC_VMID0_PASID_MAPPING__VALID_MASK; 120 121 pr_debug("pasid 0x%x vmid %d, reg value %x\n", pasid, vmid, pasid_mapping); 122 123 pr_debug("ATHUB, reg %x\n", SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); 124 WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid, 125 pasid_mapping); 126 127 #if 0 128 /* TODO: uncomment this code when the hardware support is ready. */ 129 while (!(RREG32(SOC15_REG_OFFSET( 130 ATHUB, 0, 131 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & 132 (1U << vmid))) 133 cpu_relax(); 134 135 pr_debug("ATHUB mapping update finished\n"); 136 WREG32(SOC15_REG_OFFSET(ATHUB, 0, 137 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), 138 1U << vmid); 139 #endif 140 141 /* Mapping vmid to pasid also for IH block */ 142 pr_debug("update mapping for IH block and mmhub"); 143 WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, 144 pasid_mapping); 145 146 return 0; 147 } 148 149 /* TODO - RING0 form of field is obsolete, seems to date back to SI 150 * but still works 151 */ 152 153 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) 154 { 155 struct amdgpu_device *adev = get_amdgpu_device(kgd); 156 uint32_t mec; 157 uint32_t pipe; 158 159 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 160 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 161 162 lock_srbm(kgd, mec, pipe, 0, 0); 163 164 WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, 165 CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | 166 CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); 167 168 unlock_srbm(kgd); 169 170 return 0; 171 } 172 173 static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, 174 unsigned int engine_id, 175 unsigned int queue_id) 176 { 177 uint32_t sdma_engine_reg_base[2] = { 178 SOC15_REG_OFFSET(SDMA0, 0, 179 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, 180 /* On gfx10, mmSDMA1_xxx registers are defined NOT based 181 * on SDMA1 base address (dw 0x1860) but based on SDMA0 182 * base address (dw 0x1260). Therefore use mmSDMA0_RLC0_RB_CNTL 183 * instead of mmSDMA1_RLC0_RB_CNTL for the base address calc 184 * below 185 */ 186 SOC15_REG_OFFSET(SDMA1, 0, 187 mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL 188 }; 189 190 uint32_t retval = sdma_engine_reg_base[engine_id] 191 + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); 192 193 pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, 194 queue_id, retval); 195 196 return retval; 197 } 198 199 #if 0 200 static uint32_t get_watch_base_addr(struct amdgpu_device *adev) 201 { 202 uint32_t retval = SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) - 203 mmTCP_WATCH0_ADDR_H; 204 205 pr_debug("kfd: reg watch base address: 0x%x\n", retval); 206 207 return retval; 208 } 209 #endif 210 211 static inline struct v10_compute_mqd *get_mqd(void *mqd) 212 { 213 return (struct v10_compute_mqd *)mqd; 214 } 215 216 static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd) 217 { 218 return (struct v10_sdma_mqd *)mqd; 219 } 220 221 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 222 uint32_t queue_id, uint32_t __user *wptr, 223 uint32_t wptr_shift, uint32_t wptr_mask, 224 struct mm_struct *mm) 225 { 226 struct amdgpu_device *adev = get_amdgpu_device(kgd); 227 struct v10_compute_mqd *m; 228 uint32_t *mqd_hqd; 229 uint32_t reg, hqd_base, data; 230 231 m = get_mqd(mqd); 232 233 pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id); 234 acquire_queue(kgd, pipe_id, queue_id); 235 236 /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ 237 mqd_hqd = &m->cp_mqd_base_addr_lo; 238 hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); 239 240 for (reg = hqd_base; 241 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) 242 WREG32_SOC15_IP(GC, reg, mqd_hqd[reg - hqd_base]); 243 244 245 /* Activate doorbell logic before triggering WPTR poll. */ 246 data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, 247 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 248 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data); 249 250 if (wptr) { 251 /* Don't read wptr with get_user because the user 252 * context may not be accessible (if this function 253 * runs in a work queue). Instead trigger a one-shot 254 * polling read from memory in the CP. This assumes 255 * that wptr is GPU-accessible in the queue's VMID via 256 * ATC or SVM. WPTR==RPTR before starting the poll so 257 * the CP starts fetching new commands from the right 258 * place. 259 * 260 * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit 261 * tricky. Assume that the queue didn't overflow. The 262 * number of valid bits in the 32-bit RPTR depends on 263 * the queue size. The remaining bits are taken from 264 * the saved 64-bit WPTR. If the WPTR wrapped, add the 265 * queue size. 266 */ 267 uint32_t queue_size = 268 2 << REG_GET_FIELD(m->cp_hqd_pq_control, 269 CP_HQD_PQ_CONTROL, QUEUE_SIZE); 270 uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); 271 272 if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) 273 guessed_wptr += queue_size; 274 guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); 275 guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; 276 277 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 278 lower_32_bits(guessed_wptr)); 279 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 280 upper_32_bits(guessed_wptr)); 281 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 282 lower_32_bits((uint64_t)wptr)); 283 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 284 upper_32_bits((uint64_t)wptr)); 285 pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, 286 (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); 287 WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1, 288 (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); 289 } 290 291 /* Start the EOP fetcher */ 292 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_RPTR, 293 REG_SET_FIELD(m->cp_hqd_eop_rptr, 294 CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); 295 296 data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); 297 WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data); 298 299 release_queue(kgd); 300 301 return 0; 302 } 303 304 static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, 305 uint32_t pipe_id, uint32_t queue_id, 306 uint32_t doorbell_off) 307 { 308 struct amdgpu_device *adev = get_amdgpu_device(kgd); 309 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 310 struct v10_compute_mqd *m; 311 uint32_t mec, pipe; 312 int r; 313 314 m = get_mqd(mqd); 315 316 acquire_queue(kgd, pipe_id, queue_id); 317 318 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 319 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 320 321 pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", 322 mec, pipe, queue_id); 323 324 spin_lock(&adev->gfx.kiq.ring_lock); 325 r = amdgpu_ring_alloc(kiq_ring, 7); 326 if (r) { 327 pr_err("Failed to alloc KIQ (%d).\n", r); 328 goto out_unlock; 329 } 330 331 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 332 amdgpu_ring_write(kiq_ring, 333 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 334 PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ 335 PACKET3_MAP_QUEUES_QUEUE(queue_id) | 336 PACKET3_MAP_QUEUES_PIPE(pipe) | 337 PACKET3_MAP_QUEUES_ME((mec - 1)) | 338 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 339 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 340 PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ 341 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 342 amdgpu_ring_write(kiq_ring, 343 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); 344 amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); 345 amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); 346 amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); 347 amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); 348 amdgpu_ring_commit(kiq_ring); 349 350 out_unlock: 351 spin_unlock(&adev->gfx.kiq.ring_lock); 352 release_queue(kgd); 353 354 return r; 355 } 356 357 static int kgd_hqd_dump(struct kgd_dev *kgd, 358 uint32_t pipe_id, uint32_t queue_id, 359 uint32_t (**dump)[2], uint32_t *n_regs) 360 { 361 struct amdgpu_device *adev = get_amdgpu_device(kgd); 362 uint32_t i = 0, reg; 363 #define HQD_N_REGS 56 364 #define DUMP_REG(addr) do { \ 365 if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ 366 break; \ 367 (*dump)[i][0] = (addr) << 2; \ 368 (*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \ 369 } while (0) 370 371 *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); 372 if (*dump == NULL) 373 return -ENOMEM; 374 375 acquire_queue(kgd, pipe_id, queue_id); 376 377 for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); 378 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) 379 DUMP_REG(reg); 380 381 release_queue(kgd); 382 383 WARN_ON_ONCE(i != HQD_N_REGS); 384 *n_regs = i; 385 386 return 0; 387 } 388 389 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, 390 uint32_t __user *wptr, struct mm_struct *mm) 391 { 392 struct amdgpu_device *adev = get_amdgpu_device(kgd); 393 struct v10_sdma_mqd *m; 394 uint32_t sdma_rlc_reg_offset; 395 unsigned long end_jiffies; 396 uint32_t data; 397 uint64_t data64; 398 uint64_t __user *wptr64 = (uint64_t __user *)wptr; 399 400 m = get_sdma_mqd(mqd); 401 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 402 m->sdma_queue_id); 403 404 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, 405 m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); 406 407 end_jiffies = msecs_to_jiffies(2000) + jiffies; 408 while (true) { 409 data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); 410 if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) 411 break; 412 if (time_after(jiffies, end_jiffies)) { 413 pr_err("SDMA RLC not idle in %s\n", __func__); 414 return -ETIME; 415 } 416 usleep_range(500, 1000); 417 } 418 419 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, 420 m->sdmax_rlcx_doorbell_offset); 421 422 data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, 423 ENABLE, 1); 424 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); 425 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, 426 m->sdmax_rlcx_rb_rptr); 427 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, 428 m->sdmax_rlcx_rb_rptr_hi); 429 430 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); 431 if (read_user_wptr(mm, wptr64, data64)) { 432 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, 433 lower_32_bits(data64)); 434 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, 435 upper_32_bits(data64)); 436 } else { 437 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, 438 m->sdmax_rlcx_rb_rptr); 439 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, 440 m->sdmax_rlcx_rb_rptr_hi); 441 } 442 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); 443 444 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); 445 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, 446 m->sdmax_rlcx_rb_base_hi); 447 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, 448 m->sdmax_rlcx_rb_rptr_addr_lo); 449 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, 450 m->sdmax_rlcx_rb_rptr_addr_hi); 451 452 data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, 453 RB_ENABLE, 1); 454 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); 455 456 return 0; 457 } 458 459 static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, 460 uint32_t engine_id, uint32_t queue_id, 461 uint32_t (**dump)[2], uint32_t *n_regs) 462 { 463 struct amdgpu_device *adev = get_amdgpu_device(kgd); 464 uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, 465 engine_id, queue_id); 466 uint32_t i = 0, reg; 467 #undef HQD_N_REGS 468 #define HQD_N_REGS (19+6+7+10) 469 470 *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); 471 if (*dump == NULL) 472 return -ENOMEM; 473 474 for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) 475 DUMP_REG(sdma_rlc_reg_offset + reg); 476 for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) 477 DUMP_REG(sdma_rlc_reg_offset + reg); 478 for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; 479 reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) 480 DUMP_REG(sdma_rlc_reg_offset + reg); 481 for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; 482 reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) 483 DUMP_REG(sdma_rlc_reg_offset + reg); 484 485 WARN_ON_ONCE(i != HQD_N_REGS); 486 *n_regs = i; 487 488 return 0; 489 } 490 491 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, 492 uint32_t pipe_id, uint32_t queue_id) 493 { 494 struct amdgpu_device *adev = get_amdgpu_device(kgd); 495 uint32_t act; 496 bool retval = false; 497 uint32_t low, high; 498 499 acquire_queue(kgd, pipe_id, queue_id); 500 act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); 501 if (act) { 502 low = lower_32_bits(queue_address >> 8); 503 high = upper_32_bits(queue_address >> 8); 504 505 if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) && 506 high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI)) 507 retval = true; 508 } 509 release_queue(kgd); 510 return retval; 511 } 512 513 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) 514 { 515 struct amdgpu_device *adev = get_amdgpu_device(kgd); 516 struct v10_sdma_mqd *m; 517 uint32_t sdma_rlc_reg_offset; 518 uint32_t sdma_rlc_rb_cntl; 519 520 m = get_sdma_mqd(mqd); 521 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 522 m->sdma_queue_id); 523 524 sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); 525 526 if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) 527 return true; 528 529 return false; 530 } 531 532 static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, 533 enum kfd_preempt_type reset_type, 534 unsigned int utimeout, uint32_t pipe_id, 535 uint32_t queue_id) 536 { 537 struct amdgpu_device *adev = get_amdgpu_device(kgd); 538 enum hqd_dequeue_request_type type; 539 unsigned long end_jiffies; 540 uint32_t temp; 541 struct v10_compute_mqd *m = get_mqd(mqd); 542 543 if (amdgpu_in_reset(adev)) 544 return -EIO; 545 546 #if 0 547 unsigned long flags; 548 int retry; 549 #endif 550 551 acquire_queue(kgd, pipe_id, queue_id); 552 553 if (m->cp_hqd_vmid == 0) 554 WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); 555 556 switch (reset_type) { 557 case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: 558 type = DRAIN_PIPE; 559 break; 560 case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: 561 type = RESET_WAVES; 562 break; 563 case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: 564 type = SAVE_WAVES; 565 break; 566 default: 567 type = DRAIN_PIPE; 568 break; 569 } 570 571 #if 0 /* Is this still needed? */ 572 /* Workaround: If IQ timer is active and the wait time is close to or 573 * equal to 0, dequeueing is not safe. Wait until either the wait time 574 * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is 575 * cleared before continuing. Also, ensure wait times are set to at 576 * least 0x3. 577 */ 578 local_irq_save(flags); 579 preempt_disable(); 580 retry = 5000; /* wait for 500 usecs at maximum */ 581 while (true) { 582 temp = RREG32(mmCP_HQD_IQ_TIMER); 583 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { 584 pr_debug("HW is processing IQ\n"); 585 goto loop; 586 } 587 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { 588 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) 589 == 3) /* SEM-rearm is safe */ 590 break; 591 /* Wait time 3 is safe for CP, but our MMIO read/write 592 * time is close to 1 microsecond, so check for 10 to 593 * leave more buffer room 594 */ 595 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) 596 >= 10) 597 break; 598 pr_debug("IQ timer is active\n"); 599 } else 600 break; 601 loop: 602 if (!retry) { 603 pr_err("CP HQD IQ timer status time out\n"); 604 break; 605 } 606 ndelay(100); 607 --retry; 608 } 609 retry = 1000; 610 while (true) { 611 temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); 612 if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) 613 break; 614 pr_debug("Dequeue request is pending\n"); 615 616 if (!retry) { 617 pr_err("CP HQD dequeue request time out\n"); 618 break; 619 } 620 ndelay(100); 621 --retry; 622 } 623 local_irq_restore(flags); 624 preempt_enable(); 625 #endif 626 627 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, type); 628 629 end_jiffies = (utimeout * HZ / 1000) + jiffies; 630 while (true) { 631 temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); 632 if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) 633 break; 634 if (time_after(jiffies, end_jiffies)) { 635 pr_err("cp queue preemption time out.\n"); 636 release_queue(kgd); 637 return -ETIME; 638 } 639 usleep_range(500, 1000); 640 } 641 642 release_queue(kgd); 643 return 0; 644 } 645 646 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, 647 unsigned int utimeout) 648 { 649 struct amdgpu_device *adev = get_amdgpu_device(kgd); 650 struct v10_sdma_mqd *m; 651 uint32_t sdma_rlc_reg_offset; 652 uint32_t temp; 653 unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; 654 655 m = get_sdma_mqd(mqd); 656 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 657 m->sdma_queue_id); 658 659 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); 660 temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; 661 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); 662 663 while (true) { 664 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); 665 if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) 666 break; 667 if (time_after(jiffies, end_jiffies)) { 668 pr_err("SDMA RLC not idle in %s\n", __func__); 669 return -ETIME; 670 } 671 usleep_range(500, 1000); 672 } 673 674 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); 675 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, 676 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | 677 SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); 678 679 m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); 680 m->sdmax_rlcx_rb_rptr_hi = 681 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); 682 683 return 0; 684 } 685 686 static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, 687 uint8_t vmid, uint16_t *p_pasid) 688 { 689 uint32_t value; 690 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 691 692 value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) 693 + vmid); 694 *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; 695 696 return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); 697 } 698 699 static int kgd_address_watch_disable(struct kgd_dev *kgd) 700 { 701 return 0; 702 } 703 704 static int kgd_address_watch_execute(struct kgd_dev *kgd, 705 unsigned int watch_point_id, 706 uint32_t cntl_val, 707 uint32_t addr_hi, 708 uint32_t addr_lo) 709 { 710 return 0; 711 } 712 713 static int kgd_wave_control_execute(struct kgd_dev *kgd, 714 uint32_t gfx_index_val, 715 uint32_t sq_cmd) 716 { 717 struct amdgpu_device *adev = get_amdgpu_device(kgd); 718 uint32_t data = 0; 719 720 mutex_lock(&adev->grbm_idx_mutex); 721 722 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val); 723 WREG32_SOC15(GC, 0, mmSQ_CMD, sq_cmd); 724 725 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 726 INSTANCE_BROADCAST_WRITES, 1); 727 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 728 SA_BROADCAST_WRITES, 1); 729 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 730 SE_BROADCAST_WRITES, 1); 731 732 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); 733 mutex_unlock(&adev->grbm_idx_mutex); 734 735 return 0; 736 } 737 738 static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, 739 unsigned int watch_point_id, 740 unsigned int reg_offset) 741 { 742 return 0; 743 } 744 745 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, 746 uint64_t page_table_base) 747 { 748 struct amdgpu_device *adev = get_amdgpu_device(kgd); 749 750 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { 751 pr_err("trying to set page table base for wrong VMID %u\n", 752 vmid); 753 return; 754 } 755 756 /* SDMA is on gfxhub as well for Navi1* series */ 757 adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); 758 } 759 760 static void program_trap_handler_settings(struct kgd_dev *kgd, 761 uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) 762 { 763 struct amdgpu_device *adev = get_amdgpu_device(kgd); 764 765 lock_srbm(kgd, 0, 0, 0, vmid); 766 767 /* 768 * Program TBA registers 769 */ 770 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO), 771 lower_32_bits(tba_addr >> 8)); 772 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI), 773 upper_32_bits(tba_addr >> 8) | 774 (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT)); 775 776 /* 777 * Program TMA registers 778 */ 779 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO), 780 lower_32_bits(tma_addr >> 8)); 781 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), 782 upper_32_bits(tma_addr >> 8)); 783 784 unlock_srbm(kgd); 785 } 786 787 const struct kfd2kgd_calls gfx_v10_kfd2kgd = { 788 .program_sh_mem_settings = kgd_program_sh_mem_settings, 789 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, 790 .init_interrupts = kgd_init_interrupts, 791 .hqd_load = kgd_hqd_load, 792 .hiq_mqd_load = kgd_hiq_mqd_load, 793 .hqd_sdma_load = kgd_hqd_sdma_load, 794 .hqd_dump = kgd_hqd_dump, 795 .hqd_sdma_dump = kgd_hqd_sdma_dump, 796 .hqd_is_occupied = kgd_hqd_is_occupied, 797 .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, 798 .hqd_destroy = kgd_hqd_destroy, 799 .hqd_sdma_destroy = kgd_hqd_sdma_destroy, 800 .address_watch_disable = kgd_address_watch_disable, 801 .address_watch_execute = kgd_address_watch_execute, 802 .wave_control_execute = kgd_wave_control_execute, 803 .address_watch_get_offset = kgd_address_watch_get_offset, 804 .get_atc_vmid_pasid_mapping_info = 805 get_atc_vmid_pasid_mapping_info, 806 .set_vm_context_page_table_base = set_vm_context_page_table_base, 807 .program_trap_handler_settings = program_trap_handler_settings, 808 }; 809