1 /* 2 * Copyright 2019 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 #include <linux/mmu_context.h> 23 #include "amdgpu.h" 24 #include "amdgpu_amdkfd.h" 25 #include "gc/gc_10_3_0_offset.h" 26 #include "gc/gc_10_3_0_sh_mask.h" 27 #include "oss/osssys_5_0_0_offset.h" 28 #include "oss/osssys_5_0_0_sh_mask.h" 29 #include "soc15_common.h" 30 #include "v10_structs.h" 31 #include "nv.h" 32 #include "nvd.h" 33 34 enum hqd_dequeue_request_type { 35 NO_ACTION = 0, 36 DRAIN_PIPE, 37 RESET_WAVES, 38 SAVE_WAVES 39 }; 40 41 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) 42 { 43 return (struct amdgpu_device *)kgd; 44 } 45 46 static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, 47 uint32_t queue, uint32_t vmid) 48 { 49 mutex_lock(&adev->srbm_mutex); 50 nv_grbm_select(adev, mec, pipe, queue, vmid); 51 } 52 53 static void unlock_srbm(struct amdgpu_device *adev) 54 { 55 nv_grbm_select(adev, 0, 0, 0, 0); 56 mutex_unlock(&adev->srbm_mutex); 57 } 58 59 static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, 60 uint32_t queue_id) 61 { 62 uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 63 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 64 65 lock_srbm(adev, mec, pipe, queue_id, 0); 66 } 67 68 static uint64_t get_queue_mask(struct amdgpu_device *adev, 69 uint32_t pipe_id, uint32_t queue_id) 70 { 71 unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + 72 queue_id; 73 74 return 1ull << bit; 75 } 76 77 static void release_queue(struct amdgpu_device *adev) 78 { 79 unlock_srbm(adev); 80 } 81 82 static void program_sh_mem_settings_v10_3(struct amdgpu_device *adev, uint32_t vmid, 83 uint32_t sh_mem_config, 84 uint32_t sh_mem_ape1_base, 85 uint32_t sh_mem_ape1_limit, 86 uint32_t sh_mem_bases) 87 { 88 lock_srbm(adev, 0, 0, 0, vmid); 89 90 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 91 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 92 /* APE1 no longer exists on GFX9 */ 93 94 unlock_srbm(adev); 95 } 96 97 /* ATC is defeatured on Sienna_Cichlid */ 98 static int set_pasid_vmid_mapping_v10_3(struct amdgpu_device *adev, unsigned int pasid, 99 unsigned int vmid) 100 { 101 uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT; 102 103 /* Mapping vmid to pasid also for IH block */ 104 pr_debug("mapping vmid %d -> pasid %d in IH block for GFX client\n", 105 vmid, pasid); 106 WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, value); 107 108 return 0; 109 } 110 111 static int init_interrupts_v10_3(struct amdgpu_device *adev, uint32_t pipe_id) 112 { 113 uint32_t mec; 114 uint32_t pipe; 115 116 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 117 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 118 119 lock_srbm(adev, mec, pipe, 0, 0); 120 121 WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, 122 CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | 123 CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); 124 125 unlock_srbm(adev); 126 127 return 0; 128 } 129 130 static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, 131 unsigned int engine_id, 132 unsigned int queue_id) 133 { 134 uint32_t sdma_engine_reg_base = 0; 135 uint32_t sdma_rlc_reg_offset; 136 137 switch (engine_id) { 138 default: 139 dev_warn(adev->dev, 140 "Invalid sdma engine id (%d), using engine id 0\n", 141 engine_id); 142 fallthrough; 143 case 0: 144 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, 145 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; 146 break; 147 case 1: 148 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, 149 mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; 150 break; 151 case 2: 152 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, 153 mmSDMA2_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; 154 break; 155 case 3: 156 sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, 157 mmSDMA3_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; 158 break; 159 } 160 161 sdma_rlc_reg_offset = sdma_engine_reg_base 162 + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); 163 164 pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, 165 queue_id, sdma_rlc_reg_offset); 166 167 return sdma_rlc_reg_offset; 168 } 169 170 static inline struct v10_compute_mqd *get_mqd(void *mqd) 171 { 172 return (struct v10_compute_mqd *)mqd; 173 } 174 175 static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd) 176 { 177 return (struct v10_sdma_mqd *)mqd; 178 } 179 180 static int hqd_load_v10_3(struct amdgpu_device *adev, void *mqd, 181 uint32_t pipe_id, uint32_t queue_id, 182 uint32_t __user *wptr, uint32_t wptr_shift, 183 uint32_t wptr_mask, struct mm_struct *mm) 184 { 185 struct v10_compute_mqd *m; 186 uint32_t *mqd_hqd; 187 uint32_t reg, hqd_base, data; 188 189 m = get_mqd(mqd); 190 191 pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id); 192 acquire_queue(adev, pipe_id, queue_id); 193 194 /* HIQ is set during driver init period with vmid set to 0*/ 195 if (m->cp_hqd_vmid == 0) { 196 uint32_t value, mec, pipe; 197 198 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 199 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 200 201 pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", 202 mec, pipe, queue_id); 203 value = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 204 value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, 205 ((mec << 5) | (pipe << 3) | queue_id | 0x80)); 206 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, value); 207 } 208 209 /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ 210 mqd_hqd = &m->cp_mqd_base_addr_lo; 211 hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); 212 213 for (reg = hqd_base; 214 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) 215 WREG32_SOC15_IP(GC, reg, mqd_hqd[reg - hqd_base]); 216 217 218 /* Activate doorbell logic before triggering WPTR poll. */ 219 data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, 220 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 221 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data); 222 223 if (wptr) { 224 /* Don't read wptr with get_user because the user 225 * context may not be accessible (if this function 226 * runs in a work queue). Instead trigger a one-shot 227 * polling read from memory in the CP. This assumes 228 * that wptr is GPU-accessible in the queue's VMID via 229 * ATC or SVM. WPTR==RPTR before starting the poll so 230 * the CP starts fetching new commands from the right 231 * place. 232 * 233 * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit 234 * tricky. Assume that the queue didn't overflow. The 235 * number of valid bits in the 32-bit RPTR depends on 236 * the queue size. The remaining bits are taken from 237 * the saved 64-bit WPTR. If the WPTR wrapped, add the 238 * queue size. 239 */ 240 uint32_t queue_size = 241 2 << REG_GET_FIELD(m->cp_hqd_pq_control, 242 CP_HQD_PQ_CONTROL, QUEUE_SIZE); 243 uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); 244 245 if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) 246 guessed_wptr += queue_size; 247 guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); 248 guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; 249 250 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 251 lower_32_bits(guessed_wptr)); 252 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 253 upper_32_bits(guessed_wptr)); 254 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 255 lower_32_bits((uint64_t)wptr)); 256 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 257 upper_32_bits((uint64_t)wptr)); 258 pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, 259 (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); 260 WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1, 261 (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); 262 } 263 264 /* Start the EOP fetcher */ 265 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), 266 REG_SET_FIELD(m->cp_hqd_eop_rptr, 267 CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); 268 269 data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); 270 WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data); 271 272 release_queue(adev); 273 274 return 0; 275 } 276 277 static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd, 278 uint32_t pipe_id, uint32_t queue_id, 279 uint32_t doorbell_off) 280 { 281 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 282 struct v10_compute_mqd *m; 283 uint32_t mec, pipe; 284 int r; 285 286 m = get_mqd(mqd); 287 288 acquire_queue(adev, pipe_id, queue_id); 289 290 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 291 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 292 293 pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", 294 mec, pipe, queue_id); 295 296 spin_lock(&adev->gfx.kiq.ring_lock); 297 r = amdgpu_ring_alloc(kiq_ring, 7); 298 if (r) { 299 pr_err("Failed to alloc KIQ (%d).\n", r); 300 goto out_unlock; 301 } 302 303 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 304 amdgpu_ring_write(kiq_ring, 305 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 306 PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */ 307 PACKET3_MAP_QUEUES_QUEUE(queue_id) | 308 PACKET3_MAP_QUEUES_PIPE(pipe) | 309 PACKET3_MAP_QUEUES_ME((mec - 1)) | 310 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 311 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 312 PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */ 313 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 314 amdgpu_ring_write(kiq_ring, 315 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off)); 316 amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo); 317 amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi); 318 amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo); 319 amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi); 320 amdgpu_ring_commit(kiq_ring); 321 322 out_unlock: 323 spin_unlock(&adev->gfx.kiq.ring_lock); 324 release_queue(adev); 325 326 return r; 327 } 328 329 static int hqd_dump_v10_3(struct amdgpu_device *adev, 330 uint32_t pipe_id, uint32_t queue_id, 331 uint32_t (**dump)[2], uint32_t *n_regs) 332 { 333 uint32_t i = 0, reg; 334 #define HQD_N_REGS 56 335 #define DUMP_REG(addr) do { \ 336 if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ 337 break; \ 338 (*dump)[i][0] = (addr) << 2; \ 339 (*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \ 340 } while (0) 341 342 *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); 343 if (*dump == NULL) 344 return -ENOMEM; 345 346 acquire_queue(adev, pipe_id, queue_id); 347 348 for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); 349 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) 350 DUMP_REG(reg); 351 352 release_queue(adev); 353 354 WARN_ON_ONCE(i != HQD_N_REGS); 355 *n_regs = i; 356 357 return 0; 358 } 359 360 static int hqd_sdma_load_v10_3(struct amdgpu_device *adev, void *mqd, 361 uint32_t __user *wptr, struct mm_struct *mm) 362 { 363 struct v10_sdma_mqd *m; 364 uint32_t sdma_rlc_reg_offset; 365 unsigned long end_jiffies; 366 uint32_t data; 367 uint64_t data64; 368 uint64_t __user *wptr64 = (uint64_t __user *)wptr; 369 370 m = get_sdma_mqd(mqd); 371 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 372 m->sdma_queue_id); 373 374 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, 375 m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); 376 377 end_jiffies = msecs_to_jiffies(2000) + jiffies; 378 while (true) { 379 data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); 380 if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) 381 break; 382 if (time_after(jiffies, end_jiffies)) { 383 pr_err("SDMA RLC not idle in %s\n", __func__); 384 return -ETIME; 385 } 386 usleep_range(500, 1000); 387 } 388 389 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET, 390 m->sdmax_rlcx_doorbell_offset); 391 392 data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, 393 ENABLE, 1); 394 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); 395 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, 396 m->sdmax_rlcx_rb_rptr); 397 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI, 398 m->sdmax_rlcx_rb_rptr_hi); 399 400 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); 401 if (read_user_wptr(mm, wptr64, data64)) { 402 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, 403 lower_32_bits(data64)); 404 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, 405 upper_32_bits(data64)); 406 } else { 407 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, 408 m->sdmax_rlcx_rb_rptr); 409 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI, 410 m->sdmax_rlcx_rb_rptr_hi); 411 } 412 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); 413 414 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); 415 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, 416 m->sdmax_rlcx_rb_base_hi); 417 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, 418 m->sdmax_rlcx_rb_rptr_addr_lo); 419 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, 420 m->sdmax_rlcx_rb_rptr_addr_hi); 421 422 data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, 423 RB_ENABLE, 1); 424 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); 425 426 return 0; 427 } 428 429 static int hqd_sdma_dump_v10_3(struct amdgpu_device *adev, 430 uint32_t engine_id, uint32_t queue_id, 431 uint32_t (**dump)[2], uint32_t *n_regs) 432 { 433 uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, 434 engine_id, queue_id); 435 uint32_t i = 0, reg; 436 #undef HQD_N_REGS 437 #define HQD_N_REGS (19+6+7+12) 438 439 *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); 440 if (*dump == NULL) 441 return -ENOMEM; 442 443 for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) 444 DUMP_REG(sdma_rlc_reg_offset + reg); 445 for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) 446 DUMP_REG(sdma_rlc_reg_offset + reg); 447 for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; 448 reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) 449 DUMP_REG(sdma_rlc_reg_offset + reg); 450 for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; 451 reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) 452 DUMP_REG(sdma_rlc_reg_offset + reg); 453 454 WARN_ON_ONCE(i != HQD_N_REGS); 455 *n_regs = i; 456 457 return 0; 458 } 459 460 static bool hqd_is_occupied_v10_3(struct amdgpu_device *adev, 461 uint64_t queue_address, uint32_t pipe_id, 462 uint32_t queue_id) 463 { 464 uint32_t act; 465 bool retval = false; 466 uint32_t low, high; 467 468 acquire_queue(adev, pipe_id, queue_id); 469 act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); 470 if (act) { 471 low = lower_32_bits(queue_address >> 8); 472 high = upper_32_bits(queue_address >> 8); 473 474 if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) && 475 high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI)) 476 retval = true; 477 } 478 release_queue(adev); 479 return retval; 480 } 481 482 static bool hqd_sdma_is_occupied_v10_3(struct amdgpu_device *adev, 483 void *mqd) 484 { 485 struct v10_sdma_mqd *m; 486 uint32_t sdma_rlc_reg_offset; 487 uint32_t sdma_rlc_rb_cntl; 488 489 m = get_sdma_mqd(mqd); 490 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 491 m->sdma_queue_id); 492 493 sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); 494 495 if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) 496 return true; 497 498 return false; 499 } 500 501 static int hqd_destroy_v10_3(struct amdgpu_device *adev, void *mqd, 502 enum kfd_preempt_type reset_type, 503 unsigned int utimeout, uint32_t pipe_id, 504 uint32_t queue_id) 505 { 506 enum hqd_dequeue_request_type type; 507 unsigned long end_jiffies; 508 uint32_t temp; 509 struct v10_compute_mqd *m = get_mqd(mqd); 510 511 acquire_queue(adev, pipe_id, queue_id); 512 513 if (m->cp_hqd_vmid == 0) 514 WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); 515 516 switch (reset_type) { 517 case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: 518 type = DRAIN_PIPE; 519 break; 520 case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: 521 type = RESET_WAVES; 522 break; 523 case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: 524 type = SAVE_WAVES; 525 break; 526 default: 527 type = DRAIN_PIPE; 528 break; 529 } 530 531 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, type); 532 533 end_jiffies = (utimeout * HZ / 1000) + jiffies; 534 while (true) { 535 temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); 536 if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) 537 break; 538 if (time_after(jiffies, end_jiffies)) { 539 pr_err("cp queue pipe %d queue %d preemption failed\n", 540 pipe_id, queue_id); 541 release_queue(adev); 542 return -ETIME; 543 } 544 usleep_range(500, 1000); 545 } 546 547 release_queue(adev); 548 return 0; 549 } 550 551 static int hqd_sdma_destroy_v10_3(struct amdgpu_device *adev, void *mqd, 552 unsigned int utimeout) 553 { 554 struct v10_sdma_mqd *m; 555 uint32_t sdma_rlc_reg_offset; 556 uint32_t temp; 557 unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; 558 559 m = get_sdma_mqd(mqd); 560 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, 561 m->sdma_queue_id); 562 563 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); 564 temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; 565 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); 566 567 while (true) { 568 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); 569 if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) 570 break; 571 if (time_after(jiffies, end_jiffies)) { 572 pr_err("SDMA RLC not idle in %s\n", __func__); 573 return -ETIME; 574 } 575 usleep_range(500, 1000); 576 } 577 578 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); 579 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, 580 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | 581 SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); 582 583 m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); 584 m->sdmax_rlcx_rb_rptr_hi = 585 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI); 586 587 return 0; 588 } 589 590 591 static int address_watch_disable_v10_3(struct amdgpu_device *adev) 592 { 593 return 0; 594 } 595 596 static int address_watch_execute_v10_3(struct amdgpu_device *adev, 597 unsigned int watch_point_id, 598 uint32_t cntl_val, 599 uint32_t addr_hi, 600 uint32_t addr_lo) 601 { 602 return 0; 603 } 604 605 static int wave_control_execute_v10_3(struct amdgpu_device *adev, 606 uint32_t gfx_index_val, 607 uint32_t sq_cmd) 608 { 609 uint32_t data = 0; 610 611 mutex_lock(&adev->grbm_idx_mutex); 612 613 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val); 614 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); 615 616 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 617 INSTANCE_BROADCAST_WRITES, 1); 618 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 619 SA_BROADCAST_WRITES, 1); 620 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 621 SE_BROADCAST_WRITES, 1); 622 623 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); 624 mutex_unlock(&adev->grbm_idx_mutex); 625 626 return 0; 627 } 628 629 static uint32_t address_watch_get_offset_v10_3(struct amdgpu_device *adev, 630 unsigned int watch_point_id, 631 unsigned int reg_offset) 632 { 633 return 0; 634 } 635 636 static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev, 637 uint32_t vmid, uint64_t page_table_base) 638 { 639 /* SDMA is on gfxhub as well for Navi1* series */ 640 adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); 641 } 642 643 static void program_trap_handler_settings_v10_3(struct amdgpu_device *adev, 644 uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) 645 { 646 lock_srbm(adev, 0, 0, 0, vmid); 647 648 /* 649 * Program TBA registers 650 */ 651 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO), 652 lower_32_bits(tba_addr >> 8)); 653 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI), 654 upper_32_bits(tba_addr >> 8) | 655 (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT)); 656 657 /* 658 * Program TMA registers 659 */ 660 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO), 661 lower_32_bits(tma_addr >> 8)); 662 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), 663 upper_32_bits(tma_addr >> 8)); 664 665 unlock_srbm(adev); 666 } 667 668 #if 0 669 uint32_t enable_debug_trap_v10_3(struct amdgpu_device *adev, 670 uint32_t trap_debug_wave_launch_mode, 671 uint32_t vmid) 672 { 673 uint32_t data = 0; 674 uint32_t orig_wave_cntl_value; 675 uint32_t orig_stall_vmid; 676 677 mutex_lock(&adev->grbm_idx_mutex); 678 679 orig_wave_cntl_value = RREG32(SOC15_REG_OFFSET(GC, 680 0, 681 mmSPI_GDBG_WAVE_CNTL)); 682 orig_stall_vmid = REG_GET_FIELD(orig_wave_cntl_value, 683 SPI_GDBG_WAVE_CNTL, 684 STALL_VMID); 685 686 data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1); 687 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); 688 689 data = 0; 690 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data); 691 692 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), orig_stall_vmid); 693 694 mutex_unlock(&adev->grbm_idx_mutex); 695 696 return 0; 697 } 698 699 uint32_t disable_debug_trap_v10_3(struct amdgpu_device *adev) 700 { 701 mutex_lock(&adev->grbm_idx_mutex); 702 703 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); 704 705 mutex_unlock(&adev->grbm_idx_mutex); 706 707 return 0; 708 } 709 710 uint32_t set_wave_launch_trap_override_v10_3(struct amdgpu_device *adev, 711 uint32_t trap_override, 712 uint32_t trap_mask) 713 { 714 uint32_t data = 0; 715 716 mutex_lock(&adev->grbm_idx_mutex); 717 718 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); 719 data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1); 720 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); 721 722 data = 0; 723 data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, 724 EXCP_EN, trap_mask); 725 data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, 726 REPLACE, trap_override); 727 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data); 728 729 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); 730 data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 0); 731 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); 732 733 mutex_unlock(&adev->grbm_idx_mutex); 734 735 return 0; 736 } 737 738 uint32_t set_wave_launch_mode_v10_3(struct amdgpu_device *adev, 739 uint8_t wave_launch_mode, 740 uint32_t vmid) 741 { 742 uint32_t data = 0; 743 bool is_stall_mode; 744 bool is_mode_set; 745 746 is_stall_mode = (wave_launch_mode == 4); 747 is_mode_set = (wave_launch_mode != 0 && wave_launch_mode != 4); 748 749 mutex_lock(&adev->grbm_idx_mutex); 750 751 data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2, 752 VMID_MASK, is_mode_set ? 1 << vmid : 0); 753 data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2, 754 MODE, is_mode_set ? wave_launch_mode : 0); 755 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data); 756 757 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); 758 data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, 759 STALL_VMID, is_stall_mode ? 1 << vmid : 0); 760 data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, 761 STALL_RA, is_stall_mode ? 1 : 0); 762 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); 763 764 mutex_unlock(&adev->grbm_idx_mutex); 765 766 return 0; 767 } 768 769 /* kgd_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values 770 * The values read are: 771 * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads. 772 * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads. 773 * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads. 774 * gws_wait_time -- Wait Count for Global Wave Syncs. 775 * que_sleep_wait_time -- Wait Count for Dequeue Retry. 776 * sch_wave_wait_time -- Wait Count for Scheduling Wave Message. 777 * sem_rearm_wait_time -- Wait Count for Semaphore re-arm. 778 * deq_retry_wait_time -- Wait Count for Global Wave Syncs. 779 */ 780 void get_iq_wait_times_v10_3(struct amdgpu_device *adev, 781 uint32_t *wait_times) 782 783 { 784 *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); 785 } 786 787 void build_grace_period_packet_info_v10_3(struct amdgpu_device *adev, 788 uint32_t wait_times, 789 uint32_t grace_period, 790 uint32_t *reg_offset, 791 uint32_t *reg_data) 792 { 793 *reg_data = wait_times; 794 795 *reg_data = REG_SET_FIELD(*reg_data, 796 CP_IQ_WAIT_TIME2, 797 SCH_WAVE, 798 grace_period); 799 800 *reg_offset = mmCP_IQ_WAIT_TIME2; 801 } 802 #endif 803 804 const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { 805 .program_sh_mem_settings = program_sh_mem_settings_v10_3, 806 .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v10_3, 807 .init_interrupts = init_interrupts_v10_3, 808 .hqd_load = hqd_load_v10_3, 809 .hiq_mqd_load = hiq_mqd_load_v10_3, 810 .hqd_sdma_load = hqd_sdma_load_v10_3, 811 .hqd_dump = hqd_dump_v10_3, 812 .hqd_sdma_dump = hqd_sdma_dump_v10_3, 813 .hqd_is_occupied = hqd_is_occupied_v10_3, 814 .hqd_sdma_is_occupied = hqd_sdma_is_occupied_v10_3, 815 .hqd_destroy = hqd_destroy_v10_3, 816 .hqd_sdma_destroy = hqd_sdma_destroy_v10_3, 817 .address_watch_disable = address_watch_disable_v10_3, 818 .address_watch_execute = address_watch_execute_v10_3, 819 .wave_control_execute = wave_control_execute_v10_3, 820 .address_watch_get_offset = address_watch_get_offset_v10_3, 821 .get_atc_vmid_pasid_mapping_info = NULL, 822 .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3, 823 .program_trap_handler_settings = program_trap_handler_settings_v10_3, 824 #if 0 825 .enable_debug_trap = enable_debug_trap_v10_3, 826 .disable_debug_trap = disable_debug_trap_v10_3, 827 .set_wave_launch_trap_override = set_wave_launch_trap_override_v10_3, 828 .set_wave_launch_mode = set_wave_launch_mode_v10_3, 829 .get_iq_wait_times = get_iq_wait_times_v10_3, 830 .build_grace_period_packet_info = build_grace_period_packet_info_v10_3, 831 #endif 832 }; 833