1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include "amdgpu.h" 24 #include "amdgpu_amdkfd.h" 25 #include "cikd.h" 26 #include "cik_sdma.h" 27 #include "gfx_v7_0.h" 28 #include "gca/gfx_7_2_d.h" 29 #include "gca/gfx_7_2_enum.h" 30 #include "gca/gfx_7_2_sh_mask.h" 31 #include "oss/oss_2_0_d.h" 32 #include "oss/oss_2_0_sh_mask.h" 33 #include "gmc/gmc_7_1_d.h" 34 #include "gmc/gmc_7_1_sh_mask.h" 35 #include "cik_structs.h" 36 37 enum hqd_dequeue_request_type { 38 NO_ACTION = 0, 39 DRAIN_PIPE, 40 RESET_WAVES 41 }; 42 43 enum { 44 MAX_TRAPID = 8, /* 3 bits in the bitfield. */ 45 MAX_WATCH_ADDRESSES = 4 46 }; 47 48 enum { 49 ADDRESS_WATCH_REG_ADDR_HI = 0, 50 ADDRESS_WATCH_REG_ADDR_LO, 51 ADDRESS_WATCH_REG_CNTL, 52 ADDRESS_WATCH_REG_MAX 53 }; 54 55 /* not defined in the CI/KV reg file */ 56 enum { 57 ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL, 58 ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF, 59 ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000, 60 /* extend the mask to 26 bits to match the low address field */ 61 ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6, 62 ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF 63 }; 64 65 static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = { 66 mmTCP_WATCH0_ADDR_H, mmTCP_WATCH0_ADDR_L, mmTCP_WATCH0_CNTL, 67 mmTCP_WATCH1_ADDR_H, mmTCP_WATCH1_ADDR_L, mmTCP_WATCH1_CNTL, 68 mmTCP_WATCH2_ADDR_H, mmTCP_WATCH2_ADDR_L, mmTCP_WATCH2_CNTL, 69 mmTCP_WATCH3_ADDR_H, mmTCP_WATCH3_ADDR_L, mmTCP_WATCH3_CNTL 70 }; 71 72 union TCP_WATCH_CNTL_BITS { 73 struct { 74 uint32_t mask:24; 75 uint32_t vmid:4; 76 uint32_t atc:1; 77 uint32_t mode:2; 78 uint32_t valid:1; 79 } bitfields, bits; 80 uint32_t u32All; 81 signed int i32All; 82 float f32All; 83 }; 84 85 static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, 86 uint32_t queue, uint32_t vmid) 87 { 88 uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue); 89 90 mutex_lock(&adev->srbm_mutex); 91 WREG32(mmSRBM_GFX_CNTL, value); 92 } 93 94 static void unlock_srbm(struct amdgpu_device *adev) 95 { 96 WREG32(mmSRBM_GFX_CNTL, 0); 97 mutex_unlock(&adev->srbm_mutex); 98 } 99 100 static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, 101 uint32_t queue_id) 102 { 103 uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 104 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 105 106 lock_srbm(adev, mec, pipe, queue_id, 0); 107 } 108 109 static void release_queue(struct amdgpu_device *adev) 110 { 111 unlock_srbm(adev); 112 } 113 114 static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid, 115 uint32_t sh_mem_config, 116 uint32_t sh_mem_ape1_base, 117 uint32_t sh_mem_ape1_limit, 118 uint32_t sh_mem_bases) 119 { 120 lock_srbm(adev, 0, 0, 0, vmid); 121 122 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 123 WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base); 124 WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit); 125 WREG32(mmSH_MEM_BASES, sh_mem_bases); 126 127 unlock_srbm(adev); 128 } 129 130 static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, 131 unsigned int vmid) 132 { 133 /* 134 * We have to assume that there is no outstanding mapping. 135 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because 136 * a mapping is in progress or because a mapping finished and the 137 * SW cleared it. So the protocol is to always wait & clear. 138 */ 139 uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | 140 ATC_VMID0_PASID_MAPPING__VALID_MASK; 141 142 WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping); 143 144 while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid))) 145 cpu_relax(); 146 WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid); 147 148 /* Mapping vmid to pasid also for IH block */ 149 WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping); 150 151 return 0; 152 } 153 154 static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id) 155 { 156 uint32_t mec; 157 uint32_t pipe; 158 159 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 160 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 161 162 lock_srbm(adev, mec, pipe, 0, 0); 163 164 WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | 165 CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); 166 167 unlock_srbm(adev); 168 169 return 0; 170 } 171 172 static inline uint32_t get_sdma_rlc_reg_offset(struct cik_sdma_rlc_registers *m) 173 { 174 uint32_t retval; 175 176 retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + 177 m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; 178 179 pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", 180 m->sdma_engine_id, m->sdma_queue_id, retval); 181 182 return retval; 183 } 184 185 static inline struct cik_mqd *get_mqd(void *mqd) 186 { 187 return (struct cik_mqd *)mqd; 188 } 189 190 static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) 191 { 192 return (struct cik_sdma_rlc_registers *)mqd; 193 } 194 195 static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd, 196 uint32_t pipe_id, uint32_t queue_id, 197 uint32_t __user *wptr, uint32_t wptr_shift, 198 uint32_t wptr_mask, struct mm_struct *mm) 199 { 200 struct cik_mqd *m; 201 uint32_t *mqd_hqd; 202 uint32_t reg, wptr_val, data; 203 bool valid_wptr = false; 204 205 m = get_mqd(mqd); 206 207 acquire_queue(adev, pipe_id, queue_id); 208 209 /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */ 210 mqd_hqd = &m->cp_mqd_base_addr_lo; 211 212 for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++) 213 WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); 214 215 /* Copy userspace write pointer value to register. 216 * Activate doorbell logic to monitor subsequent changes. 217 */ 218 data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, 219 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 220 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data); 221 222 /* read_user_ptr may take the mm->mmap_lock. 223 * release srbm_mutex to avoid circular dependency between 224 * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex. 225 */ 226 release_queue(adev); 227 valid_wptr = read_user_wptr(mm, wptr, wptr_val); 228 acquire_queue(adev, pipe_id, queue_id); 229 if (valid_wptr) 230 WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); 231 232 data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); 233 WREG32(mmCP_HQD_ACTIVE, data); 234 235 release_queue(adev); 236 237 return 0; 238 } 239 240 static int kgd_hqd_dump(struct amdgpu_device *adev, 241 uint32_t pipe_id, uint32_t queue_id, 242 uint32_t (**dump)[2], uint32_t *n_regs) 243 { 244 uint32_t i = 0, reg; 245 #define HQD_N_REGS (35+4) 246 #define DUMP_REG(addr) do { \ 247 if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ 248 break; \ 249 (*dump)[i][0] = (addr) << 2; \ 250 (*dump)[i++][1] = RREG32(addr); \ 251 } while (0) 252 253 *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); 254 if (*dump == NULL) 255 return -ENOMEM; 256 257 acquire_queue(adev, pipe_id, queue_id); 258 259 DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0); 260 DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1); 261 DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2); 262 DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3); 263 264 for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++) 265 DUMP_REG(reg); 266 267 release_queue(adev); 268 269 WARN_ON_ONCE(i != HQD_N_REGS); 270 *n_regs = i; 271 272 return 0; 273 } 274 275 static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd, 276 uint32_t __user *wptr, struct mm_struct *mm) 277 { 278 struct cik_sdma_rlc_registers *m; 279 unsigned long end_jiffies; 280 uint32_t sdma_rlc_reg_offset; 281 uint32_t data; 282 283 m = get_sdma_mqd(mqd); 284 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); 285 286 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, 287 m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); 288 289 end_jiffies = msecs_to_jiffies(2000) + jiffies; 290 while (true) { 291 data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); 292 if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) 293 break; 294 if (time_after(jiffies, end_jiffies)) { 295 pr_err("SDMA RLC not idle in %s\n", __func__); 296 return -ETIME; 297 } 298 usleep_range(500, 1000); 299 } 300 301 data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL, 302 ENABLE, 1); 303 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data); 304 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR, 305 m->sdma_rlc_rb_rptr); 306 307 if (read_user_wptr(mm, wptr, data)) 308 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data); 309 else 310 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, 311 m->sdma_rlc_rb_rptr); 312 313 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR, 314 m->sdma_rlc_virtual_addr); 315 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); 316 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI, 317 m->sdma_rlc_rb_base_hi); 318 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, 319 m->sdma_rlc_rb_rptr_addr_lo); 320 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, 321 m->sdma_rlc_rb_rptr_addr_hi); 322 323 data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL, 324 RB_ENABLE, 1); 325 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data); 326 327 return 0; 328 } 329 330 static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, 331 uint32_t engine_id, uint32_t queue_id, 332 uint32_t (**dump)[2], uint32_t *n_regs) 333 { 334 uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET + 335 queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; 336 uint32_t i = 0, reg; 337 #undef HQD_N_REGS 338 #define HQD_N_REGS (19+4) 339 340 *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); 341 if (*dump == NULL) 342 return -ENOMEM; 343 344 for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) 345 DUMP_REG(sdma_offset + reg); 346 for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK; 347 reg++) 348 DUMP_REG(sdma_offset + reg); 349 350 WARN_ON_ONCE(i != HQD_N_REGS); 351 *n_regs = i; 352 353 return 0; 354 } 355 356 static bool kgd_hqd_is_occupied(struct amdgpu_device *adev, 357 uint64_t queue_address, uint32_t pipe_id, 358 uint32_t queue_id) 359 { 360 uint32_t act; 361 bool retval = false; 362 uint32_t low, high; 363 364 acquire_queue(adev, pipe_id, queue_id); 365 act = RREG32(mmCP_HQD_ACTIVE); 366 if (act) { 367 low = lower_32_bits(queue_address >> 8); 368 high = upper_32_bits(queue_address >> 8); 369 370 if (low == RREG32(mmCP_HQD_PQ_BASE) && 371 high == RREG32(mmCP_HQD_PQ_BASE_HI)) 372 retval = true; 373 } 374 release_queue(adev); 375 return retval; 376 } 377 378 static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd) 379 { 380 struct cik_sdma_rlc_registers *m; 381 uint32_t sdma_rlc_reg_offset; 382 uint32_t sdma_rlc_rb_cntl; 383 384 m = get_sdma_mqd(mqd); 385 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); 386 387 sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); 388 389 if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) 390 return true; 391 392 return false; 393 } 394 395 static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd, 396 enum kfd_preempt_type reset_type, 397 unsigned int utimeout, uint32_t pipe_id, 398 uint32_t queue_id) 399 { 400 uint32_t temp; 401 enum hqd_dequeue_request_type type; 402 unsigned long flags, end_jiffies; 403 int retry; 404 405 if (amdgpu_in_reset(adev)) 406 return -EIO; 407 408 acquire_queue(adev, pipe_id, queue_id); 409 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 410 411 switch (reset_type) { 412 case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: 413 type = DRAIN_PIPE; 414 break; 415 case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: 416 type = RESET_WAVES; 417 break; 418 default: 419 type = DRAIN_PIPE; 420 break; 421 } 422 423 /* Workaround: If IQ timer is active and the wait time is close to or 424 * equal to 0, dequeueing is not safe. Wait until either the wait time 425 * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is 426 * cleared before continuing. Also, ensure wait times are set to at 427 * least 0x3. 428 */ 429 local_irq_save(flags); 430 preempt_disable(); 431 retry = 5000; /* wait for 500 usecs at maximum */ 432 while (true) { 433 temp = RREG32(mmCP_HQD_IQ_TIMER); 434 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { 435 pr_debug("HW is processing IQ\n"); 436 goto loop; 437 } 438 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { 439 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) 440 == 3) /* SEM-rearm is safe */ 441 break; 442 /* Wait time 3 is safe for CP, but our MMIO read/write 443 * time is close to 1 microsecond, so check for 10 to 444 * leave more buffer room 445 */ 446 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) 447 >= 10) 448 break; 449 pr_debug("IQ timer is active\n"); 450 } else 451 break; 452 loop: 453 if (!retry) { 454 pr_err("CP HQD IQ timer status time out\n"); 455 break; 456 } 457 ndelay(100); 458 --retry; 459 } 460 retry = 1000; 461 while (true) { 462 temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); 463 if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) 464 break; 465 pr_debug("Dequeue request is pending\n"); 466 467 if (!retry) { 468 pr_err("CP HQD dequeue request time out\n"); 469 break; 470 } 471 ndelay(100); 472 --retry; 473 } 474 local_irq_restore(flags); 475 preempt_enable(); 476 477 WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); 478 479 end_jiffies = (utimeout * HZ / 1000) + jiffies; 480 while (true) { 481 temp = RREG32(mmCP_HQD_ACTIVE); 482 if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) 483 break; 484 if (time_after(jiffies, end_jiffies)) { 485 pr_err("cp queue preemption time out\n"); 486 release_queue(adev); 487 return -ETIME; 488 } 489 usleep_range(500, 1000); 490 } 491 492 release_queue(adev); 493 return 0; 494 } 495 496 static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, 497 unsigned int utimeout) 498 { 499 struct cik_sdma_rlc_registers *m; 500 uint32_t sdma_rlc_reg_offset; 501 uint32_t temp; 502 unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; 503 504 m = get_sdma_mqd(mqd); 505 sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m); 506 507 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL); 508 temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; 509 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp); 510 511 while (true) { 512 temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS); 513 if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) 514 break; 515 if (time_after(jiffies, end_jiffies)) { 516 pr_err("SDMA RLC not idle in %s\n", __func__); 517 return -ETIME; 518 } 519 usleep_range(500, 1000); 520 } 521 522 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0); 523 WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, 524 RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) | 525 SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); 526 527 m->sdma_rlc_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR); 528 529 return 0; 530 } 531 532 static int kgd_address_watch_disable(struct amdgpu_device *adev) 533 { 534 union TCP_WATCH_CNTL_BITS cntl; 535 unsigned int i; 536 537 cntl.u32All = 0; 538 539 cntl.bitfields.valid = 0; 540 cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; 541 cntl.bitfields.atc = 1; 542 543 /* Turning off this address until we set all the registers */ 544 for (i = 0; i < MAX_WATCH_ADDRESSES; i++) 545 WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX + 546 ADDRESS_WATCH_REG_CNTL], cntl.u32All); 547 548 return 0; 549 } 550 551 static int kgd_address_watch_execute(struct amdgpu_device *adev, 552 unsigned int watch_point_id, 553 uint32_t cntl_val, 554 uint32_t addr_hi, 555 uint32_t addr_lo) 556 { 557 union TCP_WATCH_CNTL_BITS cntl; 558 559 cntl.u32All = cntl_val; 560 561 /* Turning off this watch point until we set all the registers */ 562 cntl.bitfields.valid = 0; 563 WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + 564 ADDRESS_WATCH_REG_CNTL], cntl.u32All); 565 566 WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + 567 ADDRESS_WATCH_REG_ADDR_HI], addr_hi); 568 569 WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + 570 ADDRESS_WATCH_REG_ADDR_LO], addr_lo); 571 572 /* Enable the watch point */ 573 cntl.bitfields.valid = 1; 574 575 WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + 576 ADDRESS_WATCH_REG_CNTL], cntl.u32All); 577 578 return 0; 579 } 580 581 static int kgd_wave_control_execute(struct amdgpu_device *adev, 582 uint32_t gfx_index_val, 583 uint32_t sq_cmd) 584 { 585 uint32_t data; 586 587 mutex_lock(&adev->grbm_idx_mutex); 588 589 WREG32(mmGRBM_GFX_INDEX, gfx_index_val); 590 WREG32(mmSQ_CMD, sq_cmd); 591 592 /* Restore the GRBM_GFX_INDEX register */ 593 594 data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | 595 GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | 596 GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK; 597 598 WREG32(mmGRBM_GFX_INDEX, data); 599 600 mutex_unlock(&adev->grbm_idx_mutex); 601 602 return 0; 603 } 604 605 static uint32_t kgd_address_watch_get_offset(struct amdgpu_device *adev, 606 unsigned int watch_point_id, 607 unsigned int reg_offset) 608 { 609 return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; 610 } 611 612 static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, 613 uint8_t vmid, uint16_t *p_pasid) 614 { 615 uint32_t value; 616 617 value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); 618 *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; 619 620 return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); 621 } 622 623 static void set_scratch_backing_va(struct amdgpu_device *adev, 624 uint64_t va, uint32_t vmid) 625 { 626 lock_srbm(adev, 0, 0, 0, vmid); 627 WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va); 628 unlock_srbm(adev); 629 } 630 631 static void set_vm_context_page_table_base(struct amdgpu_device *adev, 632 uint32_t vmid, uint64_t page_table_base) 633 { 634 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { 635 pr_err("trying to set page table base for wrong VMID\n"); 636 return; 637 } 638 WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, 639 lower_32_bits(page_table_base)); 640 } 641 642 /** 643 * read_vmid_from_vmfault_reg - read vmid from register 644 * 645 * adev: amdgpu_device pointer 646 * @vmid: vmid pointer 647 * read vmid from register (CIK). 648 */ 649 static uint32_t read_vmid_from_vmfault_reg(struct amdgpu_device *adev) 650 { 651 uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); 652 653 return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); 654 } 655 656 const struct kfd2kgd_calls gfx_v7_kfd2kgd = { 657 .program_sh_mem_settings = kgd_program_sh_mem_settings, 658 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, 659 .init_interrupts = kgd_init_interrupts, 660 .hqd_load = kgd_hqd_load, 661 .hqd_sdma_load = kgd_hqd_sdma_load, 662 .hqd_dump = kgd_hqd_dump, 663 .hqd_sdma_dump = kgd_hqd_sdma_dump, 664 .hqd_is_occupied = kgd_hqd_is_occupied, 665 .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, 666 .hqd_destroy = kgd_hqd_destroy, 667 .hqd_sdma_destroy = kgd_hqd_sdma_destroy, 668 .address_watch_disable = kgd_address_watch_disable, 669 .address_watch_execute = kgd_address_watch_execute, 670 .wave_control_execute = kgd_wave_control_execute, 671 .address_watch_get_offset = kgd_address_watch_get_offset, 672 .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info, 673 .set_scratch_backing_va = set_scratch_backing_va, 674 .set_vm_context_page_table_base = set_vm_context_page_table_base, 675 .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, 676 }; 677