1 /* 2 * Copyright 2022 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/firmware.h> 24 25 #include "amdgpu.h" 26 #include "amdgpu_gfx.h" 27 #include "soc15.h" 28 #include "soc15d.h" 29 #include "soc15_common.h" 30 #include "vega10_enum.h" 31 32 #include "clearstate_gfx9.h" 33 #include "v9_structs.h" 34 35 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 36 37 #include "gc/gc_9_4_3_offset.h" 38 #include "gc/gc_9_4_3_sh_mask.h" 39 40 #include "gfx_v9_4_3.h" 41 42 MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin"); 43 MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); 44 45 #define GFX9_MEC_HPD_SIZE 4096 46 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 47 48 static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev); 49 static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev); 50 static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev); 51 static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev); 52 static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, 53 struct amdgpu_cu_info *cu_info); 54 55 static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring, 56 uint64_t queue_mask) 57 { 58 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 59 amdgpu_ring_write(kiq_ring, 60 PACKET3_SET_RESOURCES_VMID_MASK(0) | 61 /* vmid_mask:0* queue_type:0 (KIQ) */ 62 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); 63 amdgpu_ring_write(kiq_ring, 64 lower_32_bits(queue_mask)); /* queue mask lo */ 65 amdgpu_ring_write(kiq_ring, 66 upper_32_bits(queue_mask)); /* queue mask hi */ 67 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 68 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 69 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 70 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 71 } 72 73 static void gfx_v9_4_3_kiq_map_queues(struct amdgpu_ring *kiq_ring, 74 struct amdgpu_ring *ring) 75 { 76 struct amdgpu_device *adev = kiq_ring->adev; 77 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 78 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 79 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 80 81 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 82 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 83 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 84 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 85 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 86 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 87 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 88 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 89 /*queue_type: normal compute queue */ 90 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | 91 /* alloc format: all_on_one_pipe */ 92 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | 93 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 94 /* num_queues: must be 1 */ 95 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 96 amdgpu_ring_write(kiq_ring, 97 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 98 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 99 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 100 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 101 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 102 } 103 104 static void gfx_v9_4_3_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 105 struct amdgpu_ring *ring, 106 enum amdgpu_unmap_queues_action action, 107 u64 gpu_addr, u64 seq) 108 { 109 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 110 111 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 112 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 113 PACKET3_UNMAP_QUEUES_ACTION(action) | 114 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 115 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 116 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 117 amdgpu_ring_write(kiq_ring, 118 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 119 120 if (action == PREEMPT_QUEUES_NO_UNMAP) { 121 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 122 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 123 amdgpu_ring_write(kiq_ring, seq); 124 } else { 125 amdgpu_ring_write(kiq_ring, 0); 126 amdgpu_ring_write(kiq_ring, 0); 127 amdgpu_ring_write(kiq_ring, 0); 128 } 129 } 130 131 static void gfx_v9_4_3_kiq_query_status(struct amdgpu_ring *kiq_ring, 132 struct amdgpu_ring *ring, 133 u64 addr, 134 u64 seq) 135 { 136 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 137 138 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 139 amdgpu_ring_write(kiq_ring, 140 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 141 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 142 PACKET3_QUERY_STATUS_COMMAND(2)); 143 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 144 amdgpu_ring_write(kiq_ring, 145 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 146 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 147 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 148 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 149 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 150 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 151 } 152 153 static void gfx_v9_4_3_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 154 uint16_t pasid, uint32_t flush_type, 155 bool all_hub) 156 { 157 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 158 amdgpu_ring_write(kiq_ring, 159 PACKET3_INVALIDATE_TLBS_DST_SEL(1) | 160 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 161 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 162 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 163 } 164 165 static const struct kiq_pm4_funcs gfx_v9_4_3_kiq_pm4_funcs = { 166 .kiq_set_resources = gfx_v9_4_3_kiq_set_resources, 167 .kiq_map_queues = gfx_v9_4_3_kiq_map_queues, 168 .kiq_unmap_queues = gfx_v9_4_3_kiq_unmap_queues, 169 .kiq_query_status = gfx_v9_4_3_kiq_query_status, 170 .kiq_invalidate_tlbs = gfx_v9_4_3_kiq_invalidate_tlbs, 171 .set_resources_size = 8, 172 .map_queues_size = 7, 173 .unmap_queues_size = 6, 174 .query_status_size = 7, 175 .invalidate_tlbs_size = 2, 176 }; 177 178 static void gfx_v9_4_3_set_kiq_pm4_funcs(struct amdgpu_device *adev) 179 { 180 int i; 181 for (i = 0; i < adev->gfx.num_xcd; i++) 182 adev->gfx.kiq[i].pmf = &gfx_v9_4_3_kiq_pm4_funcs; 183 } 184 185 static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev) 186 { 187 188 } 189 190 static void gfx_v9_4_3_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 191 bool wc, uint32_t reg, uint32_t val) 192 { 193 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 194 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 195 WRITE_DATA_DST_SEL(0) | 196 (wc ? WR_CONFIRM : 0)); 197 amdgpu_ring_write(ring, reg); 198 amdgpu_ring_write(ring, 0); 199 amdgpu_ring_write(ring, val); 200 } 201 202 static void gfx_v9_4_3_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 203 int mem_space, int opt, uint32_t addr0, 204 uint32_t addr1, uint32_t ref, uint32_t mask, 205 uint32_t inv) 206 { 207 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 208 amdgpu_ring_write(ring, 209 /* memory (1) or register (0) */ 210 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 211 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 212 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 213 WAIT_REG_MEM_ENGINE(eng_sel))); 214 215 if (mem_space) 216 BUG_ON(addr0 & 0x3); /* Dword align */ 217 amdgpu_ring_write(ring, addr0); 218 amdgpu_ring_write(ring, addr1); 219 amdgpu_ring_write(ring, ref); 220 amdgpu_ring_write(ring, mask); 221 amdgpu_ring_write(ring, inv); /* poll interval */ 222 } 223 224 static int gfx_v9_4_3_ring_test_ring(struct amdgpu_ring *ring) 225 { 226 struct amdgpu_device *adev = ring->adev; 227 uint32_t tmp = 0; 228 unsigned i; 229 int r; 230 231 WREG32_SOC15(GC, 0, regSCRATCH_REG0, 0xCAFEDEAD); 232 r = amdgpu_ring_alloc(ring, 3); 233 if (r) 234 return r; 235 236 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 237 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0) - 238 PACKET3_SET_UCONFIG_REG_START); 239 amdgpu_ring_write(ring, 0xDEADBEEF); 240 amdgpu_ring_commit(ring); 241 242 for (i = 0; i < adev->usec_timeout; i++) { 243 tmp = RREG32_SOC15(GC, 0, regSCRATCH_REG0); 244 if (tmp == 0xDEADBEEF) 245 break; 246 udelay(1); 247 } 248 249 if (i >= adev->usec_timeout) 250 r = -ETIMEDOUT; 251 return r; 252 } 253 254 static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout) 255 { 256 struct amdgpu_device *adev = ring->adev; 257 struct amdgpu_ib ib; 258 struct dma_fence *f = NULL; 259 260 unsigned index; 261 uint64_t gpu_addr; 262 uint32_t tmp; 263 long r; 264 265 r = amdgpu_device_wb_get(adev, &index); 266 if (r) 267 return r; 268 269 gpu_addr = adev->wb.gpu_addr + (index * 4); 270 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 271 memset(&ib, 0, sizeof(ib)); 272 r = amdgpu_ib_get(adev, NULL, 16, 273 AMDGPU_IB_POOL_DIRECT, &ib); 274 if (r) 275 goto err1; 276 277 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 278 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 279 ib.ptr[2] = lower_32_bits(gpu_addr); 280 ib.ptr[3] = upper_32_bits(gpu_addr); 281 ib.ptr[4] = 0xDEADBEEF; 282 ib.length_dw = 5; 283 284 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 285 if (r) 286 goto err2; 287 288 r = dma_fence_wait_timeout(f, false, timeout); 289 if (r == 0) { 290 r = -ETIMEDOUT; 291 goto err2; 292 } else if (r < 0) { 293 goto err2; 294 } 295 296 tmp = adev->wb.wb[index]; 297 if (tmp == 0xDEADBEEF) 298 r = 0; 299 else 300 r = -EINVAL; 301 302 err2: 303 amdgpu_ib_free(adev, &ib, NULL); 304 dma_fence_put(f); 305 err1: 306 amdgpu_device_wb_free(adev, index); 307 return r; 308 } 309 310 311 /* This value might differs per partition */ 312 static uint64_t gfx_v9_4_3_get_gpu_clock_counter(struct amdgpu_device *adev) 313 { 314 uint64_t clock; 315 316 amdgpu_gfx_off_ctrl(adev, false); 317 mutex_lock(&adev->gfx.gpu_clock_mutex); 318 WREG32_SOC15(GC, 0, regRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 319 clock = (uint64_t)RREG32_SOC15(GC, 0, regRLC_GPU_CLOCK_COUNT_LSB) | 320 ((uint64_t)RREG32_SOC15(GC, 0, regRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 321 mutex_unlock(&adev->gfx.gpu_clock_mutex); 322 amdgpu_gfx_off_ctrl(adev, true); 323 324 return clock; 325 } 326 327 static void gfx_v9_4_3_free_microcode(struct amdgpu_device *adev) 328 { 329 amdgpu_ucode_release(&adev->gfx.pfp_fw); 330 amdgpu_ucode_release(&adev->gfx.me_fw); 331 amdgpu_ucode_release(&adev->gfx.ce_fw); 332 amdgpu_ucode_release(&adev->gfx.rlc_fw); 333 amdgpu_ucode_release(&adev->gfx.mec_fw); 334 amdgpu_ucode_release(&adev->gfx.mec2_fw); 335 336 kfree(adev->gfx.rlc.register_list_format); 337 } 338 339 static int gfx_v9_4_3_init_rlc_microcode(struct amdgpu_device *adev, 340 const char *chip_name) 341 { 342 char fw_name[30]; 343 int err; 344 const struct rlc_firmware_header_v2_0 *rlc_hdr; 345 uint16_t version_major; 346 uint16_t version_minor; 347 348 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 349 350 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name); 351 if (err) 352 goto out; 353 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 354 355 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 356 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 357 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); 358 out: 359 if (err) 360 amdgpu_ucode_release(&adev->gfx.rlc_fw); 361 362 return err; 363 } 364 365 static bool gfx_v9_4_3_should_disable_gfxoff(struct pci_dev *pdev) 366 { 367 return true; 368 } 369 370 static void gfx_v9_4_3_check_if_need_gfxoff(struct amdgpu_device *adev) 371 { 372 if (gfx_v9_4_3_should_disable_gfxoff(adev->pdev)) 373 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 374 } 375 376 static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev, 377 const char *chip_name) 378 { 379 char fw_name[30]; 380 int err; 381 382 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 383 384 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name); 385 if (err) 386 goto out; 387 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); 388 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); 389 390 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version; 391 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version; 392 393 gfx_v9_4_3_check_if_need_gfxoff(adev); 394 395 out: 396 if (err) 397 amdgpu_ucode_release(&adev->gfx.mec_fw); 398 return err; 399 } 400 401 static int gfx_v9_4_3_init_microcode(struct amdgpu_device *adev) 402 { 403 const char *chip_name; 404 int r; 405 406 chip_name = "gc_9_4_3"; 407 408 r = gfx_v9_4_3_init_rlc_microcode(adev, chip_name); 409 if (r) 410 return r; 411 412 r = gfx_v9_4_3_init_cp_compute_microcode(adev, chip_name); 413 if (r) 414 return r; 415 416 return r; 417 } 418 419 static u32 gfx_v9_4_3_get_csb_size(struct amdgpu_device *adev) 420 { 421 u32 count = 0; 422 const struct cs_section_def *sect = NULL; 423 const struct cs_extent_def *ext = NULL; 424 425 /* begin clear state */ 426 count += 2; 427 /* context control state */ 428 count += 3; 429 430 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 431 for (ext = sect->section; ext->extent != NULL; ++ext) { 432 if (sect->id == SECT_CONTEXT) 433 count += 2 + ext->reg_count; 434 else 435 return 0; 436 } 437 } 438 439 /* end clear state */ 440 count += 2; 441 /* clear state */ 442 count += 2; 443 444 return count; 445 } 446 447 static void gfx_v9_4_3_get_csb_buffer(struct amdgpu_device *adev, 448 volatile u32 *buffer) 449 { 450 u32 count = 0, i; 451 const struct cs_section_def *sect = NULL; 452 const struct cs_extent_def *ext = NULL; 453 454 if (adev->gfx.rlc.cs_data == NULL) 455 return; 456 if (buffer == NULL) 457 return; 458 459 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 460 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 461 462 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 463 buffer[count++] = cpu_to_le32(0x80000000); 464 buffer[count++] = cpu_to_le32(0x80000000); 465 466 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 467 for (ext = sect->section; ext->extent != NULL; ++ext) { 468 if (sect->id == SECT_CONTEXT) { 469 buffer[count++] = 470 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 471 buffer[count++] = cpu_to_le32(ext->reg_index - 472 PACKET3_SET_CONTEXT_REG_START); 473 for (i = 0; i < ext->reg_count; i++) 474 buffer[count++] = cpu_to_le32(ext->extent[i]); 475 } else { 476 return; 477 } 478 } 479 } 480 481 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 482 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 483 484 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 485 buffer[count++] = cpu_to_le32(0); 486 } 487 488 static void gfx_v9_4_3_mec_fini(struct amdgpu_device *adev) 489 { 490 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 491 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 492 } 493 494 static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev) 495 { 496 int r, i; 497 u32 *hpd; 498 const __le32 *fw_data; 499 unsigned fw_size; 500 u32 *fw; 501 size_t mec_hpd_size; 502 503 const struct gfx_firmware_header_v1_0 *mec_hdr; 504 505 for (i = 0; i < adev->gfx.num_xcd; i++) 506 bitmap_zero(adev->gfx.mec_bitmap[i].queue_bitmap, 507 AMDGPU_MAX_COMPUTE_QUEUES); 508 509 /* take ownership of the relevant compute queues */ 510 amdgpu_gfx_compute_queue_acquire(adev); 511 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 512 if (mec_hpd_size) { 513 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 514 AMDGPU_GEM_DOMAIN_VRAM, 515 &adev->gfx.mec.hpd_eop_obj, 516 &adev->gfx.mec.hpd_eop_gpu_addr, 517 (void **)&hpd); 518 if (r) { 519 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 520 gfx_v9_4_3_mec_fini(adev); 521 return r; 522 } 523 524 if (amdgpu_emu_mode == 1) { 525 for (i = 0; i < mec_hpd_size / 4; i++) { 526 memset((void *)(hpd + i), 0, 4); 527 if (i % 50 == 0) 528 msleep(1); 529 } 530 } else { 531 memset(hpd, 0, mec_hpd_size); 532 } 533 534 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 535 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 536 } 537 538 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 539 540 fw_data = (const __le32 *) 541 (adev->gfx.mec_fw->data + 542 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 543 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 544 545 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 546 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 547 &adev->gfx.mec.mec_fw_obj, 548 &adev->gfx.mec.mec_fw_gpu_addr, 549 (void **)&fw); 550 if (r) { 551 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 552 gfx_v9_4_3_mec_fini(adev); 553 return r; 554 } 555 556 memcpy(fw, fw_data, fw_size); 557 558 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 559 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 560 561 return 0; 562 } 563 564 static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev, 565 u32 se_num, 566 u32 sh_num, 567 u32 instance, 568 int xcc_id) 569 { 570 u32 data; 571 572 if (instance == 0xffffffff) 573 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, 574 INSTANCE_BROADCAST_WRITES, 1); 575 else 576 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, 577 INSTANCE_INDEX, instance); 578 579 if (se_num == 0xffffffff) 580 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 581 SE_BROADCAST_WRITES, 1); 582 else 583 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 584 585 if (sh_num == 0xffffffff) 586 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 587 SH_BROADCAST_WRITES, 1); 588 else 589 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 590 591 WREG32_SOC15_RLC_SHADOW_EX(reg, GC, xcc_id, regGRBM_GFX_INDEX, data); 592 } 593 594 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 595 { 596 WREG32_SOC15_RLC(GC, 0, regSQ_IND_INDEX, 597 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 598 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 599 (address << SQ_IND_INDEX__INDEX__SHIFT) | 600 (SQ_IND_INDEX__FORCE_READ_MASK)); 601 return RREG32_SOC15(GC, 0, regSQ_IND_DATA); 602 } 603 604 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 605 uint32_t wave, uint32_t thread, 606 uint32_t regno, uint32_t num, uint32_t *out) 607 { 608 WREG32_SOC15_RLC(GC, 0, regSQ_IND_INDEX, 609 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 610 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 611 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 612 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 613 (SQ_IND_INDEX__FORCE_READ_MASK) | 614 (SQ_IND_INDEX__AUTO_INCR_MASK)); 615 while (num--) 616 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); 617 } 618 619 static void gfx_v9_4_3_read_wave_data(struct amdgpu_device *adev, 620 uint32_t simd, uint32_t wave, 621 uint32_t *dst, int *no_fields) 622 { 623 /* type 1 wave data */ 624 dst[(*no_fields)++] = 1; 625 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 626 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 627 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 628 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 629 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 630 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 631 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 632 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 633 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 634 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 635 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 636 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 637 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 638 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 639 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); 640 } 641 642 static void gfx_v9_4_3_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 643 uint32_t wave, uint32_t start, 644 uint32_t size, uint32_t *dst) 645 { 646 wave_read_regs(adev, simd, wave, 0, 647 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 648 } 649 650 static void gfx_v9_4_3_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 651 uint32_t wave, uint32_t thread, 652 uint32_t start, uint32_t size, 653 uint32_t *dst) 654 { 655 wave_read_regs(adev, simd, wave, thread, 656 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 657 } 658 659 static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, 660 u32 me, u32 pipe, u32 q, u32 vm) 661 { 662 soc15_grbm_select(adev, me, pipe, q, vm, 0); 663 } 664 665 static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { 666 .get_gpu_clock_counter = &gfx_v9_4_3_get_gpu_clock_counter, 667 .select_se_sh = &gfx_v9_4_3_select_se_sh, 668 .read_wave_data = &gfx_v9_4_3_read_wave_data, 669 .read_wave_sgprs = &gfx_v9_4_3_read_wave_sgprs, 670 .read_wave_vgprs = &gfx_v9_4_3_read_wave_vgprs, 671 .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q, 672 }; 673 674 static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev) 675 { 676 u32 gb_addr_config; 677 678 adev->gfx.funcs = &gfx_v9_4_3_gfx_funcs; 679 680 switch (adev->ip_versions[GC_HWIP][0]) { 681 case IP_VERSION(9, 4, 3): 682 adev->gfx.config.max_hw_contexts = 8; 683 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 684 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 685 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 686 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 687 gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); 688 break; 689 default: 690 BUG(); 691 break; 692 } 693 694 adev->gfx.config.gb_addr_config = gb_addr_config; 695 696 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 697 REG_GET_FIELD( 698 adev->gfx.config.gb_addr_config, 699 GB_ADDR_CONFIG, 700 NUM_PIPES); 701 702 adev->gfx.config.max_tile_pipes = 703 adev->gfx.config.gb_addr_config_fields.num_pipes; 704 705 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 706 REG_GET_FIELD( 707 adev->gfx.config.gb_addr_config, 708 GB_ADDR_CONFIG, 709 NUM_BANKS); 710 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 711 REG_GET_FIELD( 712 adev->gfx.config.gb_addr_config, 713 GB_ADDR_CONFIG, 714 MAX_COMPRESSED_FRAGS); 715 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 716 REG_GET_FIELD( 717 adev->gfx.config.gb_addr_config, 718 GB_ADDR_CONFIG, 719 NUM_RB_PER_SE); 720 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 721 REG_GET_FIELD( 722 adev->gfx.config.gb_addr_config, 723 GB_ADDR_CONFIG, 724 NUM_SHADER_ENGINES); 725 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 726 REG_GET_FIELD( 727 adev->gfx.config.gb_addr_config, 728 GB_ADDR_CONFIG, 729 PIPE_INTERLEAVE_SIZE)); 730 731 return 0; 732 } 733 734 static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, 735 int xcc_id, int mec, int pipe, int queue) 736 { 737 unsigned irq_type; 738 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 739 unsigned int hw_prio; 740 741 ring = &adev->gfx.compute_ring[ring_id]; 742 743 /* mec0 is me1 */ 744 ring->xcc_id = xcc_id; 745 ring->me = mec + 1; 746 ring->pipe = pipe; 747 ring->queue = queue; 748 749 ring->ring_obj = NULL; 750 ring->use_doorbell = true; 751 if (xcc_id >= 1) 752 ring->doorbell_index = 753 (adev->doorbell_index.xcc1_mec_ring0_start + 754 ring_id - adev->gfx.num_compute_rings) << 1; 755 else 756 ring->doorbell_index = 757 (adev->doorbell_index.mec_ring0 + ring_id) << 1; 758 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 759 + (ring_id * GFX9_MEC_HPD_SIZE); 760 ring->vm_hub = AMDGPU_GFXHUB_0; 761 sprintf(ring->name, "comp_%d.%d.%d.%d", 762 ring->xcc_id, ring->me, ring->pipe, ring->queue); 763 764 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 765 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 766 + ring->pipe; 767 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 768 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 769 /* type-2 packets are deprecated on MEC, use type-3 instead */ 770 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 771 hw_prio, NULL); 772 } 773 774 static int gfx_v9_4_3_sw_init(void *handle) 775 { 776 int i, j, k, r, ring_id, xcc_id; 777 struct amdgpu_kiq *kiq; 778 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 779 780 adev->gfx.mec.num_mec = 2; 781 adev->gfx.mec.num_pipe_per_mec = 4; 782 adev->gfx.mec.num_queue_per_pipe = 8; 783 784 /* EOP Event */ 785 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 786 if (r) 787 return r; 788 789 /* Privileged reg */ 790 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 791 &adev->gfx.priv_reg_irq); 792 if (r) 793 return r; 794 795 /* Privileged inst */ 796 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 797 &adev->gfx.priv_inst_irq); 798 if (r) 799 return r; 800 801 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 802 803 r = adev->gfx.rlc.funcs->init(adev); 804 if (r) { 805 DRM_ERROR("Failed to init rlc BOs!\n"); 806 return r; 807 } 808 809 r = gfx_v9_4_3_mec_init(adev); 810 if (r) { 811 DRM_ERROR("Failed to init MEC BOs!\n"); 812 return r; 813 } 814 815 /* set up the compute queues - allocate horizontally across pipes */ 816 ring_id = 0; 817 for (xcc_id = 0; xcc_id < adev->gfx.num_xcd; xcc_id++) { 818 819 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 820 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 821 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; 822 k++) { 823 if (!amdgpu_gfx_is_mec_queue_enabled( 824 adev, xcc_id, i, k, j)) 825 continue; 826 827 r = gfx_v9_4_3_compute_ring_init(adev, 828 ring_id, 829 xcc_id, 830 i, k, j); 831 if (r) 832 return r; 833 834 ring_id++; 835 } 836 } 837 } 838 839 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, xcc_id); 840 if (r) { 841 DRM_ERROR("Failed to init KIQ BOs!\n"); 842 return r; 843 } 844 845 kiq = &adev->gfx.kiq[xcc_id]; 846 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, xcc_id); 847 if (r) 848 return r; 849 850 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 851 r = amdgpu_gfx_mqd_sw_init(adev, 852 sizeof(struct v9_mqd_allocation), xcc_id); 853 if (r) 854 return r; 855 } 856 857 r = gfx_v9_4_3_gpu_early_init(adev); 858 if (r) 859 return r; 860 861 return 0; 862 } 863 864 static int gfx_v9_4_3_sw_fini(void *handle) 865 { 866 int i; 867 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 868 869 for (i = 0; i < adev->gfx.num_compute_rings * 870 adev->gfx.num_xcd; i++) 871 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 872 873 for (i = 0; i < adev->gfx.num_xcd; i++) { 874 amdgpu_gfx_mqd_sw_fini(adev, i); 875 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[i].ring); 876 amdgpu_gfx_kiq_fini(adev, i); 877 } 878 879 gfx_v9_4_3_mec_fini(adev); 880 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 881 gfx_v9_4_3_free_microcode(adev); 882 883 return 0; 884 } 885 886 static u32 gfx_v9_4_3_get_rb_active_bitmap(struct amdgpu_device *adev) 887 { 888 u32 data, mask; 889 890 data = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); 891 data |= RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); 892 893 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 894 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 895 896 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 897 adev->gfx.config.max_sh_per_se); 898 899 return (~data) & mask; 900 } 901 902 static void gfx_v9_4_3_setup_rb(struct amdgpu_device *adev, int xcc_id) 903 { 904 int i, j; 905 u32 data; 906 u32 active_rbs = 0; 907 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 908 adev->gfx.config.max_sh_per_se; 909 910 mutex_lock(&adev->grbm_idx_mutex); 911 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 912 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 913 gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff, xcc_id); 914 data = gfx_v9_4_3_get_rb_active_bitmap(adev); 915 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 916 rb_bitmap_width_per_sh); 917 } 918 } 919 gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id); 920 mutex_unlock(&adev->grbm_idx_mutex); 921 922 adev->gfx.config.backend_enable_mask = active_rbs; 923 adev->gfx.config.num_rbs = hweight32(active_rbs); 924 } 925 926 #define DEFAULT_SH_MEM_BASES (0x6000) 927 static void gfx_v9_4_3_init_compute_vmid(struct amdgpu_device *adev, int xcc_id) 928 { 929 int i; 930 uint32_t sh_mem_config; 931 uint32_t sh_mem_bases; 932 933 /* 934 * Configure apertures: 935 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 936 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 937 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 938 */ 939 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 940 941 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 942 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 943 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 944 945 mutex_lock(&adev->srbm_mutex); 946 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 947 soc15_grbm_select(adev, 0, 0, 0, i, xcc_id); 948 /* CP and shaders */ 949 WREG32_SOC15_RLC(GC, xcc_id, regSH_MEM_CONFIG, sh_mem_config); 950 WREG32_SOC15_RLC(GC, xcc_id, regSH_MEM_BASES, sh_mem_bases); 951 } 952 soc15_grbm_select(adev, 0, 0, 0, 0, xcc_id); 953 mutex_unlock(&adev->srbm_mutex); 954 955 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 956 acccess. These should be enabled by FW for target VMIDs. */ 957 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 958 WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_VMID0_BASE, 2 * i, 0); 959 WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_VMID0_SIZE, 2 * i, 0); 960 WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_GWS_VMID0, i, 0); 961 WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_OA_VMID0, i, 0); 962 } 963 } 964 965 static void gfx_v9_4_3_init_gds_vmid(struct amdgpu_device *adev, int xcc_id) 966 { 967 int vmid; 968 969 /* 970 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 971 * access. Compute VMIDs should be enabled by FW for target VMIDs, 972 * the driver can enable them for graphics. VMID0 should maintain 973 * access so that HWS firmware can save/restore entries. 974 */ 975 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { 976 WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_VMID0_BASE, 2 * vmid, 0); 977 WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_VMID0_SIZE, 2 * vmid, 0); 978 WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_GWS_VMID0, vmid, 0); 979 WREG32_SOC15_OFFSET(GC, xcc_id, regGDS_OA_VMID0, vmid, 0); 980 } 981 } 982 983 static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) 984 { 985 u32 tmp; 986 int i, j; 987 988 for (i = 0; i < adev->gfx.num_xcd; i++) { 989 WREG32_FIELD15_PREREG(GC, i, GRBM_CNTL, READ_TIMEOUT, 0xff); 990 gfx_v9_4_3_setup_rb(adev, i); 991 } 992 993 gfx_v9_4_3_get_cu_info(adev, &adev->gfx.cu_info); 994 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, regDB_DEBUG2); 995 996 /* XXX SH_MEM regs */ 997 /* where to put LDS, scratch, GPUVM in FSA64 space */ 998 mutex_lock(&adev->srbm_mutex); 999 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { 1000 for (j = 0; j < adev->gfx.num_xcd; j++) { 1001 soc15_grbm_select(adev, 0, 0, 0, i, j); 1002 /* CP and shaders */ 1003 if (i == 0) { 1004 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 1005 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1006 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 1007 !!adev->gmc.noretry); 1008 WREG32_SOC15_RLC(GC, j, regSH_MEM_CONFIG, tmp); 1009 WREG32_SOC15_RLC(GC, j, regSH_MEM_BASES, 0); 1010 } else { 1011 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 1012 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1013 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 1014 !!adev->gmc.noretry); 1015 WREG32_SOC15_RLC(GC, j, regSH_MEM_CONFIG, tmp); 1016 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 1017 (adev->gmc.private_aperture_start >> 48)); 1018 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 1019 (adev->gmc.shared_aperture_start >> 48)); 1020 WREG32_SOC15_RLC(GC, j, regSH_MEM_BASES, tmp); 1021 } 1022 } 1023 } 1024 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 1025 1026 mutex_unlock(&adev->srbm_mutex); 1027 1028 for (i = 0; i < adev->gfx.num_xcd; i++) { 1029 gfx_v9_4_3_init_compute_vmid(adev, i); 1030 gfx_v9_4_3_init_gds_vmid(adev, i); 1031 } 1032 } 1033 1034 static void gfx_v9_4_3_enable_save_restore_machine(struct amdgpu_device *adev, 1035 int xcc_id) 1036 { 1037 WREG32_FIELD15_PREREG(GC, xcc_id, RLC_SRM_CNTL, SRM_ENABLE, 1); 1038 } 1039 1040 static void gfx_v9_4_3_init_csb(struct amdgpu_device *adev, int xcc_id) 1041 { 1042 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 1043 /* csib */ 1044 WREG32_RLC(SOC15_REG_OFFSET(GC, xcc_id, regRLC_CSIB_ADDR_HI), 1045 adev->gfx.rlc.clear_state_gpu_addr >> 32); 1046 WREG32_RLC(SOC15_REG_OFFSET(GC, xcc_id, regRLC_CSIB_ADDR_LO), 1047 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 1048 WREG32_RLC(SOC15_REG_OFFSET(GC, xcc_id, regRLC_CSIB_LENGTH), 1049 adev->gfx.rlc.clear_state_size); 1050 } 1051 1052 static void gfx_v9_4_3_init_pg(struct amdgpu_device *adev, int xcc_id) 1053 { 1054 gfx_v9_4_3_init_csb(adev, xcc_id); 1055 1056 /* 1057 * Rlc save restore list is workable since v2_1. 1058 * And it's needed by gfxoff feature. 1059 */ 1060 if (adev->gfx.rlc.is_rlc_v2_1) 1061 gfx_v9_4_3_enable_save_restore_machine(adev, xcc_id); 1062 1063 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 1064 AMD_PG_SUPPORT_GFX_SMG | 1065 AMD_PG_SUPPORT_GFX_DMG | 1066 AMD_PG_SUPPORT_CP | 1067 AMD_PG_SUPPORT_GDS | 1068 AMD_PG_SUPPORT_RLC_SMU_HS)) { 1069 WREG32_SOC15(GC, 0, regRLC_JUMP_TABLE_RESTORE, 1070 adev->gfx.rlc.cp_table_gpu_addr >> 8); 1071 } 1072 } 1073 1074 static void gfx_v9_4_3_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id) 1075 { 1076 uint32_t data; 1077 1078 data = RREG32_SOC15(GC, xcc_id, regCPC_PSP_DEBUG); 1079 data |= CPC_PSP_DEBUG__UTCL2IUGPAOVERRIDE_MASK; 1080 WREG32_SOC15(GC, xcc_id, regCPC_PSP_DEBUG, data); 1081 } 1082 1083 static bool gfx_v9_4_3_is_rlc_enabled(struct amdgpu_device *adev) 1084 { 1085 uint32_t rlc_setting; 1086 1087 /* if RLC is not enabled, do nothing */ 1088 rlc_setting = RREG32_SOC15(GC, 0, regRLC_CNTL); 1089 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 1090 return false; 1091 1092 return true; 1093 } 1094 1095 static void gfx_v9_4_3_set_safe_mode(struct amdgpu_device *adev, int xcc_id) 1096 { 1097 uint32_t data; 1098 unsigned i; 1099 1100 data = RLC_SAFE_MODE__CMD_MASK; 1101 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 1102 WREG32_SOC15(GC, xcc_id, regRLC_SAFE_MODE, data); 1103 1104 /* wait for RLC_SAFE_MODE */ 1105 for (i = 0; i < adev->usec_timeout; i++) { 1106 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 1107 break; 1108 udelay(1); 1109 } 1110 } 1111 1112 static void gfx_v9_4_3_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) 1113 { 1114 uint32_t data; 1115 1116 data = RLC_SAFE_MODE__CMD_MASK; 1117 WREG32_SOC15(GC, xcc_id, regRLC_SAFE_MODE, data); 1118 } 1119 1120 static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev) 1121 { 1122 const struct cs_section_def *cs_data; 1123 int r; 1124 1125 adev->gfx.rlc.cs_data = gfx9_cs_data; 1126 1127 cs_data = adev->gfx.rlc.cs_data; 1128 1129 if (cs_data) { 1130 /* init clear state block */ 1131 r = amdgpu_gfx_rlc_init_csb(adev); 1132 if (r) 1133 return r; 1134 } 1135 1136 /* init spm vmid with 0xf */ 1137 if (adev->gfx.rlc.funcs->update_spm_vmid) 1138 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); 1139 1140 return 0; 1141 } 1142 1143 static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev, 1144 int xcc_id) 1145 { 1146 u32 i, j, k; 1147 u32 mask; 1148 1149 mutex_lock(&adev->grbm_idx_mutex); 1150 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1151 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1152 gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff, xcc_id); 1153 for (k = 0; k < adev->usec_timeout; k++) { 1154 if (RREG32_SOC15(GC, 0, regRLC_SERDES_CU_MASTER_BUSY) == 0) 1155 break; 1156 udelay(1); 1157 } 1158 if (k == adev->usec_timeout) { 1159 gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 1160 0xffffffff, 0xffffffff, 1161 xcc_id); 1162 mutex_unlock(&adev->grbm_idx_mutex); 1163 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 1164 i, j); 1165 return; 1166 } 1167 } 1168 } 1169 gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id); 1170 mutex_unlock(&adev->grbm_idx_mutex); 1171 1172 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 1173 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 1174 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 1175 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 1176 for (k = 0; k < adev->usec_timeout; k++) { 1177 if ((RREG32_SOC15(GC, 0, regRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 1178 break; 1179 udelay(1); 1180 } 1181 } 1182 1183 static void gfx_v9_4_3_enable_gui_idle_interrupt(struct amdgpu_device *adev, 1184 bool enable, int xcc_id) 1185 { 1186 u32 tmp; 1187 1188 /* These interrupts should be enabled to drive DS clock */ 1189 1190 tmp = RREG32_SOC15(GC, xcc_id, regCP_INT_CNTL_RING0); 1191 1192 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 1193 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 1194 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 1195 1196 WREG32_SOC15(GC, xcc_id, regCP_INT_CNTL_RING0, tmp); 1197 } 1198 1199 static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev) 1200 { 1201 int i; 1202 1203 for (i = 0; i < adev->gfx.num_xcd; i++) { 1204 WREG32_FIELD15_PREREG(GC, i, RLC_CNTL, RLC_ENABLE_F32, 0); 1205 gfx_v9_4_3_enable_gui_idle_interrupt(adev, false, i); 1206 gfx_v9_4_3_wait_for_rlc_serdes(adev, i); 1207 } 1208 } 1209 1210 static void gfx_v9_4_3_rlc_reset(struct amdgpu_device *adev) 1211 { 1212 int i; 1213 1214 for (i = 0; i < adev->gfx.num_xcd; i++) { 1215 WREG32_FIELD15_PREREG(GC, i, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 1216 udelay(50); 1217 WREG32_FIELD15_PREREG(GC, i, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 1218 udelay(50); 1219 } 1220 } 1221 1222 static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev) 1223 { 1224 #ifdef AMDGPU_RLC_DEBUG_RETRY 1225 u32 rlc_ucode_ver; 1226 #endif 1227 int i; 1228 1229 for (i = 0; i < adev->gfx.num_xcd; i++) { 1230 WREG32_FIELD15_PREREG(GC, i, RLC_CNTL, RLC_ENABLE_F32, 1); 1231 udelay(50); 1232 1233 /* carrizo do enable cp interrupt after cp inited */ 1234 if (!(adev->flags & AMD_IS_APU)) { 1235 gfx_v9_4_3_enable_gui_idle_interrupt(adev, true, i); 1236 udelay(50); 1237 } 1238 1239 #ifdef AMDGPU_RLC_DEBUG_RETRY 1240 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 1241 rlc_ucode_ver = RREG32_SOC15(GC, i, regRLC_GPM_GENERAL_6); 1242 if (rlc_ucode_ver == 0x108) { 1243 dev_info(adev->dev, 1244 "Using rlc debug ucode. regRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 1245 rlc_ucode_ver, adev->gfx.rlc_fw_version); 1246 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 1247 * default is 0x9C4 to create a 100us interval */ 1248 WREG32_SOC15(GC, i, regRLC_GPM_TIMER_INT_3, 0x9C4); 1249 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 1250 * to disable the page fault retry interrupts, default is 1251 * 0x100 (256) */ 1252 WREG32_SOC15(GC, i, regRLC_GPM_GENERAL_12, 0x100); 1253 } 1254 #endif 1255 } 1256 } 1257 1258 static int gfx_v9_4_3_rlc_load_microcode(struct amdgpu_device *adev, int xcc_id) 1259 { 1260 const struct rlc_firmware_header_v2_0 *hdr; 1261 const __le32 *fw_data; 1262 unsigned i, fw_size; 1263 1264 if (!adev->gfx.rlc_fw) 1265 return -EINVAL; 1266 1267 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1268 amdgpu_ucode_print_rlc_hdr(&hdr->header); 1269 1270 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1271 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1272 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 1273 1274 WREG32_SOC15(GC, xcc_id, regRLC_GPM_UCODE_ADDR, 1275 RLCG_UCODE_LOADING_START_ADDRESS); 1276 for (i = 0; i < fw_size; i++) { 1277 if (amdgpu_emu_mode == 1 && i % 100 == 0) { 1278 dev_info(adev->dev, "Write RLC ucode data %u DWs\n", i); 1279 msleep(1); 1280 } 1281 WREG32_SOC15(GC, xcc_id, regRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 1282 } 1283 WREG32_SOC15(GC, xcc_id, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 1284 1285 return 0; 1286 } 1287 1288 static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) 1289 { 1290 int r, i; 1291 1292 adev->gfx.rlc.funcs->stop(adev); 1293 1294 for (i = 0; i < adev->gfx.num_xcd; i++) { 1295 /* disable CG */ 1296 WREG32_SOC15(GC, i, regRLC_CGCG_CGLS_CTRL, 0); 1297 1298 gfx_v9_4_3_init_pg(adev, i); 1299 1300 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 1301 /* legacy rlc firmware loading */ 1302 r = gfx_v9_4_3_rlc_load_microcode(adev, i); 1303 if (r) 1304 return r; 1305 } 1306 } 1307 1308 adev->gfx.rlc.funcs->start(adev); 1309 1310 return 0; 1311 } 1312 1313 static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, 1314 unsigned vmid) 1315 { 1316 u32 reg, data; 1317 1318 reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); 1319 if (amdgpu_sriov_is_pp_one_vf(adev)) 1320 data = RREG32_NO_KIQ(reg); 1321 else 1322 data = RREG32(reg); 1323 1324 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; 1325 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 1326 1327 if (amdgpu_sriov_is_pp_one_vf(adev)) 1328 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); 1329 else 1330 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); 1331 } 1332 1333 static const struct soc15_reg_rlcg rlcg_access_gc_9_4_3[] = { 1334 {SOC15_REG_ENTRY(GC, 0, regGRBM_GFX_INDEX)}, 1335 {SOC15_REG_ENTRY(GC, 0, regSQ_IND_INDEX)}, 1336 }; 1337 1338 static bool gfx_v9_4_3_check_rlcg_range(struct amdgpu_device *adev, 1339 uint32_t offset, 1340 struct soc15_reg_rlcg *entries, int arr_size) 1341 { 1342 int i; 1343 uint32_t reg; 1344 1345 if (!entries) 1346 return false; 1347 1348 for (i = 0; i < arr_size; i++) { 1349 const struct soc15_reg_rlcg *entry; 1350 1351 entry = &entries[i]; 1352 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; 1353 if (offset == reg) 1354 return true; 1355 } 1356 1357 return false; 1358 } 1359 1360 static bool gfx_v9_4_3_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset) 1361 { 1362 return gfx_v9_4_3_check_rlcg_range(adev, offset, 1363 (void *)rlcg_access_gc_9_4_3, 1364 ARRAY_SIZE(rlcg_access_gc_9_4_3)); 1365 } 1366 1367 static void gfx_v9_4_3_cp_compute_enable(struct amdgpu_device *adev, 1368 bool enable, int xcc_id) 1369 { 1370 if (enable) { 1371 WREG32_SOC15_RLC(GC, xcc_id, regCP_MEC_CNTL, 0); 1372 } else { 1373 WREG32_SOC15_RLC(GC, xcc_id, regCP_MEC_CNTL, 1374 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 1375 adev->gfx.kiq[xcc_id].ring.sched.ready = false; 1376 } 1377 udelay(50); 1378 } 1379 1380 static int gfx_v9_4_3_cp_compute_load_microcode(struct amdgpu_device *adev, 1381 int xcc_id) 1382 { 1383 const struct gfx_firmware_header_v1_0 *mec_hdr; 1384 const __le32 *fw_data; 1385 unsigned i; 1386 u32 tmp; 1387 u32 mec_ucode_addr_offset; 1388 u32 mec_ucode_data_offset; 1389 1390 if (!adev->gfx.mec_fw) 1391 return -EINVAL; 1392 1393 gfx_v9_4_3_cp_compute_enable(adev, false, xcc_id); 1394 1395 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1396 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 1397 1398 fw_data = (const __le32 *) 1399 (adev->gfx.mec_fw->data + 1400 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1401 tmp = 0; 1402 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 1403 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 1404 WREG32_SOC15(GC, xcc_id, regCP_CPC_IC_BASE_CNTL, tmp); 1405 1406 WREG32_SOC15(GC, xcc_id, regCP_CPC_IC_BASE_LO, 1407 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 1408 WREG32_SOC15(GC, xcc_id, regCP_CPC_IC_BASE_HI, 1409 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 1410 1411 mec_ucode_addr_offset = 1412 SOC15_REG_OFFSET(GC, xcc_id, regCP_MEC_ME1_UCODE_ADDR); 1413 mec_ucode_data_offset = 1414 SOC15_REG_OFFSET(GC, xcc_id, regCP_MEC_ME1_UCODE_DATA); 1415 1416 /* MEC1 */ 1417 WREG32(mec_ucode_addr_offset, mec_hdr->jt_offset); 1418 for (i = 0; i < mec_hdr->jt_size; i++) 1419 WREG32(mec_ucode_data_offset, 1420 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 1421 1422 WREG32(mec_ucode_addr_offset, adev->gfx.mec_fw_version); 1423 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 1424 1425 return 0; 1426 } 1427 1428 /* KIQ functions */ 1429 static void gfx_v9_4_3_kiq_setting(struct amdgpu_ring *ring, int xcc_id) 1430 { 1431 uint32_t tmp; 1432 struct amdgpu_device *adev = ring->adev; 1433 1434 /* tell RLC which is KIQ queue */ 1435 tmp = RREG32_SOC15(GC, xcc_id, regRLC_CP_SCHEDULERS); 1436 tmp &= 0xffffff00; 1437 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 1438 WREG32_SOC15_RLC(GC, xcc_id, regRLC_CP_SCHEDULERS, tmp); 1439 tmp |= 0x80; 1440 WREG32_SOC15_RLC(GC, xcc_id, regRLC_CP_SCHEDULERS, tmp); 1441 } 1442 1443 static void gfx_v9_4_3_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) 1444 { 1445 struct amdgpu_device *adev = ring->adev; 1446 1447 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 1448 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) { 1449 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; 1450 mqd->cp_hqd_queue_priority = 1451 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; 1452 } 1453 } 1454 } 1455 1456 static int gfx_v9_4_3_mqd_init(struct amdgpu_ring *ring) 1457 { 1458 struct amdgpu_device *adev = ring->adev; 1459 struct v9_mqd *mqd = ring->mqd_ptr; 1460 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 1461 uint32_t tmp; 1462 1463 mqd->header = 0xC0310800; 1464 mqd->compute_pipelinestat_enable = 0x00000001; 1465 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 1466 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 1467 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 1468 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 1469 mqd->compute_misc_reserved = 0x00000003; 1470 1471 mqd->dynamic_cu_mask_addr_lo = 1472 lower_32_bits(ring->mqd_gpu_addr 1473 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 1474 mqd->dynamic_cu_mask_addr_hi = 1475 upper_32_bits(ring->mqd_gpu_addr 1476 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 1477 1478 eop_base_addr = ring->eop_gpu_addr >> 8; 1479 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 1480 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 1481 1482 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 1483 tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL); 1484 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 1485 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 1486 1487 mqd->cp_hqd_eop_control = tmp; 1488 1489 /* enable doorbell? */ 1490 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); 1491 1492 if (ring->use_doorbell) { 1493 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1494 DOORBELL_OFFSET, ring->doorbell_index); 1495 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1496 DOORBELL_EN, 1); 1497 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1498 DOORBELL_SOURCE, 0); 1499 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1500 DOORBELL_HIT, 0); 1501 } else { 1502 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1503 DOORBELL_EN, 0); 1504 } 1505 1506 mqd->cp_hqd_pq_doorbell_control = tmp; 1507 1508 /* disable the queue if it's active */ 1509 ring->wptr = 0; 1510 mqd->cp_hqd_dequeue_request = 0; 1511 mqd->cp_hqd_pq_rptr = 0; 1512 mqd->cp_hqd_pq_wptr_lo = 0; 1513 mqd->cp_hqd_pq_wptr_hi = 0; 1514 1515 /* set the pointer to the MQD */ 1516 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 1517 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 1518 1519 /* set MQD vmid to 0 */ 1520 tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL); 1521 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 1522 mqd->cp_mqd_control = tmp; 1523 1524 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 1525 hqd_gpu_addr = ring->gpu_addr >> 8; 1526 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 1527 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 1528 1529 /* set up the HQD, this is similar to CP_RB0_CNTL */ 1530 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL); 1531 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 1532 (order_base_2(ring->ring_size / 4) - 1)); 1533 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 1534 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 1535 #ifdef __BIG_ENDIAN 1536 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 1537 #endif 1538 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 1539 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 1540 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 1541 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 1542 mqd->cp_hqd_pq_control = tmp; 1543 1544 /* set the wb address whether it's enabled or not */ 1545 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 1546 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 1547 mqd->cp_hqd_pq_rptr_report_addr_hi = 1548 upper_32_bits(wb_gpu_addr) & 0xffff; 1549 1550 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 1551 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 1552 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 1553 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 1554 1555 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 1556 ring->wptr = 0; 1557 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR); 1558 1559 /* set the vmid for the queue */ 1560 mqd->cp_hqd_vmid = 0; 1561 1562 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE); 1563 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 1564 mqd->cp_hqd_persistent_state = tmp; 1565 1566 /* set MIN_IB_AVAIL_SIZE */ 1567 tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL); 1568 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 1569 mqd->cp_hqd_ib_control = tmp; 1570 1571 /* set static priority for a queue/ring */ 1572 gfx_v9_4_3_mqd_set_priority(ring, mqd); 1573 mqd->cp_hqd_quantum = RREG32(regCP_HQD_QUANTUM); 1574 1575 /* map_queues packet doesn't need activate the queue, 1576 * so only kiq need set this field. 1577 */ 1578 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 1579 mqd->cp_hqd_active = 1; 1580 1581 return 0; 1582 } 1583 1584 static int gfx_v9_4_3_kiq_init_register(struct amdgpu_ring *ring, int xcc_id) 1585 { 1586 struct amdgpu_device *adev = ring->adev; 1587 struct v9_mqd *mqd = ring->mqd_ptr; 1588 int j; 1589 1590 /* disable wptr polling */ 1591 WREG32_FIELD15_PREREG(GC, xcc_id, CP_PQ_WPTR_POLL_CNTL, EN, 0); 1592 1593 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_EOP_BASE_ADDR, 1594 mqd->cp_hqd_eop_base_addr_lo); 1595 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_EOP_BASE_ADDR_HI, 1596 mqd->cp_hqd_eop_base_addr_hi); 1597 1598 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 1599 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_EOP_CONTROL, 1600 mqd->cp_hqd_eop_control); 1601 1602 /* enable doorbell? */ 1603 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_DOORBELL_CONTROL, 1604 mqd->cp_hqd_pq_doorbell_control); 1605 1606 /* disable the queue if it's active */ 1607 if (RREG32_SOC15(GC, xcc_id, regCP_HQD_ACTIVE) & 1) { 1608 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_DEQUEUE_REQUEST, 1); 1609 for (j = 0; j < adev->usec_timeout; j++) { 1610 if (!(RREG32_SOC15(GC, xcc_id, regCP_HQD_ACTIVE) & 1)) 1611 break; 1612 udelay(1); 1613 } 1614 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_DEQUEUE_REQUEST, 1615 mqd->cp_hqd_dequeue_request); 1616 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_RPTR, 1617 mqd->cp_hqd_pq_rptr); 1618 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_LO, 1619 mqd->cp_hqd_pq_wptr_lo); 1620 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_HI, 1621 mqd->cp_hqd_pq_wptr_hi); 1622 } 1623 1624 /* set the pointer to the MQD */ 1625 WREG32_SOC15_RLC(GC, xcc_id, regCP_MQD_BASE_ADDR, 1626 mqd->cp_mqd_base_addr_lo); 1627 WREG32_SOC15_RLC(GC, xcc_id, regCP_MQD_BASE_ADDR_HI, 1628 mqd->cp_mqd_base_addr_hi); 1629 1630 /* set MQD vmid to 0 */ 1631 WREG32_SOC15_RLC(GC, xcc_id, regCP_MQD_CONTROL, 1632 mqd->cp_mqd_control); 1633 1634 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 1635 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_BASE, 1636 mqd->cp_hqd_pq_base_lo); 1637 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_BASE_HI, 1638 mqd->cp_hqd_pq_base_hi); 1639 1640 /* set up the HQD, this is similar to CP_RB0_CNTL */ 1641 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_CONTROL, 1642 mqd->cp_hqd_pq_control); 1643 1644 /* set the wb address whether it's enabled or not */ 1645 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_RPTR_REPORT_ADDR, 1646 mqd->cp_hqd_pq_rptr_report_addr_lo); 1647 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 1648 mqd->cp_hqd_pq_rptr_report_addr_hi); 1649 1650 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 1651 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_POLL_ADDR, 1652 mqd->cp_hqd_pq_wptr_poll_addr_lo); 1653 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 1654 mqd->cp_hqd_pq_wptr_poll_addr_hi); 1655 1656 /* enable the doorbell if requested */ 1657 if (ring->use_doorbell) { 1658 WREG32_SOC15(GC, xcc_id, regCP_MEC_DOORBELL_RANGE_LOWER, 1659 (adev->doorbell_index.kiq * 2) << 2); 1660 WREG32_SOC15(GC, xcc_id, regCP_MEC_DOORBELL_RANGE_UPPER, 1661 (adev->doorbell_index.userqueue_end * 2) << 2); 1662 } 1663 1664 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_DOORBELL_CONTROL, 1665 mqd->cp_hqd_pq_doorbell_control); 1666 1667 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 1668 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_LO, 1669 mqd->cp_hqd_pq_wptr_lo); 1670 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_HI, 1671 mqd->cp_hqd_pq_wptr_hi); 1672 1673 /* set the vmid for the queue */ 1674 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_VMID, mqd->cp_hqd_vmid); 1675 1676 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PERSISTENT_STATE, 1677 mqd->cp_hqd_persistent_state); 1678 1679 /* activate the queue */ 1680 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_ACTIVE, 1681 mqd->cp_hqd_active); 1682 1683 if (ring->use_doorbell) 1684 WREG32_FIELD15_PREREG(GC, xcc_id, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 1685 1686 return 0; 1687 } 1688 1689 static int gfx_v9_4_3_kiq_fini_register(struct amdgpu_ring *ring, int xcc_id) 1690 { 1691 struct amdgpu_device *adev = ring->adev; 1692 int j; 1693 1694 /* disable the queue if it's active */ 1695 if (RREG32_SOC15(GC, xcc_id, regCP_HQD_ACTIVE) & 1) { 1696 1697 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_DEQUEUE_REQUEST, 1); 1698 1699 for (j = 0; j < adev->usec_timeout; j++) { 1700 if (!(RREG32_SOC15(GC, xcc_id, regCP_HQD_ACTIVE) & 1)) 1701 break; 1702 udelay(1); 1703 } 1704 1705 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 1706 DRM_DEBUG("KIQ dequeue request failed.\n"); 1707 1708 /* Manual disable if dequeue request times out */ 1709 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_ACTIVE, 0); 1710 } 1711 1712 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_DEQUEUE_REQUEST, 1713 0); 1714 } 1715 1716 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_IQ_TIMER, 0); 1717 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_IB_CONTROL, 0); 1718 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PERSISTENT_STATE, 0); 1719 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 1720 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_DOORBELL_CONTROL, 0); 1721 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_RPTR, 0); 1722 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_HI, 0); 1723 WREG32_SOC15_RLC(GC, xcc_id, regCP_HQD_PQ_WPTR_LO, 0); 1724 1725 return 0; 1726 } 1727 1728 static int gfx_v9_4_3_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id) 1729 { 1730 struct amdgpu_device *adev = ring->adev; 1731 struct v9_mqd *mqd = ring->mqd_ptr; 1732 struct v9_mqd *tmp_mqd; 1733 1734 gfx_v9_4_3_kiq_setting(ring, xcc_id); 1735 1736 /* GPU could be in bad state during probe, driver trigger the reset 1737 * after load the SMU, in this case , the mqd is not be initialized. 1738 * driver need to re-init the mqd. 1739 * check mqd->cp_hqd_pq_control since this value should not be 0 1740 */ 1741 tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[xcc_id].mqd_backup; 1742 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control) { 1743 /* for GPU_RESET case , reset MQD to a clean status */ 1744 if (adev->gfx.kiq[xcc_id].mqd_backup) 1745 memcpy(mqd, adev->gfx.kiq[xcc_id].mqd_backup, sizeof(struct v9_mqd_allocation)); 1746 1747 /* reset ring buffer */ 1748 ring->wptr = 0; 1749 amdgpu_ring_clear_ring(ring); 1750 mutex_lock(&adev->srbm_mutex); 1751 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, xcc_id); 1752 gfx_v9_4_3_kiq_init_register(ring, xcc_id); 1753 soc15_grbm_select(adev, 0, 0, 0, 0, xcc_id); 1754 mutex_unlock(&adev->srbm_mutex); 1755 } else { 1756 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 1757 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 1758 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 1759 mutex_lock(&adev->srbm_mutex); 1760 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, xcc_id); 1761 gfx_v9_4_3_mqd_init(ring); 1762 gfx_v9_4_3_kiq_init_register(ring, xcc_id); 1763 soc15_grbm_select(adev, 0, 0, 0, 0, xcc_id); 1764 mutex_unlock(&adev->srbm_mutex); 1765 1766 if (adev->gfx.kiq[xcc_id].mqd_backup) 1767 memcpy(adev->gfx.kiq[xcc_id].mqd_backup, mqd, sizeof(struct v9_mqd_allocation)); 1768 } 1769 1770 return 0; 1771 } 1772 1773 static int gfx_v9_4_3_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id) 1774 { 1775 struct amdgpu_device *adev = ring->adev; 1776 struct v9_mqd *mqd = ring->mqd_ptr; 1777 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 1778 struct v9_mqd *tmp_mqd; 1779 1780 /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control 1781 * is not be initialized before 1782 */ 1783 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; 1784 1785 if (!tmp_mqd->cp_hqd_pq_control || 1786 (!amdgpu_in_reset(adev) && !adev->in_suspend)) { 1787 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 1788 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 1789 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 1790 mutex_lock(&adev->srbm_mutex); 1791 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, xcc_id); 1792 gfx_v9_4_3_mqd_init(ring); 1793 soc15_grbm_select(adev, 0, 0, 0, 0, xcc_id); 1794 mutex_unlock(&adev->srbm_mutex); 1795 1796 if (adev->gfx.mec.mqd_backup[mqd_idx]) 1797 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 1798 } else { 1799 /* restore MQD to a clean status */ 1800 if (adev->gfx.mec.mqd_backup[mqd_idx]) 1801 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 1802 /* reset ring buffer */ 1803 ring->wptr = 0; 1804 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0); 1805 amdgpu_ring_clear_ring(ring); 1806 } 1807 1808 return 0; 1809 } 1810 1811 static int gfx_v9_4_3_kiq_resume(struct amdgpu_device *adev, int xcc_id) 1812 { 1813 struct amdgpu_ring *ring; 1814 int r; 1815 1816 ring = &adev->gfx.kiq[xcc_id].ring; 1817 1818 r = amdgpu_bo_reserve(ring->mqd_obj, false); 1819 if (unlikely(r != 0)) 1820 return r; 1821 1822 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 1823 if (unlikely(r != 0)) { 1824 amdgpu_bo_unreserve(ring->mqd_obj); 1825 return r; 1826 } 1827 1828 gfx_v9_4_3_kiq_init_queue(ring, xcc_id); 1829 amdgpu_bo_kunmap(ring->mqd_obj); 1830 ring->mqd_ptr = NULL; 1831 amdgpu_bo_unreserve(ring->mqd_obj); 1832 ring->sched.ready = true; 1833 return 0; 1834 } 1835 1836 static int gfx_v9_4_3_kcq_resume(struct amdgpu_device *adev, int xcc_id) 1837 { 1838 struct amdgpu_ring *ring = NULL; 1839 int r = 0, i; 1840 1841 gfx_v9_4_3_cp_compute_enable(adev, true, xcc_id); 1842 1843 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 1844 ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings]; 1845 1846 r = amdgpu_bo_reserve(ring->mqd_obj, false); 1847 if (unlikely(r != 0)) 1848 goto done; 1849 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 1850 if (!r) { 1851 r = gfx_v9_4_3_kcq_init_queue(ring, xcc_id); 1852 amdgpu_bo_kunmap(ring->mqd_obj); 1853 ring->mqd_ptr = NULL; 1854 } 1855 amdgpu_bo_unreserve(ring->mqd_obj); 1856 if (r) 1857 goto done; 1858 } 1859 1860 r = amdgpu_gfx_enable_kcq(adev, xcc_id); 1861 done: 1862 return r; 1863 } 1864 1865 static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) 1866 { 1867 int r, i, j; 1868 struct amdgpu_ring *ring; 1869 1870 for (i = 0; i < adev->gfx.num_xcd; i++) { 1871 gfx_v9_4_3_enable_gui_idle_interrupt(adev, false, i); 1872 1873 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 1874 gfx_v9_4_3_disable_gpa_mode(adev, i); 1875 1876 r = gfx_v9_4_3_cp_compute_load_microcode(adev, i); 1877 if (r) 1878 return r; 1879 } 1880 1881 r = gfx_v9_4_3_kiq_resume(adev, i); 1882 if (r) 1883 return r; 1884 1885 r = gfx_v9_4_3_kcq_resume(adev, i); 1886 if (r) 1887 return r; 1888 1889 /* skip ring test on slave kcq */ 1890 if (amdgpu_gfx_is_master_xcc(adev, i)) { 1891 for (j = 0; j < adev->gfx.num_compute_rings; j++) { 1892 ring = &adev->gfx.compute_ring[j + 1893 i * adev->gfx.num_compute_rings]; 1894 amdgpu_ring_test_helper(ring); 1895 } 1896 } 1897 1898 gfx_v9_4_3_enable_gui_idle_interrupt(adev, true, i); 1899 } 1900 1901 return 0; 1902 } 1903 1904 static void gfx_v9_4_3_cp_enable(struct amdgpu_device *adev, bool enable, 1905 int xcc_id) 1906 { 1907 gfx_v9_4_3_cp_compute_enable(adev, enable, xcc_id); 1908 } 1909 1910 static int gfx_v9_4_3_hw_init(void *handle) 1911 { 1912 int r; 1913 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1914 1915 gfx_v9_4_3_init_golden_registers(adev); 1916 1917 gfx_v9_4_3_constants_init(adev); 1918 1919 r = adev->gfx.rlc.funcs->resume(adev); 1920 if (r) 1921 return r; 1922 1923 r = gfx_v9_4_3_cp_resume(adev); 1924 if (r) 1925 return r; 1926 1927 return r; 1928 } 1929 1930 static int gfx_v9_4_3_hw_fini(void *handle) 1931 { 1932 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1933 int i; 1934 1935 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 1936 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 1937 1938 for (i = 0; i < adev->gfx.num_xcd; i++) { 1939 if (amdgpu_gfx_disable_kcq(adev, i)) 1940 DRM_ERROR("XCD %d KCQ disable failed\n", i); 1941 1942 /* Use deinitialize sequence from CAIL when unbinding device 1943 * from driver, otherwise KIQ is hanging when binding back 1944 */ 1945 if (!amdgpu_in_reset(adev) && !adev->in_suspend) { 1946 mutex_lock(&adev->srbm_mutex); 1947 soc15_grbm_select(adev, adev->gfx.kiq[i].ring.me, 1948 adev->gfx.kiq[i].ring.pipe, 1949 adev->gfx.kiq[i].ring.queue, 0, i); 1950 gfx_v9_4_3_kiq_fini_register(&adev->gfx.kiq[i].ring, i); 1951 soc15_grbm_select(adev, 0, 0, 0, 0, i); 1952 mutex_unlock(&adev->srbm_mutex); 1953 } 1954 1955 gfx_v9_4_3_cp_enable(adev, false, i); 1956 } 1957 1958 /* Skip suspend with A+A reset */ 1959 if (adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) { 1960 dev_dbg(adev->dev, "Device in reset. Skipping RLC halt\n"); 1961 return 0; 1962 } 1963 1964 adev->gfx.rlc.funcs->stop(adev); 1965 return 0; 1966 } 1967 1968 static int gfx_v9_4_3_suspend(void *handle) 1969 { 1970 return gfx_v9_4_3_hw_fini(handle); 1971 } 1972 1973 static int gfx_v9_4_3_resume(void *handle) 1974 { 1975 return gfx_v9_4_3_hw_init(handle); 1976 } 1977 1978 static bool gfx_v9_4_3_is_idle(void *handle) 1979 { 1980 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1981 int i; 1982 1983 for (i = 0; i < adev->gfx.num_xcd; i++) { 1984 if (REG_GET_FIELD(RREG32_SOC15(GC, i, regGRBM_STATUS), 1985 GRBM_STATUS, GUI_ACTIVE)) 1986 return false; 1987 } 1988 return true; 1989 } 1990 1991 static int gfx_v9_4_3_wait_for_idle(void *handle) 1992 { 1993 unsigned i; 1994 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1995 1996 for (i = 0; i < adev->usec_timeout; i++) { 1997 if (gfx_v9_4_3_is_idle(handle)) 1998 return 0; 1999 udelay(1); 2000 } 2001 return -ETIMEDOUT; 2002 } 2003 2004 static int gfx_v9_4_3_soft_reset(void *handle) 2005 { 2006 u32 grbm_soft_reset = 0; 2007 u32 tmp; 2008 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2009 2010 /* GRBM_STATUS */ 2011 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS); 2012 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 2013 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 2014 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 2015 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 2016 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 2017 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 2018 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 2019 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 2020 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 2021 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 2022 } 2023 2024 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 2025 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 2026 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 2027 } 2028 2029 /* GRBM_STATUS2 */ 2030 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS2); 2031 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 2032 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 2033 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2034 2035 2036 if (grbm_soft_reset) { 2037 /* stop the rlc */ 2038 adev->gfx.rlc.funcs->stop(adev); 2039 2040 /* Disable MEC parsing/prefetching */ 2041 gfx_v9_4_3_cp_compute_enable(adev, false, 0); 2042 2043 if (grbm_soft_reset) { 2044 tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 2045 tmp |= grbm_soft_reset; 2046 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 2047 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); 2048 tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 2049 2050 udelay(50); 2051 2052 tmp &= ~grbm_soft_reset; 2053 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); 2054 tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 2055 } 2056 2057 /* Wait a little for things to settle down */ 2058 udelay(50); 2059 } 2060 return 0; 2061 } 2062 2063 static void gfx_v9_4_3_ring_emit_gds_switch(struct amdgpu_ring *ring, 2064 uint32_t vmid, 2065 uint32_t gds_base, uint32_t gds_size, 2066 uint32_t gws_base, uint32_t gws_size, 2067 uint32_t oa_base, uint32_t oa_size) 2068 { 2069 struct amdgpu_device *adev = ring->adev; 2070 2071 /* GDS Base */ 2072 gfx_v9_4_3_write_data_to_reg(ring, 0, false, 2073 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid, 2074 gds_base); 2075 2076 /* GDS Size */ 2077 gfx_v9_4_3_write_data_to_reg(ring, 0, false, 2078 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid, 2079 gds_size); 2080 2081 /* GWS */ 2082 gfx_v9_4_3_write_data_to_reg(ring, 0, false, 2083 SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid, 2084 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 2085 2086 /* OA */ 2087 gfx_v9_4_3_write_data_to_reg(ring, 0, false, 2088 SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid, 2089 (1 << (oa_size + oa_base)) - (1 << oa_base)); 2090 } 2091 2092 static int gfx_v9_4_3_early_init(void *handle) 2093 { 2094 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2095 2096 /* hardcode in emulation phase */ 2097 adev->gfx.num_xcd = 1; 2098 adev->gfx.num_xcc_per_xcp = 1; 2099 adev->gfx.partition_mode = AMDGPU_SPX_PARTITION_MODE; 2100 2101 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 2102 AMDGPU_MAX_COMPUTE_RINGS); 2103 gfx_v9_4_3_set_kiq_pm4_funcs(adev); 2104 gfx_v9_4_3_set_ring_funcs(adev); 2105 gfx_v9_4_3_set_irq_funcs(adev); 2106 gfx_v9_4_3_set_gds_init(adev); 2107 gfx_v9_4_3_set_rlc_funcs(adev); 2108 2109 return gfx_v9_4_3_init_microcode(adev); 2110 } 2111 2112 static int gfx_v9_4_3_late_init(void *handle) 2113 { 2114 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2115 int r; 2116 2117 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 2118 if (r) 2119 return r; 2120 2121 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 2122 if (r) 2123 return r; 2124 2125 return 0; 2126 } 2127 2128 static void gfx_v9_4_3_update_medium_grain_clock_gating(struct amdgpu_device *adev, 2129 bool enable, int xcc_id) 2130 { 2131 uint32_t data, def; 2132 2133 amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); 2134 2135 /* It is disabled by HW by default */ 2136 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 2137 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 2138 def = data = RREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE); 2139 2140 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 2141 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 2142 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 2143 2144 /* only for Vega10 & Raven1 */ 2145 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 2146 2147 if (def != data) 2148 WREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE, data); 2149 2150 /* MGLS is a global flag to control all MGLS in GFX */ 2151 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 2152 /* 2 - RLC memory Light sleep */ 2153 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 2154 def = data = RREG32_SOC15(GC, xcc_id, regRLC_MEM_SLP_CNTL); 2155 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 2156 if (def != data) 2157 WREG32_SOC15(GC, xcc_id, regRLC_MEM_SLP_CNTL, data); 2158 } 2159 /* 3 - CP memory Light sleep */ 2160 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 2161 def = data = RREG32_SOC15(GC, xcc_id, regCP_MEM_SLP_CNTL); 2162 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 2163 if (def != data) 2164 WREG32_SOC15(GC, xcc_id, regCP_MEM_SLP_CNTL, data); 2165 } 2166 } 2167 } else { 2168 /* 1 - MGCG_OVERRIDE */ 2169 def = data = RREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE); 2170 2171 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 2172 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 2173 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 2174 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 2175 2176 if (def != data) 2177 WREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE, data); 2178 2179 /* 2 - disable MGLS in RLC */ 2180 data = RREG32_SOC15(GC, xcc_id, regRLC_MEM_SLP_CNTL); 2181 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 2182 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 2183 WREG32_SOC15(GC, xcc_id, regRLC_MEM_SLP_CNTL, data); 2184 } 2185 2186 /* 3 - disable MGLS in CP */ 2187 data = RREG32_SOC15(GC, xcc_id, regCP_MEM_SLP_CNTL); 2188 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 2189 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 2190 WREG32_SOC15(GC, xcc_id, regCP_MEM_SLP_CNTL, data); 2191 } 2192 } 2193 2194 amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); 2195 } 2196 2197 static void gfx_v9_4_3_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 2198 bool enable, int xcc_id) 2199 { 2200 uint32_t def, data; 2201 2202 amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); 2203 2204 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 2205 def = data = RREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE); 2206 /* unset CGCG override */ 2207 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 2208 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 2209 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 2210 else 2211 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 2212 /* update CGCG and CGLS override bits */ 2213 if (def != data) 2214 WREG32_SOC15(GC, xcc_id, regRLC_CGTT_MGCG_OVERRIDE, data); 2215 2216 /* enable cgcg FSM(0x0000363F) */ 2217 def = RREG32_SOC15(GC, xcc_id, regRLC_CGCG_CGLS_CTRL); 2218 2219 if (adev->asic_type == CHIP_ARCTURUS) 2220 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 2221 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 2222 else 2223 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 2224 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 2225 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 2226 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 2227 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 2228 if (def != data) 2229 WREG32_SOC15(GC, xcc_id, regRLC_CGCG_CGLS_CTRL, data); 2230 2231 /* set IDLE_POLL_COUNT(0x00900100) */ 2232 def = RREG32_SOC15(GC, xcc_id, regCP_RB_WPTR_POLL_CNTL); 2233 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 2234 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2235 if (def != data) 2236 WREG32_SOC15(GC, xcc_id, regCP_RB_WPTR_POLL_CNTL, data); 2237 } else { 2238 def = data = RREG32_SOC15(GC, xcc_id, regRLC_CGCG_CGLS_CTRL); 2239 /* reset CGCG/CGLS bits */ 2240 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 2241 /* disable cgcg and cgls in FSM */ 2242 if (def != data) 2243 WREG32_SOC15(GC, xcc_id, regRLC_CGCG_CGLS_CTRL, data); 2244 } 2245 2246 amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); 2247 } 2248 2249 static int gfx_v9_4_3_update_gfx_clock_gating(struct amdgpu_device *adev, 2250 bool enable, int xcc_id) 2251 { 2252 if (enable) { 2253 /* CGCG/CGLS should be enabled after MGCG/MGLS 2254 * === MGCG + MGLS === 2255 */ 2256 gfx_v9_4_3_update_medium_grain_clock_gating(adev, enable, xcc_id); 2257 /* === CGCG + CGLS === */ 2258 gfx_v9_4_3_update_coarse_grain_clock_gating(adev, enable, xcc_id); 2259 } else { 2260 /* CGCG/CGLS should be disabled before MGCG/MGLS 2261 * === CGCG + CGLS === 2262 */ 2263 gfx_v9_4_3_update_coarse_grain_clock_gating(adev, enable, xcc_id); 2264 /* === MGCG + MGLS === */ 2265 gfx_v9_4_3_update_medium_grain_clock_gating(adev, enable, xcc_id); 2266 } 2267 return 0; 2268 } 2269 2270 static const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = { 2271 .is_rlc_enabled = gfx_v9_4_3_is_rlc_enabled, 2272 .set_safe_mode = gfx_v9_4_3_set_safe_mode, 2273 .unset_safe_mode = gfx_v9_4_3_unset_safe_mode, 2274 .init = gfx_v9_4_3_rlc_init, 2275 .get_csb_size = gfx_v9_4_3_get_csb_size, 2276 .get_csb_buffer = gfx_v9_4_3_get_csb_buffer, 2277 .resume = gfx_v9_4_3_rlc_resume, 2278 .stop = gfx_v9_4_3_rlc_stop, 2279 .reset = gfx_v9_4_3_rlc_reset, 2280 .start = gfx_v9_4_3_rlc_start, 2281 .update_spm_vmid = gfx_v9_4_3_update_spm_vmid, 2282 .is_rlcg_access_range = gfx_v9_4_3_is_rlcg_access_range, 2283 }; 2284 2285 static int gfx_v9_4_3_set_powergating_state(void *handle, 2286 enum amd_powergating_state state) 2287 { 2288 return 0; 2289 } 2290 2291 static int gfx_v9_4_3_set_clockgating_state(void *handle, 2292 enum amd_clockgating_state state) 2293 { 2294 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2295 int i; 2296 2297 if (amdgpu_sriov_vf(adev)) 2298 return 0; 2299 2300 switch (adev->ip_versions[GC_HWIP][0]) { 2301 case IP_VERSION(9, 4, 3): 2302 for (i = 0; i < adev->gfx.num_xcd; i++) 2303 gfx_v9_4_3_update_gfx_clock_gating(adev, 2304 state == AMD_CG_STATE_GATE, i); 2305 break; 2306 default: 2307 break; 2308 } 2309 return 0; 2310 } 2311 2312 static void gfx_v9_4_3_get_clockgating_state(void *handle, u64 *flags) 2313 { 2314 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2315 int data; 2316 2317 if (amdgpu_sriov_vf(adev)) 2318 *flags = 0; 2319 2320 /* AMD_CG_SUPPORT_GFX_MGCG */ 2321 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, regRLC_CGTT_MGCG_OVERRIDE)); 2322 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 2323 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 2324 2325 /* AMD_CG_SUPPORT_GFX_CGCG */ 2326 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, regRLC_CGCG_CGLS_CTRL)); 2327 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 2328 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 2329 2330 /* AMD_CG_SUPPORT_GFX_CGLS */ 2331 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 2332 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 2333 2334 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 2335 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, regRLC_MEM_SLP_CNTL)); 2336 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 2337 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 2338 2339 /* AMD_CG_SUPPORT_GFX_CP_LS */ 2340 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, regCP_MEM_SLP_CNTL)); 2341 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 2342 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 2343 } 2344 2345 static void gfx_v9_4_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) 2346 { 2347 struct amdgpu_device *adev = ring->adev; 2348 u32 ref_and_mask, reg_mem_engine; 2349 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 2350 2351 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 2352 switch (ring->me) { 2353 case 1: 2354 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 2355 break; 2356 case 2: 2357 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 2358 break; 2359 default: 2360 return; 2361 } 2362 reg_mem_engine = 0; 2363 } else { 2364 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 2365 reg_mem_engine = 1; /* pfp */ 2366 } 2367 2368 gfx_v9_4_3_wait_reg_mem(ring, reg_mem_engine, 0, 1, 2369 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 2370 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 2371 ref_and_mask, ref_and_mask, 0x20); 2372 } 2373 2374 static void gfx_v9_4_3_ring_emit_ib_compute(struct amdgpu_ring *ring, 2375 struct amdgpu_job *job, 2376 struct amdgpu_ib *ib, 2377 uint32_t flags) 2378 { 2379 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 2380 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 2381 2382 /* Currently, there is a high possibility to get wave ID mismatch 2383 * between ME and GDS, leading to a hw deadlock, because ME generates 2384 * different wave IDs than the GDS expects. This situation happens 2385 * randomly when at least 5 compute pipes use GDS ordered append. 2386 * The wave IDs generated by ME are also wrong after suspend/resume. 2387 * Those are probably bugs somewhere else in the kernel driver. 2388 * 2389 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 2390 * GDS to 0 for this ring (me/pipe). 2391 */ 2392 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 2393 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 2394 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID); 2395 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 2396 } 2397 2398 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 2399 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 2400 amdgpu_ring_write(ring, 2401 #ifdef __BIG_ENDIAN 2402 (2 << 0) | 2403 #endif 2404 lower_32_bits(ib->gpu_addr)); 2405 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 2406 amdgpu_ring_write(ring, control); 2407 } 2408 2409 static void gfx_v9_4_3_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 2410 u64 seq, unsigned flags) 2411 { 2412 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 2413 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 2414 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 2415 2416 /* RELEASE_MEM - flush caches, send int */ 2417 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 2418 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 2419 EOP_TC_NC_ACTION_EN) : 2420 (EOP_TCL1_ACTION_EN | 2421 EOP_TC_ACTION_EN | 2422 EOP_TC_WB_ACTION_EN | 2423 EOP_TC_MD_ACTION_EN)) | 2424 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 2425 EVENT_INDEX(5))); 2426 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 2427 2428 /* 2429 * the address should be Qword aligned if 64bit write, Dword 2430 * aligned if only send 32bit data low (discard data high) 2431 */ 2432 if (write64bit) 2433 BUG_ON(addr & 0x7); 2434 else 2435 BUG_ON(addr & 0x3); 2436 amdgpu_ring_write(ring, lower_32_bits(addr)); 2437 amdgpu_ring_write(ring, upper_32_bits(addr)); 2438 amdgpu_ring_write(ring, lower_32_bits(seq)); 2439 amdgpu_ring_write(ring, upper_32_bits(seq)); 2440 amdgpu_ring_write(ring, 0); 2441 } 2442 2443 static void gfx_v9_4_3_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 2444 { 2445 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 2446 uint32_t seq = ring->fence_drv.sync_seq; 2447 uint64_t addr = ring->fence_drv.gpu_addr; 2448 2449 gfx_v9_4_3_wait_reg_mem(ring, usepfp, 1, 0, 2450 lower_32_bits(addr), upper_32_bits(addr), 2451 seq, 0xffffffff, 4); 2452 } 2453 2454 static void gfx_v9_4_3_ring_emit_vm_flush(struct amdgpu_ring *ring, 2455 unsigned vmid, uint64_t pd_addr) 2456 { 2457 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 2458 } 2459 2460 static u64 gfx_v9_4_3_ring_get_rptr_compute(struct amdgpu_ring *ring) 2461 { 2462 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 2463 } 2464 2465 static u64 gfx_v9_4_3_ring_get_wptr_compute(struct amdgpu_ring *ring) 2466 { 2467 u64 wptr; 2468 2469 /* XXX check if swapping is necessary on BE */ 2470 if (ring->use_doorbell) 2471 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 2472 else 2473 BUG(); 2474 return wptr; 2475 } 2476 2477 static void gfx_v9_4_3_ring_set_wptr_compute(struct amdgpu_ring *ring) 2478 { 2479 struct amdgpu_device *adev = ring->adev; 2480 2481 /* XXX check if swapping is necessary on BE */ 2482 if (ring->use_doorbell) { 2483 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); 2484 WDOORBELL64(ring->doorbell_index, ring->wptr); 2485 } else { 2486 BUG(); /* only DOORBELL method supported on gfx9 now */ 2487 } 2488 } 2489 2490 static void gfx_v9_4_3_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 2491 u64 seq, unsigned int flags) 2492 { 2493 struct amdgpu_device *adev = ring->adev; 2494 2495 /* we only allocate 32bit for each seq wb address */ 2496 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 2497 2498 /* write fence seq to the "addr" */ 2499 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 2500 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 2501 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 2502 amdgpu_ring_write(ring, lower_32_bits(addr)); 2503 amdgpu_ring_write(ring, upper_32_bits(addr)); 2504 amdgpu_ring_write(ring, lower_32_bits(seq)); 2505 2506 if (flags & AMDGPU_FENCE_FLAG_INT) { 2507 /* set register to trigger INT */ 2508 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 2509 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 2510 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 2511 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS)); 2512 amdgpu_ring_write(ring, 0); 2513 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 2514 } 2515 } 2516 2517 static void gfx_v9_4_3_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 2518 uint32_t reg_val_offs) 2519 { 2520 struct amdgpu_device *adev = ring->adev; 2521 2522 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 2523 amdgpu_ring_write(ring, 0 | /* src: register*/ 2524 (5 << 8) | /* dst: memory */ 2525 (1 << 20)); /* write confirm */ 2526 amdgpu_ring_write(ring, reg); 2527 amdgpu_ring_write(ring, 0); 2528 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 2529 reg_val_offs * 4)); 2530 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 2531 reg_val_offs * 4)); 2532 } 2533 2534 static void gfx_v9_4_3_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 2535 uint32_t val) 2536 { 2537 uint32_t cmd = 0; 2538 2539 switch (ring->funcs->type) { 2540 case AMDGPU_RING_TYPE_GFX: 2541 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 2542 break; 2543 case AMDGPU_RING_TYPE_KIQ: 2544 cmd = (1 << 16); /* no inc addr */ 2545 break; 2546 default: 2547 cmd = WR_CONFIRM; 2548 break; 2549 } 2550 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 2551 amdgpu_ring_write(ring, cmd); 2552 amdgpu_ring_write(ring, reg); 2553 amdgpu_ring_write(ring, 0); 2554 amdgpu_ring_write(ring, val); 2555 } 2556 2557 static void gfx_v9_4_3_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 2558 uint32_t val, uint32_t mask) 2559 { 2560 gfx_v9_4_3_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 2561 } 2562 2563 static void gfx_v9_4_3_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 2564 uint32_t reg0, uint32_t reg1, 2565 uint32_t ref, uint32_t mask) 2566 { 2567 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 2568 ref, mask); 2569 } 2570 2571 static void gfx_v9_4_3_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 2572 int me, int pipe, 2573 enum amdgpu_interrupt_state state, 2574 int xcc_id) 2575 { 2576 u32 mec_int_cntl, mec_int_cntl_reg; 2577 2578 /* 2579 * amdgpu controls only the first MEC. That's why this function only 2580 * handles the setting of interrupts for this specific MEC. All other 2581 * pipes' interrupts are set by amdkfd. 2582 */ 2583 2584 if (me == 1) { 2585 switch (pipe) { 2586 case 0: 2587 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, xcc_id, regCP_ME1_PIPE0_INT_CNTL); 2588 break; 2589 case 1: 2590 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, xcc_id, regCP_ME1_PIPE1_INT_CNTL); 2591 break; 2592 case 2: 2593 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, xcc_id, regCP_ME1_PIPE2_INT_CNTL); 2594 break; 2595 case 3: 2596 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, xcc_id, regCP_ME1_PIPE3_INT_CNTL); 2597 break; 2598 default: 2599 DRM_DEBUG("invalid pipe %d\n", pipe); 2600 return; 2601 } 2602 } else { 2603 DRM_DEBUG("invalid me %d\n", me); 2604 return; 2605 } 2606 2607 switch (state) { 2608 case AMDGPU_IRQ_STATE_DISABLE: 2609 mec_int_cntl = RREG32(mec_int_cntl_reg); 2610 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 2611 TIME_STAMP_INT_ENABLE, 0); 2612 WREG32(mec_int_cntl_reg, mec_int_cntl); 2613 break; 2614 case AMDGPU_IRQ_STATE_ENABLE: 2615 mec_int_cntl = RREG32(mec_int_cntl_reg); 2616 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 2617 TIME_STAMP_INT_ENABLE, 1); 2618 WREG32(mec_int_cntl_reg, mec_int_cntl); 2619 break; 2620 default: 2621 break; 2622 } 2623 } 2624 2625 static int gfx_v9_4_3_set_priv_reg_fault_state(struct amdgpu_device *adev, 2626 struct amdgpu_irq_src *source, 2627 unsigned type, 2628 enum amdgpu_interrupt_state state) 2629 { 2630 int i; 2631 2632 switch (state) { 2633 case AMDGPU_IRQ_STATE_DISABLE: 2634 case AMDGPU_IRQ_STATE_ENABLE: 2635 for (i = 0; i < adev->gfx.num_xcd; i++) 2636 WREG32_FIELD15_PREREG(GC, i, CP_INT_CNTL_RING0, 2637 PRIV_REG_INT_ENABLE, 2638 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 2639 break; 2640 default: 2641 break; 2642 } 2643 2644 return 0; 2645 } 2646 2647 static int gfx_v9_4_3_set_priv_inst_fault_state(struct amdgpu_device *adev, 2648 struct amdgpu_irq_src *source, 2649 unsigned type, 2650 enum amdgpu_interrupt_state state) 2651 { 2652 int i; 2653 2654 switch (state) { 2655 case AMDGPU_IRQ_STATE_DISABLE: 2656 case AMDGPU_IRQ_STATE_ENABLE: 2657 for (i = 0; i < adev->gfx.num_xcd; i++) 2658 WREG32_FIELD15_PREREG(GC, i, CP_INT_CNTL_RING0, 2659 PRIV_INSTR_INT_ENABLE, 2660 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 2661 break; 2662 default: 2663 break; 2664 } 2665 2666 return 0; 2667 } 2668 2669 static int gfx_v9_4_3_set_eop_interrupt_state(struct amdgpu_device *adev, 2670 struct amdgpu_irq_src *src, 2671 unsigned type, 2672 enum amdgpu_interrupt_state state) 2673 { 2674 int i; 2675 for (i = 0; i < adev->gfx.num_xcd; i++) { 2676 switch (type) { 2677 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 2678 gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 0, state, i); 2679 break; 2680 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 2681 gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 1, state, i); 2682 break; 2683 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 2684 gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 2, state, i); 2685 break; 2686 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 2687 gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 3, state, i); 2688 break; 2689 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 2690 gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 0, state, i); 2691 break; 2692 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 2693 gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 1, state, i); 2694 break; 2695 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 2696 gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 2, state, i); 2697 break; 2698 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 2699 gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 2, 3, state, i); 2700 break; 2701 default: 2702 break; 2703 } 2704 } 2705 2706 return 0; 2707 } 2708 2709 static int gfx_v9_4_3_eop_irq(struct amdgpu_device *adev, 2710 struct amdgpu_irq_src *source, 2711 struct amdgpu_iv_entry *entry) 2712 { 2713 int i; 2714 u8 me_id, pipe_id, queue_id; 2715 struct amdgpu_ring *ring; 2716 2717 DRM_DEBUG("IH: CP EOP\n"); 2718 me_id = (entry->ring_id & 0x0c) >> 2; 2719 pipe_id = (entry->ring_id & 0x03) >> 0; 2720 queue_id = (entry->ring_id & 0x70) >> 4; 2721 2722 switch (me_id) { 2723 case 0: 2724 case 1: 2725 case 2: 2726 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2727 ring = &adev->gfx.compute_ring[i]; 2728 /* Per-queue interrupt is supported for MEC starting from VI. 2729 * The interrupt can only be enabled/disabled per pipe instead of per queue. 2730 */ 2731 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 2732 amdgpu_fence_process(ring); 2733 } 2734 break; 2735 } 2736 return 0; 2737 } 2738 2739 static void gfx_v9_4_3_fault(struct amdgpu_device *adev, 2740 struct amdgpu_iv_entry *entry) 2741 { 2742 u8 me_id, pipe_id, queue_id; 2743 struct amdgpu_ring *ring; 2744 int i; 2745 2746 me_id = (entry->ring_id & 0x0c) >> 2; 2747 pipe_id = (entry->ring_id & 0x03) >> 0; 2748 queue_id = (entry->ring_id & 0x70) >> 4; 2749 2750 switch (me_id) { 2751 case 0: 2752 case 1: 2753 case 2: 2754 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2755 ring = &adev->gfx.compute_ring[i]; 2756 if (ring->me == me_id && ring->pipe == pipe_id && 2757 ring->queue == queue_id) 2758 drm_sched_fault(&ring->sched); 2759 } 2760 break; 2761 } 2762 } 2763 2764 static int gfx_v9_4_3_priv_reg_irq(struct amdgpu_device *adev, 2765 struct amdgpu_irq_src *source, 2766 struct amdgpu_iv_entry *entry) 2767 { 2768 DRM_ERROR("Illegal register access in command stream\n"); 2769 gfx_v9_4_3_fault(adev, entry); 2770 return 0; 2771 } 2772 2773 static int gfx_v9_4_3_priv_inst_irq(struct amdgpu_device *adev, 2774 struct amdgpu_irq_src *source, 2775 struct amdgpu_iv_entry *entry) 2776 { 2777 DRM_ERROR("Illegal instruction in command stream\n"); 2778 gfx_v9_4_3_fault(adev, entry); 2779 return 0; 2780 } 2781 2782 static void gfx_v9_4_3_emit_mem_sync(struct amdgpu_ring *ring) 2783 { 2784 const unsigned int cp_coher_cntl = 2785 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) | 2786 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) | 2787 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) | 2788 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) | 2789 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1); 2790 2791 /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */ 2792 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); 2793 amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */ 2794 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 2795 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 2796 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 2797 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 2798 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 2799 } 2800 2801 static void gfx_v9_4_3_emit_wave_limit_cs(struct amdgpu_ring *ring, 2802 uint32_t pipe, bool enable) 2803 { 2804 struct amdgpu_device *adev = ring->adev; 2805 uint32_t val; 2806 uint32_t wcl_cs_reg; 2807 2808 /* regSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */ 2809 val = enable ? 0x1 : 0x7f; 2810 2811 switch (pipe) { 2812 case 0: 2813 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, regSPI_WCL_PIPE_PERCENT_CS0); 2814 break; 2815 case 1: 2816 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, regSPI_WCL_PIPE_PERCENT_CS1); 2817 break; 2818 case 2: 2819 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, regSPI_WCL_PIPE_PERCENT_CS2); 2820 break; 2821 case 3: 2822 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, regSPI_WCL_PIPE_PERCENT_CS3); 2823 break; 2824 default: 2825 DRM_DEBUG("invalid pipe %d\n", pipe); 2826 return; 2827 } 2828 2829 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val); 2830 2831 } 2832 static void gfx_v9_4_3_emit_wave_limit(struct amdgpu_ring *ring, bool enable) 2833 { 2834 struct amdgpu_device *adev = ring->adev; 2835 uint32_t val; 2836 int i; 2837 2838 /* regSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit 2839 * number of gfx waves. Setting 5 bit will make sure gfx only gets 2840 * around 25% of gpu resources. 2841 */ 2842 val = enable ? 0x1f : 0x07ffffff; 2843 amdgpu_ring_emit_wreg(ring, 2844 SOC15_REG_OFFSET(GC, 0, regSPI_WCL_PIPE_PERCENT_GFX), 2845 val); 2846 2847 /* Restrict waves for normal/low priority compute queues as well 2848 * to get best QoS for high priority compute jobs. 2849 * 2850 * amdgpu controls only 1st ME(0-3 CS pipes). 2851 */ 2852 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 2853 if (i != ring->pipe) 2854 gfx_v9_4_3_emit_wave_limit_cs(ring, i, enable); 2855 2856 } 2857 } 2858 2859 static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { 2860 .name = "gfx_v9_4_3", 2861 .early_init = gfx_v9_4_3_early_init, 2862 .late_init = gfx_v9_4_3_late_init, 2863 .sw_init = gfx_v9_4_3_sw_init, 2864 .sw_fini = gfx_v9_4_3_sw_fini, 2865 .hw_init = gfx_v9_4_3_hw_init, 2866 .hw_fini = gfx_v9_4_3_hw_fini, 2867 .suspend = gfx_v9_4_3_suspend, 2868 .resume = gfx_v9_4_3_resume, 2869 .is_idle = gfx_v9_4_3_is_idle, 2870 .wait_for_idle = gfx_v9_4_3_wait_for_idle, 2871 .soft_reset = gfx_v9_4_3_soft_reset, 2872 .set_clockgating_state = gfx_v9_4_3_set_clockgating_state, 2873 .set_powergating_state = gfx_v9_4_3_set_powergating_state, 2874 .get_clockgating_state = gfx_v9_4_3_get_clockgating_state, 2875 }; 2876 2877 static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { 2878 .type = AMDGPU_RING_TYPE_COMPUTE, 2879 .align_mask = 0xff, 2880 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 2881 .support_64bit_ptrs = true, 2882 .get_rptr = gfx_v9_4_3_ring_get_rptr_compute, 2883 .get_wptr = gfx_v9_4_3_ring_get_wptr_compute, 2884 .set_wptr = gfx_v9_4_3_ring_set_wptr_compute, 2885 .emit_frame_size = 2886 20 + /* gfx_v9_4_3_ring_emit_gds_switch */ 2887 7 + /* gfx_v9_4_3_ring_emit_hdp_flush */ 2888 5 + /* hdp invalidate */ 2889 7 + /* gfx_v9_4_3_ring_emit_pipeline_sync */ 2890 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 2891 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 2892 2 + /* gfx_v9_4_3_ring_emit_vm_flush */ 2893 8 + 8 + 8 + /* gfx_v9_4_3_ring_emit_fence x3 for user fence, vm fence */ 2894 7 + /* gfx_v9_4_3_emit_mem_sync */ 2895 5 + /* gfx_v9_4_3_emit_wave_limit for updating regSPI_WCL_PIPE_PERCENT_GFX register */ 2896 15, /* for updating 3 regSPI_WCL_PIPE_PERCENT_CS registers */ 2897 .emit_ib_size = 7, /* gfx_v9_4_3_ring_emit_ib_compute */ 2898 .emit_ib = gfx_v9_4_3_ring_emit_ib_compute, 2899 .emit_fence = gfx_v9_4_3_ring_emit_fence, 2900 .emit_pipeline_sync = gfx_v9_4_3_ring_emit_pipeline_sync, 2901 .emit_vm_flush = gfx_v9_4_3_ring_emit_vm_flush, 2902 .emit_gds_switch = gfx_v9_4_3_ring_emit_gds_switch, 2903 .emit_hdp_flush = gfx_v9_4_3_ring_emit_hdp_flush, 2904 .test_ring = gfx_v9_4_3_ring_test_ring, 2905 .test_ib = gfx_v9_4_3_ring_test_ib, 2906 .insert_nop = amdgpu_ring_insert_nop, 2907 .pad_ib = amdgpu_ring_generic_pad_ib, 2908 .emit_wreg = gfx_v9_4_3_ring_emit_wreg, 2909 .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait, 2910 .emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait, 2911 .emit_mem_sync = gfx_v9_4_3_emit_mem_sync, 2912 .emit_wave_limit = gfx_v9_4_3_emit_wave_limit, 2913 }; 2914 2915 static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = { 2916 .type = AMDGPU_RING_TYPE_KIQ, 2917 .align_mask = 0xff, 2918 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 2919 .support_64bit_ptrs = true, 2920 .get_rptr = gfx_v9_4_3_ring_get_rptr_compute, 2921 .get_wptr = gfx_v9_4_3_ring_get_wptr_compute, 2922 .set_wptr = gfx_v9_4_3_ring_set_wptr_compute, 2923 .emit_frame_size = 2924 20 + /* gfx_v9_4_3_ring_emit_gds_switch */ 2925 7 + /* gfx_v9_4_3_ring_emit_hdp_flush */ 2926 5 + /* hdp invalidate */ 2927 7 + /* gfx_v9_4_3_ring_emit_pipeline_sync */ 2928 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 2929 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 2930 2 + /* gfx_v9_4_3_ring_emit_vm_flush */ 2931 8 + 8 + 8, /* gfx_v9_4_3_ring_emit_fence_kiq x3 for user fence, vm fence */ 2932 .emit_ib_size = 7, /* gfx_v9_4_3_ring_emit_ib_compute */ 2933 .emit_fence = gfx_v9_4_3_ring_emit_fence_kiq, 2934 .test_ring = gfx_v9_4_3_ring_test_ring, 2935 .insert_nop = amdgpu_ring_insert_nop, 2936 .pad_ib = amdgpu_ring_generic_pad_ib, 2937 .emit_rreg = gfx_v9_4_3_ring_emit_rreg, 2938 .emit_wreg = gfx_v9_4_3_ring_emit_wreg, 2939 .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait, 2940 .emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait, 2941 }; 2942 2943 static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev) 2944 { 2945 int i, j; 2946 2947 for (i = 0; i < adev->gfx.num_xcd; i++) { 2948 adev->gfx.kiq[i].ring.funcs = &gfx_v9_4_3_ring_funcs_kiq; 2949 2950 for (j = 0; j < adev->gfx.num_compute_rings; j++) 2951 adev->gfx.compute_ring[j + i * adev->gfx.num_compute_rings].funcs 2952 = &gfx_v9_4_3_ring_funcs_compute; 2953 } 2954 } 2955 2956 static const struct amdgpu_irq_src_funcs gfx_v9_4_3_eop_irq_funcs = { 2957 .set = gfx_v9_4_3_set_eop_interrupt_state, 2958 .process = gfx_v9_4_3_eop_irq, 2959 }; 2960 2961 static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_reg_irq_funcs = { 2962 .set = gfx_v9_4_3_set_priv_reg_fault_state, 2963 .process = gfx_v9_4_3_priv_reg_irq, 2964 }; 2965 2966 static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_inst_irq_funcs = { 2967 .set = gfx_v9_4_3_set_priv_inst_fault_state, 2968 .process = gfx_v9_4_3_priv_inst_irq, 2969 }; 2970 2971 static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev) 2972 { 2973 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 2974 adev->gfx.eop_irq.funcs = &gfx_v9_4_3_eop_irq_funcs; 2975 2976 adev->gfx.priv_reg_irq.num_types = 1; 2977 adev->gfx.priv_reg_irq.funcs = &gfx_v9_4_3_priv_reg_irq_funcs; 2978 2979 adev->gfx.priv_inst_irq.num_types = 1; 2980 adev->gfx.priv_inst_irq.funcs = &gfx_v9_4_3_priv_inst_irq_funcs; 2981 } 2982 2983 static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev) 2984 { 2985 adev->gfx.rlc.funcs = &gfx_v9_4_3_rlc_funcs; 2986 } 2987 2988 2989 static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev) 2990 { 2991 /* init asci gds info */ 2992 switch (adev->ip_versions[GC_HWIP][0]) { 2993 case IP_VERSION(9, 4, 3): 2994 /* 9.4.3 removed all the GDS internal memory, 2995 * only support GWS opcode in kernel, like barrier 2996 * semaphore.etc */ 2997 adev->gds.gds_size = 0; 2998 break; 2999 default: 3000 adev->gds.gds_size = 0x10000; 3001 break; 3002 } 3003 3004 switch (adev->ip_versions[GC_HWIP][0]) { 3005 case IP_VERSION(9, 4, 3): 3006 /* deprecated for 9.4.3, no usage at all */ 3007 adev->gds.gds_compute_max_wave_id = 0; 3008 break; 3009 default: 3010 /* this really depends on the chip */ 3011 adev->gds.gds_compute_max_wave_id = 0x7ff; 3012 break; 3013 } 3014 3015 adev->gds.gws_size = 64; 3016 adev->gds.oa_size = 16; 3017 } 3018 3019 static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 3020 u32 bitmap) 3021 { 3022 u32 data; 3023 3024 if (!bitmap) 3025 return; 3026 3027 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 3028 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 3029 3030 WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data); 3031 } 3032 3033 static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev) 3034 { 3035 u32 data, mask; 3036 3037 data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG); 3038 data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG); 3039 3040 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 3041 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 3042 3043 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 3044 3045 return (~data) & mask; 3046 } 3047 3048 static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, 3049 struct amdgpu_cu_info *cu_info) 3050 { 3051 int i, j, k, counter, active_cu_number = 0; 3052 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 3053 unsigned disable_masks[4 * 4]; 3054 3055 if (!adev || !cu_info) 3056 return -EINVAL; 3057 3058 /* 3059 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs 3060 */ 3061 if (adev->gfx.config.max_shader_engines * 3062 adev->gfx.config.max_sh_per_se > 16) 3063 return -EINVAL; 3064 3065 amdgpu_gfx_parse_disable_cu(disable_masks, 3066 adev->gfx.config.max_shader_engines, 3067 adev->gfx.config.max_sh_per_se); 3068 3069 mutex_lock(&adev->grbm_idx_mutex); 3070 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3071 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3072 mask = 1; 3073 ao_bitmap = 0; 3074 counter = 0; 3075 gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff, 0); 3076 gfx_v9_4_3_set_user_cu_inactive_bitmap( 3077 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); 3078 bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev); 3079 3080 /* 3081 * The bitmap(and ao_cu_bitmap) in cu_info structure is 3082 * 4x4 size array, and it's usually suitable for Vega 3083 * ASICs which has 4*2 SE/SH layout. 3084 * But for Arcturus, SE/SH layout is changed to 8*1. 3085 * To mostly reduce the impact, we make it compatible 3086 * with current bitmap array as below: 3087 * SE4,SH0 --> bitmap[0][1] 3088 * SE5,SH0 --> bitmap[1][1] 3089 * SE6,SH0 --> bitmap[2][1] 3090 * SE7,SH0 --> bitmap[3][1] 3091 */ 3092 cu_info->bitmap[i % 4][j + i / 4] = bitmap; 3093 3094 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { 3095 if (bitmap & mask) { 3096 if (counter < adev->gfx.config.max_cu_per_sh) 3097 ao_bitmap |= mask; 3098 counter++; 3099 } 3100 mask <<= 1; 3101 } 3102 active_cu_number += counter; 3103 if (i < 2 && j < 2) 3104 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 3105 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; 3106 } 3107 } 3108 gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 3109 mutex_unlock(&adev->grbm_idx_mutex); 3110 3111 cu_info->number = active_cu_number; 3112 cu_info->ao_cu_mask = ao_cu_mask; 3113 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 3114 3115 return 0; 3116 } 3117 3118 const struct amdgpu_ip_block_version gfx_v9_4_3_ip_block = { 3119 .type = AMD_IP_BLOCK_TYPE_GFX, 3120 .major = 9, 3121 .minor = 4, 3122 .rev = 0, 3123 .funcs = &gfx_v9_4_3_ip_funcs, 3124 }; 3125