1 /* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/delay.h> 24 #include <linux/kernel.h> 25 #include <linux/firmware.h> 26 #include <linux/module.h> 27 #include <linux/pci.h> 28 #include "amdgpu.h" 29 #include "amdgpu_gfx.h" 30 #include "amdgpu_psp.h" 31 #include "amdgpu_smu.h" 32 #include "amdgpu_atomfirmware.h" 33 #include "imu_v11_0.h" 34 #include "soc21.h" 35 #include "nvd.h" 36 37 #include "gc/gc_11_0_0_offset.h" 38 #include "gc/gc_11_0_0_sh_mask.h" 39 #include "smuio/smuio_13_0_6_offset.h" 40 #include "smuio/smuio_13_0_6_sh_mask.h" 41 #include "navi10_enum.h" 42 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" 43 44 #include "soc15.h" 45 #include "soc15d.h" 46 #include "clearstate_gfx11.h" 47 #include "v11_structs.h" 48 #include "gfx_v11_0.h" 49 #include "nbio_v4_3.h" 50 #include "mes_v11_0.h" 51 52 #define GFX11_NUM_GFX_RINGS 1 53 #define GFX11_MEC_HPD_SIZE 2048 54 55 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 56 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388 57 58 #define regCGTT_WD_CLK_CTRL 0x5086 59 #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 60 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e 61 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1 62 63 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); 64 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); 65 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); 66 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); 67 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); 68 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); 69 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin"); 70 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin"); 71 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin"); 72 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin"); 73 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin"); 74 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin"); 75 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin"); 76 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin"); 77 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin"); 78 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin"); 79 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin"); 80 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin"); 81 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin"); 82 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin"); 83 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin"); 84 85 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = 86 { 87 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010), 88 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010), 89 SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), 90 SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988), 91 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007), 92 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008), 93 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100), 94 SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), 95 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a) 96 }; 97 98 #define DEFAULT_SH_MEM_CONFIG \ 99 ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ 100 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ 101 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) 102 103 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev); 104 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev); 105 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev); 106 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev); 107 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev); 108 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev); 109 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev); 110 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 111 struct amdgpu_cu_info *cu_info); 112 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev); 113 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 114 u32 sh_num, u32 instance); 115 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); 116 117 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); 118 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); 119 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 120 uint32_t val); 121 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); 122 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 123 uint16_t pasid, uint32_t flush_type, 124 bool all_hub, uint8_t dst_sel); 125 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev); 126 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev); 127 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 128 bool enable); 129 130 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) 131 { 132 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 133 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 134 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 135 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 136 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 137 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 138 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 139 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 140 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 141 } 142 143 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring, 144 struct amdgpu_ring *ring) 145 { 146 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 147 uint64_t wptr_addr = ring->wptr_gpu_addr; 148 uint32_t me = 0, eng_sel = 0; 149 150 switch (ring->funcs->type) { 151 case AMDGPU_RING_TYPE_COMPUTE: 152 me = 1; 153 eng_sel = 0; 154 break; 155 case AMDGPU_RING_TYPE_GFX: 156 me = 0; 157 eng_sel = 4; 158 break; 159 case AMDGPU_RING_TYPE_MES: 160 me = 2; 161 eng_sel = 5; 162 break; 163 default: 164 WARN_ON(1); 165 } 166 167 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 168 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 169 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 170 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 171 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 172 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 173 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 174 PACKET3_MAP_QUEUES_ME((me)) | 175 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 176 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 177 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 178 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 179 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 180 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 181 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 182 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 183 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 184 } 185 186 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 187 struct amdgpu_ring *ring, 188 enum amdgpu_unmap_queues_action action, 189 u64 gpu_addr, u64 seq) 190 { 191 struct amdgpu_device *adev = kiq_ring->adev; 192 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 193 194 if (adev->enable_mes && !adev->gfx.kiq.ring.sched.ready) { 195 amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq); 196 return; 197 } 198 199 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 200 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 201 PACKET3_UNMAP_QUEUES_ACTION(action) | 202 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 203 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 204 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 205 amdgpu_ring_write(kiq_ring, 206 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 207 208 if (action == PREEMPT_QUEUES_NO_UNMAP) { 209 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 210 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 211 amdgpu_ring_write(kiq_ring, seq); 212 } else { 213 amdgpu_ring_write(kiq_ring, 0); 214 amdgpu_ring_write(kiq_ring, 0); 215 amdgpu_ring_write(kiq_ring, 0); 216 } 217 } 218 219 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring, 220 struct amdgpu_ring *ring, 221 u64 addr, 222 u64 seq) 223 { 224 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 225 226 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 227 amdgpu_ring_write(kiq_ring, 228 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 229 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 230 PACKET3_QUERY_STATUS_COMMAND(2)); 231 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 232 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 233 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 234 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 235 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 236 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 237 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 238 } 239 240 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 241 uint16_t pasid, uint32_t flush_type, 242 bool all_hub) 243 { 244 gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1); 245 } 246 247 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = { 248 .kiq_set_resources = gfx11_kiq_set_resources, 249 .kiq_map_queues = gfx11_kiq_map_queues, 250 .kiq_unmap_queues = gfx11_kiq_unmap_queues, 251 .kiq_query_status = gfx11_kiq_query_status, 252 .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs, 253 .set_resources_size = 8, 254 .map_queues_size = 7, 255 .unmap_queues_size = 6, 256 .query_status_size = 7, 257 .invalidate_tlbs_size = 2, 258 }; 259 260 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 261 { 262 adev->gfx.kiq.pmf = &gfx_v11_0_kiq_pm4_funcs; 263 } 264 265 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) 266 { 267 switch (adev->ip_versions[GC_HWIP][0]) { 268 case IP_VERSION(11, 0, 1): 269 case IP_VERSION(11, 0, 4): 270 soc15_program_register_sequence(adev, 271 golden_settings_gc_11_0_1, 272 (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); 273 break; 274 default: 275 break; 276 } 277 } 278 279 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 280 bool wc, uint32_t reg, uint32_t val) 281 { 282 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 283 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 284 WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0)); 285 amdgpu_ring_write(ring, reg); 286 amdgpu_ring_write(ring, 0); 287 amdgpu_ring_write(ring, val); 288 } 289 290 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 291 int mem_space, int opt, uint32_t addr0, 292 uint32_t addr1, uint32_t ref, uint32_t mask, 293 uint32_t inv) 294 { 295 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 296 amdgpu_ring_write(ring, 297 /* memory (1) or register (0) */ 298 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 299 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 300 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 301 WAIT_REG_MEM_ENGINE(eng_sel))); 302 303 if (mem_space) 304 BUG_ON(addr0 & 0x3); /* Dword align */ 305 amdgpu_ring_write(ring, addr0); 306 amdgpu_ring_write(ring, addr1); 307 amdgpu_ring_write(ring, ref); 308 amdgpu_ring_write(ring, mask); 309 amdgpu_ring_write(ring, inv); /* poll interval */ 310 } 311 312 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring) 313 { 314 struct amdgpu_device *adev = ring->adev; 315 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 316 uint32_t tmp = 0; 317 unsigned i; 318 int r; 319 320 WREG32(scratch, 0xCAFEDEAD); 321 r = amdgpu_ring_alloc(ring, 5); 322 if (r) { 323 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 324 ring->idx, r); 325 return r; 326 } 327 328 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) { 329 gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF); 330 } else { 331 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 332 amdgpu_ring_write(ring, scratch - 333 PACKET3_SET_UCONFIG_REG_START); 334 amdgpu_ring_write(ring, 0xDEADBEEF); 335 } 336 amdgpu_ring_commit(ring); 337 338 for (i = 0; i < adev->usec_timeout; i++) { 339 tmp = RREG32(scratch); 340 if (tmp == 0xDEADBEEF) 341 break; 342 if (amdgpu_emu_mode == 1) 343 msleep(1); 344 else 345 udelay(1); 346 } 347 348 if (i >= adev->usec_timeout) 349 r = -ETIMEDOUT; 350 return r; 351 } 352 353 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 354 { 355 struct amdgpu_device *adev = ring->adev; 356 struct amdgpu_ib ib; 357 struct dma_fence *f = NULL; 358 unsigned index; 359 uint64_t gpu_addr; 360 volatile uint32_t *cpu_ptr; 361 long r; 362 363 /* MES KIQ fw hasn't indirect buffer support for now */ 364 if (adev->enable_mes_kiq && 365 ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 366 return 0; 367 368 memset(&ib, 0, sizeof(ib)); 369 370 if (ring->is_mes_queue) { 371 uint32_t padding, offset; 372 373 offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); 374 padding = amdgpu_mes_ctx_get_offs(ring, 375 AMDGPU_MES_CTX_PADDING_OFFS); 376 377 ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 378 ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 379 380 gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding); 381 cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding); 382 *cpu_ptr = cpu_to_le32(0xCAFEDEAD); 383 } else { 384 r = amdgpu_device_wb_get(adev, &index); 385 if (r) 386 return r; 387 388 gpu_addr = adev->wb.gpu_addr + (index * 4); 389 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 390 cpu_ptr = &adev->wb.wb[index]; 391 392 r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); 393 if (r) { 394 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 395 goto err1; 396 } 397 } 398 399 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 400 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 401 ib.ptr[2] = lower_32_bits(gpu_addr); 402 ib.ptr[3] = upper_32_bits(gpu_addr); 403 ib.ptr[4] = 0xDEADBEEF; 404 ib.length_dw = 5; 405 406 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 407 if (r) 408 goto err2; 409 410 r = dma_fence_wait_timeout(f, false, timeout); 411 if (r == 0) { 412 r = -ETIMEDOUT; 413 goto err2; 414 } else if (r < 0) { 415 goto err2; 416 } 417 418 if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF) 419 r = 0; 420 else 421 r = -EINVAL; 422 err2: 423 if (!ring->is_mes_queue) 424 amdgpu_ib_free(adev, &ib, NULL); 425 dma_fence_put(f); 426 err1: 427 if (!ring->is_mes_queue) 428 amdgpu_device_wb_free(adev, index); 429 return r; 430 } 431 432 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev) 433 { 434 release_firmware(adev->gfx.pfp_fw); 435 adev->gfx.pfp_fw = NULL; 436 release_firmware(adev->gfx.me_fw); 437 adev->gfx.me_fw = NULL; 438 release_firmware(adev->gfx.rlc_fw); 439 adev->gfx.rlc_fw = NULL; 440 release_firmware(adev->gfx.mec_fw); 441 adev->gfx.mec_fw = NULL; 442 443 kfree(adev->gfx.rlc.register_list_format); 444 } 445 446 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) 447 { 448 char fw_name[40]; 449 char ucode_prefix[30]; 450 int err; 451 const struct rlc_firmware_header_v2_0 *rlc_hdr; 452 uint16_t version_major; 453 uint16_t version_minor; 454 455 DRM_DEBUG("\n"); 456 457 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 458 459 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix); 460 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 461 if (err) 462 goto out; 463 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 464 if (err) 465 goto out; 466 /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/ 467 adev->gfx.rs64_enable = amdgpu_ucode_hdr_version( 468 (union amdgpu_firmware_header *) 469 adev->gfx.pfp_fw->data, 2, 0); 470 if (adev->gfx.rs64_enable) { 471 dev_info(adev->dev, "CP RS64 enable\n"); 472 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP); 473 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); 474 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK); 475 } else { 476 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); 477 } 478 479 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix); 480 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 481 if (err) 482 goto out; 483 err = amdgpu_ucode_validate(adev->gfx.me_fw); 484 if (err) 485 goto out; 486 if (adev->gfx.rs64_enable) { 487 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME); 488 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); 489 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK); 490 } else { 491 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); 492 } 493 494 if (!amdgpu_sriov_vf(adev)) { 495 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); 496 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 497 if (err) 498 goto out; 499 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 500 if (err) 501 goto out; 502 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 503 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 504 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 505 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); 506 if (err) 507 goto out; 508 } 509 510 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix); 511 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 512 if (err) 513 goto out; 514 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 515 if (err) 516 goto out; 517 if (adev->gfx.rs64_enable) { 518 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC); 519 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK); 520 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK); 521 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK); 522 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK); 523 } else { 524 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); 525 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); 526 } 527 528 /* only one MEC for gfx 11.0.0. */ 529 adev->gfx.mec2_fw = NULL; 530 531 out: 532 if (err) { 533 dev_err(adev->dev, 534 "gfx11: Failed to init firmware \"%s\"\n", 535 fw_name); 536 release_firmware(adev->gfx.pfp_fw); 537 adev->gfx.pfp_fw = NULL; 538 release_firmware(adev->gfx.me_fw); 539 adev->gfx.me_fw = NULL; 540 release_firmware(adev->gfx.rlc_fw); 541 adev->gfx.rlc_fw = NULL; 542 release_firmware(adev->gfx.mec_fw); 543 adev->gfx.mec_fw = NULL; 544 } 545 546 return err; 547 } 548 549 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev) 550 { 551 const struct psp_firmware_header_v1_0 *toc_hdr; 552 int err = 0; 553 char fw_name[40]; 554 char ucode_prefix[30]; 555 556 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 557 558 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix); 559 err = request_firmware(&adev->psp.toc_fw, fw_name, adev->dev); 560 if (err) 561 goto out; 562 563 err = amdgpu_ucode_validate(adev->psp.toc_fw); 564 if (err) 565 goto out; 566 567 toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; 568 adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version); 569 adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version); 570 adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes); 571 adev->psp.toc.start_addr = (uint8_t *)toc_hdr + 572 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); 573 return 0; 574 out: 575 dev_err(adev->dev, "Failed to load TOC microcode\n"); 576 release_firmware(adev->psp.toc_fw); 577 adev->psp.toc_fw = NULL; 578 return err; 579 } 580 581 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev) 582 { 583 u32 count = 0; 584 const struct cs_section_def *sect = NULL; 585 const struct cs_extent_def *ext = NULL; 586 587 /* begin clear state */ 588 count += 2; 589 /* context control state */ 590 count += 3; 591 592 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 593 for (ext = sect->section; ext->extent != NULL; ++ext) { 594 if (sect->id == SECT_CONTEXT) 595 count += 2 + ext->reg_count; 596 else 597 return 0; 598 } 599 } 600 601 /* set PA_SC_TILE_STEERING_OVERRIDE */ 602 count += 3; 603 /* end clear state */ 604 count += 2; 605 /* clear state */ 606 count += 2; 607 608 return count; 609 } 610 611 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, 612 volatile u32 *buffer) 613 { 614 u32 count = 0, i; 615 const struct cs_section_def *sect = NULL; 616 const struct cs_extent_def *ext = NULL; 617 int ctx_reg_offset; 618 619 if (adev->gfx.rlc.cs_data == NULL) 620 return; 621 if (buffer == NULL) 622 return; 623 624 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 625 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 626 627 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 628 buffer[count++] = cpu_to_le32(0x80000000); 629 buffer[count++] = cpu_to_le32(0x80000000); 630 631 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 632 for (ext = sect->section; ext->extent != NULL; ++ext) { 633 if (sect->id == SECT_CONTEXT) { 634 buffer[count++] = 635 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 636 buffer[count++] = cpu_to_le32(ext->reg_index - 637 PACKET3_SET_CONTEXT_REG_START); 638 for (i = 0; i < ext->reg_count; i++) 639 buffer[count++] = cpu_to_le32(ext->extent[i]); 640 } else { 641 return; 642 } 643 } 644 } 645 646 ctx_reg_offset = 647 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 648 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 649 buffer[count++] = cpu_to_le32(ctx_reg_offset); 650 buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); 651 652 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 653 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 654 655 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 656 buffer[count++] = cpu_to_le32(0); 657 } 658 659 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev) 660 { 661 /* clear state block */ 662 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 663 &adev->gfx.rlc.clear_state_gpu_addr, 664 (void **)&adev->gfx.rlc.cs_ptr); 665 666 /* jump table block */ 667 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 668 &adev->gfx.rlc.cp_table_gpu_addr, 669 (void **)&adev->gfx.rlc.cp_table_ptr); 670 } 671 672 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) 673 { 674 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; 675 676 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl; 677 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 678 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1); 679 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2); 680 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3); 681 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL); 682 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX); 683 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0); 684 adev->gfx.rlc.rlcg_reg_access_supported = true; 685 } 686 687 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev) 688 { 689 const struct cs_section_def *cs_data; 690 int r; 691 692 adev->gfx.rlc.cs_data = gfx11_cs_data; 693 694 cs_data = adev->gfx.rlc.cs_data; 695 696 if (cs_data) { 697 /* init clear state block */ 698 r = amdgpu_gfx_rlc_init_csb(adev); 699 if (r) 700 return r; 701 } 702 703 /* init spm vmid with 0xf */ 704 if (adev->gfx.rlc.funcs->update_spm_vmid) 705 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); 706 707 return 0; 708 } 709 710 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev) 711 { 712 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 713 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 714 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL); 715 } 716 717 static int gfx_v11_0_me_init(struct amdgpu_device *adev) 718 { 719 int r; 720 721 bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); 722 723 amdgpu_gfx_graphics_queue_acquire(adev); 724 725 r = gfx_v11_0_init_microcode(adev); 726 if (r) 727 DRM_ERROR("Failed to load gfx firmware!\n"); 728 729 return r; 730 } 731 732 static int gfx_v11_0_mec_init(struct amdgpu_device *adev) 733 { 734 int r; 735 u32 *hpd; 736 size_t mec_hpd_size; 737 738 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 739 740 /* take ownership of the relevant compute queues */ 741 amdgpu_gfx_compute_queue_acquire(adev); 742 mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE; 743 744 if (mec_hpd_size) { 745 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 746 AMDGPU_GEM_DOMAIN_GTT, 747 &adev->gfx.mec.hpd_eop_obj, 748 &adev->gfx.mec.hpd_eop_gpu_addr, 749 (void **)&hpd); 750 if (r) { 751 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 752 gfx_v11_0_mec_fini(adev); 753 return r; 754 } 755 756 memset(hpd, 0, mec_hpd_size); 757 758 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 759 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 760 } 761 762 return 0; 763 } 764 765 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address) 766 { 767 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 768 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 769 (address << SQ_IND_INDEX__INDEX__SHIFT)); 770 return RREG32_SOC15(GC, 0, regSQ_IND_DATA); 771 } 772 773 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, 774 uint32_t thread, uint32_t regno, 775 uint32_t num, uint32_t *out) 776 { 777 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 778 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 779 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 780 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | 781 (SQ_IND_INDEX__AUTO_INCR_MASK)); 782 while (num--) 783 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); 784 } 785 786 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 787 { 788 /* in gfx11 the SIMD_ID is specified as part of the INSTANCE 789 * field when performing a select_se_sh so it should be 790 * zero here */ 791 WARN_ON(simd != 0); 792 793 /* type 2 wave data */ 794 dst[(*no_fields)++] = 2; 795 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS); 796 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO); 797 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI); 798 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO); 799 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI); 800 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1); 801 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2); 802 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC); 803 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC); 804 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS); 805 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS); 806 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); 807 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); 808 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); 809 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); 810 } 811 812 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 813 uint32_t wave, uint32_t start, 814 uint32_t size, uint32_t *dst) 815 { 816 WARN_ON(simd != 0); 817 818 wave_read_regs( 819 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, 820 dst); 821 } 822 823 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 824 uint32_t wave, uint32_t thread, 825 uint32_t start, uint32_t size, 826 uint32_t *dst) 827 { 828 wave_read_regs( 829 adev, wave, thread, 830 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 831 } 832 833 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, 834 u32 me, u32 pipe, u32 q, u32 vm) 835 { 836 soc21_grbm_select(adev, me, pipe, q, vm); 837 } 838 839 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { 840 .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter, 841 .select_se_sh = &gfx_v11_0_select_se_sh, 842 .read_wave_data = &gfx_v11_0_read_wave_data, 843 .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs, 844 .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs, 845 .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q, 846 .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk, 847 }; 848 849 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) 850 { 851 852 switch (adev->ip_versions[GC_HWIP][0]) { 853 case IP_VERSION(11, 0, 0): 854 case IP_VERSION(11, 0, 2): 855 case IP_VERSION(11, 0, 3): 856 adev->gfx.config.max_hw_contexts = 8; 857 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 858 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 859 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 860 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 861 break; 862 case IP_VERSION(11, 0, 1): 863 case IP_VERSION(11, 0, 4): 864 adev->gfx.config.max_hw_contexts = 8; 865 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 866 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 867 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 868 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300; 869 break; 870 default: 871 BUG(); 872 break; 873 } 874 875 return 0; 876 } 877 878 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, 879 int me, int pipe, int queue) 880 { 881 int r; 882 struct amdgpu_ring *ring; 883 unsigned int irq_type; 884 885 ring = &adev->gfx.gfx_ring[ring_id]; 886 887 ring->me = me; 888 ring->pipe = pipe; 889 ring->queue = queue; 890 891 ring->ring_obj = NULL; 892 ring->use_doorbell = true; 893 894 if (!ring_id) 895 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 896 else 897 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; 898 sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); 899 900 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; 901 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 902 AMDGPU_RING_PRIO_DEFAULT, NULL); 903 if (r) 904 return r; 905 return 0; 906 } 907 908 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 909 int mec, int pipe, int queue) 910 { 911 int r; 912 unsigned irq_type; 913 struct amdgpu_ring *ring; 914 unsigned int hw_prio; 915 916 ring = &adev->gfx.compute_ring[ring_id]; 917 918 /* mec0 is me1 */ 919 ring->me = mec + 1; 920 ring->pipe = pipe; 921 ring->queue = queue; 922 923 ring->ring_obj = NULL; 924 ring->use_doorbell = true; 925 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 926 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 927 + (ring_id * GFX11_MEC_HPD_SIZE); 928 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 929 930 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 931 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 932 + ring->pipe; 933 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 934 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 935 /* type-2 packets are deprecated on MEC, use type-3 instead */ 936 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 937 hw_prio, NULL); 938 if (r) 939 return r; 940 941 return 0; 942 } 943 944 static struct { 945 SOC21_FIRMWARE_ID id; 946 unsigned int offset; 947 unsigned int size; 948 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX]; 949 950 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc) 951 { 952 RLC_TABLE_OF_CONTENT *ucode = rlc_toc; 953 954 while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) && 955 (ucode->id < SOC21_FIRMWARE_ID_MAX)) { 956 rlc_autoload_info[ucode->id].id = ucode->id; 957 rlc_autoload_info[ucode->id].offset = ucode->offset * 4; 958 rlc_autoload_info[ucode->id].size = ucode->size * 4; 959 960 ucode++; 961 } 962 } 963 964 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev) 965 { 966 uint32_t total_size = 0; 967 SOC21_FIRMWARE_ID id; 968 969 gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr); 970 971 for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++) 972 total_size += rlc_autoload_info[id].size; 973 974 /* In case the offset in rlc toc ucode is aligned */ 975 if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset) 976 total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset + 977 rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size; 978 979 return total_size; 980 } 981 982 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev) 983 { 984 int r; 985 uint32_t total_size; 986 987 total_size = gfx_v11_0_calc_toc_total_size(adev); 988 989 r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024, 990 AMDGPU_GEM_DOMAIN_VRAM, 991 &adev->gfx.rlc.rlc_autoload_bo, 992 &adev->gfx.rlc.rlc_autoload_gpu_addr, 993 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 994 995 if (r) { 996 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r); 997 return r; 998 } 999 1000 return 0; 1001 } 1002 1003 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev, 1004 SOC21_FIRMWARE_ID id, 1005 const void *fw_data, 1006 uint32_t fw_size, 1007 uint32_t *fw_autoload_mask) 1008 { 1009 uint32_t toc_offset; 1010 uint32_t toc_fw_size; 1011 char *ptr = adev->gfx.rlc.rlc_autoload_ptr; 1012 1013 if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX) 1014 return; 1015 1016 toc_offset = rlc_autoload_info[id].offset; 1017 toc_fw_size = rlc_autoload_info[id].size; 1018 1019 if (fw_size == 0) 1020 fw_size = toc_fw_size; 1021 1022 if (fw_size > toc_fw_size) 1023 fw_size = toc_fw_size; 1024 1025 memcpy(ptr + toc_offset, fw_data, fw_size); 1026 1027 if (fw_size < toc_fw_size) 1028 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); 1029 1030 if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME)) 1031 *(uint64_t *)fw_autoload_mask |= 1ULL << id; 1032 } 1033 1034 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev, 1035 uint32_t *fw_autoload_mask) 1036 { 1037 void *data; 1038 uint32_t size; 1039 uint64_t *toc_ptr; 1040 1041 *(uint64_t *)fw_autoload_mask |= 0x1; 1042 1043 DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask); 1044 1045 data = adev->psp.toc.start_addr; 1046 size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size; 1047 1048 toc_ptr = (uint64_t *)data + size / 8 - 1; 1049 *toc_ptr = *(uint64_t *)fw_autoload_mask; 1050 1051 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC, 1052 data, size, fw_autoload_mask); 1053 } 1054 1055 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev, 1056 uint32_t *fw_autoload_mask) 1057 { 1058 const __le32 *fw_data; 1059 uint32_t fw_size; 1060 const struct gfx_firmware_header_v1_0 *cp_hdr; 1061 const struct gfx_firmware_header_v2_0 *cpv2_hdr; 1062 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1063 const struct rlc_firmware_header_v2_2 *rlcv22_hdr; 1064 uint16_t version_major, version_minor; 1065 1066 if (adev->gfx.rs64_enable) { 1067 /* pfp ucode */ 1068 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1069 adev->gfx.pfp_fw->data; 1070 /* instruction */ 1071 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1072 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1073 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1074 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP, 1075 fw_data, fw_size, fw_autoload_mask); 1076 /* data */ 1077 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1078 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1079 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1080 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK, 1081 fw_data, fw_size, fw_autoload_mask); 1082 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK, 1083 fw_data, fw_size, fw_autoload_mask); 1084 /* me ucode */ 1085 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1086 adev->gfx.me_fw->data; 1087 /* instruction */ 1088 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1089 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1090 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1091 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME, 1092 fw_data, fw_size, fw_autoload_mask); 1093 /* data */ 1094 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1095 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1096 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1097 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK, 1098 fw_data, fw_size, fw_autoload_mask); 1099 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK, 1100 fw_data, fw_size, fw_autoload_mask); 1101 /* mec ucode */ 1102 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1103 adev->gfx.mec_fw->data; 1104 /* instruction */ 1105 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1106 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1107 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1108 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC, 1109 fw_data, fw_size, fw_autoload_mask); 1110 /* data */ 1111 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1112 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1113 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1114 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK, 1115 fw_data, fw_size, fw_autoload_mask); 1116 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK, 1117 fw_data, fw_size, fw_autoload_mask); 1118 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK, 1119 fw_data, fw_size, fw_autoload_mask); 1120 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK, 1121 fw_data, fw_size, fw_autoload_mask); 1122 } else { 1123 /* pfp ucode */ 1124 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1125 adev->gfx.pfp_fw->data; 1126 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1127 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1128 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1129 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP, 1130 fw_data, fw_size, fw_autoload_mask); 1131 1132 /* me ucode */ 1133 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1134 adev->gfx.me_fw->data; 1135 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1136 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1137 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1138 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME, 1139 fw_data, fw_size, fw_autoload_mask); 1140 1141 /* mec ucode */ 1142 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1143 adev->gfx.mec_fw->data; 1144 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1145 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1146 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - 1147 cp_hdr->jt_size * 4; 1148 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC, 1149 fw_data, fw_size, fw_autoload_mask); 1150 } 1151 1152 /* rlc ucode */ 1153 rlc_hdr = (const struct rlc_firmware_header_v2_0 *) 1154 adev->gfx.rlc_fw->data; 1155 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1156 le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes)); 1157 fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes); 1158 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE, 1159 fw_data, fw_size, fw_autoload_mask); 1160 1161 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1162 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1163 if (version_major == 2) { 1164 if (version_minor >= 2) { 1165 rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 1166 1167 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1168 le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes)); 1169 fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes); 1170 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE, 1171 fw_data, fw_size, fw_autoload_mask); 1172 1173 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1174 le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes)); 1175 fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes); 1176 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT, 1177 fw_data, fw_size, fw_autoload_mask); 1178 } 1179 } 1180 } 1181 1182 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev, 1183 uint32_t *fw_autoload_mask) 1184 { 1185 const __le32 *fw_data; 1186 uint32_t fw_size; 1187 const struct sdma_firmware_header_v2_0 *sdma_hdr; 1188 1189 sdma_hdr = (const struct sdma_firmware_header_v2_0 *) 1190 adev->sdma.instance[0].fw->data; 1191 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1192 le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes)); 1193 fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes); 1194 1195 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1196 SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask); 1197 1198 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1199 le32_to_cpu(sdma_hdr->ctl_ucode_offset)); 1200 fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes); 1201 1202 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1203 SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask); 1204 } 1205 1206 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev, 1207 uint32_t *fw_autoload_mask) 1208 { 1209 const __le32 *fw_data; 1210 unsigned fw_size; 1211 const struct mes_firmware_header_v1_0 *mes_hdr; 1212 int pipe, ucode_id, data_id; 1213 1214 for (pipe = 0; pipe < 2; pipe++) { 1215 if (pipe==0) { 1216 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0; 1217 data_id = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK; 1218 } else { 1219 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1; 1220 data_id = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK; 1221 } 1222 1223 mes_hdr = (const struct mes_firmware_header_v1_0 *) 1224 adev->mes.fw[pipe]->data; 1225 1226 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1227 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); 1228 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); 1229 1230 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1231 ucode_id, fw_data, fw_size, fw_autoload_mask); 1232 1233 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1234 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); 1235 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); 1236 1237 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1238 data_id, fw_data, fw_size, fw_autoload_mask); 1239 } 1240 } 1241 1242 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) 1243 { 1244 uint32_t rlc_g_offset, rlc_g_size; 1245 uint64_t gpu_addr; 1246 uint32_t autoload_fw_id[2]; 1247 1248 memset(autoload_fw_id, 0, sizeof(uint32_t) * 2); 1249 1250 /* RLC autoload sequence 2: copy ucode */ 1251 gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id); 1252 gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id); 1253 gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id); 1254 gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id); 1255 1256 rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset; 1257 rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size; 1258 gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset; 1259 1260 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr)); 1261 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr)); 1262 1263 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size); 1264 1265 /* RLC autoload sequence 3: load IMU fw */ 1266 if (adev->gfx.imu.funcs->load_microcode) 1267 adev->gfx.imu.funcs->load_microcode(adev); 1268 /* RLC autoload sequence 4 init IMU fw */ 1269 if (adev->gfx.imu.funcs->setup_imu) 1270 adev->gfx.imu.funcs->setup_imu(adev); 1271 if (adev->gfx.imu.funcs->start_imu) 1272 adev->gfx.imu.funcs->start_imu(adev); 1273 1274 /* RLC autoload sequence 5 disable gpa mode */ 1275 gfx_v11_0_disable_gpa_mode(adev); 1276 1277 return 0; 1278 } 1279 1280 static int gfx_v11_0_sw_init(void *handle) 1281 { 1282 int i, j, k, r, ring_id = 0; 1283 struct amdgpu_kiq *kiq; 1284 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1285 1286 adev->gfxhub.funcs->init(adev); 1287 1288 switch (adev->ip_versions[GC_HWIP][0]) { 1289 case IP_VERSION(11, 0, 0): 1290 case IP_VERSION(11, 0, 1): 1291 case IP_VERSION(11, 0, 2): 1292 case IP_VERSION(11, 0, 3): 1293 case IP_VERSION(11, 0, 4): 1294 adev->gfx.me.num_me = 1; 1295 adev->gfx.me.num_pipe_per_me = 1; 1296 adev->gfx.me.num_queue_per_pipe = 1; 1297 adev->gfx.mec.num_mec = 2; 1298 adev->gfx.mec.num_pipe_per_mec = 4; 1299 adev->gfx.mec.num_queue_per_pipe = 4; 1300 break; 1301 default: 1302 adev->gfx.me.num_me = 1; 1303 adev->gfx.me.num_pipe_per_me = 1; 1304 adev->gfx.me.num_queue_per_pipe = 1; 1305 adev->gfx.mec.num_mec = 1; 1306 adev->gfx.mec.num_pipe_per_mec = 4; 1307 adev->gfx.mec.num_queue_per_pipe = 8; 1308 break; 1309 } 1310 1311 /* EOP Event */ 1312 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1313 GFX_11_0_0__SRCID__CP_EOP_INTERRUPT, 1314 &adev->gfx.eop_irq); 1315 if (r) 1316 return r; 1317 1318 /* Privileged reg */ 1319 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1320 GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, 1321 &adev->gfx.priv_reg_irq); 1322 if (r) 1323 return r; 1324 1325 /* Privileged inst */ 1326 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1327 GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT, 1328 &adev->gfx.priv_inst_irq); 1329 if (r) 1330 return r; 1331 1332 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1333 1334 if (adev->gfx.imu.funcs) { 1335 if (adev->gfx.imu.funcs->init_microcode) { 1336 r = adev->gfx.imu.funcs->init_microcode(adev); 1337 if (r) 1338 DRM_ERROR("Failed to load imu firmware!\n"); 1339 } 1340 } 1341 1342 r = gfx_v11_0_me_init(adev); 1343 if (r) 1344 return r; 1345 1346 r = gfx_v11_0_rlc_init(adev); 1347 if (r) { 1348 DRM_ERROR("Failed to init rlc BOs!\n"); 1349 return r; 1350 } 1351 1352 r = gfx_v11_0_mec_init(adev); 1353 if (r) { 1354 DRM_ERROR("Failed to init MEC BOs!\n"); 1355 return r; 1356 } 1357 1358 /* set up the gfx ring */ 1359 for (i = 0; i < adev->gfx.me.num_me; i++) { 1360 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { 1361 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 1362 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) 1363 continue; 1364 1365 r = gfx_v11_0_gfx_ring_init(adev, ring_id, 1366 i, k, j); 1367 if (r) 1368 return r; 1369 ring_id++; 1370 } 1371 } 1372 } 1373 1374 ring_id = 0; 1375 /* set up the compute queues - allocate horizontally across pipes */ 1376 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1377 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1378 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1379 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, 1380 j)) 1381 continue; 1382 1383 r = gfx_v11_0_compute_ring_init(adev, ring_id, 1384 i, k, j); 1385 if (r) 1386 return r; 1387 1388 ring_id++; 1389 } 1390 } 1391 } 1392 1393 if (!adev->enable_mes_kiq) { 1394 r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE); 1395 if (r) { 1396 DRM_ERROR("Failed to init KIQ BOs!\n"); 1397 return r; 1398 } 1399 1400 kiq = &adev->gfx.kiq; 1401 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 1402 if (r) 1403 return r; 1404 } 1405 1406 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd)); 1407 if (r) 1408 return r; 1409 1410 /* allocate visible FB for rlc auto-loading fw */ 1411 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1412 r = gfx_v11_0_init_toc_microcode(adev); 1413 if (r) 1414 dev_err(adev->dev, "Failed to load toc firmware!\n"); 1415 r = gfx_v11_0_rlc_autoload_buffer_init(adev); 1416 if (r) 1417 return r; 1418 } 1419 1420 r = gfx_v11_0_gpu_early_init(adev); 1421 if (r) 1422 return r; 1423 1424 return 0; 1425 } 1426 1427 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev) 1428 { 1429 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj, 1430 &adev->gfx.pfp.pfp_fw_gpu_addr, 1431 (void **)&adev->gfx.pfp.pfp_fw_ptr); 1432 1433 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj, 1434 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 1435 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 1436 } 1437 1438 static void gfx_v11_0_me_fini(struct amdgpu_device *adev) 1439 { 1440 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj, 1441 &adev->gfx.me.me_fw_gpu_addr, 1442 (void **)&adev->gfx.me.me_fw_ptr); 1443 1444 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj, 1445 &adev->gfx.me.me_fw_data_gpu_addr, 1446 (void **)&adev->gfx.me.me_fw_data_ptr); 1447 } 1448 1449 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev) 1450 { 1451 amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo, 1452 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1453 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1454 } 1455 1456 static int gfx_v11_0_sw_fini(void *handle) 1457 { 1458 int i; 1459 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1460 1461 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1462 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1463 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1464 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1465 1466 amdgpu_gfx_mqd_sw_fini(adev); 1467 1468 if (!adev->enable_mes_kiq) { 1469 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); 1470 amdgpu_gfx_kiq_fini(adev); 1471 } 1472 1473 gfx_v11_0_pfp_fini(adev); 1474 gfx_v11_0_me_fini(adev); 1475 gfx_v11_0_rlc_fini(adev); 1476 gfx_v11_0_mec_fini(adev); 1477 1478 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1479 gfx_v11_0_rlc_autoload_buffer_fini(adev); 1480 1481 gfx_v11_0_free_microcode(adev); 1482 1483 return 0; 1484 } 1485 1486 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 1487 u32 sh_num, u32 instance) 1488 { 1489 u32 data; 1490 1491 if (instance == 0xffffffff) 1492 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, 1493 INSTANCE_BROADCAST_WRITES, 1); 1494 else 1495 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, 1496 instance); 1497 1498 if (se_num == 0xffffffff) 1499 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1500 1); 1501 else 1502 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1503 1504 if (sh_num == 0xffffffff) 1505 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1506 1); 1507 else 1508 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num); 1509 1510 WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data); 1511 } 1512 1513 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1514 { 1515 u32 data, mask; 1516 1517 data = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); 1518 data |= RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); 1519 1520 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 1521 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 1522 1523 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 1524 adev->gfx.config.max_sh_per_se); 1525 1526 return (~data) & mask; 1527 } 1528 1529 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) 1530 { 1531 int i, j; 1532 u32 data; 1533 u32 active_rbs = 0; 1534 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 1535 adev->gfx.config.max_sh_per_se; 1536 1537 mutex_lock(&adev->grbm_idx_mutex); 1538 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1539 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1540 gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff); 1541 data = gfx_v11_0_get_rb_active_bitmap(adev); 1542 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 1543 rb_bitmap_width_per_sh); 1544 } 1545 } 1546 gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1547 mutex_unlock(&adev->grbm_idx_mutex); 1548 1549 adev->gfx.config.backend_enable_mask = active_rbs; 1550 adev->gfx.config.num_rbs = hweight32(active_rbs); 1551 } 1552 1553 #define DEFAULT_SH_MEM_BASES (0x6000) 1554 #define LDS_APP_BASE 0x1 1555 #define SCRATCH_APP_BASE 0x2 1556 1557 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev) 1558 { 1559 int i; 1560 uint32_t sh_mem_bases; 1561 uint32_t data; 1562 1563 /* 1564 * Configure apertures: 1565 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 1566 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 1567 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 1568 */ 1569 sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) | 1570 SCRATCH_APP_BASE; 1571 1572 mutex_lock(&adev->srbm_mutex); 1573 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 1574 soc21_grbm_select(adev, 0, 0, 0, i); 1575 /* CP and shaders */ 1576 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 1577 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases); 1578 1579 /* Enable trap for each kfd vmid. */ 1580 data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL); 1581 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); 1582 } 1583 soc21_grbm_select(adev, 0, 0, 0, 0); 1584 mutex_unlock(&adev->srbm_mutex); 1585 1586 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 1587 acccess. These should be enabled by FW for target VMIDs. */ 1588 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 1589 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0); 1590 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0); 1591 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0); 1592 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0); 1593 } 1594 } 1595 1596 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev) 1597 { 1598 int vmid; 1599 1600 /* 1601 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 1602 * access. Compute VMIDs should be enabled by FW for target VMIDs, 1603 * the driver can enable them for graphics. VMID0 should maintain 1604 * access so that HWS firmware can save/restore entries. 1605 */ 1606 for (vmid = 1; vmid < 16; vmid++) { 1607 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0); 1608 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0); 1609 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0); 1610 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0); 1611 } 1612 } 1613 1614 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev) 1615 { 1616 /* TODO: harvest feature to be added later. */ 1617 } 1618 1619 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev) 1620 { 1621 /* TCCs are global (not instanced). */ 1622 uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) | 1623 RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE); 1624 1625 adev->gfx.config.tcc_disabled_mask = 1626 REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) | 1627 (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16); 1628 } 1629 1630 static void gfx_v11_0_constants_init(struct amdgpu_device *adev) 1631 { 1632 u32 tmp; 1633 int i; 1634 1635 if (!amdgpu_sriov_vf(adev)) 1636 WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 1637 1638 gfx_v11_0_setup_rb(adev); 1639 gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info); 1640 gfx_v11_0_get_tcc_info(adev); 1641 adev->gfx.config.pa_sc_tile_steering_override = 0; 1642 1643 /* XXX SH_MEM regs */ 1644 /* where to put LDS, scratch, GPUVM in FSA64 space */ 1645 mutex_lock(&adev->srbm_mutex); 1646 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { 1647 soc21_grbm_select(adev, 0, 0, 0, i); 1648 /* CP and shaders */ 1649 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 1650 if (i != 0) { 1651 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 1652 (adev->gmc.private_aperture_start >> 48)); 1653 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 1654 (adev->gmc.shared_aperture_start >> 48)); 1655 WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp); 1656 } 1657 } 1658 soc21_grbm_select(adev, 0, 0, 0, 0); 1659 1660 mutex_unlock(&adev->srbm_mutex); 1661 1662 gfx_v11_0_init_compute_vmid(adev); 1663 gfx_v11_0_init_gds_vmid(adev); 1664 } 1665 1666 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 1667 bool enable) 1668 { 1669 u32 tmp; 1670 1671 if (amdgpu_sriov_vf(adev)) 1672 return; 1673 1674 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0); 1675 1676 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 1677 enable ? 1 : 0); 1678 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 1679 enable ? 1 : 0); 1680 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 1681 enable ? 1 : 0); 1682 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 1683 enable ? 1 : 0); 1684 1685 WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp); 1686 } 1687 1688 static int gfx_v11_0_init_csb(struct amdgpu_device *adev) 1689 { 1690 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 1691 1692 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI, 1693 adev->gfx.rlc.clear_state_gpu_addr >> 32); 1694 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO, 1695 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 1696 WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); 1697 1698 return 0; 1699 } 1700 1701 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev) 1702 { 1703 u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL); 1704 1705 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 1706 WREG32_SOC15(GC, 0, regRLC_CNTL, tmp); 1707 } 1708 1709 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev) 1710 { 1711 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 1712 udelay(50); 1713 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 1714 udelay(50); 1715 } 1716 1717 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, 1718 bool enable) 1719 { 1720 uint32_t rlc_pg_cntl; 1721 1722 rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 1723 1724 if (!enable) { 1725 /* RLC_PG_CNTL[23] = 0 (default) 1726 * RLC will wait for handshake acks with SMU 1727 * GFXOFF will be enabled 1728 * RLC_PG_CNTL[23] = 1 1729 * RLC will not issue any message to SMU 1730 * hence no handshake between SMU & RLC 1731 * GFXOFF will be disabled 1732 */ 1733 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 1734 } else 1735 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 1736 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl); 1737 } 1738 1739 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev) 1740 { 1741 /* TODO: enable rlc & smu handshake until smu 1742 * and gfxoff feature works as expected */ 1743 if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) 1744 gfx_v11_0_rlc_smu_handshake_cntl(adev, false); 1745 1746 WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 1747 udelay(50); 1748 } 1749 1750 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev) 1751 { 1752 uint32_t tmp; 1753 1754 /* enable Save Restore Machine */ 1755 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL)); 1756 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 1757 tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; 1758 WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp); 1759 } 1760 1761 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev) 1762 { 1763 const struct rlc_firmware_header_v2_0 *hdr; 1764 const __le32 *fw_data; 1765 unsigned i, fw_size; 1766 1767 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1768 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1769 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1770 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 1771 1772 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, 1773 RLCG_UCODE_LOADING_START_ADDRESS); 1774 1775 for (i = 0; i < fw_size; i++) 1776 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, 1777 le32_to_cpup(fw_data++)); 1778 1779 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 1780 } 1781 1782 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev) 1783 { 1784 const struct rlc_firmware_header_v2_2 *hdr; 1785 const __le32 *fw_data; 1786 unsigned i, fw_size; 1787 u32 tmp; 1788 1789 hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 1790 1791 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1792 le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes)); 1793 fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4; 1794 1795 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0); 1796 1797 for (i = 0; i < fw_size; i++) { 1798 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 1799 msleep(1); 1800 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA, 1801 le32_to_cpup(fw_data++)); 1802 } 1803 1804 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 1805 1806 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1807 le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes)); 1808 fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4; 1809 1810 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0); 1811 for (i = 0; i < fw_size; i++) { 1812 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 1813 msleep(1); 1814 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA, 1815 le32_to_cpup(fw_data++)); 1816 } 1817 1818 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 1819 1820 tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL); 1821 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1); 1822 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0); 1823 WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp); 1824 } 1825 1826 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev) 1827 { 1828 const struct rlc_firmware_header_v2_3 *hdr; 1829 const __le32 *fw_data; 1830 unsigned i, fw_size; 1831 u32 tmp; 1832 1833 hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data; 1834 1835 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1836 le32_to_cpu(hdr->rlcp_ucode_offset_bytes)); 1837 fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4; 1838 1839 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0); 1840 1841 for (i = 0; i < fw_size; i++) { 1842 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 1843 msleep(1); 1844 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA, 1845 le32_to_cpup(fw_data++)); 1846 } 1847 1848 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version); 1849 1850 tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE); 1851 tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1); 1852 WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp); 1853 1854 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1855 le32_to_cpu(hdr->rlcv_ucode_offset_bytes)); 1856 fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4; 1857 1858 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0); 1859 1860 for (i = 0; i < fw_size; i++) { 1861 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 1862 msleep(1); 1863 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA, 1864 le32_to_cpup(fw_data++)); 1865 } 1866 1867 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version); 1868 1869 tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL); 1870 tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1); 1871 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp); 1872 } 1873 1874 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev) 1875 { 1876 const struct rlc_firmware_header_v2_0 *hdr; 1877 uint16_t version_major; 1878 uint16_t version_minor; 1879 1880 if (!adev->gfx.rlc_fw) 1881 return -EINVAL; 1882 1883 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1884 amdgpu_ucode_print_rlc_hdr(&hdr->header); 1885 1886 version_major = le16_to_cpu(hdr->header.header_version_major); 1887 version_minor = le16_to_cpu(hdr->header.header_version_minor); 1888 1889 if (version_major == 2) { 1890 gfx_v11_0_load_rlcg_microcode(adev); 1891 if (amdgpu_dpm == 1) { 1892 if (version_minor >= 2) 1893 gfx_v11_0_load_rlc_iram_dram_microcode(adev); 1894 if (version_minor == 3) 1895 gfx_v11_0_load_rlcp_rlcv_microcode(adev); 1896 } 1897 1898 return 0; 1899 } 1900 1901 return -EINVAL; 1902 } 1903 1904 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev) 1905 { 1906 int r; 1907 1908 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1909 gfx_v11_0_init_csb(adev); 1910 1911 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ 1912 gfx_v11_0_rlc_enable_srm(adev); 1913 } else { 1914 if (amdgpu_sriov_vf(adev)) { 1915 gfx_v11_0_init_csb(adev); 1916 return 0; 1917 } 1918 1919 adev->gfx.rlc.funcs->stop(adev); 1920 1921 /* disable CG */ 1922 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); 1923 1924 /* disable PG */ 1925 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0); 1926 1927 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1928 /* legacy rlc firmware loading */ 1929 r = gfx_v11_0_rlc_load_microcode(adev); 1930 if (r) 1931 return r; 1932 } 1933 1934 gfx_v11_0_init_csb(adev); 1935 1936 adev->gfx.rlc.funcs->start(adev); 1937 } 1938 return 0; 1939 } 1940 1941 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr) 1942 { 1943 uint32_t usec_timeout = 50000; /* wait for 50ms */ 1944 uint32_t tmp; 1945 int i; 1946 1947 /* Trigger an invalidation of the L1 instruction caches */ 1948 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 1949 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); 1950 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 1951 1952 /* Wait for invalidation complete */ 1953 for (i = 0; i < usec_timeout; i++) { 1954 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 1955 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 1956 INVALIDATE_CACHE_COMPLETE)) 1957 break; 1958 udelay(1); 1959 } 1960 1961 if (i >= usec_timeout) { 1962 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 1963 return -EINVAL; 1964 } 1965 1966 if (amdgpu_emu_mode == 1) 1967 adev->hdp.funcs->flush_hdp(adev, NULL); 1968 1969 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 1970 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 1971 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 1972 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 1973 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 1974 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 1975 1976 /* Program me ucode address into intruction cache address register */ 1977 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 1978 lower_32_bits(addr) & 0xFFFFF000); 1979 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 1980 upper_32_bits(addr)); 1981 1982 return 0; 1983 } 1984 1985 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr) 1986 { 1987 uint32_t usec_timeout = 50000; /* wait for 50ms */ 1988 uint32_t tmp; 1989 int i; 1990 1991 /* Trigger an invalidation of the L1 instruction caches */ 1992 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 1993 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); 1994 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 1995 1996 /* Wait for invalidation complete */ 1997 for (i = 0; i < usec_timeout; i++) { 1998 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 1999 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2000 INVALIDATE_CACHE_COMPLETE)) 2001 break; 2002 udelay(1); 2003 } 2004 2005 if (i >= usec_timeout) { 2006 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2007 return -EINVAL; 2008 } 2009 2010 if (amdgpu_emu_mode == 1) 2011 adev->hdp.funcs->flush_hdp(adev, NULL); 2012 2013 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2014 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2015 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2016 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2017 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2018 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2019 2020 /* Program pfp ucode address into intruction cache address register */ 2021 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2022 lower_32_bits(addr) & 0xFFFFF000); 2023 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2024 upper_32_bits(addr)); 2025 2026 return 0; 2027 } 2028 2029 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr) 2030 { 2031 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2032 uint32_t tmp; 2033 int i; 2034 2035 /* Trigger an invalidation of the L1 instruction caches */ 2036 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2037 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2038 2039 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2040 2041 /* Wait for invalidation complete */ 2042 for (i = 0; i < usec_timeout; i++) { 2043 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2044 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2045 INVALIDATE_CACHE_COMPLETE)) 2046 break; 2047 udelay(1); 2048 } 2049 2050 if (i >= usec_timeout) { 2051 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2052 return -EINVAL; 2053 } 2054 2055 if (amdgpu_emu_mode == 1) 2056 adev->hdp.funcs->flush_hdp(adev, NULL); 2057 2058 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2059 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2060 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2061 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2062 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2063 2064 /* Program mec1 ucode address into intruction cache address register */ 2065 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, 2066 lower_32_bits(addr) & 0xFFFFF000); 2067 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2068 upper_32_bits(addr)); 2069 2070 return 0; 2071 } 2072 2073 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2074 { 2075 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2076 uint32_t tmp; 2077 unsigned i, pipe_id; 2078 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2079 2080 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2081 adev->gfx.pfp_fw->data; 2082 2083 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2084 lower_32_bits(addr)); 2085 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2086 upper_32_bits(addr)); 2087 2088 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2089 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2090 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2091 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2092 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2093 2094 /* 2095 * Programming any of the CP_PFP_IC_BASE registers 2096 * forces invalidation of the ME L1 I$. Wait for the 2097 * invalidation complete 2098 */ 2099 for (i = 0; i < usec_timeout; i++) { 2100 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2101 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2102 INVALIDATE_CACHE_COMPLETE)) 2103 break; 2104 udelay(1); 2105 } 2106 2107 if (i >= usec_timeout) { 2108 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2109 return -EINVAL; 2110 } 2111 2112 /* Prime the L1 instruction caches */ 2113 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2114 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 2115 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2116 /* Waiting for cache primed*/ 2117 for (i = 0; i < usec_timeout; i++) { 2118 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2119 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2120 ICACHE_PRIMED)) 2121 break; 2122 udelay(1); 2123 } 2124 2125 if (i >= usec_timeout) { 2126 dev_err(adev->dev, "failed to prime instruction cache\n"); 2127 return -EINVAL; 2128 } 2129 2130 mutex_lock(&adev->srbm_mutex); 2131 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2132 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2133 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2134 (pfp_hdr->ucode_start_addr_hi << 30) | 2135 (pfp_hdr->ucode_start_addr_lo >> 2)); 2136 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2137 pfp_hdr->ucode_start_addr_hi >> 2); 2138 2139 /* 2140 * Program CP_ME_CNTL to reset given PIPE to take 2141 * effect of CP_PFP_PRGRM_CNTR_START. 2142 */ 2143 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2144 if (pipe_id == 0) 2145 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2146 PFP_PIPE0_RESET, 1); 2147 else 2148 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2149 PFP_PIPE1_RESET, 1); 2150 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2151 2152 /* Clear pfp pipe0 reset bit. */ 2153 if (pipe_id == 0) 2154 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2155 PFP_PIPE0_RESET, 0); 2156 else 2157 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2158 PFP_PIPE1_RESET, 0); 2159 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2160 2161 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 2162 lower_32_bits(addr2)); 2163 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 2164 upper_32_bits(addr2)); 2165 } 2166 soc21_grbm_select(adev, 0, 0, 0, 0); 2167 mutex_unlock(&adev->srbm_mutex); 2168 2169 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2170 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2171 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2172 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2173 2174 /* Invalidate the data caches */ 2175 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2176 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2177 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2178 2179 for (i = 0; i < usec_timeout; i++) { 2180 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2181 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2182 INVALIDATE_DCACHE_COMPLETE)) 2183 break; 2184 udelay(1); 2185 } 2186 2187 if (i >= usec_timeout) { 2188 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2189 return -EINVAL; 2190 } 2191 2192 return 0; 2193 } 2194 2195 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2196 { 2197 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2198 uint32_t tmp; 2199 unsigned i, pipe_id; 2200 const struct gfx_firmware_header_v2_0 *me_hdr; 2201 2202 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2203 adev->gfx.me_fw->data; 2204 2205 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2206 lower_32_bits(addr)); 2207 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2208 upper_32_bits(addr)); 2209 2210 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2211 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2212 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2213 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2214 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2215 2216 /* 2217 * Programming any of the CP_ME_IC_BASE registers 2218 * forces invalidation of the ME L1 I$. Wait for the 2219 * invalidation complete 2220 */ 2221 for (i = 0; i < usec_timeout; i++) { 2222 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2223 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2224 INVALIDATE_CACHE_COMPLETE)) 2225 break; 2226 udelay(1); 2227 } 2228 2229 if (i >= usec_timeout) { 2230 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2231 return -EINVAL; 2232 } 2233 2234 /* Prime the instruction caches */ 2235 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2236 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 2237 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2238 2239 /* Waiting for instruction cache primed*/ 2240 for (i = 0; i < usec_timeout; i++) { 2241 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2242 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2243 ICACHE_PRIMED)) 2244 break; 2245 udelay(1); 2246 } 2247 2248 if (i >= usec_timeout) { 2249 dev_err(adev->dev, "failed to prime instruction cache\n"); 2250 return -EINVAL; 2251 } 2252 2253 mutex_lock(&adev->srbm_mutex); 2254 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2255 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2256 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2257 (me_hdr->ucode_start_addr_hi << 30) | 2258 (me_hdr->ucode_start_addr_lo >> 2) ); 2259 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2260 me_hdr->ucode_start_addr_hi>>2); 2261 2262 /* 2263 * Program CP_ME_CNTL to reset given PIPE to take 2264 * effect of CP_PFP_PRGRM_CNTR_START. 2265 */ 2266 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2267 if (pipe_id == 0) 2268 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2269 ME_PIPE0_RESET, 1); 2270 else 2271 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2272 ME_PIPE1_RESET, 1); 2273 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2274 2275 /* Clear pfp pipe0 reset bit. */ 2276 if (pipe_id == 0) 2277 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2278 ME_PIPE0_RESET, 0); 2279 else 2280 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2281 ME_PIPE1_RESET, 0); 2282 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2283 2284 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 2285 lower_32_bits(addr2)); 2286 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 2287 upper_32_bits(addr2)); 2288 } 2289 soc21_grbm_select(adev, 0, 0, 0, 0); 2290 mutex_unlock(&adev->srbm_mutex); 2291 2292 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2293 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2294 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2295 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2296 2297 /* Invalidate the data caches */ 2298 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2299 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2300 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2301 2302 for (i = 0; i < usec_timeout; i++) { 2303 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2304 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2305 INVALIDATE_DCACHE_COMPLETE)) 2306 break; 2307 udelay(1); 2308 } 2309 2310 if (i >= usec_timeout) { 2311 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2312 return -EINVAL; 2313 } 2314 2315 return 0; 2316 } 2317 2318 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2319 { 2320 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2321 uint32_t tmp; 2322 unsigned i; 2323 const struct gfx_firmware_header_v2_0 *mec_hdr; 2324 2325 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2326 adev->gfx.mec_fw->data; 2327 2328 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2329 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2330 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2331 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2332 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2333 2334 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 2335 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 2336 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 2337 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 2338 2339 mutex_lock(&adev->srbm_mutex); 2340 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 2341 soc21_grbm_select(adev, 1, i, 0, 0); 2342 2343 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2); 2344 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 2345 upper_32_bits(addr2)); 2346 2347 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 2348 mec_hdr->ucode_start_addr_lo >> 2 | 2349 mec_hdr->ucode_start_addr_hi << 30); 2350 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 2351 mec_hdr->ucode_start_addr_hi >> 2); 2352 2353 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr); 2354 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2355 upper_32_bits(addr)); 2356 } 2357 mutex_unlock(&adev->srbm_mutex); 2358 soc21_grbm_select(adev, 0, 0, 0, 0); 2359 2360 /* Trigger an invalidation of the L1 instruction caches */ 2361 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2362 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2363 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 2364 2365 /* Wait for invalidation complete */ 2366 for (i = 0; i < usec_timeout; i++) { 2367 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2368 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 2369 INVALIDATE_DCACHE_COMPLETE)) 2370 break; 2371 udelay(1); 2372 } 2373 2374 if (i >= usec_timeout) { 2375 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2376 return -EINVAL; 2377 } 2378 2379 /* Trigger an invalidation of the L1 instruction caches */ 2380 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2381 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2382 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2383 2384 /* Wait for invalidation complete */ 2385 for (i = 0; i < usec_timeout; i++) { 2386 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2387 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2388 INVALIDATE_CACHE_COMPLETE)) 2389 break; 2390 udelay(1); 2391 } 2392 2393 if (i >= usec_timeout) { 2394 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2395 return -EINVAL; 2396 } 2397 2398 return 0; 2399 } 2400 2401 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev) 2402 { 2403 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2404 const struct gfx_firmware_header_v2_0 *me_hdr; 2405 const struct gfx_firmware_header_v2_0 *mec_hdr; 2406 uint32_t pipe_id, tmp; 2407 2408 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2409 adev->gfx.mec_fw->data; 2410 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2411 adev->gfx.me_fw->data; 2412 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2413 adev->gfx.pfp_fw->data; 2414 2415 /* config pfp program start addr */ 2416 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 2417 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2418 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2419 (pfp_hdr->ucode_start_addr_hi << 30) | 2420 (pfp_hdr->ucode_start_addr_lo >> 2)); 2421 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2422 pfp_hdr->ucode_start_addr_hi >> 2); 2423 } 2424 soc21_grbm_select(adev, 0, 0, 0, 0); 2425 2426 /* reset pfp pipe */ 2427 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2428 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1); 2429 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1); 2430 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2431 2432 /* clear pfp pipe reset */ 2433 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0); 2434 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0); 2435 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2436 2437 /* config me program start addr */ 2438 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 2439 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2440 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2441 (me_hdr->ucode_start_addr_hi << 30) | 2442 (me_hdr->ucode_start_addr_lo >> 2) ); 2443 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2444 me_hdr->ucode_start_addr_hi>>2); 2445 } 2446 soc21_grbm_select(adev, 0, 0, 0, 0); 2447 2448 /* reset me pipe */ 2449 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2450 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1); 2451 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1); 2452 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2453 2454 /* clear me pipe reset */ 2455 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0); 2456 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0); 2457 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2458 2459 /* config mec program start addr */ 2460 for (pipe_id = 0; pipe_id < 4; pipe_id++) { 2461 soc21_grbm_select(adev, 1, pipe_id, 0, 0); 2462 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 2463 mec_hdr->ucode_start_addr_lo >> 2 | 2464 mec_hdr->ucode_start_addr_hi << 30); 2465 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 2466 mec_hdr->ucode_start_addr_hi >> 2); 2467 } 2468 soc21_grbm_select(adev, 0, 0, 0, 0); 2469 2470 /* reset mec pipe */ 2471 tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 2472 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1); 2473 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1); 2474 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1); 2475 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1); 2476 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 2477 2478 /* clear mec pipe reset */ 2479 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0); 2480 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0); 2481 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0); 2482 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0); 2483 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 2484 } 2485 2486 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) 2487 { 2488 uint32_t cp_status; 2489 uint32_t bootload_status; 2490 int i, r; 2491 uint64_t addr, addr2; 2492 2493 for (i = 0; i < adev->usec_timeout; i++) { 2494 cp_status = RREG32_SOC15(GC, 0, regCP_STAT); 2495 2496 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1) || 2497 adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 4)) 2498 bootload_status = RREG32_SOC15(GC, 0, 2499 regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); 2500 else 2501 bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS); 2502 2503 if ((cp_status == 0) && 2504 (REG_GET_FIELD(bootload_status, 2505 RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { 2506 break; 2507 } 2508 udelay(1); 2509 } 2510 2511 if (i >= adev->usec_timeout) { 2512 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n"); 2513 return -ETIMEDOUT; 2514 } 2515 2516 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 2517 if (adev->gfx.rs64_enable) { 2518 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2519 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset; 2520 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 2521 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset; 2522 r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2); 2523 if (r) 2524 return r; 2525 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2526 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset; 2527 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 2528 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset; 2529 r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2); 2530 if (r) 2531 return r; 2532 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2533 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset; 2534 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 2535 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset; 2536 r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2); 2537 if (r) 2538 return r; 2539 } else { 2540 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2541 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset; 2542 r = gfx_v11_0_config_me_cache(adev, addr); 2543 if (r) 2544 return r; 2545 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2546 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset; 2547 r = gfx_v11_0_config_pfp_cache(adev, addr); 2548 if (r) 2549 return r; 2550 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2551 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset; 2552 r = gfx_v11_0_config_mec_cache(adev, addr); 2553 if (r) 2554 return r; 2555 } 2556 } 2557 2558 return 0; 2559 } 2560 2561 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 2562 { 2563 int i; 2564 u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2565 2566 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 2567 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 2568 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2569 2570 for (i = 0; i < adev->usec_timeout; i++) { 2571 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0) 2572 break; 2573 udelay(1); 2574 } 2575 2576 if (i >= adev->usec_timeout) 2577 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt"); 2578 2579 return 0; 2580 } 2581 2582 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) 2583 { 2584 int r; 2585 const struct gfx_firmware_header_v1_0 *pfp_hdr; 2586 const __le32 *fw_data; 2587 unsigned i, fw_size; 2588 2589 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 2590 adev->gfx.pfp_fw->data; 2591 2592 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 2593 2594 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 2595 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 2596 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes); 2597 2598 r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes, 2599 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 2600 &adev->gfx.pfp.pfp_fw_obj, 2601 &adev->gfx.pfp.pfp_fw_gpu_addr, 2602 (void **)&adev->gfx.pfp.pfp_fw_ptr); 2603 if (r) { 2604 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r); 2605 gfx_v11_0_pfp_fini(adev); 2606 return r; 2607 } 2608 2609 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size); 2610 2611 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 2612 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 2613 2614 gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr); 2615 2616 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0); 2617 2618 for (i = 0; i < pfp_hdr->jt_size; i++) 2619 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA, 2620 le32_to_cpup(fw_data + pfp_hdr->jt_offset + i)); 2621 2622 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 2623 2624 return 0; 2625 } 2626 2627 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) 2628 { 2629 int r; 2630 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2631 const __le32 *fw_ucode, *fw_data; 2632 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 2633 uint32_t tmp; 2634 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2635 2636 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2637 adev->gfx.pfp_fw->data; 2638 2639 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 2640 2641 /* instruction */ 2642 fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data + 2643 le32_to_cpu(pfp_hdr->ucode_offset_bytes)); 2644 fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes); 2645 /* data */ 2646 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 2647 le32_to_cpu(pfp_hdr->data_offset_bytes)); 2648 fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes); 2649 2650 /* 64kb align */ 2651 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 2652 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, 2653 &adev->gfx.pfp.pfp_fw_obj, 2654 &adev->gfx.pfp.pfp_fw_gpu_addr, 2655 (void **)&adev->gfx.pfp.pfp_fw_ptr); 2656 if (r) { 2657 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r); 2658 gfx_v11_0_pfp_fini(adev); 2659 return r; 2660 } 2661 2662 r = amdgpu_bo_create_reserved(adev, fw_data_size, 2663 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, 2664 &adev->gfx.pfp.pfp_fw_data_obj, 2665 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 2666 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 2667 if (r) { 2668 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r); 2669 gfx_v11_0_pfp_fini(adev); 2670 return r; 2671 } 2672 2673 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size); 2674 memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size); 2675 2676 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 2677 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj); 2678 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 2679 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); 2680 2681 if (amdgpu_emu_mode == 1) 2682 adev->hdp.funcs->flush_hdp(adev, NULL); 2683 2684 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2685 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 2686 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2687 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 2688 2689 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2690 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2691 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2692 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2693 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2694 2695 /* 2696 * Programming any of the CP_PFP_IC_BASE registers 2697 * forces invalidation of the ME L1 I$. Wait for the 2698 * invalidation complete 2699 */ 2700 for (i = 0; i < usec_timeout; i++) { 2701 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2702 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2703 INVALIDATE_CACHE_COMPLETE)) 2704 break; 2705 udelay(1); 2706 } 2707 2708 if (i >= usec_timeout) { 2709 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2710 return -EINVAL; 2711 } 2712 2713 /* Prime the L1 instruction caches */ 2714 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2715 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 2716 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2717 /* Waiting for cache primed*/ 2718 for (i = 0; i < usec_timeout; i++) { 2719 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2720 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2721 ICACHE_PRIMED)) 2722 break; 2723 udelay(1); 2724 } 2725 2726 if (i >= usec_timeout) { 2727 dev_err(adev->dev, "failed to prime instruction cache\n"); 2728 return -EINVAL; 2729 } 2730 2731 mutex_lock(&adev->srbm_mutex); 2732 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2733 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2734 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2735 (pfp_hdr->ucode_start_addr_hi << 30) | 2736 (pfp_hdr->ucode_start_addr_lo >> 2) ); 2737 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2738 pfp_hdr->ucode_start_addr_hi>>2); 2739 2740 /* 2741 * Program CP_ME_CNTL to reset given PIPE to take 2742 * effect of CP_PFP_PRGRM_CNTR_START. 2743 */ 2744 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2745 if (pipe_id == 0) 2746 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2747 PFP_PIPE0_RESET, 1); 2748 else 2749 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2750 PFP_PIPE1_RESET, 1); 2751 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2752 2753 /* Clear pfp pipe0 reset bit. */ 2754 if (pipe_id == 0) 2755 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2756 PFP_PIPE0_RESET, 0); 2757 else 2758 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2759 PFP_PIPE1_RESET, 0); 2760 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2761 2762 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 2763 lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 2764 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 2765 upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 2766 } 2767 soc21_grbm_select(adev, 0, 0, 0, 0); 2768 mutex_unlock(&adev->srbm_mutex); 2769 2770 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2771 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2772 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2773 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2774 2775 /* Invalidate the data caches */ 2776 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2777 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2778 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2779 2780 for (i = 0; i < usec_timeout; i++) { 2781 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2782 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2783 INVALIDATE_DCACHE_COMPLETE)) 2784 break; 2785 udelay(1); 2786 } 2787 2788 if (i >= usec_timeout) { 2789 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2790 return -EINVAL; 2791 } 2792 2793 return 0; 2794 } 2795 2796 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) 2797 { 2798 int r; 2799 const struct gfx_firmware_header_v1_0 *me_hdr; 2800 const __le32 *fw_data; 2801 unsigned i, fw_size; 2802 2803 me_hdr = (const struct gfx_firmware_header_v1_0 *) 2804 adev->gfx.me_fw->data; 2805 2806 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 2807 2808 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 2809 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 2810 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes); 2811 2812 r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes, 2813 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 2814 &adev->gfx.me.me_fw_obj, 2815 &adev->gfx.me.me_fw_gpu_addr, 2816 (void **)&adev->gfx.me.me_fw_ptr); 2817 if (r) { 2818 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r); 2819 gfx_v11_0_me_fini(adev); 2820 return r; 2821 } 2822 2823 memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size); 2824 2825 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 2826 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 2827 2828 gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr); 2829 2830 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0); 2831 2832 for (i = 0; i < me_hdr->jt_size; i++) 2833 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA, 2834 le32_to_cpup(fw_data + me_hdr->jt_offset + i)); 2835 2836 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version); 2837 2838 return 0; 2839 } 2840 2841 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) 2842 { 2843 int r; 2844 const struct gfx_firmware_header_v2_0 *me_hdr; 2845 const __le32 *fw_ucode, *fw_data; 2846 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 2847 uint32_t tmp; 2848 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2849 2850 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2851 adev->gfx.me_fw->data; 2852 2853 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 2854 2855 /* instruction */ 2856 fw_ucode = (const __le32 *)(adev->gfx.me_fw->data + 2857 le32_to_cpu(me_hdr->ucode_offset_bytes)); 2858 fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes); 2859 /* data */ 2860 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 2861 le32_to_cpu(me_hdr->data_offset_bytes)); 2862 fw_data_size = le32_to_cpu(me_hdr->data_size_bytes); 2863 2864 /* 64kb align*/ 2865 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 2866 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, 2867 &adev->gfx.me.me_fw_obj, 2868 &adev->gfx.me.me_fw_gpu_addr, 2869 (void **)&adev->gfx.me.me_fw_ptr); 2870 if (r) { 2871 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r); 2872 gfx_v11_0_me_fini(adev); 2873 return r; 2874 } 2875 2876 r = amdgpu_bo_create_reserved(adev, fw_data_size, 2877 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, 2878 &adev->gfx.me.me_fw_data_obj, 2879 &adev->gfx.me.me_fw_data_gpu_addr, 2880 (void **)&adev->gfx.me.me_fw_data_ptr); 2881 if (r) { 2882 dev_err(adev->dev, "(%d) failed to create me data bo\n", r); 2883 gfx_v11_0_pfp_fini(adev); 2884 return r; 2885 } 2886 2887 memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size); 2888 memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size); 2889 2890 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 2891 amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj); 2892 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 2893 amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); 2894 2895 if (amdgpu_emu_mode == 1) 2896 adev->hdp.funcs->flush_hdp(adev, NULL); 2897 2898 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2899 lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); 2900 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2901 upper_32_bits(adev->gfx.me.me_fw_gpu_addr)); 2902 2903 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2904 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2905 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2906 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2907 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2908 2909 /* 2910 * Programming any of the CP_ME_IC_BASE registers 2911 * forces invalidation of the ME L1 I$. Wait for the 2912 * invalidation complete 2913 */ 2914 for (i = 0; i < usec_timeout; i++) { 2915 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2916 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2917 INVALIDATE_CACHE_COMPLETE)) 2918 break; 2919 udelay(1); 2920 } 2921 2922 if (i >= usec_timeout) { 2923 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2924 return -EINVAL; 2925 } 2926 2927 /* Prime the instruction caches */ 2928 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2929 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 2930 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2931 2932 /* Waiting for instruction cache primed*/ 2933 for (i = 0; i < usec_timeout; i++) { 2934 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2935 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2936 ICACHE_PRIMED)) 2937 break; 2938 udelay(1); 2939 } 2940 2941 if (i >= usec_timeout) { 2942 dev_err(adev->dev, "failed to prime instruction cache\n"); 2943 return -EINVAL; 2944 } 2945 2946 mutex_lock(&adev->srbm_mutex); 2947 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2948 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2949 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2950 (me_hdr->ucode_start_addr_hi << 30) | 2951 (me_hdr->ucode_start_addr_lo >> 2) ); 2952 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2953 me_hdr->ucode_start_addr_hi>>2); 2954 2955 /* 2956 * Program CP_ME_CNTL to reset given PIPE to take 2957 * effect of CP_PFP_PRGRM_CNTR_START. 2958 */ 2959 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2960 if (pipe_id == 0) 2961 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2962 ME_PIPE0_RESET, 1); 2963 else 2964 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2965 ME_PIPE1_RESET, 1); 2966 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2967 2968 /* Clear pfp pipe0 reset bit. */ 2969 if (pipe_id == 0) 2970 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2971 ME_PIPE0_RESET, 0); 2972 else 2973 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2974 ME_PIPE1_RESET, 0); 2975 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2976 2977 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 2978 lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 2979 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 2980 upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 2981 } 2982 soc21_grbm_select(adev, 0, 0, 0, 0); 2983 mutex_unlock(&adev->srbm_mutex); 2984 2985 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2986 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2987 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2988 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2989 2990 /* Invalidate the data caches */ 2991 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2992 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2993 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2994 2995 for (i = 0; i < usec_timeout; i++) { 2996 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2997 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2998 INVALIDATE_DCACHE_COMPLETE)) 2999 break; 3000 udelay(1); 3001 } 3002 3003 if (i >= usec_timeout) { 3004 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3005 return -EINVAL; 3006 } 3007 3008 return 0; 3009 } 3010 3011 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3012 { 3013 int r; 3014 3015 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw) 3016 return -EINVAL; 3017 3018 gfx_v11_0_cp_gfx_enable(adev, false); 3019 3020 if (adev->gfx.rs64_enable) 3021 r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev); 3022 else 3023 r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev); 3024 if (r) { 3025 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r); 3026 return r; 3027 } 3028 3029 if (adev->gfx.rs64_enable) 3030 r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev); 3031 else 3032 r = gfx_v11_0_cp_gfx_load_me_microcode(adev); 3033 if (r) { 3034 dev_err(adev->dev, "(%d) failed to load me fw\n", r); 3035 return r; 3036 } 3037 3038 return 0; 3039 } 3040 3041 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev) 3042 { 3043 struct amdgpu_ring *ring; 3044 const struct cs_section_def *sect = NULL; 3045 const struct cs_extent_def *ext = NULL; 3046 int r, i; 3047 int ctx_reg_offset; 3048 3049 /* init the CP */ 3050 WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT, 3051 adev->gfx.config.max_hw_contexts - 1); 3052 WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1); 3053 3054 if (!amdgpu_async_gfx_ring) 3055 gfx_v11_0_cp_gfx_enable(adev, true); 3056 3057 ring = &adev->gfx.gfx_ring[0]; 3058 r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev)); 3059 if (r) { 3060 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3061 return r; 3062 } 3063 3064 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3065 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3066 3067 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3068 amdgpu_ring_write(ring, 0x80000000); 3069 amdgpu_ring_write(ring, 0x80000000); 3070 3071 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 3072 for (ext = sect->section; ext->extent != NULL; ++ext) { 3073 if (sect->id == SECT_CONTEXT) { 3074 amdgpu_ring_write(ring, 3075 PACKET3(PACKET3_SET_CONTEXT_REG, 3076 ext->reg_count)); 3077 amdgpu_ring_write(ring, ext->reg_index - 3078 PACKET3_SET_CONTEXT_REG_START); 3079 for (i = 0; i < ext->reg_count; i++) 3080 amdgpu_ring_write(ring, ext->extent[i]); 3081 } 3082 } 3083 } 3084 3085 ctx_reg_offset = 3086 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 3087 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 3088 amdgpu_ring_write(ring, ctx_reg_offset); 3089 amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override); 3090 3091 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3092 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3093 3094 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3095 amdgpu_ring_write(ring, 0); 3096 3097 amdgpu_ring_commit(ring); 3098 3099 /* submit cs packet to copy state 0 to next available state */ 3100 if (adev->gfx.num_gfx_rings > 1) { 3101 /* maximum supported gfx ring is 2 */ 3102 ring = &adev->gfx.gfx_ring[1]; 3103 r = amdgpu_ring_alloc(ring, 2); 3104 if (r) { 3105 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3106 return r; 3107 } 3108 3109 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3110 amdgpu_ring_write(ring, 0); 3111 3112 amdgpu_ring_commit(ring); 3113 } 3114 return 0; 3115 } 3116 3117 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev, 3118 CP_PIPE_ID pipe) 3119 { 3120 u32 tmp; 3121 3122 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); 3123 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe); 3124 3125 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); 3126 } 3127 3128 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, 3129 struct amdgpu_ring *ring) 3130 { 3131 u32 tmp; 3132 3133 tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); 3134 if (ring->use_doorbell) { 3135 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3136 DOORBELL_OFFSET, ring->doorbell_index); 3137 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3138 DOORBELL_EN, 1); 3139 } else { 3140 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3141 DOORBELL_EN, 0); 3142 } 3143 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp); 3144 3145 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3146 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3147 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp); 3148 3149 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 3150 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3151 } 3152 3153 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) 3154 { 3155 struct amdgpu_ring *ring; 3156 u32 tmp; 3157 u32 rb_bufsz; 3158 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3159 u32 i; 3160 3161 /* Set the write pointer delay */ 3162 WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); 3163 3164 /* set the RB to use vmid 0 */ 3165 WREG32_SOC15(GC, 0, regCP_RB_VMID, 0); 3166 3167 /* Init gfx ring 0 for pipe 0 */ 3168 mutex_lock(&adev->srbm_mutex); 3169 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3170 3171 /* Set ring buffer size */ 3172 ring = &adev->gfx.gfx_ring[0]; 3173 rb_bufsz = order_base_2(ring->ring_size / 8); 3174 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3175 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3176 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3177 3178 /* Initialize the ring buffer's write pointers */ 3179 ring->wptr = 0; 3180 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3181 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3182 3183 /* set the wb address wether it's enabled or not */ 3184 rptr_addr = ring->rptr_gpu_addr; 3185 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3186 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3187 CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3188 3189 wptr_gpu_addr = ring->wptr_gpu_addr; 3190 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3191 lower_32_bits(wptr_gpu_addr)); 3192 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3193 upper_32_bits(wptr_gpu_addr)); 3194 3195 mdelay(1); 3196 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3197 3198 rb_addr = ring->gpu_addr >> 8; 3199 WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr); 3200 WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3201 3202 WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1); 3203 3204 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3205 mutex_unlock(&adev->srbm_mutex); 3206 3207 /* Init gfx ring 1 for pipe 1 */ 3208 if (adev->gfx.num_gfx_rings > 1) { 3209 mutex_lock(&adev->srbm_mutex); 3210 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1); 3211 /* maximum supported gfx ring is 2 */ 3212 ring = &adev->gfx.gfx_ring[1]; 3213 rb_bufsz = order_base_2(ring->ring_size / 8); 3214 tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); 3215 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); 3216 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3217 /* Initialize the ring buffer's write pointers */ 3218 ring->wptr = 0; 3219 WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr)); 3220 WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); 3221 /* Set the wb address wether it's enabled or not */ 3222 rptr_addr = ring->rptr_gpu_addr; 3223 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); 3224 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3225 CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3226 wptr_gpu_addr = ring->wptr_gpu_addr; 3227 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3228 lower_32_bits(wptr_gpu_addr)); 3229 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3230 upper_32_bits(wptr_gpu_addr)); 3231 3232 mdelay(1); 3233 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3234 3235 rb_addr = ring->gpu_addr >> 8; 3236 WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr); 3237 WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr)); 3238 WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1); 3239 3240 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3241 mutex_unlock(&adev->srbm_mutex); 3242 } 3243 /* Switch to pipe 0 */ 3244 mutex_lock(&adev->srbm_mutex); 3245 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3246 mutex_unlock(&adev->srbm_mutex); 3247 3248 /* start the ring */ 3249 gfx_v11_0_cp_gfx_start(adev); 3250 3251 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 3252 ring = &adev->gfx.gfx_ring[i]; 3253 ring->sched.ready = true; 3254 } 3255 3256 return 0; 3257 } 3258 3259 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3260 { 3261 u32 data; 3262 3263 if (adev->gfx.rs64_enable) { 3264 data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 3265 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE, 3266 enable ? 0 : 1); 3267 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 3268 enable ? 0 : 1); 3269 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 3270 enable ? 0 : 1); 3271 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 3272 enable ? 0 : 1); 3273 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 3274 enable ? 0 : 1); 3275 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE, 3276 enable ? 1 : 0); 3277 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE, 3278 enable ? 1 : 0); 3279 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE, 3280 enable ? 1 : 0); 3281 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE, 3282 enable ? 1 : 0); 3283 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT, 3284 enable ? 0 : 1); 3285 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data); 3286 } else { 3287 data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL); 3288 3289 if (enable) { 3290 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0); 3291 if (!adev->enable_mes_kiq) 3292 data = REG_SET_FIELD(data, CP_MEC_CNTL, 3293 MEC_ME2_HALT, 0); 3294 } else { 3295 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1); 3296 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1); 3297 } 3298 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data); 3299 } 3300 3301 adev->gfx.kiq.ring.sched.ready = enable; 3302 3303 udelay(50); 3304 } 3305 3306 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3307 { 3308 const struct gfx_firmware_header_v1_0 *mec_hdr; 3309 const __le32 *fw_data; 3310 unsigned i, fw_size; 3311 u32 *fw = NULL; 3312 int r; 3313 3314 if (!adev->gfx.mec_fw) 3315 return -EINVAL; 3316 3317 gfx_v11_0_cp_compute_enable(adev, false); 3318 3319 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3320 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3321 3322 fw_data = (const __le32 *) 3323 (adev->gfx.mec_fw->data + 3324 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3325 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 3326 3327 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 3328 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3329 &adev->gfx.mec.mec_fw_obj, 3330 &adev->gfx.mec.mec_fw_gpu_addr, 3331 (void **)&fw); 3332 if (r) { 3333 dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r); 3334 gfx_v11_0_mec_fini(adev); 3335 return r; 3336 } 3337 3338 memcpy(fw, fw_data, fw_size); 3339 3340 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3341 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3342 3343 gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr); 3344 3345 /* MEC1 */ 3346 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0); 3347 3348 for (i = 0; i < mec_hdr->jt_size; i++) 3349 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA, 3350 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3351 3352 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 3353 3354 return 0; 3355 } 3356 3357 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev) 3358 { 3359 const struct gfx_firmware_header_v2_0 *mec_hdr; 3360 const __le32 *fw_ucode, *fw_data; 3361 u32 tmp, fw_ucode_size, fw_data_size; 3362 u32 i, usec_timeout = 50000; /* Wait for 50 ms */ 3363 u32 *fw_ucode_ptr, *fw_data_ptr; 3364 int r; 3365 3366 if (!adev->gfx.mec_fw) 3367 return -EINVAL; 3368 3369 gfx_v11_0_cp_compute_enable(adev, false); 3370 3371 mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; 3372 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3373 3374 fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data + 3375 le32_to_cpu(mec_hdr->ucode_offset_bytes)); 3376 fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes); 3377 3378 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 3379 le32_to_cpu(mec_hdr->data_offset_bytes)); 3380 fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes); 3381 3382 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3383 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, 3384 &adev->gfx.mec.mec_fw_obj, 3385 &adev->gfx.mec.mec_fw_gpu_addr, 3386 (void **)&fw_ucode_ptr); 3387 if (r) { 3388 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3389 gfx_v11_0_mec_fini(adev); 3390 return r; 3391 } 3392 3393 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3394 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, 3395 &adev->gfx.mec.mec_fw_data_obj, 3396 &adev->gfx.mec.mec_fw_data_gpu_addr, 3397 (void **)&fw_data_ptr); 3398 if (r) { 3399 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3400 gfx_v11_0_mec_fini(adev); 3401 return r; 3402 } 3403 3404 memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size); 3405 memcpy(fw_data_ptr, fw_data, fw_data_size); 3406 3407 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3408 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj); 3409 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3410 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj); 3411 3412 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 3413 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3414 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 3415 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3416 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 3417 3418 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 3419 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 3420 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 3421 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 3422 3423 mutex_lock(&adev->srbm_mutex); 3424 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 3425 soc21_grbm_select(adev, 1, i, 0, 0); 3426 3427 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr); 3428 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 3429 upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr)); 3430 3431 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 3432 mec_hdr->ucode_start_addr_lo >> 2 | 3433 mec_hdr->ucode_start_addr_hi << 30); 3434 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 3435 mec_hdr->ucode_start_addr_hi >> 2); 3436 3437 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr); 3438 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 3439 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3440 } 3441 mutex_unlock(&adev->srbm_mutex); 3442 soc21_grbm_select(adev, 0, 0, 0, 0); 3443 3444 /* Trigger an invalidation of the L1 instruction caches */ 3445 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 3446 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3447 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 3448 3449 /* Wait for invalidation complete */ 3450 for (i = 0; i < usec_timeout; i++) { 3451 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 3452 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 3453 INVALIDATE_DCACHE_COMPLETE)) 3454 break; 3455 udelay(1); 3456 } 3457 3458 if (i >= usec_timeout) { 3459 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3460 return -EINVAL; 3461 } 3462 3463 /* Trigger an invalidation of the L1 instruction caches */ 3464 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 3465 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 3466 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 3467 3468 /* Wait for invalidation complete */ 3469 for (i = 0; i < usec_timeout; i++) { 3470 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 3471 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 3472 INVALIDATE_CACHE_COMPLETE)) 3473 break; 3474 udelay(1); 3475 } 3476 3477 if (i >= usec_timeout) { 3478 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3479 return -EINVAL; 3480 } 3481 3482 return 0; 3483 } 3484 3485 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring) 3486 { 3487 uint32_t tmp; 3488 struct amdgpu_device *adev = ring->adev; 3489 3490 /* tell RLC which is KIQ queue */ 3491 tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); 3492 tmp &= 0xffffff00; 3493 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3494 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); 3495 tmp |= 0x80; 3496 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); 3497 } 3498 3499 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev) 3500 { 3501 /* set graphics engine doorbell range */ 3502 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, 3503 (adev->doorbell_index.gfx_ring0 * 2) << 2); 3504 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 3505 (adev->doorbell_index.gfx_userqueue_end * 2) << 2); 3506 3507 /* set compute engine doorbell range */ 3508 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 3509 (adev->doorbell_index.kiq * 2) << 2); 3510 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 3511 (adev->doorbell_index.userqueue_end * 2) << 2); 3512 } 3513 3514 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, 3515 struct amdgpu_mqd_prop *prop) 3516 { 3517 struct v11_gfx_mqd *mqd = m; 3518 uint64_t hqd_gpu_addr, wb_gpu_addr; 3519 uint32_t tmp; 3520 uint32_t rb_bufsz; 3521 3522 /* set up gfx hqd wptr */ 3523 mqd->cp_gfx_hqd_wptr = 0; 3524 mqd->cp_gfx_hqd_wptr_hi = 0; 3525 3526 /* set the pointer to the MQD */ 3527 mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc; 3528 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 3529 3530 /* set up mqd control */ 3531 tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL); 3532 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); 3533 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); 3534 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); 3535 mqd->cp_gfx_mqd_control = tmp; 3536 3537 /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ 3538 tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID); 3539 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); 3540 mqd->cp_gfx_hqd_vmid = 0; 3541 3542 /* set up default queue priority level 3543 * 0x0 = low priority, 0x1 = high priority */ 3544 tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY); 3545 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0); 3546 mqd->cp_gfx_hqd_queue_priority = tmp; 3547 3548 /* set up time quantum */ 3549 tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM); 3550 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); 3551 mqd->cp_gfx_hqd_quantum = tmp; 3552 3553 /* set up gfx hqd base. this is similar as CP_RB_BASE */ 3554 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 3555 mqd->cp_gfx_hqd_base = hqd_gpu_addr; 3556 mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr); 3557 3558 /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */ 3559 wb_gpu_addr = prop->rptr_gpu_addr; 3560 mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc; 3561 mqd->cp_gfx_hqd_rptr_addr_hi = 3562 upper_32_bits(wb_gpu_addr) & 0xffff; 3563 3564 /* set up rb_wptr_poll addr */ 3565 wb_gpu_addr = prop->wptr_gpu_addr; 3566 mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3567 mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3568 3569 /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ 3570 rb_bufsz = order_base_2(prop->queue_size / 4) - 1; 3571 tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL); 3572 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); 3573 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); 3574 #ifdef __BIG_ENDIAN 3575 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); 3576 #endif 3577 mqd->cp_gfx_hqd_cntl = tmp; 3578 3579 /* set up cp_doorbell_control */ 3580 tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); 3581 if (prop->use_doorbell) { 3582 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3583 DOORBELL_OFFSET, prop->doorbell_index); 3584 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3585 DOORBELL_EN, 1); 3586 } else 3587 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3588 DOORBELL_EN, 0); 3589 mqd->cp_rb_doorbell_control = tmp; 3590 3591 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3592 mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR); 3593 3594 /* active the queue */ 3595 mqd->cp_gfx_hqd_active = 1; 3596 3597 return 0; 3598 } 3599 3600 #ifdef BRING_UP_DEBUG 3601 static int gfx_v11_0_gfx_queue_init_register(struct amdgpu_ring *ring) 3602 { 3603 struct amdgpu_device *adev = ring->adev; 3604 struct v11_gfx_mqd *mqd = ring->mqd_ptr; 3605 3606 /* set mmCP_GFX_HQD_WPTR/_HI to 0 */ 3607 WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr); 3608 WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi); 3609 3610 /* set GFX_MQD_BASE */ 3611 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr); 3612 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 3613 3614 /* set GFX_MQD_CONTROL */ 3615 WREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control); 3616 3617 /* set GFX_HQD_VMID to 0 */ 3618 WREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid); 3619 3620 WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY, 3621 mqd->cp_gfx_hqd_queue_priority); 3622 WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum); 3623 3624 /* set GFX_HQD_BASE, similar as CP_RB_BASE */ 3625 WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base); 3626 WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi); 3627 3628 /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */ 3629 WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr); 3630 WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi); 3631 3632 /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */ 3633 WREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl); 3634 3635 /* set RB_WPTR_POLL_ADDR */ 3636 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo); 3637 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi); 3638 3639 /* set RB_DOORBELL_CONTROL */ 3640 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control); 3641 3642 /* active the queue */ 3643 WREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active); 3644 3645 return 0; 3646 } 3647 #endif 3648 3649 static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring) 3650 { 3651 struct amdgpu_device *adev = ring->adev; 3652 struct v11_gfx_mqd *mqd = ring->mqd_ptr; 3653 int mqd_idx = ring - &adev->gfx.gfx_ring[0]; 3654 3655 if (!amdgpu_in_reset(adev) && !adev->in_suspend) { 3656 memset((void *)mqd, 0, sizeof(*mqd)); 3657 mutex_lock(&adev->srbm_mutex); 3658 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3659 amdgpu_ring_init_mqd(ring); 3660 #ifdef BRING_UP_DEBUG 3661 gfx_v11_0_gfx_queue_init_register(ring); 3662 #endif 3663 soc21_grbm_select(adev, 0, 0, 0, 0); 3664 mutex_unlock(&adev->srbm_mutex); 3665 if (adev->gfx.me.mqd_backup[mqd_idx]) 3666 memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 3667 } else if (amdgpu_in_reset(adev)) { 3668 /* reset mqd with the backup copy */ 3669 if (adev->gfx.me.mqd_backup[mqd_idx]) 3670 memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); 3671 /* reset the ring */ 3672 ring->wptr = 0; 3673 *ring->wptr_cpu_addr = 0; 3674 amdgpu_ring_clear_ring(ring); 3675 #ifdef BRING_UP_DEBUG 3676 mutex_lock(&adev->srbm_mutex); 3677 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3678 gfx_v11_0_gfx_queue_init_register(ring); 3679 soc21_grbm_select(adev, 0, 0, 0, 0); 3680 mutex_unlock(&adev->srbm_mutex); 3681 #endif 3682 } else { 3683 amdgpu_ring_clear_ring(ring); 3684 } 3685 3686 return 0; 3687 } 3688 3689 #ifndef BRING_UP_DEBUG 3690 static int gfx_v11_0_kiq_enable_kgq(struct amdgpu_device *adev) 3691 { 3692 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 3693 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3694 int r, i; 3695 3696 if (!kiq->pmf || !kiq->pmf->kiq_map_queues) 3697 return -EINVAL; 3698 3699 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * 3700 adev->gfx.num_gfx_rings); 3701 if (r) { 3702 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3703 return r; 3704 } 3705 3706 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 3707 kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]); 3708 3709 return amdgpu_ring_test_helper(kiq_ring); 3710 } 3711 #endif 3712 3713 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) 3714 { 3715 int r, i; 3716 struct amdgpu_ring *ring; 3717 3718 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 3719 ring = &adev->gfx.gfx_ring[i]; 3720 3721 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3722 if (unlikely(r != 0)) 3723 goto done; 3724 3725 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3726 if (!r) { 3727 r = gfx_v11_0_gfx_init_queue(ring); 3728 amdgpu_bo_kunmap(ring->mqd_obj); 3729 ring->mqd_ptr = NULL; 3730 } 3731 amdgpu_bo_unreserve(ring->mqd_obj); 3732 if (r) 3733 goto done; 3734 } 3735 #ifndef BRING_UP_DEBUG 3736 r = gfx_v11_0_kiq_enable_kgq(adev); 3737 if (r) 3738 goto done; 3739 #endif 3740 r = gfx_v11_0_cp_gfx_start(adev); 3741 if (r) 3742 goto done; 3743 3744 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 3745 ring = &adev->gfx.gfx_ring[i]; 3746 ring->sched.ready = true; 3747 } 3748 done: 3749 return r; 3750 } 3751 3752 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, 3753 struct amdgpu_mqd_prop *prop) 3754 { 3755 struct v11_compute_mqd *mqd = m; 3756 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3757 uint32_t tmp; 3758 3759 mqd->header = 0xC0310800; 3760 mqd->compute_pipelinestat_enable = 0x00000001; 3761 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3762 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3763 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3764 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3765 mqd->compute_misc_reserved = 0x00000007; 3766 3767 eop_base_addr = prop->eop_gpu_addr >> 8; 3768 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3769 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3770 3771 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3772 tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL); 3773 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3774 (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1)); 3775 3776 mqd->cp_hqd_eop_control = tmp; 3777 3778 /* enable doorbell? */ 3779 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); 3780 3781 if (prop->use_doorbell) { 3782 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3783 DOORBELL_OFFSET, prop->doorbell_index); 3784 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3785 DOORBELL_EN, 1); 3786 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3787 DOORBELL_SOURCE, 0); 3788 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3789 DOORBELL_HIT, 0); 3790 } else { 3791 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3792 DOORBELL_EN, 0); 3793 } 3794 3795 mqd->cp_hqd_pq_doorbell_control = tmp; 3796 3797 /* disable the queue if it's active */ 3798 mqd->cp_hqd_dequeue_request = 0; 3799 mqd->cp_hqd_pq_rptr = 0; 3800 mqd->cp_hqd_pq_wptr_lo = 0; 3801 mqd->cp_hqd_pq_wptr_hi = 0; 3802 3803 /* set the pointer to the MQD */ 3804 mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc; 3805 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 3806 3807 /* set MQD vmid to 0 */ 3808 tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL); 3809 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3810 mqd->cp_mqd_control = tmp; 3811 3812 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3813 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 3814 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3815 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3816 3817 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3818 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL); 3819 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3820 (order_base_2(prop->queue_size / 4) - 1)); 3821 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3822 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 3823 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3824 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); 3825 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3826 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3827 mqd->cp_hqd_pq_control = tmp; 3828 3829 /* set the wb address whether it's enabled or not */ 3830 wb_gpu_addr = prop->rptr_gpu_addr; 3831 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3832 mqd->cp_hqd_pq_rptr_report_addr_hi = 3833 upper_32_bits(wb_gpu_addr) & 0xffff; 3834 3835 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3836 wb_gpu_addr = prop->wptr_gpu_addr; 3837 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3838 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3839 3840 tmp = 0; 3841 /* enable the doorbell if requested */ 3842 if (prop->use_doorbell) { 3843 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); 3844 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3845 DOORBELL_OFFSET, prop->doorbell_index); 3846 3847 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3848 DOORBELL_EN, 1); 3849 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3850 DOORBELL_SOURCE, 0); 3851 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3852 DOORBELL_HIT, 0); 3853 } 3854 3855 mqd->cp_hqd_pq_doorbell_control = tmp; 3856 3857 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3858 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR); 3859 3860 /* set the vmid for the queue */ 3861 mqd->cp_hqd_vmid = 0; 3862 3863 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE); 3864 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); 3865 mqd->cp_hqd_persistent_state = tmp; 3866 3867 /* set MIN_IB_AVAIL_SIZE */ 3868 tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL); 3869 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3870 mqd->cp_hqd_ib_control = tmp; 3871 3872 /* set static priority for a compute queue/ring */ 3873 mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority; 3874 mqd->cp_hqd_queue_priority = prop->hqd_queue_priority; 3875 3876 mqd->cp_hqd_active = prop->hqd_active; 3877 3878 return 0; 3879 } 3880 3881 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring) 3882 { 3883 struct amdgpu_device *adev = ring->adev; 3884 struct v11_compute_mqd *mqd = ring->mqd_ptr; 3885 int j; 3886 3887 /* inactivate the queue */ 3888 if (amdgpu_sriov_vf(adev)) 3889 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0); 3890 3891 /* disable wptr polling */ 3892 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3893 3894 /* write the EOP addr */ 3895 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR, 3896 mqd->cp_hqd_eop_base_addr_lo); 3897 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI, 3898 mqd->cp_hqd_eop_base_addr_hi); 3899 3900 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3901 WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL, 3902 mqd->cp_hqd_eop_control); 3903 3904 /* enable doorbell? */ 3905 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 3906 mqd->cp_hqd_pq_doorbell_control); 3907 3908 /* disable the queue if it's active */ 3909 if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { 3910 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); 3911 for (j = 0; j < adev->usec_timeout; j++) { 3912 if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) 3913 break; 3914 udelay(1); 3915 } 3916 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 3917 mqd->cp_hqd_dequeue_request); 3918 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 3919 mqd->cp_hqd_pq_rptr); 3920 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 3921 mqd->cp_hqd_pq_wptr_lo); 3922 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 3923 mqd->cp_hqd_pq_wptr_hi); 3924 } 3925 3926 /* set the pointer to the MQD */ 3927 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, 3928 mqd->cp_mqd_base_addr_lo); 3929 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, 3930 mqd->cp_mqd_base_addr_hi); 3931 3932 /* set MQD vmid to 0 */ 3933 WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 3934 mqd->cp_mqd_control); 3935 3936 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3937 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, 3938 mqd->cp_hqd_pq_base_lo); 3939 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, 3940 mqd->cp_hqd_pq_base_hi); 3941 3942 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3943 WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, 3944 mqd->cp_hqd_pq_control); 3945 3946 /* set the wb address whether it's enabled or not */ 3947 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR, 3948 mqd->cp_hqd_pq_rptr_report_addr_lo); 3949 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3950 mqd->cp_hqd_pq_rptr_report_addr_hi); 3951 3952 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3953 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR, 3954 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3955 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3956 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3957 3958 /* enable the doorbell if requested */ 3959 if (ring->use_doorbell) { 3960 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 3961 (adev->doorbell_index.kiq * 2) << 2); 3962 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 3963 (adev->doorbell_index.userqueue_end * 2) << 2); 3964 } 3965 3966 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 3967 mqd->cp_hqd_pq_doorbell_control); 3968 3969 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3970 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 3971 mqd->cp_hqd_pq_wptr_lo); 3972 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 3973 mqd->cp_hqd_pq_wptr_hi); 3974 3975 /* set the vmid for the queue */ 3976 WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid); 3977 3978 WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, 3979 mqd->cp_hqd_persistent_state); 3980 3981 /* activate the queue */ 3982 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 3983 mqd->cp_hqd_active); 3984 3985 if (ring->use_doorbell) 3986 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3987 3988 return 0; 3989 } 3990 3991 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) 3992 { 3993 struct amdgpu_device *adev = ring->adev; 3994 struct v11_compute_mqd *mqd = ring->mqd_ptr; 3995 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3996 3997 gfx_v11_0_kiq_setting(ring); 3998 3999 if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 4000 /* reset MQD to a clean status */ 4001 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4002 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 4003 4004 /* reset ring buffer */ 4005 ring->wptr = 0; 4006 amdgpu_ring_clear_ring(ring); 4007 4008 mutex_lock(&adev->srbm_mutex); 4009 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4010 gfx_v11_0_kiq_init_register(ring); 4011 soc21_grbm_select(adev, 0, 0, 0, 0); 4012 mutex_unlock(&adev->srbm_mutex); 4013 } else { 4014 memset((void *)mqd, 0, sizeof(*mqd)); 4015 if (amdgpu_sriov_vf(adev) && adev->in_suspend) 4016 amdgpu_ring_clear_ring(ring); 4017 mutex_lock(&adev->srbm_mutex); 4018 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4019 amdgpu_ring_init_mqd(ring); 4020 gfx_v11_0_kiq_init_register(ring); 4021 soc21_grbm_select(adev, 0, 0, 0, 0); 4022 mutex_unlock(&adev->srbm_mutex); 4023 4024 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4025 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4026 } 4027 4028 return 0; 4029 } 4030 4031 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring) 4032 { 4033 struct amdgpu_device *adev = ring->adev; 4034 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4035 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4036 4037 if (!amdgpu_in_reset(adev) && !adev->in_suspend) { 4038 memset((void *)mqd, 0, sizeof(*mqd)); 4039 mutex_lock(&adev->srbm_mutex); 4040 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4041 amdgpu_ring_init_mqd(ring); 4042 soc21_grbm_select(adev, 0, 0, 0, 0); 4043 mutex_unlock(&adev->srbm_mutex); 4044 4045 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4046 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4047 } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 4048 /* reset MQD to a clean status */ 4049 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4050 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 4051 4052 /* reset ring buffer */ 4053 ring->wptr = 0; 4054 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 4055 amdgpu_ring_clear_ring(ring); 4056 } else { 4057 amdgpu_ring_clear_ring(ring); 4058 } 4059 4060 return 0; 4061 } 4062 4063 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev) 4064 { 4065 struct amdgpu_ring *ring; 4066 int r; 4067 4068 ring = &adev->gfx.kiq.ring; 4069 4070 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4071 if (unlikely(r != 0)) 4072 return r; 4073 4074 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 4075 if (unlikely(r != 0)) { 4076 amdgpu_bo_unreserve(ring->mqd_obj); 4077 return r; 4078 } 4079 4080 gfx_v11_0_kiq_init_queue(ring); 4081 amdgpu_bo_kunmap(ring->mqd_obj); 4082 ring->mqd_ptr = NULL; 4083 amdgpu_bo_unreserve(ring->mqd_obj); 4084 ring->sched.ready = true; 4085 return 0; 4086 } 4087 4088 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev) 4089 { 4090 struct amdgpu_ring *ring = NULL; 4091 int r = 0, i; 4092 4093 if (!amdgpu_async_gfx_ring) 4094 gfx_v11_0_cp_compute_enable(adev, true); 4095 4096 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4097 ring = &adev->gfx.compute_ring[i]; 4098 4099 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4100 if (unlikely(r != 0)) 4101 goto done; 4102 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 4103 if (!r) { 4104 r = gfx_v11_0_kcq_init_queue(ring); 4105 amdgpu_bo_kunmap(ring->mqd_obj); 4106 ring->mqd_ptr = NULL; 4107 } 4108 amdgpu_bo_unreserve(ring->mqd_obj); 4109 if (r) 4110 goto done; 4111 } 4112 4113 r = amdgpu_gfx_enable_kcq(adev); 4114 done: 4115 return r; 4116 } 4117 4118 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) 4119 { 4120 int r, i; 4121 struct amdgpu_ring *ring; 4122 4123 if (!(adev->flags & AMD_IS_APU)) 4124 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4125 4126 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4127 /* legacy firmware loading */ 4128 r = gfx_v11_0_cp_gfx_load_microcode(adev); 4129 if (r) 4130 return r; 4131 4132 if (adev->gfx.rs64_enable) 4133 r = gfx_v11_0_cp_compute_load_microcode_rs64(adev); 4134 else 4135 r = gfx_v11_0_cp_compute_load_microcode(adev); 4136 if (r) 4137 return r; 4138 } 4139 4140 gfx_v11_0_cp_set_doorbell_range(adev); 4141 4142 if (amdgpu_async_gfx_ring) { 4143 gfx_v11_0_cp_compute_enable(adev, true); 4144 gfx_v11_0_cp_gfx_enable(adev, true); 4145 } 4146 4147 if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) 4148 r = amdgpu_mes_kiq_hw_init(adev); 4149 else 4150 r = gfx_v11_0_kiq_resume(adev); 4151 if (r) 4152 return r; 4153 4154 r = gfx_v11_0_kcq_resume(adev); 4155 if (r) 4156 return r; 4157 4158 if (!amdgpu_async_gfx_ring) { 4159 r = gfx_v11_0_cp_gfx_resume(adev); 4160 if (r) 4161 return r; 4162 } else { 4163 r = gfx_v11_0_cp_async_gfx_ring_resume(adev); 4164 if (r) 4165 return r; 4166 } 4167 4168 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4169 ring = &adev->gfx.gfx_ring[i]; 4170 r = amdgpu_ring_test_helper(ring); 4171 if (r) 4172 return r; 4173 } 4174 4175 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4176 ring = &adev->gfx.compute_ring[i]; 4177 r = amdgpu_ring_test_helper(ring); 4178 if (r) 4179 return r; 4180 } 4181 4182 return 0; 4183 } 4184 4185 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable) 4186 { 4187 gfx_v11_0_cp_gfx_enable(adev, enable); 4188 gfx_v11_0_cp_compute_enable(adev, enable); 4189 } 4190 4191 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) 4192 { 4193 int r; 4194 bool value; 4195 4196 r = adev->gfxhub.funcs->gart_enable(adev); 4197 if (r) 4198 return r; 4199 4200 adev->hdp.funcs->flush_hdp(adev, NULL); 4201 4202 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 4203 false : true; 4204 4205 adev->gfxhub.funcs->set_fault_enable_default(adev, value); 4206 amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0); 4207 4208 return 0; 4209 } 4210 4211 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev) 4212 { 4213 u32 tmp; 4214 4215 /* select RS64 */ 4216 if (adev->gfx.rs64_enable) { 4217 tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL); 4218 tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1); 4219 WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp); 4220 4221 tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL); 4222 tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1); 4223 WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp); 4224 } 4225 4226 if (amdgpu_emu_mode == 1) 4227 msleep(100); 4228 } 4229 4230 static int get_gb_addr_config(struct amdgpu_device * adev) 4231 { 4232 u32 gb_addr_config; 4233 4234 gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); 4235 if (gb_addr_config == 0) 4236 return -EINVAL; 4237 4238 adev->gfx.config.gb_addr_config_fields.num_pkrs = 4239 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); 4240 4241 adev->gfx.config.gb_addr_config = gb_addr_config; 4242 4243 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 4244 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4245 GB_ADDR_CONFIG, NUM_PIPES); 4246 4247 adev->gfx.config.max_tile_pipes = 4248 adev->gfx.config.gb_addr_config_fields.num_pipes; 4249 4250 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 4251 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4252 GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS); 4253 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 4254 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4255 GB_ADDR_CONFIG, NUM_RB_PER_SE); 4256 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 4257 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4258 GB_ADDR_CONFIG, NUM_SHADER_ENGINES); 4259 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 4260 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4261 GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); 4262 4263 return 0; 4264 } 4265 4266 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev) 4267 { 4268 uint32_t data; 4269 4270 data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG); 4271 data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK; 4272 WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data); 4273 4274 data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG); 4275 data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK; 4276 WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); 4277 } 4278 4279 static int gfx_v11_0_hw_init(void *handle) 4280 { 4281 int r; 4282 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4283 4284 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 4285 if (adev->gfx.imu.funcs) { 4286 /* RLC autoload sequence 1: Program rlc ram */ 4287 if (adev->gfx.imu.funcs->program_rlc_ram) 4288 adev->gfx.imu.funcs->program_rlc_ram(adev); 4289 } 4290 /* rlc autoload firmware */ 4291 r = gfx_v11_0_rlc_backdoor_autoload_enable(adev); 4292 if (r) 4293 return r; 4294 } else { 4295 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4296 if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) { 4297 if (adev->gfx.imu.funcs->load_microcode) 4298 adev->gfx.imu.funcs->load_microcode(adev); 4299 if (adev->gfx.imu.funcs->setup_imu) 4300 adev->gfx.imu.funcs->setup_imu(adev); 4301 if (adev->gfx.imu.funcs->start_imu) 4302 adev->gfx.imu.funcs->start_imu(adev); 4303 } 4304 4305 /* disable gpa mode in backdoor loading */ 4306 gfx_v11_0_disable_gpa_mode(adev); 4307 } 4308 } 4309 4310 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) || 4311 (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { 4312 r = gfx_v11_0_wait_for_rlc_autoload_complete(adev); 4313 if (r) { 4314 dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r); 4315 return r; 4316 } 4317 } 4318 4319 adev->gfx.is_poweron = true; 4320 4321 if(get_gb_addr_config(adev)) 4322 DRM_WARN("Invalid gb_addr_config !\n"); 4323 4324 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && 4325 adev->gfx.rs64_enable) 4326 gfx_v11_0_config_gfx_rs64(adev); 4327 4328 r = gfx_v11_0_gfxhub_enable(adev); 4329 if (r) 4330 return r; 4331 4332 if (!amdgpu_emu_mode) 4333 gfx_v11_0_init_golden_registers(adev); 4334 4335 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) || 4336 (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) { 4337 /** 4338 * For gfx 11, rlc firmware loading relies on smu firmware is 4339 * loaded firstly, so in direct type, it has to load smc ucode 4340 * here before rlc. 4341 */ 4342 if (!(adev->flags & AMD_IS_APU)) { 4343 r = amdgpu_pm_load_smu_firmware(adev, NULL); 4344 if (r) 4345 return r; 4346 } 4347 } 4348 4349 gfx_v11_0_constants_init(adev); 4350 4351 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 4352 gfx_v11_0_select_cp_fw_arch(adev); 4353 4354 if (adev->nbio.funcs->gc_doorbell_init) 4355 adev->nbio.funcs->gc_doorbell_init(adev); 4356 4357 r = gfx_v11_0_rlc_resume(adev); 4358 if (r) 4359 return r; 4360 4361 /* 4362 * init golden registers and rlc resume may override some registers, 4363 * reconfig them here 4364 */ 4365 gfx_v11_0_tcp_harvest(adev); 4366 4367 r = gfx_v11_0_cp_resume(adev); 4368 if (r) 4369 return r; 4370 4371 return r; 4372 } 4373 4374 #ifndef BRING_UP_DEBUG 4375 static int gfx_v11_0_kiq_disable_kgq(struct amdgpu_device *adev) 4376 { 4377 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 4378 struct amdgpu_ring *kiq_ring = &kiq->ring; 4379 int i, r = 0; 4380 4381 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 4382 return -EINVAL; 4383 4384 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * 4385 adev->gfx.num_gfx_rings)) 4386 return -ENOMEM; 4387 4388 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4389 kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], 4390 PREEMPT_QUEUES, 0, 0); 4391 4392 if (adev->gfx.kiq.ring.sched.ready) 4393 r = amdgpu_ring_test_helper(kiq_ring); 4394 4395 return r; 4396 } 4397 #endif 4398 4399 static int gfx_v11_0_hw_fini(void *handle) 4400 { 4401 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4402 int r; 4403 4404 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4405 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4406 4407 if (!adev->no_hw_access) { 4408 #ifndef BRING_UP_DEBUG 4409 if (amdgpu_async_gfx_ring) { 4410 r = gfx_v11_0_kiq_disable_kgq(adev); 4411 if (r) 4412 DRM_ERROR("KGQ disable failed\n"); 4413 } 4414 #endif 4415 if (amdgpu_gfx_disable_kcq(adev)) 4416 DRM_ERROR("KCQ disable failed\n"); 4417 4418 amdgpu_mes_kiq_hw_fini(adev); 4419 } 4420 4421 if (amdgpu_sriov_vf(adev)) 4422 /* Remove the steps disabling CPG and clearing KIQ position, 4423 * so that CP could perform IDLE-SAVE during switch. Those 4424 * steps are necessary to avoid a DMAR error in gfx9 but it is 4425 * not reproduced on gfx11. 4426 */ 4427 return 0; 4428 4429 gfx_v11_0_cp_enable(adev, false); 4430 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4431 4432 adev->gfxhub.funcs->gart_disable(adev); 4433 4434 adev->gfx.is_poweron = false; 4435 4436 return 0; 4437 } 4438 4439 static int gfx_v11_0_suspend(void *handle) 4440 { 4441 return gfx_v11_0_hw_fini(handle); 4442 } 4443 4444 static int gfx_v11_0_resume(void *handle) 4445 { 4446 return gfx_v11_0_hw_init(handle); 4447 } 4448 4449 static bool gfx_v11_0_is_idle(void *handle) 4450 { 4451 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4452 4453 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS), 4454 GRBM_STATUS, GUI_ACTIVE)) 4455 return false; 4456 else 4457 return true; 4458 } 4459 4460 static int gfx_v11_0_wait_for_idle(void *handle) 4461 { 4462 unsigned i; 4463 u32 tmp; 4464 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4465 4466 for (i = 0; i < adev->usec_timeout; i++) { 4467 /* read MC_STATUS */ 4468 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) & 4469 GRBM_STATUS__GUI_ACTIVE_MASK; 4470 4471 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) 4472 return 0; 4473 udelay(1); 4474 } 4475 return -ETIMEDOUT; 4476 } 4477 4478 static int gfx_v11_0_soft_reset(void *handle) 4479 { 4480 u32 grbm_soft_reset = 0; 4481 u32 tmp; 4482 int i, j, k; 4483 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4484 4485 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 4486 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); 4487 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0); 4488 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0); 4489 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); 4490 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 4491 4492 gfx_v11_0_set_safe_mode(adev); 4493 4494 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 4495 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 4496 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 4497 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); 4498 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i); 4499 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j); 4500 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k); 4501 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); 4502 4503 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); 4504 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); 4505 } 4506 } 4507 } 4508 for (i = 0; i < adev->gfx.me.num_me; ++i) { 4509 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { 4510 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 4511 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); 4512 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i); 4513 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j); 4514 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k); 4515 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); 4516 4517 WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1); 4518 } 4519 } 4520 } 4521 4522 WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe); 4523 4524 // Read CP_VMID_RESET register three times. 4525 // to get sufficient time for GFX_HQD_ACTIVE reach 0 4526 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 4527 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 4528 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 4529 4530 for (i = 0; i < adev->usec_timeout; i++) { 4531 if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) && 4532 !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE)) 4533 break; 4534 udelay(1); 4535 } 4536 if (i >= adev->usec_timeout) { 4537 printk("Failed to wait all pipes clean\n"); 4538 return -EINVAL; 4539 } 4540 4541 /********** trigger soft reset ***********/ 4542 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 4543 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4544 SOFT_RESET_CP, 1); 4545 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4546 SOFT_RESET_GFX, 1); 4547 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4548 SOFT_RESET_CPF, 1); 4549 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4550 SOFT_RESET_CPC, 1); 4551 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4552 SOFT_RESET_CPG, 1); 4553 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 4554 /********** exit soft reset ***********/ 4555 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 4556 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4557 SOFT_RESET_CP, 0); 4558 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4559 SOFT_RESET_GFX, 0); 4560 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4561 SOFT_RESET_CPF, 0); 4562 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4563 SOFT_RESET_CPC, 0); 4564 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4565 SOFT_RESET_CPG, 0); 4566 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 4567 4568 tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL); 4569 tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1); 4570 WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp); 4571 4572 WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0); 4573 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0); 4574 4575 for (i = 0; i < adev->usec_timeout; i++) { 4576 if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET)) 4577 break; 4578 udelay(1); 4579 } 4580 if (i >= adev->usec_timeout) { 4581 printk("Failed to wait CP_VMID_RESET to 0\n"); 4582 return -EINVAL; 4583 } 4584 4585 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 4586 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 4587 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 4588 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 4589 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 4590 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 4591 4592 gfx_v11_0_unset_safe_mode(adev); 4593 4594 return gfx_v11_0_cp_resume(adev); 4595 } 4596 4597 static bool gfx_v11_0_check_soft_reset(void *handle) 4598 { 4599 int i, r; 4600 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4601 struct amdgpu_ring *ring; 4602 long tmo = msecs_to_jiffies(1000); 4603 4604 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4605 ring = &adev->gfx.gfx_ring[i]; 4606 r = amdgpu_ring_test_ib(ring, tmo); 4607 if (r) 4608 return true; 4609 } 4610 4611 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4612 ring = &adev->gfx.compute_ring[i]; 4613 r = amdgpu_ring_test_ib(ring, tmo); 4614 if (r) 4615 return true; 4616 } 4617 4618 return false; 4619 } 4620 4621 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4622 { 4623 uint64_t clock; 4624 4625 amdgpu_gfx_off_ctrl(adev, false); 4626 mutex_lock(&adev->gfx.gpu_clock_mutex); 4627 clock = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER) | 4628 ((uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER) << 32ULL); 4629 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4630 amdgpu_gfx_off_ctrl(adev, true); 4631 return clock; 4632 } 4633 4634 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4635 uint32_t vmid, 4636 uint32_t gds_base, uint32_t gds_size, 4637 uint32_t gws_base, uint32_t gws_size, 4638 uint32_t oa_base, uint32_t oa_size) 4639 { 4640 struct amdgpu_device *adev = ring->adev; 4641 4642 /* GDS Base */ 4643 gfx_v11_0_write_data_to_reg(ring, 0, false, 4644 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid, 4645 gds_base); 4646 4647 /* GDS Size */ 4648 gfx_v11_0_write_data_to_reg(ring, 0, false, 4649 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid, 4650 gds_size); 4651 4652 /* GWS */ 4653 gfx_v11_0_write_data_to_reg(ring, 0, false, 4654 SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid, 4655 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 4656 4657 /* OA */ 4658 gfx_v11_0_write_data_to_reg(ring, 0, false, 4659 SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid, 4660 (1 << (oa_size + oa_base)) - (1 << oa_base)); 4661 } 4662 4663 static int gfx_v11_0_early_init(void *handle) 4664 { 4665 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4666 4667 adev->gfx.funcs = &gfx_v11_0_gfx_funcs; 4668 4669 adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; 4670 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 4671 AMDGPU_MAX_COMPUTE_RINGS); 4672 4673 gfx_v11_0_set_kiq_pm4_funcs(adev); 4674 gfx_v11_0_set_ring_funcs(adev); 4675 gfx_v11_0_set_irq_funcs(adev); 4676 gfx_v11_0_set_gds_init(adev); 4677 gfx_v11_0_set_rlc_funcs(adev); 4678 gfx_v11_0_set_mqd_funcs(adev); 4679 gfx_v11_0_set_imu_funcs(adev); 4680 4681 gfx_v11_0_init_rlcg_reg_access_ctrl(adev); 4682 4683 return 0; 4684 } 4685 4686 static int gfx_v11_0_ras_late_init(void *handle) 4687 { 4688 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4689 struct ras_common_if *gfx_common_if; 4690 int ret; 4691 4692 gfx_common_if = kzalloc(sizeof(struct ras_common_if), GFP_KERNEL); 4693 if (!gfx_common_if) 4694 return -ENOMEM; 4695 4696 gfx_common_if->block = AMDGPU_RAS_BLOCK__GFX; 4697 4698 ret = amdgpu_ras_feature_enable(adev, gfx_common_if, true); 4699 if (ret) 4700 dev_warn(adev->dev, "Failed to enable gfx11 ras feature\n"); 4701 4702 kfree(gfx_common_if); 4703 return 0; 4704 } 4705 4706 static int gfx_v11_0_late_init(void *handle) 4707 { 4708 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4709 int r; 4710 4711 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4712 if (r) 4713 return r; 4714 4715 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4716 if (r) 4717 return r; 4718 4719 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) { 4720 r = gfx_v11_0_ras_late_init(handle); 4721 if (r) 4722 return r; 4723 } 4724 4725 return 0; 4726 } 4727 4728 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev) 4729 { 4730 uint32_t rlc_cntl; 4731 4732 /* if RLC is not enabled, do nothing */ 4733 rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL); 4734 return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; 4735 } 4736 4737 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev) 4738 { 4739 uint32_t data; 4740 unsigned i; 4741 4742 data = RLC_SAFE_MODE__CMD_MASK; 4743 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4744 4745 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); 4746 4747 /* wait for RLC_SAFE_MODE */ 4748 for (i = 0; i < adev->usec_timeout; i++) { 4749 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), 4750 RLC_SAFE_MODE, CMD)) 4751 break; 4752 udelay(1); 4753 } 4754 } 4755 4756 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev) 4757 { 4758 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK); 4759 } 4760 4761 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 4762 bool enable) 4763 { 4764 uint32_t def, data; 4765 4766 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK)) 4767 return; 4768 4769 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 4770 4771 if (enable) 4772 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 4773 else 4774 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 4775 4776 if (def != data) 4777 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 4778 } 4779 4780 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev, 4781 bool enable) 4782 { 4783 uint32_t def, data; 4784 4785 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) 4786 return; 4787 4788 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 4789 4790 if (enable) 4791 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 4792 else 4793 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 4794 4795 if (def != data) 4796 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 4797 } 4798 4799 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev, 4800 bool enable) 4801 { 4802 uint32_t def, data; 4803 4804 if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG)) 4805 return; 4806 4807 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 4808 4809 if (enable) 4810 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 4811 else 4812 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 4813 4814 if (def != data) 4815 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 4816 } 4817 4818 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4819 bool enable) 4820 { 4821 uint32_t data, def; 4822 4823 if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS))) 4824 return; 4825 4826 /* It is disabled by HW by default */ 4827 if (enable) { 4828 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 4829 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4830 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 4831 4832 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4833 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4834 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 4835 4836 if (def != data) 4837 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 4838 } 4839 } else { 4840 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 4841 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 4842 4843 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4844 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4845 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 4846 4847 if (def != data) 4848 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 4849 } 4850 } 4851 } 4852 4853 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4854 bool enable) 4855 { 4856 uint32_t def, data; 4857 4858 if (!(adev->cg_flags & 4859 (AMD_CG_SUPPORT_GFX_CGCG | 4860 AMD_CG_SUPPORT_GFX_CGLS | 4861 AMD_CG_SUPPORT_GFX_3D_CGCG | 4862 AMD_CG_SUPPORT_GFX_3D_CGLS))) 4863 return; 4864 4865 if (enable) { 4866 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 4867 4868 /* unset CGCG override */ 4869 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 4870 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 4871 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4872 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4873 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG || 4874 adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4875 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4876 4877 /* update CGCG override bits */ 4878 if (def != data) 4879 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 4880 4881 /* enable cgcg FSM(0x0000363F) */ 4882 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 4883 4884 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 4885 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK; 4886 data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4887 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4888 } 4889 4890 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 4891 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK; 4892 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4893 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4894 } 4895 4896 if (def != data) 4897 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 4898 4899 /* Program RLC_CGCG_CGLS_CTRL_3D */ 4900 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 4901 4902 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 4903 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK; 4904 data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4905 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4906 } 4907 4908 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 4909 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK; 4910 data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4911 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4912 } 4913 4914 if (def != data) 4915 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 4916 4917 /* set IDLE_POLL_COUNT(0x00900100) */ 4918 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL); 4919 4920 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK); 4921 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4922 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4923 4924 if (def != data) 4925 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data); 4926 4927 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 4928 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 4929 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 4930 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 4931 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 4932 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data); 4933 4934 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 4935 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 4936 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 4937 4938 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 4939 if (adev->sdma.num_instances > 1) { 4940 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 4941 data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 4942 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 4943 } 4944 } else { 4945 /* Program RLC_CGCG_CGLS_CTRL */ 4946 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 4947 4948 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 4949 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4950 4951 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4952 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4953 4954 if (def != data) 4955 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 4956 4957 /* Program RLC_CGCG_CGLS_CTRL_3D */ 4958 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 4959 4960 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 4961 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4962 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4963 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4964 4965 if (def != data) 4966 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 4967 4968 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 4969 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 4970 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 4971 4972 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 4973 if (adev->sdma.num_instances > 1) { 4974 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 4975 data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 4976 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 4977 } 4978 } 4979 } 4980 4981 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, 4982 bool enable) 4983 { 4984 amdgpu_gfx_rlc_enter_safe_mode(adev); 4985 4986 gfx_v11_0_update_coarse_grain_clock_gating(adev, enable); 4987 4988 gfx_v11_0_update_medium_grain_clock_gating(adev, enable); 4989 4990 gfx_v11_0_update_repeater_fgcg(adev, enable); 4991 4992 gfx_v11_0_update_sram_fgcg(adev, enable); 4993 4994 gfx_v11_0_update_perf_clk(adev, enable); 4995 4996 if (adev->cg_flags & 4997 (AMD_CG_SUPPORT_GFX_MGCG | 4998 AMD_CG_SUPPORT_GFX_CGLS | 4999 AMD_CG_SUPPORT_GFX_CGCG | 5000 AMD_CG_SUPPORT_GFX_3D_CGCG | 5001 AMD_CG_SUPPORT_GFX_3D_CGLS)) 5002 gfx_v11_0_enable_gui_idle_interrupt(adev, enable); 5003 5004 amdgpu_gfx_rlc_exit_safe_mode(adev); 5005 5006 return 0; 5007 } 5008 5009 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) 5010 { 5011 u32 reg, data; 5012 5013 amdgpu_gfx_off_ctrl(adev, false); 5014 5015 reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); 5016 if (amdgpu_sriov_is_pp_one_vf(adev)) 5017 data = RREG32_NO_KIQ(reg); 5018 else 5019 data = RREG32(reg); 5020 5021 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; 5022 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5023 5024 if (amdgpu_sriov_is_pp_one_vf(adev)) 5025 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); 5026 else 5027 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); 5028 5029 amdgpu_gfx_off_ctrl(adev, true); 5030 } 5031 5032 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { 5033 .is_rlc_enabled = gfx_v11_0_is_rlc_enabled, 5034 .set_safe_mode = gfx_v11_0_set_safe_mode, 5035 .unset_safe_mode = gfx_v11_0_unset_safe_mode, 5036 .init = gfx_v11_0_rlc_init, 5037 .get_csb_size = gfx_v11_0_get_csb_size, 5038 .get_csb_buffer = gfx_v11_0_get_csb_buffer, 5039 .resume = gfx_v11_0_rlc_resume, 5040 .stop = gfx_v11_0_rlc_stop, 5041 .reset = gfx_v11_0_rlc_reset, 5042 .start = gfx_v11_0_rlc_start, 5043 .update_spm_vmid = gfx_v11_0_update_spm_vmid, 5044 }; 5045 5046 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) 5047 { 5048 u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 5049 5050 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) 5051 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5052 else 5053 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5054 5055 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data); 5056 5057 // Program RLC_PG_DELAY3 for CGPG hysteresis 5058 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { 5059 switch (adev->ip_versions[GC_HWIP][0]) { 5060 case IP_VERSION(11, 0, 1): 5061 WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); 5062 break; 5063 default: 5064 break; 5065 } 5066 } 5067 } 5068 5069 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) 5070 { 5071 amdgpu_gfx_rlc_enter_safe_mode(adev); 5072 5073 gfx_v11_cntl_power_gating(adev, enable); 5074 5075 amdgpu_gfx_rlc_exit_safe_mode(adev); 5076 } 5077 5078 static int gfx_v11_0_set_powergating_state(void *handle, 5079 enum amd_powergating_state state) 5080 { 5081 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5082 bool enable = (state == AMD_PG_STATE_GATE); 5083 5084 if (amdgpu_sriov_vf(adev)) 5085 return 0; 5086 5087 switch (adev->ip_versions[GC_HWIP][0]) { 5088 case IP_VERSION(11, 0, 0): 5089 case IP_VERSION(11, 0, 2): 5090 case IP_VERSION(11, 0, 3): 5091 amdgpu_gfx_off_ctrl(adev, enable); 5092 break; 5093 case IP_VERSION(11, 0, 1): 5094 gfx_v11_cntl_pg(adev, enable); 5095 amdgpu_gfx_off_ctrl(adev, enable); 5096 break; 5097 default: 5098 break; 5099 } 5100 5101 return 0; 5102 } 5103 5104 static int gfx_v11_0_set_clockgating_state(void *handle, 5105 enum amd_clockgating_state state) 5106 { 5107 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5108 5109 if (amdgpu_sriov_vf(adev)) 5110 return 0; 5111 5112 switch (adev->ip_versions[GC_HWIP][0]) { 5113 case IP_VERSION(11, 0, 0): 5114 case IP_VERSION(11, 0, 1): 5115 case IP_VERSION(11, 0, 2): 5116 case IP_VERSION(11, 0, 3): 5117 gfx_v11_0_update_gfx_clock_gating(adev, 5118 state == AMD_CG_STATE_GATE); 5119 break; 5120 default: 5121 break; 5122 } 5123 5124 return 0; 5125 } 5126 5127 static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags) 5128 { 5129 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5130 int data; 5131 5132 /* AMD_CG_SUPPORT_GFX_MGCG */ 5133 data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5134 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5135 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5136 5137 /* AMD_CG_SUPPORT_REPEATER_FGCG */ 5138 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK)) 5139 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG; 5140 5141 /* AMD_CG_SUPPORT_GFX_FGCG */ 5142 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK)) 5143 *flags |= AMD_CG_SUPPORT_GFX_FGCG; 5144 5145 /* AMD_CG_SUPPORT_GFX_PERF_CLK */ 5146 if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK)) 5147 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK; 5148 5149 /* AMD_CG_SUPPORT_GFX_CGCG */ 5150 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5151 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5152 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5153 5154 /* AMD_CG_SUPPORT_GFX_CGLS */ 5155 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5156 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5157 5158 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5159 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5160 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5161 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5162 5163 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5164 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5165 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5166 } 5167 5168 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5169 { 5170 /* gfx11 is 32bit rptr*/ 5171 return *(uint32_t *)ring->rptr_cpu_addr; 5172 } 5173 5174 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5175 { 5176 struct amdgpu_device *adev = ring->adev; 5177 u64 wptr; 5178 5179 /* XXX check if swapping is necessary on BE */ 5180 if (ring->use_doorbell) { 5181 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5182 } else { 5183 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR); 5184 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32; 5185 } 5186 5187 return wptr; 5188 } 5189 5190 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5191 { 5192 struct amdgpu_device *adev = ring->adev; 5193 uint32_t *wptr_saved; 5194 uint32_t *is_queue_unmap; 5195 uint64_t aggregated_db_index; 5196 uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size; 5197 uint64_t wptr_tmp; 5198 5199 if (ring->is_mes_queue) { 5200 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); 5201 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + 5202 sizeof(uint32_t)); 5203 aggregated_db_index = 5204 amdgpu_mes_get_aggregated_doorbell_index(adev, 5205 ring->hw_prio); 5206 5207 wptr_tmp = ring->wptr & ring->buf_mask; 5208 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); 5209 *wptr_saved = wptr_tmp; 5210 /* assume doorbell always being used by mes mapped queue */ 5211 if (*is_queue_unmap) { 5212 WDOORBELL64(aggregated_db_index, wptr_tmp); 5213 WDOORBELL64(ring->doorbell_index, wptr_tmp); 5214 } else { 5215 WDOORBELL64(ring->doorbell_index, wptr_tmp); 5216 5217 if (*is_queue_unmap) 5218 WDOORBELL64(aggregated_db_index, wptr_tmp); 5219 } 5220 } else { 5221 if (ring->use_doorbell) { 5222 /* XXX check if swapping is necessary on BE */ 5223 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5224 ring->wptr); 5225 WDOORBELL64(ring->doorbell_index, ring->wptr); 5226 } else { 5227 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, 5228 lower_32_bits(ring->wptr)); 5229 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, 5230 upper_32_bits(ring->wptr)); 5231 } 5232 } 5233 } 5234 5235 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5236 { 5237 /* gfx11 hardware is 32bit rptr */ 5238 return *(uint32_t *)ring->rptr_cpu_addr; 5239 } 5240 5241 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5242 { 5243 u64 wptr; 5244 5245 /* XXX check if swapping is necessary on BE */ 5246 if (ring->use_doorbell) 5247 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5248 else 5249 BUG(); 5250 return wptr; 5251 } 5252 5253 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5254 { 5255 struct amdgpu_device *adev = ring->adev; 5256 uint32_t *wptr_saved; 5257 uint32_t *is_queue_unmap; 5258 uint64_t aggregated_db_index; 5259 uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size; 5260 uint64_t wptr_tmp; 5261 5262 if (ring->is_mes_queue) { 5263 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); 5264 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + 5265 sizeof(uint32_t)); 5266 aggregated_db_index = 5267 amdgpu_mes_get_aggregated_doorbell_index(adev, 5268 ring->hw_prio); 5269 5270 wptr_tmp = ring->wptr & ring->buf_mask; 5271 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); 5272 *wptr_saved = wptr_tmp; 5273 /* assume doorbell always used by mes mapped queue */ 5274 if (*is_queue_unmap) { 5275 WDOORBELL64(aggregated_db_index, wptr_tmp); 5276 WDOORBELL64(ring->doorbell_index, wptr_tmp); 5277 } else { 5278 WDOORBELL64(ring->doorbell_index, wptr_tmp); 5279 5280 if (*is_queue_unmap) 5281 WDOORBELL64(aggregated_db_index, wptr_tmp); 5282 } 5283 } else { 5284 /* XXX check if swapping is necessary on BE */ 5285 if (ring->use_doorbell) { 5286 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5287 ring->wptr); 5288 WDOORBELL64(ring->doorbell_index, ring->wptr); 5289 } else { 5290 BUG(); /* only DOORBELL method supported on gfx11 now */ 5291 } 5292 } 5293 } 5294 5295 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5296 { 5297 struct amdgpu_device *adev = ring->adev; 5298 u32 ref_and_mask, reg_mem_engine; 5299 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 5300 5301 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 5302 switch (ring->me) { 5303 case 1: 5304 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 5305 break; 5306 case 2: 5307 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 5308 break; 5309 default: 5310 return; 5311 } 5312 reg_mem_engine = 0; 5313 } else { 5314 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 5315 reg_mem_engine = 1; /* pfp */ 5316 } 5317 5318 gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5319 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5320 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5321 ref_and_mask, ref_and_mask, 0x20); 5322 } 5323 5324 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5325 struct amdgpu_job *job, 5326 struct amdgpu_ib *ib, 5327 uint32_t flags) 5328 { 5329 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5330 u32 header, control = 0; 5331 5332 BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE); 5333 5334 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5335 5336 control |= ib->length_dw | (vmid << 24); 5337 5338 if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5339 control |= INDIRECT_BUFFER_PRE_ENB(1); 5340 5341 if (flags & AMDGPU_IB_PREEMPTED) 5342 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5343 5344 if (vmid) 5345 gfx_v11_0_ring_emit_de_meta(ring, 5346 (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); 5347 } 5348 5349 if (ring->is_mes_queue) 5350 /* inherit vmid from mqd */ 5351 control |= 0x400000; 5352 5353 amdgpu_ring_write(ring, header); 5354 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5355 amdgpu_ring_write(ring, 5356 #ifdef __BIG_ENDIAN 5357 (2 << 0) | 5358 #endif 5359 lower_32_bits(ib->gpu_addr)); 5360 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5361 amdgpu_ring_write(ring, control); 5362 } 5363 5364 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5365 struct amdgpu_job *job, 5366 struct amdgpu_ib *ib, 5367 uint32_t flags) 5368 { 5369 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5370 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5371 5372 if (ring->is_mes_queue) 5373 /* inherit vmid from mqd */ 5374 control |= 0x40000000; 5375 5376 /* Currently, there is a high possibility to get wave ID mismatch 5377 * between ME and GDS, leading to a hw deadlock, because ME generates 5378 * different wave IDs than the GDS expects. This situation happens 5379 * randomly when at least 5 compute pipes use GDS ordered append. 5380 * The wave IDs generated by ME are also wrong after suspend/resume. 5381 * Those are probably bugs somewhere else in the kernel driver. 5382 * 5383 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5384 * GDS to 0 for this ring (me/pipe). 5385 */ 5386 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5387 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5388 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID); 5389 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5390 } 5391 5392 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5393 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5394 amdgpu_ring_write(ring, 5395 #ifdef __BIG_ENDIAN 5396 (2 << 0) | 5397 #endif 5398 lower_32_bits(ib->gpu_addr)); 5399 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5400 amdgpu_ring_write(ring, control); 5401 } 5402 5403 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5404 u64 seq, unsigned flags) 5405 { 5406 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5407 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5408 5409 /* RELEASE_MEM - flush caches, send int */ 5410 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5411 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | 5412 PACKET3_RELEASE_MEM_GCR_GL2_WB | 5413 PACKET3_RELEASE_MEM_GCR_GL2_INV | 5414 PACKET3_RELEASE_MEM_GCR_GL2_US | 5415 PACKET3_RELEASE_MEM_GCR_GL1_INV | 5416 PACKET3_RELEASE_MEM_GCR_GLV_INV | 5417 PACKET3_RELEASE_MEM_GCR_GLM_INV | 5418 PACKET3_RELEASE_MEM_GCR_GLM_WB | 5419 PACKET3_RELEASE_MEM_CACHE_POLICY(3) | 5420 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5421 PACKET3_RELEASE_MEM_EVENT_INDEX(5))); 5422 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | 5423 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); 5424 5425 /* 5426 * the address should be Qword aligned if 64bit write, Dword 5427 * aligned if only send 32bit data low (discard data high) 5428 */ 5429 if (write64bit) 5430 BUG_ON(addr & 0x7); 5431 else 5432 BUG_ON(addr & 0x3); 5433 amdgpu_ring_write(ring, lower_32_bits(addr)); 5434 amdgpu_ring_write(ring, upper_32_bits(addr)); 5435 amdgpu_ring_write(ring, lower_32_bits(seq)); 5436 amdgpu_ring_write(ring, upper_32_bits(seq)); 5437 amdgpu_ring_write(ring, ring->is_mes_queue ? 5438 (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0); 5439 } 5440 5441 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5442 { 5443 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5444 uint32_t seq = ring->fence_drv.sync_seq; 5445 uint64_t addr = ring->fence_drv.gpu_addr; 5446 5447 gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr), 5448 upper_32_bits(addr), seq, 0xffffffff, 4); 5449 } 5450 5451 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 5452 uint16_t pasid, uint32_t flush_type, 5453 bool all_hub, uint8_t dst_sel) 5454 { 5455 amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 5456 amdgpu_ring_write(ring, 5457 PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) | 5458 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 5459 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 5460 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 5461 } 5462 5463 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5464 unsigned vmid, uint64_t pd_addr) 5465 { 5466 if (ring->is_mes_queue) 5467 gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0); 5468 else 5469 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5470 5471 /* compute doesn't have PFP */ 5472 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5473 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5474 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5475 amdgpu_ring_write(ring, 0x0); 5476 } 5477 } 5478 5479 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5480 u64 seq, unsigned int flags) 5481 { 5482 struct amdgpu_device *adev = ring->adev; 5483 5484 /* we only allocate 32bit for each seq wb address */ 5485 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5486 5487 /* write fence seq to the "addr" */ 5488 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5489 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5490 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5491 amdgpu_ring_write(ring, lower_32_bits(addr)); 5492 amdgpu_ring_write(ring, upper_32_bits(addr)); 5493 amdgpu_ring_write(ring, lower_32_bits(seq)); 5494 5495 if (flags & AMDGPU_FENCE_FLAG_INT) { 5496 /* set register to trigger INT */ 5497 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5498 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5499 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5500 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS)); 5501 amdgpu_ring_write(ring, 0); 5502 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5503 } 5504 } 5505 5506 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, 5507 uint32_t flags) 5508 { 5509 uint32_t dw2 = 0; 5510 5511 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5512 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5513 /* set load_global_config & load_global_uconfig */ 5514 dw2 |= 0x8001; 5515 /* set load_cs_sh_regs */ 5516 dw2 |= 0x01000000; 5517 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5518 dw2 |= 0x10002; 5519 } 5520 5521 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5522 amdgpu_ring_write(ring, dw2); 5523 amdgpu_ring_write(ring, 0); 5524 } 5525 5526 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 5527 { 5528 unsigned ret; 5529 5530 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5531 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 5532 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 5533 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 5534 ret = ring->wptr & ring->buf_mask; 5535 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 5536 5537 return ret; 5538 } 5539 5540 static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 5541 { 5542 unsigned cur; 5543 BUG_ON(offset > ring->buf_mask); 5544 BUG_ON(ring->ring[offset] != 0x55aa55aa); 5545 5546 cur = (ring->wptr - 1) & ring->buf_mask; 5547 if (likely(cur > offset)) 5548 ring->ring[offset] = cur - offset; 5549 else 5550 ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; 5551 } 5552 5553 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring) 5554 { 5555 int i, r = 0; 5556 struct amdgpu_device *adev = ring->adev; 5557 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 5558 struct amdgpu_ring *kiq_ring = &kiq->ring; 5559 unsigned long flags; 5560 5561 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 5562 return -EINVAL; 5563 5564 spin_lock_irqsave(&kiq->ring_lock, flags); 5565 5566 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 5567 spin_unlock_irqrestore(&kiq->ring_lock, flags); 5568 return -ENOMEM; 5569 } 5570 5571 /* assert preemption condition */ 5572 amdgpu_ring_set_preempt_cond_exec(ring, false); 5573 5574 /* assert IB preemption, emit the trailing fence */ 5575 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, 5576 ring->trail_fence_gpu_addr, 5577 ++ring->trail_seq); 5578 amdgpu_ring_commit(kiq_ring); 5579 5580 spin_unlock_irqrestore(&kiq->ring_lock, flags); 5581 5582 /* poll the trailing fence */ 5583 for (i = 0; i < adev->usec_timeout; i++) { 5584 if (ring->trail_seq == 5585 le32_to_cpu(*(ring->trail_fence_cpu_addr))) 5586 break; 5587 udelay(1); 5588 } 5589 5590 if (i >= adev->usec_timeout) { 5591 r = -EINVAL; 5592 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx); 5593 } 5594 5595 /* deassert preemption condition */ 5596 amdgpu_ring_set_preempt_cond_exec(ring, true); 5597 return r; 5598 } 5599 5600 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) 5601 { 5602 struct amdgpu_device *adev = ring->adev; 5603 struct v10_de_ib_state de_payload = {0}; 5604 uint64_t offset, gds_addr, de_payload_gpu_addr; 5605 void *de_payload_cpu_addr; 5606 int cnt; 5607 5608 if (ring->is_mes_queue) { 5609 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5610 gfx[0].gfx_meta_data) + 5611 offsetof(struct v10_gfx_meta_data, de_payload); 5612 de_payload_gpu_addr = 5613 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5614 de_payload_cpu_addr = 5615 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 5616 5617 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5618 gfx[0].gds_backup) + 5619 offsetof(struct v10_gfx_meta_data, de_payload); 5620 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5621 } else { 5622 offset = offsetof(struct v10_gfx_meta_data, de_payload); 5623 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 5624 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 5625 5626 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + 5627 AMDGPU_CSA_SIZE - adev->gds.gds_size, 5628 PAGE_SIZE); 5629 } 5630 5631 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5632 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5633 5634 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 5635 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5636 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 5637 WRITE_DATA_DST_SEL(8) | 5638 WR_CONFIRM) | 5639 WRITE_DATA_CACHE_POLICY(0)); 5640 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); 5641 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); 5642 5643 if (resume) 5644 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, 5645 sizeof(de_payload) >> 2); 5646 else 5647 amdgpu_ring_write_multiple(ring, (void *)&de_payload, 5648 sizeof(de_payload) >> 2); 5649 } 5650 5651 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 5652 bool secure) 5653 { 5654 uint32_t v = secure ? FRAME_TMZ : 0; 5655 5656 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 5657 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 5658 } 5659 5660 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 5661 uint32_t reg_val_offs) 5662 { 5663 struct amdgpu_device *adev = ring->adev; 5664 5665 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 5666 amdgpu_ring_write(ring, 0 | /* src: register*/ 5667 (5 << 8) | /* dst: memory */ 5668 (1 << 20)); /* write confirm */ 5669 amdgpu_ring_write(ring, reg); 5670 amdgpu_ring_write(ring, 0); 5671 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 5672 reg_val_offs * 4)); 5673 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 5674 reg_val_offs * 4)); 5675 } 5676 5677 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 5678 uint32_t val) 5679 { 5680 uint32_t cmd = 0; 5681 5682 switch (ring->funcs->type) { 5683 case AMDGPU_RING_TYPE_GFX: 5684 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 5685 break; 5686 case AMDGPU_RING_TYPE_KIQ: 5687 cmd = (1 << 16); /* no inc addr */ 5688 break; 5689 default: 5690 cmd = WR_CONFIRM; 5691 break; 5692 } 5693 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5694 amdgpu_ring_write(ring, cmd); 5695 amdgpu_ring_write(ring, reg); 5696 amdgpu_ring_write(ring, 0); 5697 amdgpu_ring_write(ring, val); 5698 } 5699 5700 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 5701 uint32_t val, uint32_t mask) 5702 { 5703 gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 5704 } 5705 5706 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 5707 uint32_t reg0, uint32_t reg1, 5708 uint32_t ref, uint32_t mask) 5709 { 5710 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5711 5712 gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 5713 ref, mask, 0x20); 5714 } 5715 5716 static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring, 5717 unsigned vmid) 5718 { 5719 struct amdgpu_device *adev = ring->adev; 5720 uint32_t value = 0; 5721 5722 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 5723 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 5724 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 5725 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 5726 WREG32_SOC15(GC, 0, regSQ_CMD, value); 5727 } 5728 5729 static void 5730 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5731 uint32_t me, uint32_t pipe, 5732 enum amdgpu_interrupt_state state) 5733 { 5734 uint32_t cp_int_cntl, cp_int_cntl_reg; 5735 5736 if (!me) { 5737 switch (pipe) { 5738 case 0: 5739 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 5740 break; 5741 case 1: 5742 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 5743 break; 5744 default: 5745 DRM_DEBUG("invalid pipe %d\n", pipe); 5746 return; 5747 } 5748 } else { 5749 DRM_DEBUG("invalid me %d\n", me); 5750 return; 5751 } 5752 5753 switch (state) { 5754 case AMDGPU_IRQ_STATE_DISABLE: 5755 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 5756 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 5757 TIME_STAMP_INT_ENABLE, 0); 5758 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 5759 GENERIC0_INT_ENABLE, 0); 5760 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 5761 break; 5762 case AMDGPU_IRQ_STATE_ENABLE: 5763 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 5764 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 5765 TIME_STAMP_INT_ENABLE, 1); 5766 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 5767 GENERIC0_INT_ENABLE, 1); 5768 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 5769 break; 5770 default: 5771 break; 5772 } 5773 } 5774 5775 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5776 int me, int pipe, 5777 enum amdgpu_interrupt_state state) 5778 { 5779 u32 mec_int_cntl, mec_int_cntl_reg; 5780 5781 /* 5782 * amdgpu controls only the first MEC. That's why this function only 5783 * handles the setting of interrupts for this specific MEC. All other 5784 * pipes' interrupts are set by amdkfd. 5785 */ 5786 5787 if (me == 1) { 5788 switch (pipe) { 5789 case 0: 5790 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 5791 break; 5792 case 1: 5793 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 5794 break; 5795 case 2: 5796 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 5797 break; 5798 case 3: 5799 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 5800 break; 5801 default: 5802 DRM_DEBUG("invalid pipe %d\n", pipe); 5803 return; 5804 } 5805 } else { 5806 DRM_DEBUG("invalid me %d\n", me); 5807 return; 5808 } 5809 5810 switch (state) { 5811 case AMDGPU_IRQ_STATE_DISABLE: 5812 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 5813 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5814 TIME_STAMP_INT_ENABLE, 0); 5815 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5816 GENERIC0_INT_ENABLE, 0); 5817 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 5818 break; 5819 case AMDGPU_IRQ_STATE_ENABLE: 5820 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 5821 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5822 TIME_STAMP_INT_ENABLE, 1); 5823 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5824 GENERIC0_INT_ENABLE, 1); 5825 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 5826 break; 5827 default: 5828 break; 5829 } 5830 } 5831 5832 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5833 struct amdgpu_irq_src *src, 5834 unsigned type, 5835 enum amdgpu_interrupt_state state) 5836 { 5837 switch (type) { 5838 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 5839 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state); 5840 break; 5841 case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP: 5842 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state); 5843 break; 5844 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5845 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5846 break; 5847 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5848 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5849 break; 5850 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5851 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5852 break; 5853 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5854 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5855 break; 5856 default: 5857 break; 5858 } 5859 return 0; 5860 } 5861 5862 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, 5863 struct amdgpu_irq_src *source, 5864 struct amdgpu_iv_entry *entry) 5865 { 5866 int i; 5867 u8 me_id, pipe_id, queue_id; 5868 struct amdgpu_ring *ring; 5869 uint32_t mes_queue_id = entry->src_data[0]; 5870 5871 DRM_DEBUG("IH: CP EOP\n"); 5872 5873 if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { 5874 struct amdgpu_mes_queue *queue; 5875 5876 mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; 5877 5878 spin_lock(&adev->mes.queue_id_lock); 5879 queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); 5880 if (queue) { 5881 DRM_DEBUG("process mes queue id = %d\n", mes_queue_id); 5882 amdgpu_fence_process(queue->ring); 5883 } 5884 spin_unlock(&adev->mes.queue_id_lock); 5885 } else { 5886 me_id = (entry->ring_id & 0x0c) >> 2; 5887 pipe_id = (entry->ring_id & 0x03) >> 0; 5888 queue_id = (entry->ring_id & 0x70) >> 4; 5889 5890 switch (me_id) { 5891 case 0: 5892 if (pipe_id == 0) 5893 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5894 else 5895 amdgpu_fence_process(&adev->gfx.gfx_ring[1]); 5896 break; 5897 case 1: 5898 case 2: 5899 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5900 ring = &adev->gfx.compute_ring[i]; 5901 /* Per-queue interrupt is supported for MEC starting from VI. 5902 * The interrupt can only be enabled/disabled per pipe instead 5903 * of per queue. 5904 */ 5905 if ((ring->me == me_id) && 5906 (ring->pipe == pipe_id) && 5907 (ring->queue == queue_id)) 5908 amdgpu_fence_process(ring); 5909 } 5910 break; 5911 } 5912 } 5913 5914 return 0; 5915 } 5916 5917 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 5918 struct amdgpu_irq_src *source, 5919 unsigned type, 5920 enum amdgpu_interrupt_state state) 5921 { 5922 switch (state) { 5923 case AMDGPU_IRQ_STATE_DISABLE: 5924 case AMDGPU_IRQ_STATE_ENABLE: 5925 WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, 5926 PRIV_REG_INT_ENABLE, 5927 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5928 break; 5929 default: 5930 break; 5931 } 5932 5933 return 0; 5934 } 5935 5936 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 5937 struct amdgpu_irq_src *source, 5938 unsigned type, 5939 enum amdgpu_interrupt_state state) 5940 { 5941 switch (state) { 5942 case AMDGPU_IRQ_STATE_DISABLE: 5943 case AMDGPU_IRQ_STATE_ENABLE: 5944 WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, 5945 PRIV_INSTR_INT_ENABLE, 5946 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5947 break; 5948 default: 5949 break; 5950 } 5951 5952 return 0; 5953 } 5954 5955 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, 5956 struct amdgpu_iv_entry *entry) 5957 { 5958 u8 me_id, pipe_id, queue_id; 5959 struct amdgpu_ring *ring; 5960 int i; 5961 5962 me_id = (entry->ring_id & 0x0c) >> 2; 5963 pipe_id = (entry->ring_id & 0x03) >> 0; 5964 queue_id = (entry->ring_id & 0x70) >> 4; 5965 5966 switch (me_id) { 5967 case 0: 5968 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 5969 ring = &adev->gfx.gfx_ring[i]; 5970 /* we only enabled 1 gfx queue per pipe for now */ 5971 if (ring->me == me_id && ring->pipe == pipe_id) 5972 drm_sched_fault(&ring->sched); 5973 } 5974 break; 5975 case 1: 5976 case 2: 5977 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5978 ring = &adev->gfx.compute_ring[i]; 5979 if (ring->me == me_id && ring->pipe == pipe_id && 5980 ring->queue == queue_id) 5981 drm_sched_fault(&ring->sched); 5982 } 5983 break; 5984 default: 5985 BUG(); 5986 break; 5987 } 5988 } 5989 5990 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev, 5991 struct amdgpu_irq_src *source, 5992 struct amdgpu_iv_entry *entry) 5993 { 5994 DRM_ERROR("Illegal register access in command stream\n"); 5995 gfx_v11_0_handle_priv_fault(adev, entry); 5996 return 0; 5997 } 5998 5999 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev, 6000 struct amdgpu_irq_src *source, 6001 struct amdgpu_iv_entry *entry) 6002 { 6003 DRM_ERROR("Illegal instruction in command stream\n"); 6004 gfx_v11_0_handle_priv_fault(adev, entry); 6005 return 0; 6006 } 6007 6008 #if 0 6009 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6010 struct amdgpu_irq_src *src, 6011 unsigned int type, 6012 enum amdgpu_interrupt_state state) 6013 { 6014 uint32_t tmp, target; 6015 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 6016 6017 target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6018 target += ring->pipe; 6019 6020 switch (type) { 6021 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6022 if (state == AMDGPU_IRQ_STATE_DISABLE) { 6023 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6024 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6025 GENERIC2_INT_ENABLE, 0); 6026 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6027 6028 tmp = RREG32_SOC15_IP(GC, target); 6029 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6030 GENERIC2_INT_ENABLE, 0); 6031 WREG32_SOC15_IP(GC, target, tmp); 6032 } else { 6033 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6034 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6035 GENERIC2_INT_ENABLE, 1); 6036 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6037 6038 tmp = RREG32_SOC15_IP(GC, target); 6039 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6040 GENERIC2_INT_ENABLE, 1); 6041 WREG32_SOC15_IP(GC, target, tmp); 6042 } 6043 break; 6044 default: 6045 BUG(); /* kiq only support GENERIC2_INT now */ 6046 break; 6047 } 6048 return 0; 6049 } 6050 #endif 6051 6052 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) 6053 { 6054 const unsigned int gcr_cntl = 6055 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) | 6056 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) | 6057 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) | 6058 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) | 6059 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) | 6060 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) | 6061 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) | 6062 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1); 6063 6064 /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */ 6065 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6)); 6066 amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */ 6067 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6068 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 6069 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6070 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6071 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 6072 amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ 6073 } 6074 6075 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { 6076 .name = "gfx_v11_0", 6077 .early_init = gfx_v11_0_early_init, 6078 .late_init = gfx_v11_0_late_init, 6079 .sw_init = gfx_v11_0_sw_init, 6080 .sw_fini = gfx_v11_0_sw_fini, 6081 .hw_init = gfx_v11_0_hw_init, 6082 .hw_fini = gfx_v11_0_hw_fini, 6083 .suspend = gfx_v11_0_suspend, 6084 .resume = gfx_v11_0_resume, 6085 .is_idle = gfx_v11_0_is_idle, 6086 .wait_for_idle = gfx_v11_0_wait_for_idle, 6087 .soft_reset = gfx_v11_0_soft_reset, 6088 .check_soft_reset = gfx_v11_0_check_soft_reset, 6089 .set_clockgating_state = gfx_v11_0_set_clockgating_state, 6090 .set_powergating_state = gfx_v11_0_set_powergating_state, 6091 .get_clockgating_state = gfx_v11_0_get_clockgating_state, 6092 }; 6093 6094 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 6095 .type = AMDGPU_RING_TYPE_GFX, 6096 .align_mask = 0xff, 6097 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6098 .support_64bit_ptrs = true, 6099 .secure_submission_supported = true, 6100 .vmhub = AMDGPU_GFXHUB_0, 6101 .get_rptr = gfx_v11_0_ring_get_rptr_gfx, 6102 .get_wptr = gfx_v11_0_ring_get_wptr_gfx, 6103 .set_wptr = gfx_v11_0_ring_set_wptr_gfx, 6104 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 6105 5 + /* COND_EXEC */ 6106 7 + /* PIPELINE_SYNC */ 6107 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6108 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6109 2 + /* VM_FLUSH */ 6110 8 + /* FENCE for VM_FLUSH */ 6111 20 + /* GDS switch */ 6112 5 + /* COND_EXEC */ 6113 7 + /* HDP_flush */ 6114 4 + /* VGT_flush */ 6115 31 + /* DE_META */ 6116 3 + /* CNTX_CTRL */ 6117 5 + /* HDP_INVL */ 6118 8 + 8 + /* FENCE x2 */ 6119 8, /* gfx_v11_0_emit_mem_sync */ 6120 .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */ 6121 .emit_ib = gfx_v11_0_ring_emit_ib_gfx, 6122 .emit_fence = gfx_v11_0_ring_emit_fence, 6123 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 6124 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 6125 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 6126 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 6127 .test_ring = gfx_v11_0_ring_test_ring, 6128 .test_ib = gfx_v11_0_ring_test_ib, 6129 .insert_nop = amdgpu_ring_insert_nop, 6130 .pad_ib = amdgpu_ring_generic_pad_ib, 6131 .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, 6132 .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec, 6133 .patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec, 6134 .preempt_ib = gfx_v11_0_ring_preempt_ib, 6135 .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl, 6136 .emit_wreg = gfx_v11_0_ring_emit_wreg, 6137 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 6138 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 6139 .soft_recovery = gfx_v11_0_ring_soft_recovery, 6140 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 6141 }; 6142 6143 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { 6144 .type = AMDGPU_RING_TYPE_COMPUTE, 6145 .align_mask = 0xff, 6146 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6147 .support_64bit_ptrs = true, 6148 .vmhub = AMDGPU_GFXHUB_0, 6149 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 6150 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 6151 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 6152 .emit_frame_size = 6153 20 + /* gfx_v11_0_ring_emit_gds_switch */ 6154 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 6155 5 + /* hdp invalidate */ 6156 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 6157 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6158 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6159 2 + /* gfx_v11_0_ring_emit_vm_flush */ 6160 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */ 6161 8, /* gfx_v11_0_emit_mem_sync */ 6162 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 6163 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 6164 .emit_fence = gfx_v11_0_ring_emit_fence, 6165 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 6166 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 6167 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 6168 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 6169 .test_ring = gfx_v11_0_ring_test_ring, 6170 .test_ib = gfx_v11_0_ring_test_ib, 6171 .insert_nop = amdgpu_ring_insert_nop, 6172 .pad_ib = amdgpu_ring_generic_pad_ib, 6173 .emit_wreg = gfx_v11_0_ring_emit_wreg, 6174 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 6175 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 6176 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 6177 }; 6178 6179 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { 6180 .type = AMDGPU_RING_TYPE_KIQ, 6181 .align_mask = 0xff, 6182 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6183 .support_64bit_ptrs = true, 6184 .vmhub = AMDGPU_GFXHUB_0, 6185 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 6186 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 6187 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 6188 .emit_frame_size = 6189 20 + /* gfx_v11_0_ring_emit_gds_switch */ 6190 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 6191 5 + /*hdp invalidate */ 6192 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 6193 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6194 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6195 2 + /* gfx_v11_0_ring_emit_vm_flush */ 6196 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6197 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 6198 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 6199 .emit_fence = gfx_v11_0_ring_emit_fence_kiq, 6200 .test_ring = gfx_v11_0_ring_test_ring, 6201 .test_ib = gfx_v11_0_ring_test_ib, 6202 .insert_nop = amdgpu_ring_insert_nop, 6203 .pad_ib = amdgpu_ring_generic_pad_ib, 6204 .emit_rreg = gfx_v11_0_ring_emit_rreg, 6205 .emit_wreg = gfx_v11_0_ring_emit_wreg, 6206 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 6207 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 6208 }; 6209 6210 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev) 6211 { 6212 int i; 6213 6214 adev->gfx.kiq.ring.funcs = &gfx_v11_0_ring_funcs_kiq; 6215 6216 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6217 adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx; 6218 6219 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6220 adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute; 6221 } 6222 6223 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = { 6224 .set = gfx_v11_0_set_eop_interrupt_state, 6225 .process = gfx_v11_0_eop_irq, 6226 }; 6227 6228 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = { 6229 .set = gfx_v11_0_set_priv_reg_fault_state, 6230 .process = gfx_v11_0_priv_reg_irq, 6231 }; 6232 6233 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = { 6234 .set = gfx_v11_0_set_priv_inst_fault_state, 6235 .process = gfx_v11_0_priv_inst_irq, 6236 }; 6237 6238 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) 6239 { 6240 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6241 adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs; 6242 6243 adev->gfx.priv_reg_irq.num_types = 1; 6244 adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs; 6245 6246 adev->gfx.priv_inst_irq.num_types = 1; 6247 adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; 6248 } 6249 6250 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev) 6251 { 6252 if (adev->flags & AMD_IS_APU) 6253 adev->gfx.imu.mode = MISSION_MODE; 6254 else 6255 adev->gfx.imu.mode = DEBUG_MODE; 6256 6257 adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs; 6258 } 6259 6260 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev) 6261 { 6262 adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs; 6263 } 6264 6265 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev) 6266 { 6267 unsigned total_cu = adev->gfx.config.max_cu_per_sh * 6268 adev->gfx.config.max_sh_per_se * 6269 adev->gfx.config.max_shader_engines; 6270 6271 adev->gds.gds_size = 0x1000; 6272 adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1; 6273 adev->gds.gws_size = 64; 6274 adev->gds.oa_size = 16; 6275 } 6276 6277 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev) 6278 { 6279 /* set gfx eng mqd */ 6280 adev->mqds[AMDGPU_HW_IP_GFX].mqd_size = 6281 sizeof(struct v11_gfx_mqd); 6282 adev->mqds[AMDGPU_HW_IP_GFX].init_mqd = 6283 gfx_v11_0_gfx_mqd_init; 6284 /* set compute eng mqd */ 6285 adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size = 6286 sizeof(struct v11_compute_mqd); 6287 adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd = 6288 gfx_v11_0_compute_mqd_init; 6289 } 6290 6291 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev, 6292 u32 bitmap) 6293 { 6294 u32 data; 6295 6296 if (!bitmap) 6297 return; 6298 6299 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 6300 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 6301 6302 WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data); 6303 } 6304 6305 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) 6306 { 6307 u32 data, wgp_bitmask; 6308 data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG); 6309 data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG); 6310 6311 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 6312 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 6313 6314 wgp_bitmask = 6315 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); 6316 6317 return (~data) & wgp_bitmask; 6318 } 6319 6320 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) 6321 { 6322 u32 wgp_idx, wgp_active_bitmap; 6323 u32 cu_bitmap_per_wgp, cu_active_bitmap; 6324 6325 wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev); 6326 cu_active_bitmap = 0; 6327 6328 for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) { 6329 /* if there is one WGP enabled, it means 2 CUs will be enabled */ 6330 cu_bitmap_per_wgp = 3 << (2 * wgp_idx); 6331 if (wgp_active_bitmap & (1 << wgp_idx)) 6332 cu_active_bitmap |= cu_bitmap_per_wgp; 6333 } 6334 6335 return cu_active_bitmap; 6336 } 6337 6338 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 6339 struct amdgpu_cu_info *cu_info) 6340 { 6341 int i, j, k, counter, active_cu_number = 0; 6342 u32 mask, bitmap; 6343 unsigned disable_masks[8 * 2]; 6344 6345 if (!adev || !cu_info) 6346 return -EINVAL; 6347 6348 amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2); 6349 6350 mutex_lock(&adev->grbm_idx_mutex); 6351 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 6352 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 6353 mask = 1; 6354 counter = 0; 6355 gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff); 6356 if (i < 8 && j < 2) 6357 gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh( 6358 adev, disable_masks[i * 2 + j]); 6359 bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev); 6360 6361 /** 6362 * GFX11 could support more than 4 SEs, while the bitmap 6363 * in cu_info struct is 4x4 and ioctl interface struct 6364 * drm_amdgpu_info_device should keep stable. 6365 * So we use last two columns of bitmap to store cu mask for 6366 * SEs 4 to 7, the layout of the bitmap is as below: 6367 * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]} 6368 * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]} 6369 * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]} 6370 * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]} 6371 * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]} 6372 * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]} 6373 * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]} 6374 * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]} 6375 */ 6376 cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap; 6377 6378 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { 6379 if (bitmap & mask) 6380 counter++; 6381 6382 mask <<= 1; 6383 } 6384 active_cu_number += counter; 6385 } 6386 } 6387 gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6388 mutex_unlock(&adev->grbm_idx_mutex); 6389 6390 cu_info->number = active_cu_number; 6391 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 6392 6393 return 0; 6394 } 6395 6396 const struct amdgpu_ip_block_version gfx_v11_0_ip_block = 6397 { 6398 .type = AMD_IP_BLOCK_TYPE_GFX, 6399 .major = 11, 6400 .minor = 0, 6401 .rev = 0, 6402 .funcs = &gfx_v11_0_ip_funcs, 6403 }; 6404