1 /* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/delay.h> 24 #include <linux/kernel.h> 25 #include <linux/firmware.h> 26 #include <linux/module.h> 27 #include <linux/pci.h> 28 #include "amdgpu.h" 29 #include "amdgpu_gfx.h" 30 #include "amdgpu_psp.h" 31 #include "amdgpu_smu.h" 32 #include "amdgpu_atomfirmware.h" 33 #include "imu_v11_0.h" 34 #include "soc21.h" 35 #include "nvd.h" 36 37 #include "gc/gc_11_0_0_offset.h" 38 #include "gc/gc_11_0_0_sh_mask.h" 39 #include "smuio/smuio_13_0_6_offset.h" 40 #include "smuio/smuio_13_0_6_sh_mask.h" 41 #include "navi10_enum.h" 42 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" 43 44 #include "soc15.h" 45 #include "soc15d.h" 46 #include "clearstate_gfx11.h" 47 #include "v11_structs.h" 48 #include "gfx_v11_0.h" 49 #include "nbio_v4_3.h" 50 #include "mes_v11_0.h" 51 52 #define GFX11_NUM_GFX_RINGS 1 53 #define GFX11_MEC_HPD_SIZE 2048 54 55 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 56 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388 57 58 #define regCGTT_WD_CLK_CTRL 0x5086 59 #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 60 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e 61 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1 62 63 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); 64 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); 65 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); 66 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); 67 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); 68 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); 69 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin"); 70 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin"); 71 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin"); 72 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin"); 73 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin"); 74 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin"); 75 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin"); 76 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin"); 77 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin"); 78 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin"); 79 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin"); 80 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin"); 81 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin"); 82 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin"); 83 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin"); 84 85 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = 86 { 87 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010), 88 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010), 89 SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), 90 SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988), 91 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007), 92 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008), 93 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100), 94 SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), 95 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a) 96 }; 97 98 #define DEFAULT_SH_MEM_CONFIG \ 99 ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ 100 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ 101 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) 102 103 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev); 104 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev); 105 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev); 106 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev); 107 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev); 108 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev); 109 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev); 110 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 111 struct amdgpu_cu_info *cu_info); 112 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev); 113 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 114 u32 sh_num, u32 instance); 115 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); 116 117 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); 118 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); 119 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 120 uint32_t val); 121 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); 122 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 123 uint16_t pasid, uint32_t flush_type, 124 bool all_hub, uint8_t dst_sel); 125 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev); 126 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev); 127 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 128 bool enable); 129 130 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) 131 { 132 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 133 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 134 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 135 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 136 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 137 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 138 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 139 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 140 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 141 } 142 143 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring, 144 struct amdgpu_ring *ring) 145 { 146 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 147 uint64_t wptr_addr = ring->wptr_gpu_addr; 148 uint32_t me = 0, eng_sel = 0; 149 150 switch (ring->funcs->type) { 151 case AMDGPU_RING_TYPE_COMPUTE: 152 me = 1; 153 eng_sel = 0; 154 break; 155 case AMDGPU_RING_TYPE_GFX: 156 me = 0; 157 eng_sel = 4; 158 break; 159 case AMDGPU_RING_TYPE_MES: 160 me = 2; 161 eng_sel = 5; 162 break; 163 default: 164 WARN_ON(1); 165 } 166 167 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 168 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 169 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 170 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 171 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 172 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 173 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 174 PACKET3_MAP_QUEUES_ME((me)) | 175 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 176 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 177 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 178 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 179 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 180 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 181 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 182 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 183 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 184 } 185 186 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 187 struct amdgpu_ring *ring, 188 enum amdgpu_unmap_queues_action action, 189 u64 gpu_addr, u64 seq) 190 { 191 struct amdgpu_device *adev = kiq_ring->adev; 192 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 193 194 if (adev->enable_mes && !adev->gfx.kiq.ring.sched.ready) { 195 amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq); 196 return; 197 } 198 199 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 200 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 201 PACKET3_UNMAP_QUEUES_ACTION(action) | 202 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 203 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 204 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 205 amdgpu_ring_write(kiq_ring, 206 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 207 208 if (action == PREEMPT_QUEUES_NO_UNMAP) { 209 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 210 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 211 amdgpu_ring_write(kiq_ring, seq); 212 } else { 213 amdgpu_ring_write(kiq_ring, 0); 214 amdgpu_ring_write(kiq_ring, 0); 215 amdgpu_ring_write(kiq_ring, 0); 216 } 217 } 218 219 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring, 220 struct amdgpu_ring *ring, 221 u64 addr, 222 u64 seq) 223 { 224 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 225 226 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 227 amdgpu_ring_write(kiq_ring, 228 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 229 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 230 PACKET3_QUERY_STATUS_COMMAND(2)); 231 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 232 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 233 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 234 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 235 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 236 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 237 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 238 } 239 240 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 241 uint16_t pasid, uint32_t flush_type, 242 bool all_hub) 243 { 244 gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1); 245 } 246 247 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = { 248 .kiq_set_resources = gfx11_kiq_set_resources, 249 .kiq_map_queues = gfx11_kiq_map_queues, 250 .kiq_unmap_queues = gfx11_kiq_unmap_queues, 251 .kiq_query_status = gfx11_kiq_query_status, 252 .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs, 253 .set_resources_size = 8, 254 .map_queues_size = 7, 255 .unmap_queues_size = 6, 256 .query_status_size = 7, 257 .invalidate_tlbs_size = 2, 258 }; 259 260 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 261 { 262 adev->gfx.kiq.pmf = &gfx_v11_0_kiq_pm4_funcs; 263 } 264 265 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) 266 { 267 switch (adev->ip_versions[GC_HWIP][0]) { 268 case IP_VERSION(11, 0, 1): 269 case IP_VERSION(11, 0, 4): 270 soc15_program_register_sequence(adev, 271 golden_settings_gc_11_0_1, 272 (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); 273 break; 274 default: 275 break; 276 } 277 } 278 279 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 280 bool wc, uint32_t reg, uint32_t val) 281 { 282 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 283 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 284 WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0)); 285 amdgpu_ring_write(ring, reg); 286 amdgpu_ring_write(ring, 0); 287 amdgpu_ring_write(ring, val); 288 } 289 290 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 291 int mem_space, int opt, uint32_t addr0, 292 uint32_t addr1, uint32_t ref, uint32_t mask, 293 uint32_t inv) 294 { 295 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 296 amdgpu_ring_write(ring, 297 /* memory (1) or register (0) */ 298 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 299 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 300 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 301 WAIT_REG_MEM_ENGINE(eng_sel))); 302 303 if (mem_space) 304 BUG_ON(addr0 & 0x3); /* Dword align */ 305 amdgpu_ring_write(ring, addr0); 306 amdgpu_ring_write(ring, addr1); 307 amdgpu_ring_write(ring, ref); 308 amdgpu_ring_write(ring, mask); 309 amdgpu_ring_write(ring, inv); /* poll interval */ 310 } 311 312 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring) 313 { 314 struct amdgpu_device *adev = ring->adev; 315 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 316 uint32_t tmp = 0; 317 unsigned i; 318 int r; 319 320 WREG32(scratch, 0xCAFEDEAD); 321 r = amdgpu_ring_alloc(ring, 5); 322 if (r) { 323 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 324 ring->idx, r); 325 return r; 326 } 327 328 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) { 329 gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF); 330 } else { 331 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 332 amdgpu_ring_write(ring, scratch - 333 PACKET3_SET_UCONFIG_REG_START); 334 amdgpu_ring_write(ring, 0xDEADBEEF); 335 } 336 amdgpu_ring_commit(ring); 337 338 for (i = 0; i < adev->usec_timeout; i++) { 339 tmp = RREG32(scratch); 340 if (tmp == 0xDEADBEEF) 341 break; 342 if (amdgpu_emu_mode == 1) 343 msleep(1); 344 else 345 udelay(1); 346 } 347 348 if (i >= adev->usec_timeout) 349 r = -ETIMEDOUT; 350 return r; 351 } 352 353 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 354 { 355 struct amdgpu_device *adev = ring->adev; 356 struct amdgpu_ib ib; 357 struct dma_fence *f = NULL; 358 unsigned index; 359 uint64_t gpu_addr; 360 volatile uint32_t *cpu_ptr; 361 long r; 362 363 /* MES KIQ fw hasn't indirect buffer support for now */ 364 if (adev->enable_mes_kiq && 365 ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 366 return 0; 367 368 memset(&ib, 0, sizeof(ib)); 369 370 if (ring->is_mes_queue) { 371 uint32_t padding, offset; 372 373 offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); 374 padding = amdgpu_mes_ctx_get_offs(ring, 375 AMDGPU_MES_CTX_PADDING_OFFS); 376 377 ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 378 ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 379 380 gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding); 381 cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding); 382 *cpu_ptr = cpu_to_le32(0xCAFEDEAD); 383 } else { 384 r = amdgpu_device_wb_get(adev, &index); 385 if (r) 386 return r; 387 388 gpu_addr = adev->wb.gpu_addr + (index * 4); 389 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 390 cpu_ptr = &adev->wb.wb[index]; 391 392 r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); 393 if (r) { 394 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 395 goto err1; 396 } 397 } 398 399 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 400 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 401 ib.ptr[2] = lower_32_bits(gpu_addr); 402 ib.ptr[3] = upper_32_bits(gpu_addr); 403 ib.ptr[4] = 0xDEADBEEF; 404 ib.length_dw = 5; 405 406 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 407 if (r) 408 goto err2; 409 410 r = dma_fence_wait_timeout(f, false, timeout); 411 if (r == 0) { 412 r = -ETIMEDOUT; 413 goto err2; 414 } else if (r < 0) { 415 goto err2; 416 } 417 418 if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF) 419 r = 0; 420 else 421 r = -EINVAL; 422 err2: 423 if (!ring->is_mes_queue) 424 amdgpu_ib_free(adev, &ib, NULL); 425 dma_fence_put(f); 426 err1: 427 if (!ring->is_mes_queue) 428 amdgpu_device_wb_free(adev, index); 429 return r; 430 } 431 432 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev) 433 { 434 release_firmware(adev->gfx.pfp_fw); 435 adev->gfx.pfp_fw = NULL; 436 release_firmware(adev->gfx.me_fw); 437 adev->gfx.me_fw = NULL; 438 release_firmware(adev->gfx.rlc_fw); 439 adev->gfx.rlc_fw = NULL; 440 release_firmware(adev->gfx.mec_fw); 441 adev->gfx.mec_fw = NULL; 442 443 kfree(adev->gfx.rlc.register_list_format); 444 } 445 446 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) 447 { 448 char fw_name[40]; 449 char ucode_prefix[30]; 450 int err; 451 const struct rlc_firmware_header_v2_0 *rlc_hdr; 452 uint16_t version_major; 453 uint16_t version_minor; 454 455 DRM_DEBUG("\n"); 456 457 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 458 459 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix); 460 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 461 if (err) 462 goto out; 463 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 464 if (err) 465 goto out; 466 /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/ 467 adev->gfx.rs64_enable = amdgpu_ucode_hdr_version( 468 (union amdgpu_firmware_header *) 469 adev->gfx.pfp_fw->data, 2, 0); 470 if (adev->gfx.rs64_enable) { 471 dev_info(adev->dev, "CP RS64 enable\n"); 472 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP); 473 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); 474 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK); 475 } else { 476 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); 477 } 478 479 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix); 480 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 481 if (err) 482 goto out; 483 err = amdgpu_ucode_validate(adev->gfx.me_fw); 484 if (err) 485 goto out; 486 if (adev->gfx.rs64_enable) { 487 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME); 488 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); 489 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK); 490 } else { 491 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); 492 } 493 494 if (!amdgpu_sriov_vf(adev)) { 495 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); 496 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 497 if (err) 498 goto out; 499 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 500 if (err) 501 goto out; 502 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 503 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 504 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 505 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); 506 if (err) 507 goto out; 508 } 509 510 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix); 511 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 512 if (err) 513 goto out; 514 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 515 if (err) 516 goto out; 517 if (adev->gfx.rs64_enable) { 518 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC); 519 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK); 520 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK); 521 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK); 522 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK); 523 } else { 524 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); 525 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); 526 } 527 528 /* only one MEC for gfx 11.0.0. */ 529 adev->gfx.mec2_fw = NULL; 530 531 out: 532 if (err) { 533 dev_err(adev->dev, 534 "gfx11: Failed to init firmware \"%s\"\n", 535 fw_name); 536 release_firmware(adev->gfx.pfp_fw); 537 adev->gfx.pfp_fw = NULL; 538 release_firmware(adev->gfx.me_fw); 539 adev->gfx.me_fw = NULL; 540 release_firmware(adev->gfx.rlc_fw); 541 adev->gfx.rlc_fw = NULL; 542 release_firmware(adev->gfx.mec_fw); 543 adev->gfx.mec_fw = NULL; 544 } 545 546 return err; 547 } 548 549 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev) 550 { 551 const struct psp_firmware_header_v1_0 *toc_hdr; 552 int err = 0; 553 char fw_name[40]; 554 char ucode_prefix[30]; 555 556 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 557 558 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix); 559 err = request_firmware(&adev->psp.toc_fw, fw_name, adev->dev); 560 if (err) 561 goto out; 562 563 err = amdgpu_ucode_validate(adev->psp.toc_fw); 564 if (err) 565 goto out; 566 567 toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; 568 adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version); 569 adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version); 570 adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes); 571 adev->psp.toc.start_addr = (uint8_t *)toc_hdr + 572 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); 573 return 0; 574 out: 575 dev_err(adev->dev, "Failed to load TOC microcode\n"); 576 release_firmware(adev->psp.toc_fw); 577 adev->psp.toc_fw = NULL; 578 return err; 579 } 580 581 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev) 582 { 583 u32 count = 0; 584 const struct cs_section_def *sect = NULL; 585 const struct cs_extent_def *ext = NULL; 586 587 /* begin clear state */ 588 count += 2; 589 /* context control state */ 590 count += 3; 591 592 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 593 for (ext = sect->section; ext->extent != NULL; ++ext) { 594 if (sect->id == SECT_CONTEXT) 595 count += 2 + ext->reg_count; 596 else 597 return 0; 598 } 599 } 600 601 /* set PA_SC_TILE_STEERING_OVERRIDE */ 602 count += 3; 603 /* end clear state */ 604 count += 2; 605 /* clear state */ 606 count += 2; 607 608 return count; 609 } 610 611 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, 612 volatile u32 *buffer) 613 { 614 u32 count = 0, i; 615 const struct cs_section_def *sect = NULL; 616 const struct cs_extent_def *ext = NULL; 617 int ctx_reg_offset; 618 619 if (adev->gfx.rlc.cs_data == NULL) 620 return; 621 if (buffer == NULL) 622 return; 623 624 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 625 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 626 627 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 628 buffer[count++] = cpu_to_le32(0x80000000); 629 buffer[count++] = cpu_to_le32(0x80000000); 630 631 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 632 for (ext = sect->section; ext->extent != NULL; ++ext) { 633 if (sect->id == SECT_CONTEXT) { 634 buffer[count++] = 635 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 636 buffer[count++] = cpu_to_le32(ext->reg_index - 637 PACKET3_SET_CONTEXT_REG_START); 638 for (i = 0; i < ext->reg_count; i++) 639 buffer[count++] = cpu_to_le32(ext->extent[i]); 640 } else { 641 return; 642 } 643 } 644 } 645 646 ctx_reg_offset = 647 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 648 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 649 buffer[count++] = cpu_to_le32(ctx_reg_offset); 650 buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); 651 652 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 653 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 654 655 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 656 buffer[count++] = cpu_to_le32(0); 657 } 658 659 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev) 660 { 661 /* clear state block */ 662 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 663 &adev->gfx.rlc.clear_state_gpu_addr, 664 (void **)&adev->gfx.rlc.cs_ptr); 665 666 /* jump table block */ 667 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 668 &adev->gfx.rlc.cp_table_gpu_addr, 669 (void **)&adev->gfx.rlc.cp_table_ptr); 670 } 671 672 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) 673 { 674 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; 675 676 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl; 677 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 678 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1); 679 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2); 680 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3); 681 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL); 682 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX); 683 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0); 684 adev->gfx.rlc.rlcg_reg_access_supported = true; 685 } 686 687 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev) 688 { 689 const struct cs_section_def *cs_data; 690 int r; 691 692 adev->gfx.rlc.cs_data = gfx11_cs_data; 693 694 cs_data = adev->gfx.rlc.cs_data; 695 696 if (cs_data) { 697 /* init clear state block */ 698 r = amdgpu_gfx_rlc_init_csb(adev); 699 if (r) 700 return r; 701 } 702 703 /* init spm vmid with 0xf */ 704 if (adev->gfx.rlc.funcs->update_spm_vmid) 705 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); 706 707 return 0; 708 } 709 710 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev) 711 { 712 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 713 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 714 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL); 715 } 716 717 static int gfx_v11_0_me_init(struct amdgpu_device *adev) 718 { 719 int r; 720 721 bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); 722 723 amdgpu_gfx_graphics_queue_acquire(adev); 724 725 r = gfx_v11_0_init_microcode(adev); 726 if (r) 727 DRM_ERROR("Failed to load gfx firmware!\n"); 728 729 return r; 730 } 731 732 static int gfx_v11_0_mec_init(struct amdgpu_device *adev) 733 { 734 int r; 735 u32 *hpd; 736 size_t mec_hpd_size; 737 738 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 739 740 /* take ownership of the relevant compute queues */ 741 amdgpu_gfx_compute_queue_acquire(adev); 742 mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE; 743 744 if (mec_hpd_size) { 745 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 746 AMDGPU_GEM_DOMAIN_GTT, 747 &adev->gfx.mec.hpd_eop_obj, 748 &adev->gfx.mec.hpd_eop_gpu_addr, 749 (void **)&hpd); 750 if (r) { 751 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 752 gfx_v11_0_mec_fini(adev); 753 return r; 754 } 755 756 memset(hpd, 0, mec_hpd_size); 757 758 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 759 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 760 } 761 762 return 0; 763 } 764 765 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address) 766 { 767 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 768 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 769 (address << SQ_IND_INDEX__INDEX__SHIFT)); 770 return RREG32_SOC15(GC, 0, regSQ_IND_DATA); 771 } 772 773 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, 774 uint32_t thread, uint32_t regno, 775 uint32_t num, uint32_t *out) 776 { 777 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 778 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 779 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 780 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | 781 (SQ_IND_INDEX__AUTO_INCR_MASK)); 782 while (num--) 783 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); 784 } 785 786 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 787 { 788 /* in gfx11 the SIMD_ID is specified as part of the INSTANCE 789 * field when performing a select_se_sh so it should be 790 * zero here */ 791 WARN_ON(simd != 0); 792 793 /* type 3 wave data */ 794 dst[(*no_fields)++] = 3; 795 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS); 796 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO); 797 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI); 798 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO); 799 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI); 800 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1); 801 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2); 802 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC); 803 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC); 804 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS); 805 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS); 806 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); 807 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); 808 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); 809 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); 810 } 811 812 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 813 uint32_t wave, uint32_t start, 814 uint32_t size, uint32_t *dst) 815 { 816 WARN_ON(simd != 0); 817 818 wave_read_regs( 819 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, 820 dst); 821 } 822 823 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 824 uint32_t wave, uint32_t thread, 825 uint32_t start, uint32_t size, 826 uint32_t *dst) 827 { 828 wave_read_regs( 829 adev, wave, thread, 830 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 831 } 832 833 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, 834 u32 me, u32 pipe, u32 q, u32 vm) 835 { 836 soc21_grbm_select(adev, me, pipe, q, vm); 837 } 838 839 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { 840 .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter, 841 .select_se_sh = &gfx_v11_0_select_se_sh, 842 .read_wave_data = &gfx_v11_0_read_wave_data, 843 .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs, 844 .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs, 845 .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q, 846 .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk, 847 }; 848 849 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) 850 { 851 852 switch (adev->ip_versions[GC_HWIP][0]) { 853 case IP_VERSION(11, 0, 0): 854 case IP_VERSION(11, 0, 2): 855 case IP_VERSION(11, 0, 3): 856 adev->gfx.config.max_hw_contexts = 8; 857 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 858 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 859 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 860 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 861 break; 862 case IP_VERSION(11, 0, 1): 863 case IP_VERSION(11, 0, 4): 864 adev->gfx.config.max_hw_contexts = 8; 865 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 866 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 867 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 868 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300; 869 break; 870 default: 871 BUG(); 872 break; 873 } 874 875 return 0; 876 } 877 878 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, 879 int me, int pipe, int queue) 880 { 881 int r; 882 struct amdgpu_ring *ring; 883 unsigned int irq_type; 884 885 ring = &adev->gfx.gfx_ring[ring_id]; 886 887 ring->me = me; 888 ring->pipe = pipe; 889 ring->queue = queue; 890 891 ring->ring_obj = NULL; 892 ring->use_doorbell = true; 893 894 if (!ring_id) 895 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 896 else 897 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; 898 sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); 899 900 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; 901 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 902 AMDGPU_RING_PRIO_DEFAULT, NULL); 903 if (r) 904 return r; 905 return 0; 906 } 907 908 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 909 int mec, int pipe, int queue) 910 { 911 int r; 912 unsigned irq_type; 913 struct amdgpu_ring *ring; 914 unsigned int hw_prio; 915 916 ring = &adev->gfx.compute_ring[ring_id]; 917 918 /* mec0 is me1 */ 919 ring->me = mec + 1; 920 ring->pipe = pipe; 921 ring->queue = queue; 922 923 ring->ring_obj = NULL; 924 ring->use_doorbell = true; 925 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 926 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 927 + (ring_id * GFX11_MEC_HPD_SIZE); 928 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 929 930 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 931 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 932 + ring->pipe; 933 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 934 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 935 /* type-2 packets are deprecated on MEC, use type-3 instead */ 936 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 937 hw_prio, NULL); 938 if (r) 939 return r; 940 941 return 0; 942 } 943 944 static struct { 945 SOC21_FIRMWARE_ID id; 946 unsigned int offset; 947 unsigned int size; 948 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX]; 949 950 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc) 951 { 952 RLC_TABLE_OF_CONTENT *ucode = rlc_toc; 953 954 while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) && 955 (ucode->id < SOC21_FIRMWARE_ID_MAX)) { 956 rlc_autoload_info[ucode->id].id = ucode->id; 957 rlc_autoload_info[ucode->id].offset = ucode->offset * 4; 958 rlc_autoload_info[ucode->id].size = ucode->size * 4; 959 960 ucode++; 961 } 962 } 963 964 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev) 965 { 966 uint32_t total_size = 0; 967 SOC21_FIRMWARE_ID id; 968 969 gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr); 970 971 for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++) 972 total_size += rlc_autoload_info[id].size; 973 974 /* In case the offset in rlc toc ucode is aligned */ 975 if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset) 976 total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset + 977 rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size; 978 979 return total_size; 980 } 981 982 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev) 983 { 984 int r; 985 uint32_t total_size; 986 987 total_size = gfx_v11_0_calc_toc_total_size(adev); 988 989 r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024, 990 AMDGPU_GEM_DOMAIN_VRAM, 991 &adev->gfx.rlc.rlc_autoload_bo, 992 &adev->gfx.rlc.rlc_autoload_gpu_addr, 993 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 994 995 if (r) { 996 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r); 997 return r; 998 } 999 1000 return 0; 1001 } 1002 1003 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev, 1004 SOC21_FIRMWARE_ID id, 1005 const void *fw_data, 1006 uint32_t fw_size, 1007 uint32_t *fw_autoload_mask) 1008 { 1009 uint32_t toc_offset; 1010 uint32_t toc_fw_size; 1011 char *ptr = adev->gfx.rlc.rlc_autoload_ptr; 1012 1013 if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX) 1014 return; 1015 1016 toc_offset = rlc_autoload_info[id].offset; 1017 toc_fw_size = rlc_autoload_info[id].size; 1018 1019 if (fw_size == 0) 1020 fw_size = toc_fw_size; 1021 1022 if (fw_size > toc_fw_size) 1023 fw_size = toc_fw_size; 1024 1025 memcpy(ptr + toc_offset, fw_data, fw_size); 1026 1027 if (fw_size < toc_fw_size) 1028 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); 1029 1030 if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME)) 1031 *(uint64_t *)fw_autoload_mask |= 1ULL << id; 1032 } 1033 1034 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev, 1035 uint32_t *fw_autoload_mask) 1036 { 1037 void *data; 1038 uint32_t size; 1039 uint64_t *toc_ptr; 1040 1041 *(uint64_t *)fw_autoload_mask |= 0x1; 1042 1043 DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask); 1044 1045 data = adev->psp.toc.start_addr; 1046 size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size; 1047 1048 toc_ptr = (uint64_t *)data + size / 8 - 1; 1049 *toc_ptr = *(uint64_t *)fw_autoload_mask; 1050 1051 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC, 1052 data, size, fw_autoload_mask); 1053 } 1054 1055 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev, 1056 uint32_t *fw_autoload_mask) 1057 { 1058 const __le32 *fw_data; 1059 uint32_t fw_size; 1060 const struct gfx_firmware_header_v1_0 *cp_hdr; 1061 const struct gfx_firmware_header_v2_0 *cpv2_hdr; 1062 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1063 const struct rlc_firmware_header_v2_2 *rlcv22_hdr; 1064 uint16_t version_major, version_minor; 1065 1066 if (adev->gfx.rs64_enable) { 1067 /* pfp ucode */ 1068 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1069 adev->gfx.pfp_fw->data; 1070 /* instruction */ 1071 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1072 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1073 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1074 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP, 1075 fw_data, fw_size, fw_autoload_mask); 1076 /* data */ 1077 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1078 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1079 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1080 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK, 1081 fw_data, fw_size, fw_autoload_mask); 1082 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK, 1083 fw_data, fw_size, fw_autoload_mask); 1084 /* me ucode */ 1085 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1086 adev->gfx.me_fw->data; 1087 /* instruction */ 1088 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1089 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1090 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1091 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME, 1092 fw_data, fw_size, fw_autoload_mask); 1093 /* data */ 1094 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1095 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1096 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1097 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK, 1098 fw_data, fw_size, fw_autoload_mask); 1099 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK, 1100 fw_data, fw_size, fw_autoload_mask); 1101 /* mec ucode */ 1102 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1103 adev->gfx.mec_fw->data; 1104 /* instruction */ 1105 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1106 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1107 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1108 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC, 1109 fw_data, fw_size, fw_autoload_mask); 1110 /* data */ 1111 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1112 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1113 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1114 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK, 1115 fw_data, fw_size, fw_autoload_mask); 1116 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK, 1117 fw_data, fw_size, fw_autoload_mask); 1118 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK, 1119 fw_data, fw_size, fw_autoload_mask); 1120 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK, 1121 fw_data, fw_size, fw_autoload_mask); 1122 } else { 1123 /* pfp ucode */ 1124 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1125 adev->gfx.pfp_fw->data; 1126 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1127 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1128 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1129 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP, 1130 fw_data, fw_size, fw_autoload_mask); 1131 1132 /* me ucode */ 1133 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1134 adev->gfx.me_fw->data; 1135 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1136 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1137 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1138 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME, 1139 fw_data, fw_size, fw_autoload_mask); 1140 1141 /* mec ucode */ 1142 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1143 adev->gfx.mec_fw->data; 1144 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1145 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1146 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - 1147 cp_hdr->jt_size * 4; 1148 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC, 1149 fw_data, fw_size, fw_autoload_mask); 1150 } 1151 1152 /* rlc ucode */ 1153 rlc_hdr = (const struct rlc_firmware_header_v2_0 *) 1154 adev->gfx.rlc_fw->data; 1155 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1156 le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes)); 1157 fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes); 1158 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE, 1159 fw_data, fw_size, fw_autoload_mask); 1160 1161 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1162 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1163 if (version_major == 2) { 1164 if (version_minor >= 2) { 1165 rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 1166 1167 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1168 le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes)); 1169 fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes); 1170 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE, 1171 fw_data, fw_size, fw_autoload_mask); 1172 1173 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1174 le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes)); 1175 fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes); 1176 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT, 1177 fw_data, fw_size, fw_autoload_mask); 1178 } 1179 } 1180 } 1181 1182 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev, 1183 uint32_t *fw_autoload_mask) 1184 { 1185 const __le32 *fw_data; 1186 uint32_t fw_size; 1187 const struct sdma_firmware_header_v2_0 *sdma_hdr; 1188 1189 sdma_hdr = (const struct sdma_firmware_header_v2_0 *) 1190 adev->sdma.instance[0].fw->data; 1191 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1192 le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes)); 1193 fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes); 1194 1195 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1196 SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask); 1197 1198 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1199 le32_to_cpu(sdma_hdr->ctl_ucode_offset)); 1200 fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes); 1201 1202 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1203 SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask); 1204 } 1205 1206 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev, 1207 uint32_t *fw_autoload_mask) 1208 { 1209 const __le32 *fw_data; 1210 unsigned fw_size; 1211 const struct mes_firmware_header_v1_0 *mes_hdr; 1212 int pipe, ucode_id, data_id; 1213 1214 for (pipe = 0; pipe < 2; pipe++) { 1215 if (pipe==0) { 1216 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0; 1217 data_id = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK; 1218 } else { 1219 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1; 1220 data_id = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK; 1221 } 1222 1223 mes_hdr = (const struct mes_firmware_header_v1_0 *) 1224 adev->mes.fw[pipe]->data; 1225 1226 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1227 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); 1228 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); 1229 1230 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1231 ucode_id, fw_data, fw_size, fw_autoload_mask); 1232 1233 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1234 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); 1235 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); 1236 1237 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1238 data_id, fw_data, fw_size, fw_autoload_mask); 1239 } 1240 } 1241 1242 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) 1243 { 1244 uint32_t rlc_g_offset, rlc_g_size; 1245 uint64_t gpu_addr; 1246 uint32_t autoload_fw_id[2]; 1247 1248 memset(autoload_fw_id, 0, sizeof(uint32_t) * 2); 1249 1250 /* RLC autoload sequence 2: copy ucode */ 1251 gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id); 1252 gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id); 1253 gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id); 1254 gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id); 1255 1256 rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset; 1257 rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size; 1258 gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset; 1259 1260 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr)); 1261 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr)); 1262 1263 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size); 1264 1265 /* RLC autoload sequence 3: load IMU fw */ 1266 if (adev->gfx.imu.funcs->load_microcode) 1267 adev->gfx.imu.funcs->load_microcode(adev); 1268 /* RLC autoload sequence 4 init IMU fw */ 1269 if (adev->gfx.imu.funcs->setup_imu) 1270 adev->gfx.imu.funcs->setup_imu(adev); 1271 if (adev->gfx.imu.funcs->start_imu) 1272 adev->gfx.imu.funcs->start_imu(adev); 1273 1274 /* RLC autoload sequence 5 disable gpa mode */ 1275 gfx_v11_0_disable_gpa_mode(adev); 1276 1277 return 0; 1278 } 1279 1280 static int gfx_v11_0_sw_init(void *handle) 1281 { 1282 int i, j, k, r, ring_id = 0; 1283 struct amdgpu_kiq *kiq; 1284 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1285 1286 adev->gfxhub.funcs->init(adev); 1287 1288 switch (adev->ip_versions[GC_HWIP][0]) { 1289 case IP_VERSION(11, 0, 0): 1290 case IP_VERSION(11, 0, 2): 1291 case IP_VERSION(11, 0, 3): 1292 adev->gfx.me.num_me = 1; 1293 adev->gfx.me.num_pipe_per_me = 1; 1294 adev->gfx.me.num_queue_per_pipe = 1; 1295 adev->gfx.mec.num_mec = 2; 1296 adev->gfx.mec.num_pipe_per_mec = 4; 1297 adev->gfx.mec.num_queue_per_pipe = 4; 1298 break; 1299 case IP_VERSION(11, 0, 1): 1300 case IP_VERSION(11, 0, 4): 1301 adev->gfx.me.num_me = 1; 1302 adev->gfx.me.num_pipe_per_me = 1; 1303 adev->gfx.me.num_queue_per_pipe = 1; 1304 adev->gfx.mec.num_mec = 1; 1305 adev->gfx.mec.num_pipe_per_mec = 4; 1306 adev->gfx.mec.num_queue_per_pipe = 4; 1307 break; 1308 default: 1309 adev->gfx.me.num_me = 1; 1310 adev->gfx.me.num_pipe_per_me = 1; 1311 adev->gfx.me.num_queue_per_pipe = 1; 1312 adev->gfx.mec.num_mec = 1; 1313 adev->gfx.mec.num_pipe_per_mec = 4; 1314 adev->gfx.mec.num_queue_per_pipe = 8; 1315 break; 1316 } 1317 1318 /* EOP Event */ 1319 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1320 GFX_11_0_0__SRCID__CP_EOP_INTERRUPT, 1321 &adev->gfx.eop_irq); 1322 if (r) 1323 return r; 1324 1325 /* Privileged reg */ 1326 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1327 GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, 1328 &adev->gfx.priv_reg_irq); 1329 if (r) 1330 return r; 1331 1332 /* Privileged inst */ 1333 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1334 GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT, 1335 &adev->gfx.priv_inst_irq); 1336 if (r) 1337 return r; 1338 1339 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1340 1341 if (adev->gfx.imu.funcs) { 1342 if (adev->gfx.imu.funcs->init_microcode) { 1343 r = adev->gfx.imu.funcs->init_microcode(adev); 1344 if (r) 1345 DRM_ERROR("Failed to load imu firmware!\n"); 1346 } 1347 } 1348 1349 r = gfx_v11_0_me_init(adev); 1350 if (r) 1351 return r; 1352 1353 r = gfx_v11_0_rlc_init(adev); 1354 if (r) { 1355 DRM_ERROR("Failed to init rlc BOs!\n"); 1356 return r; 1357 } 1358 1359 r = gfx_v11_0_mec_init(adev); 1360 if (r) { 1361 DRM_ERROR("Failed to init MEC BOs!\n"); 1362 return r; 1363 } 1364 1365 /* set up the gfx ring */ 1366 for (i = 0; i < adev->gfx.me.num_me; i++) { 1367 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { 1368 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 1369 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) 1370 continue; 1371 1372 r = gfx_v11_0_gfx_ring_init(adev, ring_id, 1373 i, k, j); 1374 if (r) 1375 return r; 1376 ring_id++; 1377 } 1378 } 1379 } 1380 1381 ring_id = 0; 1382 /* set up the compute queues - allocate horizontally across pipes */ 1383 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1384 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1385 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1386 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, 1387 j)) 1388 continue; 1389 1390 r = gfx_v11_0_compute_ring_init(adev, ring_id, 1391 i, k, j); 1392 if (r) 1393 return r; 1394 1395 ring_id++; 1396 } 1397 } 1398 } 1399 1400 if (!adev->enable_mes_kiq) { 1401 r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE); 1402 if (r) { 1403 DRM_ERROR("Failed to init KIQ BOs!\n"); 1404 return r; 1405 } 1406 1407 kiq = &adev->gfx.kiq; 1408 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 1409 if (r) 1410 return r; 1411 } 1412 1413 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd)); 1414 if (r) 1415 return r; 1416 1417 /* allocate visible FB for rlc auto-loading fw */ 1418 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1419 r = gfx_v11_0_init_toc_microcode(adev); 1420 if (r) 1421 dev_err(adev->dev, "Failed to load toc firmware!\n"); 1422 r = gfx_v11_0_rlc_autoload_buffer_init(adev); 1423 if (r) 1424 return r; 1425 } 1426 1427 r = gfx_v11_0_gpu_early_init(adev); 1428 if (r) 1429 return r; 1430 1431 return 0; 1432 } 1433 1434 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev) 1435 { 1436 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj, 1437 &adev->gfx.pfp.pfp_fw_gpu_addr, 1438 (void **)&adev->gfx.pfp.pfp_fw_ptr); 1439 1440 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj, 1441 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 1442 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 1443 } 1444 1445 static void gfx_v11_0_me_fini(struct amdgpu_device *adev) 1446 { 1447 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj, 1448 &adev->gfx.me.me_fw_gpu_addr, 1449 (void **)&adev->gfx.me.me_fw_ptr); 1450 1451 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj, 1452 &adev->gfx.me.me_fw_data_gpu_addr, 1453 (void **)&adev->gfx.me.me_fw_data_ptr); 1454 } 1455 1456 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev) 1457 { 1458 amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo, 1459 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1460 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1461 } 1462 1463 static int gfx_v11_0_sw_fini(void *handle) 1464 { 1465 int i; 1466 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1467 1468 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1469 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1470 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1471 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1472 1473 amdgpu_gfx_mqd_sw_fini(adev); 1474 1475 if (!adev->enable_mes_kiq) { 1476 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); 1477 amdgpu_gfx_kiq_fini(adev); 1478 } 1479 1480 gfx_v11_0_pfp_fini(adev); 1481 gfx_v11_0_me_fini(adev); 1482 gfx_v11_0_rlc_fini(adev); 1483 gfx_v11_0_mec_fini(adev); 1484 1485 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1486 gfx_v11_0_rlc_autoload_buffer_fini(adev); 1487 1488 gfx_v11_0_free_microcode(adev); 1489 1490 return 0; 1491 } 1492 1493 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 1494 u32 sh_num, u32 instance) 1495 { 1496 u32 data; 1497 1498 if (instance == 0xffffffff) 1499 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, 1500 INSTANCE_BROADCAST_WRITES, 1); 1501 else 1502 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, 1503 instance); 1504 1505 if (se_num == 0xffffffff) 1506 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1507 1); 1508 else 1509 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1510 1511 if (sh_num == 0xffffffff) 1512 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1513 1); 1514 else 1515 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num); 1516 1517 WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data); 1518 } 1519 1520 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1521 { 1522 u32 data, mask; 1523 1524 data = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); 1525 data |= RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); 1526 1527 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 1528 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 1529 1530 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 1531 adev->gfx.config.max_sh_per_se); 1532 1533 return (~data) & mask; 1534 } 1535 1536 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) 1537 { 1538 int i, j; 1539 u32 data; 1540 u32 active_rbs = 0; 1541 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 1542 adev->gfx.config.max_sh_per_se; 1543 1544 mutex_lock(&adev->grbm_idx_mutex); 1545 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1546 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1547 gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff); 1548 data = gfx_v11_0_get_rb_active_bitmap(adev); 1549 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 1550 rb_bitmap_width_per_sh); 1551 } 1552 } 1553 gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1554 mutex_unlock(&adev->grbm_idx_mutex); 1555 1556 adev->gfx.config.backend_enable_mask = active_rbs; 1557 adev->gfx.config.num_rbs = hweight32(active_rbs); 1558 } 1559 1560 #define DEFAULT_SH_MEM_BASES (0x6000) 1561 #define LDS_APP_BASE 0x1 1562 #define SCRATCH_APP_BASE 0x2 1563 1564 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev) 1565 { 1566 int i; 1567 uint32_t sh_mem_bases; 1568 uint32_t data; 1569 1570 /* 1571 * Configure apertures: 1572 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 1573 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 1574 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 1575 */ 1576 sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) | 1577 SCRATCH_APP_BASE; 1578 1579 mutex_lock(&adev->srbm_mutex); 1580 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 1581 soc21_grbm_select(adev, 0, 0, 0, i); 1582 /* CP and shaders */ 1583 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 1584 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases); 1585 1586 /* Enable trap for each kfd vmid. */ 1587 data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL); 1588 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); 1589 } 1590 soc21_grbm_select(adev, 0, 0, 0, 0); 1591 mutex_unlock(&adev->srbm_mutex); 1592 1593 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 1594 acccess. These should be enabled by FW for target VMIDs. */ 1595 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 1596 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0); 1597 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0); 1598 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0); 1599 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0); 1600 } 1601 } 1602 1603 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev) 1604 { 1605 int vmid; 1606 1607 /* 1608 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 1609 * access. Compute VMIDs should be enabled by FW for target VMIDs, 1610 * the driver can enable them for graphics. VMID0 should maintain 1611 * access so that HWS firmware can save/restore entries. 1612 */ 1613 for (vmid = 1; vmid < 16; vmid++) { 1614 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0); 1615 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0); 1616 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0); 1617 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0); 1618 } 1619 } 1620 1621 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev) 1622 { 1623 /* TODO: harvest feature to be added later. */ 1624 } 1625 1626 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev) 1627 { 1628 /* TCCs are global (not instanced). */ 1629 uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) | 1630 RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE); 1631 1632 adev->gfx.config.tcc_disabled_mask = 1633 REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) | 1634 (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16); 1635 } 1636 1637 static void gfx_v11_0_constants_init(struct amdgpu_device *adev) 1638 { 1639 u32 tmp; 1640 int i; 1641 1642 if (!amdgpu_sriov_vf(adev)) 1643 WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 1644 1645 gfx_v11_0_setup_rb(adev); 1646 gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info); 1647 gfx_v11_0_get_tcc_info(adev); 1648 adev->gfx.config.pa_sc_tile_steering_override = 0; 1649 1650 /* XXX SH_MEM regs */ 1651 /* where to put LDS, scratch, GPUVM in FSA64 space */ 1652 mutex_lock(&adev->srbm_mutex); 1653 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { 1654 soc21_grbm_select(adev, 0, 0, 0, i); 1655 /* CP and shaders */ 1656 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 1657 if (i != 0) { 1658 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 1659 (adev->gmc.private_aperture_start >> 48)); 1660 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 1661 (adev->gmc.shared_aperture_start >> 48)); 1662 WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp); 1663 } 1664 } 1665 soc21_grbm_select(adev, 0, 0, 0, 0); 1666 1667 mutex_unlock(&adev->srbm_mutex); 1668 1669 gfx_v11_0_init_compute_vmid(adev); 1670 gfx_v11_0_init_gds_vmid(adev); 1671 } 1672 1673 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 1674 bool enable) 1675 { 1676 u32 tmp; 1677 1678 if (amdgpu_sriov_vf(adev)) 1679 return; 1680 1681 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0); 1682 1683 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 1684 enable ? 1 : 0); 1685 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 1686 enable ? 1 : 0); 1687 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 1688 enable ? 1 : 0); 1689 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 1690 enable ? 1 : 0); 1691 1692 WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp); 1693 } 1694 1695 static int gfx_v11_0_init_csb(struct amdgpu_device *adev) 1696 { 1697 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 1698 1699 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI, 1700 adev->gfx.rlc.clear_state_gpu_addr >> 32); 1701 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO, 1702 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 1703 WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); 1704 1705 return 0; 1706 } 1707 1708 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev) 1709 { 1710 u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL); 1711 1712 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 1713 WREG32_SOC15(GC, 0, regRLC_CNTL, tmp); 1714 } 1715 1716 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev) 1717 { 1718 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 1719 udelay(50); 1720 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 1721 udelay(50); 1722 } 1723 1724 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, 1725 bool enable) 1726 { 1727 uint32_t rlc_pg_cntl; 1728 1729 rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 1730 1731 if (!enable) { 1732 /* RLC_PG_CNTL[23] = 0 (default) 1733 * RLC will wait for handshake acks with SMU 1734 * GFXOFF will be enabled 1735 * RLC_PG_CNTL[23] = 1 1736 * RLC will not issue any message to SMU 1737 * hence no handshake between SMU & RLC 1738 * GFXOFF will be disabled 1739 */ 1740 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 1741 } else 1742 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 1743 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl); 1744 } 1745 1746 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev) 1747 { 1748 /* TODO: enable rlc & smu handshake until smu 1749 * and gfxoff feature works as expected */ 1750 if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) 1751 gfx_v11_0_rlc_smu_handshake_cntl(adev, false); 1752 1753 WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 1754 udelay(50); 1755 } 1756 1757 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev) 1758 { 1759 uint32_t tmp; 1760 1761 /* enable Save Restore Machine */ 1762 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL)); 1763 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 1764 tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; 1765 WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp); 1766 } 1767 1768 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev) 1769 { 1770 const struct rlc_firmware_header_v2_0 *hdr; 1771 const __le32 *fw_data; 1772 unsigned i, fw_size; 1773 1774 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1775 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1776 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1777 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 1778 1779 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, 1780 RLCG_UCODE_LOADING_START_ADDRESS); 1781 1782 for (i = 0; i < fw_size; i++) 1783 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, 1784 le32_to_cpup(fw_data++)); 1785 1786 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 1787 } 1788 1789 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev) 1790 { 1791 const struct rlc_firmware_header_v2_2 *hdr; 1792 const __le32 *fw_data; 1793 unsigned i, fw_size; 1794 u32 tmp; 1795 1796 hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 1797 1798 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1799 le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes)); 1800 fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4; 1801 1802 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0); 1803 1804 for (i = 0; i < fw_size; i++) { 1805 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 1806 msleep(1); 1807 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA, 1808 le32_to_cpup(fw_data++)); 1809 } 1810 1811 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 1812 1813 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1814 le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes)); 1815 fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4; 1816 1817 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0); 1818 for (i = 0; i < fw_size; i++) { 1819 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 1820 msleep(1); 1821 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA, 1822 le32_to_cpup(fw_data++)); 1823 } 1824 1825 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 1826 1827 tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL); 1828 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1); 1829 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0); 1830 WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp); 1831 } 1832 1833 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev) 1834 { 1835 const struct rlc_firmware_header_v2_3 *hdr; 1836 const __le32 *fw_data; 1837 unsigned i, fw_size; 1838 u32 tmp; 1839 1840 hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data; 1841 1842 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1843 le32_to_cpu(hdr->rlcp_ucode_offset_bytes)); 1844 fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4; 1845 1846 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0); 1847 1848 for (i = 0; i < fw_size; i++) { 1849 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 1850 msleep(1); 1851 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA, 1852 le32_to_cpup(fw_data++)); 1853 } 1854 1855 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version); 1856 1857 tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE); 1858 tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1); 1859 WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp); 1860 1861 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1862 le32_to_cpu(hdr->rlcv_ucode_offset_bytes)); 1863 fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4; 1864 1865 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0); 1866 1867 for (i = 0; i < fw_size; i++) { 1868 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 1869 msleep(1); 1870 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA, 1871 le32_to_cpup(fw_data++)); 1872 } 1873 1874 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version); 1875 1876 tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL); 1877 tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1); 1878 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp); 1879 } 1880 1881 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev) 1882 { 1883 const struct rlc_firmware_header_v2_0 *hdr; 1884 uint16_t version_major; 1885 uint16_t version_minor; 1886 1887 if (!adev->gfx.rlc_fw) 1888 return -EINVAL; 1889 1890 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1891 amdgpu_ucode_print_rlc_hdr(&hdr->header); 1892 1893 version_major = le16_to_cpu(hdr->header.header_version_major); 1894 version_minor = le16_to_cpu(hdr->header.header_version_minor); 1895 1896 if (version_major == 2) { 1897 gfx_v11_0_load_rlcg_microcode(adev); 1898 if (amdgpu_dpm == 1) { 1899 if (version_minor >= 2) 1900 gfx_v11_0_load_rlc_iram_dram_microcode(adev); 1901 if (version_minor == 3) 1902 gfx_v11_0_load_rlcp_rlcv_microcode(adev); 1903 } 1904 1905 return 0; 1906 } 1907 1908 return -EINVAL; 1909 } 1910 1911 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev) 1912 { 1913 int r; 1914 1915 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1916 gfx_v11_0_init_csb(adev); 1917 1918 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ 1919 gfx_v11_0_rlc_enable_srm(adev); 1920 } else { 1921 if (amdgpu_sriov_vf(adev)) { 1922 gfx_v11_0_init_csb(adev); 1923 return 0; 1924 } 1925 1926 adev->gfx.rlc.funcs->stop(adev); 1927 1928 /* disable CG */ 1929 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); 1930 1931 /* disable PG */ 1932 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0); 1933 1934 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1935 /* legacy rlc firmware loading */ 1936 r = gfx_v11_0_rlc_load_microcode(adev); 1937 if (r) 1938 return r; 1939 } 1940 1941 gfx_v11_0_init_csb(adev); 1942 1943 adev->gfx.rlc.funcs->start(adev); 1944 } 1945 return 0; 1946 } 1947 1948 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr) 1949 { 1950 uint32_t usec_timeout = 50000; /* wait for 50ms */ 1951 uint32_t tmp; 1952 int i; 1953 1954 /* Trigger an invalidation of the L1 instruction caches */ 1955 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 1956 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); 1957 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 1958 1959 /* Wait for invalidation complete */ 1960 for (i = 0; i < usec_timeout; i++) { 1961 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 1962 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 1963 INVALIDATE_CACHE_COMPLETE)) 1964 break; 1965 udelay(1); 1966 } 1967 1968 if (i >= usec_timeout) { 1969 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 1970 return -EINVAL; 1971 } 1972 1973 if (amdgpu_emu_mode == 1) 1974 adev->hdp.funcs->flush_hdp(adev, NULL); 1975 1976 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 1977 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 1978 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 1979 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 1980 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 1981 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 1982 1983 /* Program me ucode address into intruction cache address register */ 1984 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 1985 lower_32_bits(addr) & 0xFFFFF000); 1986 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 1987 upper_32_bits(addr)); 1988 1989 return 0; 1990 } 1991 1992 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr) 1993 { 1994 uint32_t usec_timeout = 50000; /* wait for 50ms */ 1995 uint32_t tmp; 1996 int i; 1997 1998 /* Trigger an invalidation of the L1 instruction caches */ 1999 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2000 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2001 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2002 2003 /* Wait for invalidation complete */ 2004 for (i = 0; i < usec_timeout; i++) { 2005 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2006 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2007 INVALIDATE_CACHE_COMPLETE)) 2008 break; 2009 udelay(1); 2010 } 2011 2012 if (i >= usec_timeout) { 2013 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2014 return -EINVAL; 2015 } 2016 2017 if (amdgpu_emu_mode == 1) 2018 adev->hdp.funcs->flush_hdp(adev, NULL); 2019 2020 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2021 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2022 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2023 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2024 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2025 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2026 2027 /* Program pfp ucode address into intruction cache address register */ 2028 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2029 lower_32_bits(addr) & 0xFFFFF000); 2030 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2031 upper_32_bits(addr)); 2032 2033 return 0; 2034 } 2035 2036 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr) 2037 { 2038 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2039 uint32_t tmp; 2040 int i; 2041 2042 /* Trigger an invalidation of the L1 instruction caches */ 2043 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2044 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2045 2046 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2047 2048 /* Wait for invalidation complete */ 2049 for (i = 0; i < usec_timeout; i++) { 2050 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2051 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2052 INVALIDATE_CACHE_COMPLETE)) 2053 break; 2054 udelay(1); 2055 } 2056 2057 if (i >= usec_timeout) { 2058 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2059 return -EINVAL; 2060 } 2061 2062 if (amdgpu_emu_mode == 1) 2063 adev->hdp.funcs->flush_hdp(adev, NULL); 2064 2065 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2066 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2067 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2068 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2069 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2070 2071 /* Program mec1 ucode address into intruction cache address register */ 2072 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, 2073 lower_32_bits(addr) & 0xFFFFF000); 2074 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2075 upper_32_bits(addr)); 2076 2077 return 0; 2078 } 2079 2080 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2081 { 2082 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2083 uint32_t tmp; 2084 unsigned i, pipe_id; 2085 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2086 2087 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2088 adev->gfx.pfp_fw->data; 2089 2090 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2091 lower_32_bits(addr)); 2092 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2093 upper_32_bits(addr)); 2094 2095 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2096 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2097 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2098 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2099 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2100 2101 /* 2102 * Programming any of the CP_PFP_IC_BASE registers 2103 * forces invalidation of the ME L1 I$. Wait for the 2104 * invalidation complete 2105 */ 2106 for (i = 0; i < usec_timeout; i++) { 2107 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2108 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2109 INVALIDATE_CACHE_COMPLETE)) 2110 break; 2111 udelay(1); 2112 } 2113 2114 if (i >= usec_timeout) { 2115 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2116 return -EINVAL; 2117 } 2118 2119 /* Prime the L1 instruction caches */ 2120 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2121 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 2122 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2123 /* Waiting for cache primed*/ 2124 for (i = 0; i < usec_timeout; i++) { 2125 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2126 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2127 ICACHE_PRIMED)) 2128 break; 2129 udelay(1); 2130 } 2131 2132 if (i >= usec_timeout) { 2133 dev_err(adev->dev, "failed to prime instruction cache\n"); 2134 return -EINVAL; 2135 } 2136 2137 mutex_lock(&adev->srbm_mutex); 2138 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2139 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2140 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2141 (pfp_hdr->ucode_start_addr_hi << 30) | 2142 (pfp_hdr->ucode_start_addr_lo >> 2)); 2143 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2144 pfp_hdr->ucode_start_addr_hi >> 2); 2145 2146 /* 2147 * Program CP_ME_CNTL to reset given PIPE to take 2148 * effect of CP_PFP_PRGRM_CNTR_START. 2149 */ 2150 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2151 if (pipe_id == 0) 2152 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2153 PFP_PIPE0_RESET, 1); 2154 else 2155 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2156 PFP_PIPE1_RESET, 1); 2157 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2158 2159 /* Clear pfp pipe0 reset bit. */ 2160 if (pipe_id == 0) 2161 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2162 PFP_PIPE0_RESET, 0); 2163 else 2164 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2165 PFP_PIPE1_RESET, 0); 2166 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2167 2168 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 2169 lower_32_bits(addr2)); 2170 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 2171 upper_32_bits(addr2)); 2172 } 2173 soc21_grbm_select(adev, 0, 0, 0, 0); 2174 mutex_unlock(&adev->srbm_mutex); 2175 2176 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2177 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2178 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2179 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2180 2181 /* Invalidate the data caches */ 2182 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2183 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2184 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2185 2186 for (i = 0; i < usec_timeout; i++) { 2187 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2188 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2189 INVALIDATE_DCACHE_COMPLETE)) 2190 break; 2191 udelay(1); 2192 } 2193 2194 if (i >= usec_timeout) { 2195 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2196 return -EINVAL; 2197 } 2198 2199 return 0; 2200 } 2201 2202 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2203 { 2204 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2205 uint32_t tmp; 2206 unsigned i, pipe_id; 2207 const struct gfx_firmware_header_v2_0 *me_hdr; 2208 2209 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2210 adev->gfx.me_fw->data; 2211 2212 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2213 lower_32_bits(addr)); 2214 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2215 upper_32_bits(addr)); 2216 2217 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2218 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2219 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2220 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2221 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2222 2223 /* 2224 * Programming any of the CP_ME_IC_BASE registers 2225 * forces invalidation of the ME L1 I$. Wait for the 2226 * invalidation complete 2227 */ 2228 for (i = 0; i < usec_timeout; i++) { 2229 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2230 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2231 INVALIDATE_CACHE_COMPLETE)) 2232 break; 2233 udelay(1); 2234 } 2235 2236 if (i >= usec_timeout) { 2237 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2238 return -EINVAL; 2239 } 2240 2241 /* Prime the instruction caches */ 2242 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2243 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 2244 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2245 2246 /* Waiting for instruction cache primed*/ 2247 for (i = 0; i < usec_timeout; i++) { 2248 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2249 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2250 ICACHE_PRIMED)) 2251 break; 2252 udelay(1); 2253 } 2254 2255 if (i >= usec_timeout) { 2256 dev_err(adev->dev, "failed to prime instruction cache\n"); 2257 return -EINVAL; 2258 } 2259 2260 mutex_lock(&adev->srbm_mutex); 2261 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2262 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2263 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2264 (me_hdr->ucode_start_addr_hi << 30) | 2265 (me_hdr->ucode_start_addr_lo >> 2) ); 2266 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2267 me_hdr->ucode_start_addr_hi>>2); 2268 2269 /* 2270 * Program CP_ME_CNTL to reset given PIPE to take 2271 * effect of CP_PFP_PRGRM_CNTR_START. 2272 */ 2273 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2274 if (pipe_id == 0) 2275 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2276 ME_PIPE0_RESET, 1); 2277 else 2278 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2279 ME_PIPE1_RESET, 1); 2280 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2281 2282 /* Clear pfp pipe0 reset bit. */ 2283 if (pipe_id == 0) 2284 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2285 ME_PIPE0_RESET, 0); 2286 else 2287 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2288 ME_PIPE1_RESET, 0); 2289 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2290 2291 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 2292 lower_32_bits(addr2)); 2293 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 2294 upper_32_bits(addr2)); 2295 } 2296 soc21_grbm_select(adev, 0, 0, 0, 0); 2297 mutex_unlock(&adev->srbm_mutex); 2298 2299 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2300 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2301 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2302 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2303 2304 /* Invalidate the data caches */ 2305 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2306 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2307 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2308 2309 for (i = 0; i < usec_timeout; i++) { 2310 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2311 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2312 INVALIDATE_DCACHE_COMPLETE)) 2313 break; 2314 udelay(1); 2315 } 2316 2317 if (i >= usec_timeout) { 2318 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2319 return -EINVAL; 2320 } 2321 2322 return 0; 2323 } 2324 2325 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2326 { 2327 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2328 uint32_t tmp; 2329 unsigned i; 2330 const struct gfx_firmware_header_v2_0 *mec_hdr; 2331 2332 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2333 adev->gfx.mec_fw->data; 2334 2335 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2336 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2337 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2338 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2339 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2340 2341 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 2342 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 2343 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 2344 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 2345 2346 mutex_lock(&adev->srbm_mutex); 2347 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 2348 soc21_grbm_select(adev, 1, i, 0, 0); 2349 2350 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2); 2351 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 2352 upper_32_bits(addr2)); 2353 2354 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 2355 mec_hdr->ucode_start_addr_lo >> 2 | 2356 mec_hdr->ucode_start_addr_hi << 30); 2357 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 2358 mec_hdr->ucode_start_addr_hi >> 2); 2359 2360 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr); 2361 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2362 upper_32_bits(addr)); 2363 } 2364 mutex_unlock(&adev->srbm_mutex); 2365 soc21_grbm_select(adev, 0, 0, 0, 0); 2366 2367 /* Trigger an invalidation of the L1 instruction caches */ 2368 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2369 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2370 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 2371 2372 /* Wait for invalidation complete */ 2373 for (i = 0; i < usec_timeout; i++) { 2374 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2375 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 2376 INVALIDATE_DCACHE_COMPLETE)) 2377 break; 2378 udelay(1); 2379 } 2380 2381 if (i >= usec_timeout) { 2382 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2383 return -EINVAL; 2384 } 2385 2386 /* Trigger an invalidation of the L1 instruction caches */ 2387 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2388 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2389 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2390 2391 /* Wait for invalidation complete */ 2392 for (i = 0; i < usec_timeout; i++) { 2393 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2394 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2395 INVALIDATE_CACHE_COMPLETE)) 2396 break; 2397 udelay(1); 2398 } 2399 2400 if (i >= usec_timeout) { 2401 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2402 return -EINVAL; 2403 } 2404 2405 return 0; 2406 } 2407 2408 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev) 2409 { 2410 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2411 const struct gfx_firmware_header_v2_0 *me_hdr; 2412 const struct gfx_firmware_header_v2_0 *mec_hdr; 2413 uint32_t pipe_id, tmp; 2414 2415 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2416 adev->gfx.mec_fw->data; 2417 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2418 adev->gfx.me_fw->data; 2419 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2420 adev->gfx.pfp_fw->data; 2421 2422 /* config pfp program start addr */ 2423 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 2424 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2425 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2426 (pfp_hdr->ucode_start_addr_hi << 30) | 2427 (pfp_hdr->ucode_start_addr_lo >> 2)); 2428 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2429 pfp_hdr->ucode_start_addr_hi >> 2); 2430 } 2431 soc21_grbm_select(adev, 0, 0, 0, 0); 2432 2433 /* reset pfp pipe */ 2434 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2435 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1); 2436 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1); 2437 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2438 2439 /* clear pfp pipe reset */ 2440 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0); 2441 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0); 2442 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2443 2444 /* config me program start addr */ 2445 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 2446 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2447 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2448 (me_hdr->ucode_start_addr_hi << 30) | 2449 (me_hdr->ucode_start_addr_lo >> 2) ); 2450 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2451 me_hdr->ucode_start_addr_hi>>2); 2452 } 2453 soc21_grbm_select(adev, 0, 0, 0, 0); 2454 2455 /* reset me pipe */ 2456 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2457 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1); 2458 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1); 2459 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2460 2461 /* clear me pipe reset */ 2462 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0); 2463 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0); 2464 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2465 2466 /* config mec program start addr */ 2467 for (pipe_id = 0; pipe_id < 4; pipe_id++) { 2468 soc21_grbm_select(adev, 1, pipe_id, 0, 0); 2469 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 2470 mec_hdr->ucode_start_addr_lo >> 2 | 2471 mec_hdr->ucode_start_addr_hi << 30); 2472 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 2473 mec_hdr->ucode_start_addr_hi >> 2); 2474 } 2475 soc21_grbm_select(adev, 0, 0, 0, 0); 2476 2477 /* reset mec pipe */ 2478 tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 2479 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1); 2480 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1); 2481 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1); 2482 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1); 2483 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 2484 2485 /* clear mec pipe reset */ 2486 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0); 2487 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0); 2488 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0); 2489 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0); 2490 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 2491 } 2492 2493 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) 2494 { 2495 uint32_t cp_status; 2496 uint32_t bootload_status; 2497 int i, r; 2498 uint64_t addr, addr2; 2499 2500 for (i = 0; i < adev->usec_timeout; i++) { 2501 cp_status = RREG32_SOC15(GC, 0, regCP_STAT); 2502 2503 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1) || 2504 adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 4)) 2505 bootload_status = RREG32_SOC15(GC, 0, 2506 regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); 2507 else 2508 bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS); 2509 2510 if ((cp_status == 0) && 2511 (REG_GET_FIELD(bootload_status, 2512 RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { 2513 break; 2514 } 2515 udelay(1); 2516 } 2517 2518 if (i >= adev->usec_timeout) { 2519 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n"); 2520 return -ETIMEDOUT; 2521 } 2522 2523 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 2524 if (adev->gfx.rs64_enable) { 2525 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2526 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset; 2527 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 2528 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset; 2529 r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2); 2530 if (r) 2531 return r; 2532 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2533 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset; 2534 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 2535 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset; 2536 r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2); 2537 if (r) 2538 return r; 2539 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2540 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset; 2541 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 2542 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset; 2543 r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2); 2544 if (r) 2545 return r; 2546 } else { 2547 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2548 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset; 2549 r = gfx_v11_0_config_me_cache(adev, addr); 2550 if (r) 2551 return r; 2552 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2553 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset; 2554 r = gfx_v11_0_config_pfp_cache(adev, addr); 2555 if (r) 2556 return r; 2557 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2558 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset; 2559 r = gfx_v11_0_config_mec_cache(adev, addr); 2560 if (r) 2561 return r; 2562 } 2563 } 2564 2565 return 0; 2566 } 2567 2568 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 2569 { 2570 int i; 2571 u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2572 2573 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 2574 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 2575 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2576 2577 for (i = 0; i < adev->usec_timeout; i++) { 2578 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0) 2579 break; 2580 udelay(1); 2581 } 2582 2583 if (i >= adev->usec_timeout) 2584 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt"); 2585 2586 return 0; 2587 } 2588 2589 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) 2590 { 2591 int r; 2592 const struct gfx_firmware_header_v1_0 *pfp_hdr; 2593 const __le32 *fw_data; 2594 unsigned i, fw_size; 2595 2596 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 2597 adev->gfx.pfp_fw->data; 2598 2599 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 2600 2601 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 2602 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 2603 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes); 2604 2605 r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes, 2606 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 2607 &adev->gfx.pfp.pfp_fw_obj, 2608 &adev->gfx.pfp.pfp_fw_gpu_addr, 2609 (void **)&adev->gfx.pfp.pfp_fw_ptr); 2610 if (r) { 2611 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r); 2612 gfx_v11_0_pfp_fini(adev); 2613 return r; 2614 } 2615 2616 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size); 2617 2618 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 2619 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 2620 2621 gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr); 2622 2623 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0); 2624 2625 for (i = 0; i < pfp_hdr->jt_size; i++) 2626 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA, 2627 le32_to_cpup(fw_data + pfp_hdr->jt_offset + i)); 2628 2629 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 2630 2631 return 0; 2632 } 2633 2634 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) 2635 { 2636 int r; 2637 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2638 const __le32 *fw_ucode, *fw_data; 2639 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 2640 uint32_t tmp; 2641 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2642 2643 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2644 adev->gfx.pfp_fw->data; 2645 2646 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 2647 2648 /* instruction */ 2649 fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data + 2650 le32_to_cpu(pfp_hdr->ucode_offset_bytes)); 2651 fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes); 2652 /* data */ 2653 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 2654 le32_to_cpu(pfp_hdr->data_offset_bytes)); 2655 fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes); 2656 2657 /* 64kb align */ 2658 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 2659 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, 2660 &adev->gfx.pfp.pfp_fw_obj, 2661 &adev->gfx.pfp.pfp_fw_gpu_addr, 2662 (void **)&adev->gfx.pfp.pfp_fw_ptr); 2663 if (r) { 2664 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r); 2665 gfx_v11_0_pfp_fini(adev); 2666 return r; 2667 } 2668 2669 r = amdgpu_bo_create_reserved(adev, fw_data_size, 2670 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, 2671 &adev->gfx.pfp.pfp_fw_data_obj, 2672 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 2673 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 2674 if (r) { 2675 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r); 2676 gfx_v11_0_pfp_fini(adev); 2677 return r; 2678 } 2679 2680 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size); 2681 memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size); 2682 2683 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 2684 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj); 2685 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 2686 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); 2687 2688 if (amdgpu_emu_mode == 1) 2689 adev->hdp.funcs->flush_hdp(adev, NULL); 2690 2691 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2692 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 2693 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2694 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 2695 2696 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2697 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2698 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2699 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2700 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2701 2702 /* 2703 * Programming any of the CP_PFP_IC_BASE registers 2704 * forces invalidation of the ME L1 I$. Wait for the 2705 * invalidation complete 2706 */ 2707 for (i = 0; i < usec_timeout; i++) { 2708 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2709 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2710 INVALIDATE_CACHE_COMPLETE)) 2711 break; 2712 udelay(1); 2713 } 2714 2715 if (i >= usec_timeout) { 2716 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2717 return -EINVAL; 2718 } 2719 2720 /* Prime the L1 instruction caches */ 2721 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2722 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 2723 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2724 /* Waiting for cache primed*/ 2725 for (i = 0; i < usec_timeout; i++) { 2726 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2727 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2728 ICACHE_PRIMED)) 2729 break; 2730 udelay(1); 2731 } 2732 2733 if (i >= usec_timeout) { 2734 dev_err(adev->dev, "failed to prime instruction cache\n"); 2735 return -EINVAL; 2736 } 2737 2738 mutex_lock(&adev->srbm_mutex); 2739 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2740 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2741 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2742 (pfp_hdr->ucode_start_addr_hi << 30) | 2743 (pfp_hdr->ucode_start_addr_lo >> 2) ); 2744 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2745 pfp_hdr->ucode_start_addr_hi>>2); 2746 2747 /* 2748 * Program CP_ME_CNTL to reset given PIPE to take 2749 * effect of CP_PFP_PRGRM_CNTR_START. 2750 */ 2751 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2752 if (pipe_id == 0) 2753 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2754 PFP_PIPE0_RESET, 1); 2755 else 2756 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2757 PFP_PIPE1_RESET, 1); 2758 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2759 2760 /* Clear pfp pipe0 reset bit. */ 2761 if (pipe_id == 0) 2762 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2763 PFP_PIPE0_RESET, 0); 2764 else 2765 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2766 PFP_PIPE1_RESET, 0); 2767 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2768 2769 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 2770 lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 2771 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 2772 upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 2773 } 2774 soc21_grbm_select(adev, 0, 0, 0, 0); 2775 mutex_unlock(&adev->srbm_mutex); 2776 2777 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2778 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2779 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2780 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2781 2782 /* Invalidate the data caches */ 2783 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2784 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2785 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2786 2787 for (i = 0; i < usec_timeout; i++) { 2788 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2789 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2790 INVALIDATE_DCACHE_COMPLETE)) 2791 break; 2792 udelay(1); 2793 } 2794 2795 if (i >= usec_timeout) { 2796 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2797 return -EINVAL; 2798 } 2799 2800 return 0; 2801 } 2802 2803 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) 2804 { 2805 int r; 2806 const struct gfx_firmware_header_v1_0 *me_hdr; 2807 const __le32 *fw_data; 2808 unsigned i, fw_size; 2809 2810 me_hdr = (const struct gfx_firmware_header_v1_0 *) 2811 adev->gfx.me_fw->data; 2812 2813 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 2814 2815 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 2816 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 2817 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes); 2818 2819 r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes, 2820 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 2821 &adev->gfx.me.me_fw_obj, 2822 &adev->gfx.me.me_fw_gpu_addr, 2823 (void **)&adev->gfx.me.me_fw_ptr); 2824 if (r) { 2825 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r); 2826 gfx_v11_0_me_fini(adev); 2827 return r; 2828 } 2829 2830 memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size); 2831 2832 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 2833 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 2834 2835 gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr); 2836 2837 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0); 2838 2839 for (i = 0; i < me_hdr->jt_size; i++) 2840 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA, 2841 le32_to_cpup(fw_data + me_hdr->jt_offset + i)); 2842 2843 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version); 2844 2845 return 0; 2846 } 2847 2848 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) 2849 { 2850 int r; 2851 const struct gfx_firmware_header_v2_0 *me_hdr; 2852 const __le32 *fw_ucode, *fw_data; 2853 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 2854 uint32_t tmp; 2855 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2856 2857 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2858 adev->gfx.me_fw->data; 2859 2860 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 2861 2862 /* instruction */ 2863 fw_ucode = (const __le32 *)(adev->gfx.me_fw->data + 2864 le32_to_cpu(me_hdr->ucode_offset_bytes)); 2865 fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes); 2866 /* data */ 2867 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 2868 le32_to_cpu(me_hdr->data_offset_bytes)); 2869 fw_data_size = le32_to_cpu(me_hdr->data_size_bytes); 2870 2871 /* 64kb align*/ 2872 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 2873 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, 2874 &adev->gfx.me.me_fw_obj, 2875 &adev->gfx.me.me_fw_gpu_addr, 2876 (void **)&adev->gfx.me.me_fw_ptr); 2877 if (r) { 2878 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r); 2879 gfx_v11_0_me_fini(adev); 2880 return r; 2881 } 2882 2883 r = amdgpu_bo_create_reserved(adev, fw_data_size, 2884 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, 2885 &adev->gfx.me.me_fw_data_obj, 2886 &adev->gfx.me.me_fw_data_gpu_addr, 2887 (void **)&adev->gfx.me.me_fw_data_ptr); 2888 if (r) { 2889 dev_err(adev->dev, "(%d) failed to create me data bo\n", r); 2890 gfx_v11_0_pfp_fini(adev); 2891 return r; 2892 } 2893 2894 memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size); 2895 memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size); 2896 2897 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 2898 amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj); 2899 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 2900 amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); 2901 2902 if (amdgpu_emu_mode == 1) 2903 adev->hdp.funcs->flush_hdp(adev, NULL); 2904 2905 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2906 lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); 2907 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2908 upper_32_bits(adev->gfx.me.me_fw_gpu_addr)); 2909 2910 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2911 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2912 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2913 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2914 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2915 2916 /* 2917 * Programming any of the CP_ME_IC_BASE registers 2918 * forces invalidation of the ME L1 I$. Wait for the 2919 * invalidation complete 2920 */ 2921 for (i = 0; i < usec_timeout; i++) { 2922 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2923 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2924 INVALIDATE_CACHE_COMPLETE)) 2925 break; 2926 udelay(1); 2927 } 2928 2929 if (i >= usec_timeout) { 2930 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2931 return -EINVAL; 2932 } 2933 2934 /* Prime the instruction caches */ 2935 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2936 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 2937 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2938 2939 /* Waiting for instruction cache primed*/ 2940 for (i = 0; i < usec_timeout; i++) { 2941 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2942 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2943 ICACHE_PRIMED)) 2944 break; 2945 udelay(1); 2946 } 2947 2948 if (i >= usec_timeout) { 2949 dev_err(adev->dev, "failed to prime instruction cache\n"); 2950 return -EINVAL; 2951 } 2952 2953 mutex_lock(&adev->srbm_mutex); 2954 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2955 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2956 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2957 (me_hdr->ucode_start_addr_hi << 30) | 2958 (me_hdr->ucode_start_addr_lo >> 2) ); 2959 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2960 me_hdr->ucode_start_addr_hi>>2); 2961 2962 /* 2963 * Program CP_ME_CNTL to reset given PIPE to take 2964 * effect of CP_PFP_PRGRM_CNTR_START. 2965 */ 2966 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2967 if (pipe_id == 0) 2968 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2969 ME_PIPE0_RESET, 1); 2970 else 2971 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2972 ME_PIPE1_RESET, 1); 2973 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2974 2975 /* Clear pfp pipe0 reset bit. */ 2976 if (pipe_id == 0) 2977 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2978 ME_PIPE0_RESET, 0); 2979 else 2980 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2981 ME_PIPE1_RESET, 0); 2982 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2983 2984 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 2985 lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 2986 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 2987 upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 2988 } 2989 soc21_grbm_select(adev, 0, 0, 0, 0); 2990 mutex_unlock(&adev->srbm_mutex); 2991 2992 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2993 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2994 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2995 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2996 2997 /* Invalidate the data caches */ 2998 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2999 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3000 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3001 3002 for (i = 0; i < usec_timeout; i++) { 3003 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3004 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3005 INVALIDATE_DCACHE_COMPLETE)) 3006 break; 3007 udelay(1); 3008 } 3009 3010 if (i >= usec_timeout) { 3011 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3012 return -EINVAL; 3013 } 3014 3015 return 0; 3016 } 3017 3018 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3019 { 3020 int r; 3021 3022 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw) 3023 return -EINVAL; 3024 3025 gfx_v11_0_cp_gfx_enable(adev, false); 3026 3027 if (adev->gfx.rs64_enable) 3028 r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev); 3029 else 3030 r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev); 3031 if (r) { 3032 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r); 3033 return r; 3034 } 3035 3036 if (adev->gfx.rs64_enable) 3037 r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev); 3038 else 3039 r = gfx_v11_0_cp_gfx_load_me_microcode(adev); 3040 if (r) { 3041 dev_err(adev->dev, "(%d) failed to load me fw\n", r); 3042 return r; 3043 } 3044 3045 return 0; 3046 } 3047 3048 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev) 3049 { 3050 struct amdgpu_ring *ring; 3051 const struct cs_section_def *sect = NULL; 3052 const struct cs_extent_def *ext = NULL; 3053 int r, i; 3054 int ctx_reg_offset; 3055 3056 /* init the CP */ 3057 WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT, 3058 adev->gfx.config.max_hw_contexts - 1); 3059 WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1); 3060 3061 if (!amdgpu_async_gfx_ring) 3062 gfx_v11_0_cp_gfx_enable(adev, true); 3063 3064 ring = &adev->gfx.gfx_ring[0]; 3065 r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev)); 3066 if (r) { 3067 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3068 return r; 3069 } 3070 3071 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3072 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3073 3074 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3075 amdgpu_ring_write(ring, 0x80000000); 3076 amdgpu_ring_write(ring, 0x80000000); 3077 3078 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 3079 for (ext = sect->section; ext->extent != NULL; ++ext) { 3080 if (sect->id == SECT_CONTEXT) { 3081 amdgpu_ring_write(ring, 3082 PACKET3(PACKET3_SET_CONTEXT_REG, 3083 ext->reg_count)); 3084 amdgpu_ring_write(ring, ext->reg_index - 3085 PACKET3_SET_CONTEXT_REG_START); 3086 for (i = 0; i < ext->reg_count; i++) 3087 amdgpu_ring_write(ring, ext->extent[i]); 3088 } 3089 } 3090 } 3091 3092 ctx_reg_offset = 3093 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 3094 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 3095 amdgpu_ring_write(ring, ctx_reg_offset); 3096 amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override); 3097 3098 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3099 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3100 3101 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3102 amdgpu_ring_write(ring, 0); 3103 3104 amdgpu_ring_commit(ring); 3105 3106 /* submit cs packet to copy state 0 to next available state */ 3107 if (adev->gfx.num_gfx_rings > 1) { 3108 /* maximum supported gfx ring is 2 */ 3109 ring = &adev->gfx.gfx_ring[1]; 3110 r = amdgpu_ring_alloc(ring, 2); 3111 if (r) { 3112 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3113 return r; 3114 } 3115 3116 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3117 amdgpu_ring_write(ring, 0); 3118 3119 amdgpu_ring_commit(ring); 3120 } 3121 return 0; 3122 } 3123 3124 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev, 3125 CP_PIPE_ID pipe) 3126 { 3127 u32 tmp; 3128 3129 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); 3130 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe); 3131 3132 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); 3133 } 3134 3135 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, 3136 struct amdgpu_ring *ring) 3137 { 3138 u32 tmp; 3139 3140 tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); 3141 if (ring->use_doorbell) { 3142 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3143 DOORBELL_OFFSET, ring->doorbell_index); 3144 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3145 DOORBELL_EN, 1); 3146 } else { 3147 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3148 DOORBELL_EN, 0); 3149 } 3150 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp); 3151 3152 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3153 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3154 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp); 3155 3156 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 3157 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3158 } 3159 3160 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) 3161 { 3162 struct amdgpu_ring *ring; 3163 u32 tmp; 3164 u32 rb_bufsz; 3165 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3166 u32 i; 3167 3168 /* Set the write pointer delay */ 3169 WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); 3170 3171 /* set the RB to use vmid 0 */ 3172 WREG32_SOC15(GC, 0, regCP_RB_VMID, 0); 3173 3174 /* Init gfx ring 0 for pipe 0 */ 3175 mutex_lock(&adev->srbm_mutex); 3176 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3177 3178 /* Set ring buffer size */ 3179 ring = &adev->gfx.gfx_ring[0]; 3180 rb_bufsz = order_base_2(ring->ring_size / 8); 3181 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3182 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3183 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3184 3185 /* Initialize the ring buffer's write pointers */ 3186 ring->wptr = 0; 3187 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3188 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3189 3190 /* set the wb address wether it's enabled or not */ 3191 rptr_addr = ring->rptr_gpu_addr; 3192 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3193 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3194 CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3195 3196 wptr_gpu_addr = ring->wptr_gpu_addr; 3197 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3198 lower_32_bits(wptr_gpu_addr)); 3199 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3200 upper_32_bits(wptr_gpu_addr)); 3201 3202 mdelay(1); 3203 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3204 3205 rb_addr = ring->gpu_addr >> 8; 3206 WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr); 3207 WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3208 3209 WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1); 3210 3211 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3212 mutex_unlock(&adev->srbm_mutex); 3213 3214 /* Init gfx ring 1 for pipe 1 */ 3215 if (adev->gfx.num_gfx_rings > 1) { 3216 mutex_lock(&adev->srbm_mutex); 3217 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1); 3218 /* maximum supported gfx ring is 2 */ 3219 ring = &adev->gfx.gfx_ring[1]; 3220 rb_bufsz = order_base_2(ring->ring_size / 8); 3221 tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); 3222 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); 3223 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3224 /* Initialize the ring buffer's write pointers */ 3225 ring->wptr = 0; 3226 WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr)); 3227 WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); 3228 /* Set the wb address wether it's enabled or not */ 3229 rptr_addr = ring->rptr_gpu_addr; 3230 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); 3231 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3232 CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3233 wptr_gpu_addr = ring->wptr_gpu_addr; 3234 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3235 lower_32_bits(wptr_gpu_addr)); 3236 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3237 upper_32_bits(wptr_gpu_addr)); 3238 3239 mdelay(1); 3240 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3241 3242 rb_addr = ring->gpu_addr >> 8; 3243 WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr); 3244 WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr)); 3245 WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1); 3246 3247 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3248 mutex_unlock(&adev->srbm_mutex); 3249 } 3250 /* Switch to pipe 0 */ 3251 mutex_lock(&adev->srbm_mutex); 3252 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3253 mutex_unlock(&adev->srbm_mutex); 3254 3255 /* start the ring */ 3256 gfx_v11_0_cp_gfx_start(adev); 3257 3258 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 3259 ring = &adev->gfx.gfx_ring[i]; 3260 ring->sched.ready = true; 3261 } 3262 3263 return 0; 3264 } 3265 3266 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3267 { 3268 u32 data; 3269 3270 if (adev->gfx.rs64_enable) { 3271 data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 3272 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE, 3273 enable ? 0 : 1); 3274 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 3275 enable ? 0 : 1); 3276 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 3277 enable ? 0 : 1); 3278 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 3279 enable ? 0 : 1); 3280 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 3281 enable ? 0 : 1); 3282 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE, 3283 enable ? 1 : 0); 3284 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE, 3285 enable ? 1 : 0); 3286 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE, 3287 enable ? 1 : 0); 3288 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE, 3289 enable ? 1 : 0); 3290 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT, 3291 enable ? 0 : 1); 3292 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data); 3293 } else { 3294 data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL); 3295 3296 if (enable) { 3297 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0); 3298 if (!adev->enable_mes_kiq) 3299 data = REG_SET_FIELD(data, CP_MEC_CNTL, 3300 MEC_ME2_HALT, 0); 3301 } else { 3302 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1); 3303 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1); 3304 } 3305 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data); 3306 } 3307 3308 adev->gfx.kiq.ring.sched.ready = enable; 3309 3310 udelay(50); 3311 } 3312 3313 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3314 { 3315 const struct gfx_firmware_header_v1_0 *mec_hdr; 3316 const __le32 *fw_data; 3317 unsigned i, fw_size; 3318 u32 *fw = NULL; 3319 int r; 3320 3321 if (!adev->gfx.mec_fw) 3322 return -EINVAL; 3323 3324 gfx_v11_0_cp_compute_enable(adev, false); 3325 3326 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3327 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3328 3329 fw_data = (const __le32 *) 3330 (adev->gfx.mec_fw->data + 3331 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3332 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 3333 3334 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 3335 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3336 &adev->gfx.mec.mec_fw_obj, 3337 &adev->gfx.mec.mec_fw_gpu_addr, 3338 (void **)&fw); 3339 if (r) { 3340 dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r); 3341 gfx_v11_0_mec_fini(adev); 3342 return r; 3343 } 3344 3345 memcpy(fw, fw_data, fw_size); 3346 3347 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3348 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3349 3350 gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr); 3351 3352 /* MEC1 */ 3353 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0); 3354 3355 for (i = 0; i < mec_hdr->jt_size; i++) 3356 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA, 3357 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3358 3359 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 3360 3361 return 0; 3362 } 3363 3364 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev) 3365 { 3366 const struct gfx_firmware_header_v2_0 *mec_hdr; 3367 const __le32 *fw_ucode, *fw_data; 3368 u32 tmp, fw_ucode_size, fw_data_size; 3369 u32 i, usec_timeout = 50000; /* Wait for 50 ms */ 3370 u32 *fw_ucode_ptr, *fw_data_ptr; 3371 int r; 3372 3373 if (!adev->gfx.mec_fw) 3374 return -EINVAL; 3375 3376 gfx_v11_0_cp_compute_enable(adev, false); 3377 3378 mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; 3379 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3380 3381 fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data + 3382 le32_to_cpu(mec_hdr->ucode_offset_bytes)); 3383 fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes); 3384 3385 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 3386 le32_to_cpu(mec_hdr->data_offset_bytes)); 3387 fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes); 3388 3389 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3390 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, 3391 &adev->gfx.mec.mec_fw_obj, 3392 &adev->gfx.mec.mec_fw_gpu_addr, 3393 (void **)&fw_ucode_ptr); 3394 if (r) { 3395 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3396 gfx_v11_0_mec_fini(adev); 3397 return r; 3398 } 3399 3400 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3401 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, 3402 &adev->gfx.mec.mec_fw_data_obj, 3403 &adev->gfx.mec.mec_fw_data_gpu_addr, 3404 (void **)&fw_data_ptr); 3405 if (r) { 3406 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3407 gfx_v11_0_mec_fini(adev); 3408 return r; 3409 } 3410 3411 memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size); 3412 memcpy(fw_data_ptr, fw_data, fw_data_size); 3413 3414 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3415 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj); 3416 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3417 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj); 3418 3419 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 3420 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3421 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 3422 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3423 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 3424 3425 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 3426 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 3427 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 3428 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 3429 3430 mutex_lock(&adev->srbm_mutex); 3431 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 3432 soc21_grbm_select(adev, 1, i, 0, 0); 3433 3434 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr); 3435 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 3436 upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr)); 3437 3438 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 3439 mec_hdr->ucode_start_addr_lo >> 2 | 3440 mec_hdr->ucode_start_addr_hi << 30); 3441 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 3442 mec_hdr->ucode_start_addr_hi >> 2); 3443 3444 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr); 3445 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 3446 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3447 } 3448 mutex_unlock(&adev->srbm_mutex); 3449 soc21_grbm_select(adev, 0, 0, 0, 0); 3450 3451 /* Trigger an invalidation of the L1 instruction caches */ 3452 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 3453 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3454 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 3455 3456 /* Wait for invalidation complete */ 3457 for (i = 0; i < usec_timeout; i++) { 3458 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 3459 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 3460 INVALIDATE_DCACHE_COMPLETE)) 3461 break; 3462 udelay(1); 3463 } 3464 3465 if (i >= usec_timeout) { 3466 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3467 return -EINVAL; 3468 } 3469 3470 /* Trigger an invalidation of the L1 instruction caches */ 3471 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 3472 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 3473 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 3474 3475 /* Wait for invalidation complete */ 3476 for (i = 0; i < usec_timeout; i++) { 3477 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 3478 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 3479 INVALIDATE_CACHE_COMPLETE)) 3480 break; 3481 udelay(1); 3482 } 3483 3484 if (i >= usec_timeout) { 3485 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3486 return -EINVAL; 3487 } 3488 3489 return 0; 3490 } 3491 3492 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring) 3493 { 3494 uint32_t tmp; 3495 struct amdgpu_device *adev = ring->adev; 3496 3497 /* tell RLC which is KIQ queue */ 3498 tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); 3499 tmp &= 0xffffff00; 3500 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3501 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); 3502 tmp |= 0x80; 3503 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); 3504 } 3505 3506 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev) 3507 { 3508 /* set graphics engine doorbell range */ 3509 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, 3510 (adev->doorbell_index.gfx_ring0 * 2) << 2); 3511 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 3512 (adev->doorbell_index.gfx_userqueue_end * 2) << 2); 3513 3514 /* set compute engine doorbell range */ 3515 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 3516 (adev->doorbell_index.kiq * 2) << 2); 3517 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 3518 (adev->doorbell_index.userqueue_end * 2) << 2); 3519 } 3520 3521 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, 3522 struct amdgpu_mqd_prop *prop) 3523 { 3524 struct v11_gfx_mqd *mqd = m; 3525 uint64_t hqd_gpu_addr, wb_gpu_addr; 3526 uint32_t tmp; 3527 uint32_t rb_bufsz; 3528 3529 /* set up gfx hqd wptr */ 3530 mqd->cp_gfx_hqd_wptr = 0; 3531 mqd->cp_gfx_hqd_wptr_hi = 0; 3532 3533 /* set the pointer to the MQD */ 3534 mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc; 3535 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 3536 3537 /* set up mqd control */ 3538 tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL); 3539 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); 3540 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); 3541 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); 3542 mqd->cp_gfx_mqd_control = tmp; 3543 3544 /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ 3545 tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID); 3546 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); 3547 mqd->cp_gfx_hqd_vmid = 0; 3548 3549 /* set up default queue priority level 3550 * 0x0 = low priority, 0x1 = high priority */ 3551 tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY); 3552 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0); 3553 mqd->cp_gfx_hqd_queue_priority = tmp; 3554 3555 /* set up time quantum */ 3556 tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM); 3557 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); 3558 mqd->cp_gfx_hqd_quantum = tmp; 3559 3560 /* set up gfx hqd base. this is similar as CP_RB_BASE */ 3561 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 3562 mqd->cp_gfx_hqd_base = hqd_gpu_addr; 3563 mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr); 3564 3565 /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */ 3566 wb_gpu_addr = prop->rptr_gpu_addr; 3567 mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc; 3568 mqd->cp_gfx_hqd_rptr_addr_hi = 3569 upper_32_bits(wb_gpu_addr) & 0xffff; 3570 3571 /* set up rb_wptr_poll addr */ 3572 wb_gpu_addr = prop->wptr_gpu_addr; 3573 mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3574 mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3575 3576 /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ 3577 rb_bufsz = order_base_2(prop->queue_size / 4) - 1; 3578 tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL); 3579 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); 3580 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); 3581 #ifdef __BIG_ENDIAN 3582 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); 3583 #endif 3584 mqd->cp_gfx_hqd_cntl = tmp; 3585 3586 /* set up cp_doorbell_control */ 3587 tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); 3588 if (prop->use_doorbell) { 3589 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3590 DOORBELL_OFFSET, prop->doorbell_index); 3591 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3592 DOORBELL_EN, 1); 3593 } else 3594 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3595 DOORBELL_EN, 0); 3596 mqd->cp_rb_doorbell_control = tmp; 3597 3598 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3599 mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR); 3600 3601 /* active the queue */ 3602 mqd->cp_gfx_hqd_active = 1; 3603 3604 return 0; 3605 } 3606 3607 #ifdef BRING_UP_DEBUG 3608 static int gfx_v11_0_gfx_queue_init_register(struct amdgpu_ring *ring) 3609 { 3610 struct amdgpu_device *adev = ring->adev; 3611 struct v11_gfx_mqd *mqd = ring->mqd_ptr; 3612 3613 /* set mmCP_GFX_HQD_WPTR/_HI to 0 */ 3614 WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr); 3615 WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi); 3616 3617 /* set GFX_MQD_BASE */ 3618 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr); 3619 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 3620 3621 /* set GFX_MQD_CONTROL */ 3622 WREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control); 3623 3624 /* set GFX_HQD_VMID to 0 */ 3625 WREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid); 3626 3627 WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY, 3628 mqd->cp_gfx_hqd_queue_priority); 3629 WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum); 3630 3631 /* set GFX_HQD_BASE, similar as CP_RB_BASE */ 3632 WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base); 3633 WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi); 3634 3635 /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */ 3636 WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr); 3637 WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi); 3638 3639 /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */ 3640 WREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl); 3641 3642 /* set RB_WPTR_POLL_ADDR */ 3643 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo); 3644 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi); 3645 3646 /* set RB_DOORBELL_CONTROL */ 3647 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control); 3648 3649 /* active the queue */ 3650 WREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active); 3651 3652 return 0; 3653 } 3654 #endif 3655 3656 static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring) 3657 { 3658 struct amdgpu_device *adev = ring->adev; 3659 struct v11_gfx_mqd *mqd = ring->mqd_ptr; 3660 int mqd_idx = ring - &adev->gfx.gfx_ring[0]; 3661 3662 if (!amdgpu_in_reset(adev) && !adev->in_suspend) { 3663 memset((void *)mqd, 0, sizeof(*mqd)); 3664 mutex_lock(&adev->srbm_mutex); 3665 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3666 amdgpu_ring_init_mqd(ring); 3667 #ifdef BRING_UP_DEBUG 3668 gfx_v11_0_gfx_queue_init_register(ring); 3669 #endif 3670 soc21_grbm_select(adev, 0, 0, 0, 0); 3671 mutex_unlock(&adev->srbm_mutex); 3672 if (adev->gfx.me.mqd_backup[mqd_idx]) 3673 memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 3674 } else if (amdgpu_in_reset(adev)) { 3675 /* reset mqd with the backup copy */ 3676 if (adev->gfx.me.mqd_backup[mqd_idx]) 3677 memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); 3678 /* reset the ring */ 3679 ring->wptr = 0; 3680 *ring->wptr_cpu_addr = 0; 3681 amdgpu_ring_clear_ring(ring); 3682 #ifdef BRING_UP_DEBUG 3683 mutex_lock(&adev->srbm_mutex); 3684 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3685 gfx_v11_0_gfx_queue_init_register(ring); 3686 soc21_grbm_select(adev, 0, 0, 0, 0); 3687 mutex_unlock(&adev->srbm_mutex); 3688 #endif 3689 } else { 3690 amdgpu_ring_clear_ring(ring); 3691 } 3692 3693 return 0; 3694 } 3695 3696 #ifndef BRING_UP_DEBUG 3697 static int gfx_v11_0_kiq_enable_kgq(struct amdgpu_device *adev) 3698 { 3699 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 3700 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3701 int r, i; 3702 3703 if (!kiq->pmf || !kiq->pmf->kiq_map_queues) 3704 return -EINVAL; 3705 3706 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * 3707 adev->gfx.num_gfx_rings); 3708 if (r) { 3709 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3710 return r; 3711 } 3712 3713 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 3714 kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]); 3715 3716 return amdgpu_ring_test_helper(kiq_ring); 3717 } 3718 #endif 3719 3720 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) 3721 { 3722 int r, i; 3723 struct amdgpu_ring *ring; 3724 3725 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 3726 ring = &adev->gfx.gfx_ring[i]; 3727 3728 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3729 if (unlikely(r != 0)) 3730 goto done; 3731 3732 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3733 if (!r) { 3734 r = gfx_v11_0_gfx_init_queue(ring); 3735 amdgpu_bo_kunmap(ring->mqd_obj); 3736 ring->mqd_ptr = NULL; 3737 } 3738 amdgpu_bo_unreserve(ring->mqd_obj); 3739 if (r) 3740 goto done; 3741 } 3742 #ifndef BRING_UP_DEBUG 3743 r = gfx_v11_0_kiq_enable_kgq(adev); 3744 if (r) 3745 goto done; 3746 #endif 3747 r = gfx_v11_0_cp_gfx_start(adev); 3748 if (r) 3749 goto done; 3750 3751 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 3752 ring = &adev->gfx.gfx_ring[i]; 3753 ring->sched.ready = true; 3754 } 3755 done: 3756 return r; 3757 } 3758 3759 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, 3760 struct amdgpu_mqd_prop *prop) 3761 { 3762 struct v11_compute_mqd *mqd = m; 3763 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3764 uint32_t tmp; 3765 3766 mqd->header = 0xC0310800; 3767 mqd->compute_pipelinestat_enable = 0x00000001; 3768 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3769 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3770 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3771 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3772 mqd->compute_misc_reserved = 0x00000007; 3773 3774 eop_base_addr = prop->eop_gpu_addr >> 8; 3775 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3776 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3777 3778 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3779 tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL); 3780 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3781 (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1)); 3782 3783 mqd->cp_hqd_eop_control = tmp; 3784 3785 /* enable doorbell? */ 3786 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); 3787 3788 if (prop->use_doorbell) { 3789 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3790 DOORBELL_OFFSET, prop->doorbell_index); 3791 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3792 DOORBELL_EN, 1); 3793 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3794 DOORBELL_SOURCE, 0); 3795 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3796 DOORBELL_HIT, 0); 3797 } else { 3798 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3799 DOORBELL_EN, 0); 3800 } 3801 3802 mqd->cp_hqd_pq_doorbell_control = tmp; 3803 3804 /* disable the queue if it's active */ 3805 mqd->cp_hqd_dequeue_request = 0; 3806 mqd->cp_hqd_pq_rptr = 0; 3807 mqd->cp_hqd_pq_wptr_lo = 0; 3808 mqd->cp_hqd_pq_wptr_hi = 0; 3809 3810 /* set the pointer to the MQD */ 3811 mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc; 3812 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 3813 3814 /* set MQD vmid to 0 */ 3815 tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL); 3816 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3817 mqd->cp_mqd_control = tmp; 3818 3819 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3820 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 3821 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3822 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3823 3824 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3825 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL); 3826 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3827 (order_base_2(prop->queue_size / 4) - 1)); 3828 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3829 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 3830 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3831 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); 3832 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3833 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3834 mqd->cp_hqd_pq_control = tmp; 3835 3836 /* set the wb address whether it's enabled or not */ 3837 wb_gpu_addr = prop->rptr_gpu_addr; 3838 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3839 mqd->cp_hqd_pq_rptr_report_addr_hi = 3840 upper_32_bits(wb_gpu_addr) & 0xffff; 3841 3842 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3843 wb_gpu_addr = prop->wptr_gpu_addr; 3844 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3845 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3846 3847 tmp = 0; 3848 /* enable the doorbell if requested */ 3849 if (prop->use_doorbell) { 3850 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); 3851 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3852 DOORBELL_OFFSET, prop->doorbell_index); 3853 3854 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3855 DOORBELL_EN, 1); 3856 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3857 DOORBELL_SOURCE, 0); 3858 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3859 DOORBELL_HIT, 0); 3860 } 3861 3862 mqd->cp_hqd_pq_doorbell_control = tmp; 3863 3864 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3865 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR); 3866 3867 /* set the vmid for the queue */ 3868 mqd->cp_hqd_vmid = 0; 3869 3870 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE); 3871 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); 3872 mqd->cp_hqd_persistent_state = tmp; 3873 3874 /* set MIN_IB_AVAIL_SIZE */ 3875 tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL); 3876 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3877 mqd->cp_hqd_ib_control = tmp; 3878 3879 /* set static priority for a compute queue/ring */ 3880 mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority; 3881 mqd->cp_hqd_queue_priority = prop->hqd_queue_priority; 3882 3883 mqd->cp_hqd_active = prop->hqd_active; 3884 3885 return 0; 3886 } 3887 3888 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring) 3889 { 3890 struct amdgpu_device *adev = ring->adev; 3891 struct v11_compute_mqd *mqd = ring->mqd_ptr; 3892 int j; 3893 3894 /* inactivate the queue */ 3895 if (amdgpu_sriov_vf(adev)) 3896 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0); 3897 3898 /* disable wptr polling */ 3899 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3900 3901 /* write the EOP addr */ 3902 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR, 3903 mqd->cp_hqd_eop_base_addr_lo); 3904 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI, 3905 mqd->cp_hqd_eop_base_addr_hi); 3906 3907 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3908 WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL, 3909 mqd->cp_hqd_eop_control); 3910 3911 /* enable doorbell? */ 3912 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 3913 mqd->cp_hqd_pq_doorbell_control); 3914 3915 /* disable the queue if it's active */ 3916 if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { 3917 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); 3918 for (j = 0; j < adev->usec_timeout; j++) { 3919 if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) 3920 break; 3921 udelay(1); 3922 } 3923 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 3924 mqd->cp_hqd_dequeue_request); 3925 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 3926 mqd->cp_hqd_pq_rptr); 3927 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 3928 mqd->cp_hqd_pq_wptr_lo); 3929 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 3930 mqd->cp_hqd_pq_wptr_hi); 3931 } 3932 3933 /* set the pointer to the MQD */ 3934 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, 3935 mqd->cp_mqd_base_addr_lo); 3936 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, 3937 mqd->cp_mqd_base_addr_hi); 3938 3939 /* set MQD vmid to 0 */ 3940 WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 3941 mqd->cp_mqd_control); 3942 3943 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3944 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, 3945 mqd->cp_hqd_pq_base_lo); 3946 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, 3947 mqd->cp_hqd_pq_base_hi); 3948 3949 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3950 WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, 3951 mqd->cp_hqd_pq_control); 3952 3953 /* set the wb address whether it's enabled or not */ 3954 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR, 3955 mqd->cp_hqd_pq_rptr_report_addr_lo); 3956 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3957 mqd->cp_hqd_pq_rptr_report_addr_hi); 3958 3959 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3960 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR, 3961 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3962 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3963 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3964 3965 /* enable the doorbell if requested */ 3966 if (ring->use_doorbell) { 3967 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 3968 (adev->doorbell_index.kiq * 2) << 2); 3969 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 3970 (adev->doorbell_index.userqueue_end * 2) << 2); 3971 } 3972 3973 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 3974 mqd->cp_hqd_pq_doorbell_control); 3975 3976 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3977 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 3978 mqd->cp_hqd_pq_wptr_lo); 3979 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 3980 mqd->cp_hqd_pq_wptr_hi); 3981 3982 /* set the vmid for the queue */ 3983 WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid); 3984 3985 WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, 3986 mqd->cp_hqd_persistent_state); 3987 3988 /* activate the queue */ 3989 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 3990 mqd->cp_hqd_active); 3991 3992 if (ring->use_doorbell) 3993 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3994 3995 return 0; 3996 } 3997 3998 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) 3999 { 4000 struct amdgpu_device *adev = ring->adev; 4001 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4002 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 4003 4004 gfx_v11_0_kiq_setting(ring); 4005 4006 if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 4007 /* reset MQD to a clean status */ 4008 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4009 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 4010 4011 /* reset ring buffer */ 4012 ring->wptr = 0; 4013 amdgpu_ring_clear_ring(ring); 4014 4015 mutex_lock(&adev->srbm_mutex); 4016 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4017 gfx_v11_0_kiq_init_register(ring); 4018 soc21_grbm_select(adev, 0, 0, 0, 0); 4019 mutex_unlock(&adev->srbm_mutex); 4020 } else { 4021 memset((void *)mqd, 0, sizeof(*mqd)); 4022 if (amdgpu_sriov_vf(adev) && adev->in_suspend) 4023 amdgpu_ring_clear_ring(ring); 4024 mutex_lock(&adev->srbm_mutex); 4025 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4026 amdgpu_ring_init_mqd(ring); 4027 gfx_v11_0_kiq_init_register(ring); 4028 soc21_grbm_select(adev, 0, 0, 0, 0); 4029 mutex_unlock(&adev->srbm_mutex); 4030 4031 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4032 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4033 } 4034 4035 return 0; 4036 } 4037 4038 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring) 4039 { 4040 struct amdgpu_device *adev = ring->adev; 4041 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4042 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4043 4044 if (!amdgpu_in_reset(adev) && !adev->in_suspend) { 4045 memset((void *)mqd, 0, sizeof(*mqd)); 4046 mutex_lock(&adev->srbm_mutex); 4047 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4048 amdgpu_ring_init_mqd(ring); 4049 soc21_grbm_select(adev, 0, 0, 0, 0); 4050 mutex_unlock(&adev->srbm_mutex); 4051 4052 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4053 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4054 } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 4055 /* reset MQD to a clean status */ 4056 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4057 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 4058 4059 /* reset ring buffer */ 4060 ring->wptr = 0; 4061 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 4062 amdgpu_ring_clear_ring(ring); 4063 } else { 4064 amdgpu_ring_clear_ring(ring); 4065 } 4066 4067 return 0; 4068 } 4069 4070 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev) 4071 { 4072 struct amdgpu_ring *ring; 4073 int r; 4074 4075 ring = &adev->gfx.kiq.ring; 4076 4077 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4078 if (unlikely(r != 0)) 4079 return r; 4080 4081 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 4082 if (unlikely(r != 0)) { 4083 amdgpu_bo_unreserve(ring->mqd_obj); 4084 return r; 4085 } 4086 4087 gfx_v11_0_kiq_init_queue(ring); 4088 amdgpu_bo_kunmap(ring->mqd_obj); 4089 ring->mqd_ptr = NULL; 4090 amdgpu_bo_unreserve(ring->mqd_obj); 4091 ring->sched.ready = true; 4092 return 0; 4093 } 4094 4095 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev) 4096 { 4097 struct amdgpu_ring *ring = NULL; 4098 int r = 0, i; 4099 4100 if (!amdgpu_async_gfx_ring) 4101 gfx_v11_0_cp_compute_enable(adev, true); 4102 4103 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4104 ring = &adev->gfx.compute_ring[i]; 4105 4106 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4107 if (unlikely(r != 0)) 4108 goto done; 4109 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 4110 if (!r) { 4111 r = gfx_v11_0_kcq_init_queue(ring); 4112 amdgpu_bo_kunmap(ring->mqd_obj); 4113 ring->mqd_ptr = NULL; 4114 } 4115 amdgpu_bo_unreserve(ring->mqd_obj); 4116 if (r) 4117 goto done; 4118 } 4119 4120 r = amdgpu_gfx_enable_kcq(adev); 4121 done: 4122 return r; 4123 } 4124 4125 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) 4126 { 4127 int r, i; 4128 struct amdgpu_ring *ring; 4129 4130 if (!(adev->flags & AMD_IS_APU)) 4131 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4132 4133 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4134 /* legacy firmware loading */ 4135 r = gfx_v11_0_cp_gfx_load_microcode(adev); 4136 if (r) 4137 return r; 4138 4139 if (adev->gfx.rs64_enable) 4140 r = gfx_v11_0_cp_compute_load_microcode_rs64(adev); 4141 else 4142 r = gfx_v11_0_cp_compute_load_microcode(adev); 4143 if (r) 4144 return r; 4145 } 4146 4147 gfx_v11_0_cp_set_doorbell_range(adev); 4148 4149 if (amdgpu_async_gfx_ring) { 4150 gfx_v11_0_cp_compute_enable(adev, true); 4151 gfx_v11_0_cp_gfx_enable(adev, true); 4152 } 4153 4154 if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) 4155 r = amdgpu_mes_kiq_hw_init(adev); 4156 else 4157 r = gfx_v11_0_kiq_resume(adev); 4158 if (r) 4159 return r; 4160 4161 r = gfx_v11_0_kcq_resume(adev); 4162 if (r) 4163 return r; 4164 4165 if (!amdgpu_async_gfx_ring) { 4166 r = gfx_v11_0_cp_gfx_resume(adev); 4167 if (r) 4168 return r; 4169 } else { 4170 r = gfx_v11_0_cp_async_gfx_ring_resume(adev); 4171 if (r) 4172 return r; 4173 } 4174 4175 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4176 ring = &adev->gfx.gfx_ring[i]; 4177 r = amdgpu_ring_test_helper(ring); 4178 if (r) 4179 return r; 4180 } 4181 4182 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4183 ring = &adev->gfx.compute_ring[i]; 4184 r = amdgpu_ring_test_helper(ring); 4185 if (r) 4186 return r; 4187 } 4188 4189 return 0; 4190 } 4191 4192 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable) 4193 { 4194 gfx_v11_0_cp_gfx_enable(adev, enable); 4195 gfx_v11_0_cp_compute_enable(adev, enable); 4196 } 4197 4198 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) 4199 { 4200 int r; 4201 bool value; 4202 4203 r = adev->gfxhub.funcs->gart_enable(adev); 4204 if (r) 4205 return r; 4206 4207 adev->hdp.funcs->flush_hdp(adev, NULL); 4208 4209 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 4210 false : true; 4211 4212 adev->gfxhub.funcs->set_fault_enable_default(adev, value); 4213 amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0); 4214 4215 return 0; 4216 } 4217 4218 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev) 4219 { 4220 u32 tmp; 4221 4222 /* select RS64 */ 4223 if (adev->gfx.rs64_enable) { 4224 tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL); 4225 tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1); 4226 WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp); 4227 4228 tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL); 4229 tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1); 4230 WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp); 4231 } 4232 4233 if (amdgpu_emu_mode == 1) 4234 msleep(100); 4235 } 4236 4237 static int get_gb_addr_config(struct amdgpu_device * adev) 4238 { 4239 u32 gb_addr_config; 4240 4241 gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); 4242 if (gb_addr_config == 0) 4243 return -EINVAL; 4244 4245 adev->gfx.config.gb_addr_config_fields.num_pkrs = 4246 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); 4247 4248 adev->gfx.config.gb_addr_config = gb_addr_config; 4249 4250 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 4251 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4252 GB_ADDR_CONFIG, NUM_PIPES); 4253 4254 adev->gfx.config.max_tile_pipes = 4255 adev->gfx.config.gb_addr_config_fields.num_pipes; 4256 4257 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 4258 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4259 GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS); 4260 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 4261 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4262 GB_ADDR_CONFIG, NUM_RB_PER_SE); 4263 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 4264 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4265 GB_ADDR_CONFIG, NUM_SHADER_ENGINES); 4266 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 4267 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4268 GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); 4269 4270 return 0; 4271 } 4272 4273 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev) 4274 { 4275 uint32_t data; 4276 4277 data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG); 4278 data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK; 4279 WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data); 4280 4281 data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG); 4282 data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK; 4283 WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); 4284 } 4285 4286 static int gfx_v11_0_hw_init(void *handle) 4287 { 4288 int r; 4289 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4290 4291 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 4292 if (adev->gfx.imu.funcs) { 4293 /* RLC autoload sequence 1: Program rlc ram */ 4294 if (adev->gfx.imu.funcs->program_rlc_ram) 4295 adev->gfx.imu.funcs->program_rlc_ram(adev); 4296 } 4297 /* rlc autoload firmware */ 4298 r = gfx_v11_0_rlc_backdoor_autoload_enable(adev); 4299 if (r) 4300 return r; 4301 } else { 4302 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4303 if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) { 4304 if (adev->gfx.imu.funcs->load_microcode) 4305 adev->gfx.imu.funcs->load_microcode(adev); 4306 if (adev->gfx.imu.funcs->setup_imu) 4307 adev->gfx.imu.funcs->setup_imu(adev); 4308 if (adev->gfx.imu.funcs->start_imu) 4309 adev->gfx.imu.funcs->start_imu(adev); 4310 } 4311 4312 /* disable gpa mode in backdoor loading */ 4313 gfx_v11_0_disable_gpa_mode(adev); 4314 } 4315 } 4316 4317 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) || 4318 (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { 4319 r = gfx_v11_0_wait_for_rlc_autoload_complete(adev); 4320 if (r) { 4321 dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r); 4322 return r; 4323 } 4324 } 4325 4326 adev->gfx.is_poweron = true; 4327 4328 if(get_gb_addr_config(adev)) 4329 DRM_WARN("Invalid gb_addr_config !\n"); 4330 4331 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && 4332 adev->gfx.rs64_enable) 4333 gfx_v11_0_config_gfx_rs64(adev); 4334 4335 r = gfx_v11_0_gfxhub_enable(adev); 4336 if (r) 4337 return r; 4338 4339 if (!amdgpu_emu_mode) 4340 gfx_v11_0_init_golden_registers(adev); 4341 4342 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) || 4343 (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) { 4344 /** 4345 * For gfx 11, rlc firmware loading relies on smu firmware is 4346 * loaded firstly, so in direct type, it has to load smc ucode 4347 * here before rlc. 4348 */ 4349 if (!(adev->flags & AMD_IS_APU)) { 4350 r = amdgpu_pm_load_smu_firmware(adev, NULL); 4351 if (r) 4352 return r; 4353 } 4354 } 4355 4356 gfx_v11_0_constants_init(adev); 4357 4358 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 4359 gfx_v11_0_select_cp_fw_arch(adev); 4360 4361 if (adev->nbio.funcs->gc_doorbell_init) 4362 adev->nbio.funcs->gc_doorbell_init(adev); 4363 4364 r = gfx_v11_0_rlc_resume(adev); 4365 if (r) 4366 return r; 4367 4368 /* 4369 * init golden registers and rlc resume may override some registers, 4370 * reconfig them here 4371 */ 4372 gfx_v11_0_tcp_harvest(adev); 4373 4374 r = gfx_v11_0_cp_resume(adev); 4375 if (r) 4376 return r; 4377 4378 return r; 4379 } 4380 4381 #ifndef BRING_UP_DEBUG 4382 static int gfx_v11_0_kiq_disable_kgq(struct amdgpu_device *adev) 4383 { 4384 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 4385 struct amdgpu_ring *kiq_ring = &kiq->ring; 4386 int i, r = 0; 4387 4388 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 4389 return -EINVAL; 4390 4391 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * 4392 adev->gfx.num_gfx_rings)) 4393 return -ENOMEM; 4394 4395 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4396 kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], 4397 PREEMPT_QUEUES, 0, 0); 4398 4399 if (adev->gfx.kiq.ring.sched.ready) 4400 r = amdgpu_ring_test_helper(kiq_ring); 4401 4402 return r; 4403 } 4404 #endif 4405 4406 static int gfx_v11_0_hw_fini(void *handle) 4407 { 4408 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4409 int r; 4410 4411 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4412 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4413 4414 if (!adev->no_hw_access) { 4415 #ifndef BRING_UP_DEBUG 4416 if (amdgpu_async_gfx_ring) { 4417 r = gfx_v11_0_kiq_disable_kgq(adev); 4418 if (r) 4419 DRM_ERROR("KGQ disable failed\n"); 4420 } 4421 #endif 4422 if (amdgpu_gfx_disable_kcq(adev)) 4423 DRM_ERROR("KCQ disable failed\n"); 4424 4425 amdgpu_mes_kiq_hw_fini(adev); 4426 } 4427 4428 if (amdgpu_sriov_vf(adev)) 4429 /* Remove the steps disabling CPG and clearing KIQ position, 4430 * so that CP could perform IDLE-SAVE during switch. Those 4431 * steps are necessary to avoid a DMAR error in gfx9 but it is 4432 * not reproduced on gfx11. 4433 */ 4434 return 0; 4435 4436 gfx_v11_0_cp_enable(adev, false); 4437 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4438 4439 adev->gfxhub.funcs->gart_disable(adev); 4440 4441 adev->gfx.is_poweron = false; 4442 4443 return 0; 4444 } 4445 4446 static int gfx_v11_0_suspend(void *handle) 4447 { 4448 return gfx_v11_0_hw_fini(handle); 4449 } 4450 4451 static int gfx_v11_0_resume(void *handle) 4452 { 4453 return gfx_v11_0_hw_init(handle); 4454 } 4455 4456 static bool gfx_v11_0_is_idle(void *handle) 4457 { 4458 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4459 4460 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS), 4461 GRBM_STATUS, GUI_ACTIVE)) 4462 return false; 4463 else 4464 return true; 4465 } 4466 4467 static int gfx_v11_0_wait_for_idle(void *handle) 4468 { 4469 unsigned i; 4470 u32 tmp; 4471 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4472 4473 for (i = 0; i < adev->usec_timeout; i++) { 4474 /* read MC_STATUS */ 4475 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) & 4476 GRBM_STATUS__GUI_ACTIVE_MASK; 4477 4478 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) 4479 return 0; 4480 udelay(1); 4481 } 4482 return -ETIMEDOUT; 4483 } 4484 4485 static int gfx_v11_0_soft_reset(void *handle) 4486 { 4487 u32 grbm_soft_reset = 0; 4488 u32 tmp; 4489 int i, j, k; 4490 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4491 4492 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 4493 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); 4494 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0); 4495 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0); 4496 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); 4497 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 4498 4499 gfx_v11_0_set_safe_mode(adev); 4500 4501 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 4502 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 4503 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 4504 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); 4505 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i); 4506 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j); 4507 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k); 4508 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); 4509 4510 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); 4511 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); 4512 } 4513 } 4514 } 4515 for (i = 0; i < adev->gfx.me.num_me; ++i) { 4516 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { 4517 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 4518 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); 4519 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i); 4520 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j); 4521 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k); 4522 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); 4523 4524 WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1); 4525 } 4526 } 4527 } 4528 4529 WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe); 4530 4531 // Read CP_VMID_RESET register three times. 4532 // to get sufficient time for GFX_HQD_ACTIVE reach 0 4533 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 4534 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 4535 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 4536 4537 for (i = 0; i < adev->usec_timeout; i++) { 4538 if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) && 4539 !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE)) 4540 break; 4541 udelay(1); 4542 } 4543 if (i >= adev->usec_timeout) { 4544 printk("Failed to wait all pipes clean\n"); 4545 return -EINVAL; 4546 } 4547 4548 /********** trigger soft reset ***********/ 4549 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 4550 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4551 SOFT_RESET_CP, 1); 4552 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4553 SOFT_RESET_GFX, 1); 4554 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4555 SOFT_RESET_CPF, 1); 4556 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4557 SOFT_RESET_CPC, 1); 4558 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4559 SOFT_RESET_CPG, 1); 4560 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 4561 /********** exit soft reset ***********/ 4562 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 4563 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4564 SOFT_RESET_CP, 0); 4565 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4566 SOFT_RESET_GFX, 0); 4567 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4568 SOFT_RESET_CPF, 0); 4569 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4570 SOFT_RESET_CPC, 0); 4571 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4572 SOFT_RESET_CPG, 0); 4573 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 4574 4575 tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL); 4576 tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1); 4577 WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp); 4578 4579 WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0); 4580 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0); 4581 4582 for (i = 0; i < adev->usec_timeout; i++) { 4583 if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET)) 4584 break; 4585 udelay(1); 4586 } 4587 if (i >= adev->usec_timeout) { 4588 printk("Failed to wait CP_VMID_RESET to 0\n"); 4589 return -EINVAL; 4590 } 4591 4592 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 4593 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 4594 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 4595 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 4596 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 4597 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 4598 4599 gfx_v11_0_unset_safe_mode(adev); 4600 4601 return gfx_v11_0_cp_resume(adev); 4602 } 4603 4604 static bool gfx_v11_0_check_soft_reset(void *handle) 4605 { 4606 int i, r; 4607 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4608 struct amdgpu_ring *ring; 4609 long tmo = msecs_to_jiffies(1000); 4610 4611 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4612 ring = &adev->gfx.gfx_ring[i]; 4613 r = amdgpu_ring_test_ib(ring, tmo); 4614 if (r) 4615 return true; 4616 } 4617 4618 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4619 ring = &adev->gfx.compute_ring[i]; 4620 r = amdgpu_ring_test_ib(ring, tmo); 4621 if (r) 4622 return true; 4623 } 4624 4625 return false; 4626 } 4627 4628 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4629 { 4630 uint64_t clock; 4631 4632 amdgpu_gfx_off_ctrl(adev, false); 4633 mutex_lock(&adev->gfx.gpu_clock_mutex); 4634 clock = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER) | 4635 ((uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER) << 32ULL); 4636 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4637 amdgpu_gfx_off_ctrl(adev, true); 4638 return clock; 4639 } 4640 4641 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4642 uint32_t vmid, 4643 uint32_t gds_base, uint32_t gds_size, 4644 uint32_t gws_base, uint32_t gws_size, 4645 uint32_t oa_base, uint32_t oa_size) 4646 { 4647 struct amdgpu_device *adev = ring->adev; 4648 4649 /* GDS Base */ 4650 gfx_v11_0_write_data_to_reg(ring, 0, false, 4651 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid, 4652 gds_base); 4653 4654 /* GDS Size */ 4655 gfx_v11_0_write_data_to_reg(ring, 0, false, 4656 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid, 4657 gds_size); 4658 4659 /* GWS */ 4660 gfx_v11_0_write_data_to_reg(ring, 0, false, 4661 SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid, 4662 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 4663 4664 /* OA */ 4665 gfx_v11_0_write_data_to_reg(ring, 0, false, 4666 SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid, 4667 (1 << (oa_size + oa_base)) - (1 << oa_base)); 4668 } 4669 4670 static int gfx_v11_0_early_init(void *handle) 4671 { 4672 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4673 4674 adev->gfx.funcs = &gfx_v11_0_gfx_funcs; 4675 4676 adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; 4677 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 4678 AMDGPU_MAX_COMPUTE_RINGS); 4679 4680 gfx_v11_0_set_kiq_pm4_funcs(adev); 4681 gfx_v11_0_set_ring_funcs(adev); 4682 gfx_v11_0_set_irq_funcs(adev); 4683 gfx_v11_0_set_gds_init(adev); 4684 gfx_v11_0_set_rlc_funcs(adev); 4685 gfx_v11_0_set_mqd_funcs(adev); 4686 gfx_v11_0_set_imu_funcs(adev); 4687 4688 gfx_v11_0_init_rlcg_reg_access_ctrl(adev); 4689 4690 return 0; 4691 } 4692 4693 static int gfx_v11_0_ras_late_init(void *handle) 4694 { 4695 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4696 struct ras_common_if *gfx_common_if; 4697 int ret; 4698 4699 gfx_common_if = kzalloc(sizeof(struct ras_common_if), GFP_KERNEL); 4700 if (!gfx_common_if) 4701 return -ENOMEM; 4702 4703 gfx_common_if->block = AMDGPU_RAS_BLOCK__GFX; 4704 4705 ret = amdgpu_ras_feature_enable(adev, gfx_common_if, true); 4706 if (ret) 4707 dev_warn(adev->dev, "Failed to enable gfx11 ras feature\n"); 4708 4709 kfree(gfx_common_if); 4710 return 0; 4711 } 4712 4713 static int gfx_v11_0_late_init(void *handle) 4714 { 4715 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4716 int r; 4717 4718 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4719 if (r) 4720 return r; 4721 4722 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4723 if (r) 4724 return r; 4725 4726 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) { 4727 r = gfx_v11_0_ras_late_init(handle); 4728 if (r) 4729 return r; 4730 } 4731 4732 return 0; 4733 } 4734 4735 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev) 4736 { 4737 uint32_t rlc_cntl; 4738 4739 /* if RLC is not enabled, do nothing */ 4740 rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL); 4741 return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; 4742 } 4743 4744 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev) 4745 { 4746 uint32_t data; 4747 unsigned i; 4748 4749 data = RLC_SAFE_MODE__CMD_MASK; 4750 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4751 4752 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); 4753 4754 /* wait for RLC_SAFE_MODE */ 4755 for (i = 0; i < adev->usec_timeout; i++) { 4756 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), 4757 RLC_SAFE_MODE, CMD)) 4758 break; 4759 udelay(1); 4760 } 4761 } 4762 4763 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev) 4764 { 4765 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK); 4766 } 4767 4768 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 4769 bool enable) 4770 { 4771 uint32_t def, data; 4772 4773 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK)) 4774 return; 4775 4776 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 4777 4778 if (enable) 4779 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 4780 else 4781 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 4782 4783 if (def != data) 4784 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 4785 } 4786 4787 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev, 4788 bool enable) 4789 { 4790 uint32_t def, data; 4791 4792 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) 4793 return; 4794 4795 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 4796 4797 if (enable) 4798 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 4799 else 4800 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 4801 4802 if (def != data) 4803 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 4804 } 4805 4806 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev, 4807 bool enable) 4808 { 4809 uint32_t def, data; 4810 4811 if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG)) 4812 return; 4813 4814 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 4815 4816 if (enable) 4817 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 4818 else 4819 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 4820 4821 if (def != data) 4822 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 4823 } 4824 4825 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4826 bool enable) 4827 { 4828 uint32_t data, def; 4829 4830 if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS))) 4831 return; 4832 4833 /* It is disabled by HW by default */ 4834 if (enable) { 4835 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 4836 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4837 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 4838 4839 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4840 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4841 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 4842 4843 if (def != data) 4844 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 4845 } 4846 } else { 4847 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 4848 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 4849 4850 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4851 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4852 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 4853 4854 if (def != data) 4855 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 4856 } 4857 } 4858 } 4859 4860 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4861 bool enable) 4862 { 4863 uint32_t def, data; 4864 4865 if (!(adev->cg_flags & 4866 (AMD_CG_SUPPORT_GFX_CGCG | 4867 AMD_CG_SUPPORT_GFX_CGLS | 4868 AMD_CG_SUPPORT_GFX_3D_CGCG | 4869 AMD_CG_SUPPORT_GFX_3D_CGLS))) 4870 return; 4871 4872 if (enable) { 4873 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 4874 4875 /* unset CGCG override */ 4876 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 4877 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 4878 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4879 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4880 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG || 4881 adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4882 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4883 4884 /* update CGCG override bits */ 4885 if (def != data) 4886 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 4887 4888 /* enable cgcg FSM(0x0000363F) */ 4889 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 4890 4891 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 4892 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK; 4893 data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4894 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4895 } 4896 4897 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 4898 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK; 4899 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4900 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4901 } 4902 4903 if (def != data) 4904 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 4905 4906 /* Program RLC_CGCG_CGLS_CTRL_3D */ 4907 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 4908 4909 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 4910 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK; 4911 data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4912 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4913 } 4914 4915 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 4916 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK; 4917 data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4918 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4919 } 4920 4921 if (def != data) 4922 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 4923 4924 /* set IDLE_POLL_COUNT(0x00900100) */ 4925 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL); 4926 4927 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK); 4928 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4929 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4930 4931 if (def != data) 4932 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data); 4933 4934 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 4935 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 4936 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 4937 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 4938 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 4939 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data); 4940 4941 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 4942 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 4943 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 4944 4945 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 4946 if (adev->sdma.num_instances > 1) { 4947 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 4948 data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 4949 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 4950 } 4951 } else { 4952 /* Program RLC_CGCG_CGLS_CTRL */ 4953 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 4954 4955 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 4956 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4957 4958 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4959 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4960 4961 if (def != data) 4962 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 4963 4964 /* Program RLC_CGCG_CGLS_CTRL_3D */ 4965 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 4966 4967 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 4968 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4969 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4970 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4971 4972 if (def != data) 4973 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 4974 4975 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 4976 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 4977 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 4978 4979 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 4980 if (adev->sdma.num_instances > 1) { 4981 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 4982 data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 4983 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 4984 } 4985 } 4986 } 4987 4988 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, 4989 bool enable) 4990 { 4991 amdgpu_gfx_rlc_enter_safe_mode(adev); 4992 4993 gfx_v11_0_update_coarse_grain_clock_gating(adev, enable); 4994 4995 gfx_v11_0_update_medium_grain_clock_gating(adev, enable); 4996 4997 gfx_v11_0_update_repeater_fgcg(adev, enable); 4998 4999 gfx_v11_0_update_sram_fgcg(adev, enable); 5000 5001 gfx_v11_0_update_perf_clk(adev, enable); 5002 5003 if (adev->cg_flags & 5004 (AMD_CG_SUPPORT_GFX_MGCG | 5005 AMD_CG_SUPPORT_GFX_CGLS | 5006 AMD_CG_SUPPORT_GFX_CGCG | 5007 AMD_CG_SUPPORT_GFX_3D_CGCG | 5008 AMD_CG_SUPPORT_GFX_3D_CGLS)) 5009 gfx_v11_0_enable_gui_idle_interrupt(adev, enable); 5010 5011 amdgpu_gfx_rlc_exit_safe_mode(adev); 5012 5013 return 0; 5014 } 5015 5016 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) 5017 { 5018 u32 reg, data; 5019 5020 amdgpu_gfx_off_ctrl(adev, false); 5021 5022 reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); 5023 if (amdgpu_sriov_is_pp_one_vf(adev)) 5024 data = RREG32_NO_KIQ(reg); 5025 else 5026 data = RREG32(reg); 5027 5028 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; 5029 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5030 5031 if (amdgpu_sriov_is_pp_one_vf(adev)) 5032 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); 5033 else 5034 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); 5035 5036 amdgpu_gfx_off_ctrl(adev, true); 5037 } 5038 5039 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { 5040 .is_rlc_enabled = gfx_v11_0_is_rlc_enabled, 5041 .set_safe_mode = gfx_v11_0_set_safe_mode, 5042 .unset_safe_mode = gfx_v11_0_unset_safe_mode, 5043 .init = gfx_v11_0_rlc_init, 5044 .get_csb_size = gfx_v11_0_get_csb_size, 5045 .get_csb_buffer = gfx_v11_0_get_csb_buffer, 5046 .resume = gfx_v11_0_rlc_resume, 5047 .stop = gfx_v11_0_rlc_stop, 5048 .reset = gfx_v11_0_rlc_reset, 5049 .start = gfx_v11_0_rlc_start, 5050 .update_spm_vmid = gfx_v11_0_update_spm_vmid, 5051 }; 5052 5053 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) 5054 { 5055 u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 5056 5057 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) 5058 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5059 else 5060 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5061 5062 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data); 5063 5064 // Program RLC_PG_DELAY3 for CGPG hysteresis 5065 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { 5066 switch (adev->ip_versions[GC_HWIP][0]) { 5067 case IP_VERSION(11, 0, 1): 5068 case IP_VERSION(11, 0, 4): 5069 WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); 5070 break; 5071 default: 5072 break; 5073 } 5074 } 5075 } 5076 5077 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) 5078 { 5079 amdgpu_gfx_rlc_enter_safe_mode(adev); 5080 5081 gfx_v11_cntl_power_gating(adev, enable); 5082 5083 amdgpu_gfx_rlc_exit_safe_mode(adev); 5084 } 5085 5086 static int gfx_v11_0_set_powergating_state(void *handle, 5087 enum amd_powergating_state state) 5088 { 5089 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5090 bool enable = (state == AMD_PG_STATE_GATE); 5091 5092 if (amdgpu_sriov_vf(adev)) 5093 return 0; 5094 5095 switch (adev->ip_versions[GC_HWIP][0]) { 5096 case IP_VERSION(11, 0, 0): 5097 case IP_VERSION(11, 0, 2): 5098 case IP_VERSION(11, 0, 3): 5099 amdgpu_gfx_off_ctrl(adev, enable); 5100 break; 5101 case IP_VERSION(11, 0, 1): 5102 case IP_VERSION(11, 0, 4): 5103 gfx_v11_cntl_pg(adev, enable); 5104 amdgpu_gfx_off_ctrl(adev, enable); 5105 break; 5106 default: 5107 break; 5108 } 5109 5110 return 0; 5111 } 5112 5113 static int gfx_v11_0_set_clockgating_state(void *handle, 5114 enum amd_clockgating_state state) 5115 { 5116 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5117 5118 if (amdgpu_sriov_vf(adev)) 5119 return 0; 5120 5121 switch (adev->ip_versions[GC_HWIP][0]) { 5122 case IP_VERSION(11, 0, 0): 5123 case IP_VERSION(11, 0, 1): 5124 case IP_VERSION(11, 0, 2): 5125 case IP_VERSION(11, 0, 3): 5126 case IP_VERSION(11, 0, 4): 5127 gfx_v11_0_update_gfx_clock_gating(adev, 5128 state == AMD_CG_STATE_GATE); 5129 break; 5130 default: 5131 break; 5132 } 5133 5134 return 0; 5135 } 5136 5137 static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags) 5138 { 5139 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5140 int data; 5141 5142 /* AMD_CG_SUPPORT_GFX_MGCG */ 5143 data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5144 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5145 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5146 5147 /* AMD_CG_SUPPORT_REPEATER_FGCG */ 5148 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK)) 5149 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG; 5150 5151 /* AMD_CG_SUPPORT_GFX_FGCG */ 5152 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK)) 5153 *flags |= AMD_CG_SUPPORT_GFX_FGCG; 5154 5155 /* AMD_CG_SUPPORT_GFX_PERF_CLK */ 5156 if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK)) 5157 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK; 5158 5159 /* AMD_CG_SUPPORT_GFX_CGCG */ 5160 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5161 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5162 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5163 5164 /* AMD_CG_SUPPORT_GFX_CGLS */ 5165 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5166 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5167 5168 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5169 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5170 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5171 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5172 5173 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5174 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5175 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5176 } 5177 5178 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5179 { 5180 /* gfx11 is 32bit rptr*/ 5181 return *(uint32_t *)ring->rptr_cpu_addr; 5182 } 5183 5184 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5185 { 5186 struct amdgpu_device *adev = ring->adev; 5187 u64 wptr; 5188 5189 /* XXX check if swapping is necessary on BE */ 5190 if (ring->use_doorbell) { 5191 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5192 } else { 5193 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR); 5194 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32; 5195 } 5196 5197 return wptr; 5198 } 5199 5200 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5201 { 5202 struct amdgpu_device *adev = ring->adev; 5203 uint32_t *wptr_saved; 5204 uint32_t *is_queue_unmap; 5205 uint64_t aggregated_db_index; 5206 uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size; 5207 uint64_t wptr_tmp; 5208 5209 if (ring->is_mes_queue) { 5210 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); 5211 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + 5212 sizeof(uint32_t)); 5213 aggregated_db_index = 5214 amdgpu_mes_get_aggregated_doorbell_index(adev, 5215 ring->hw_prio); 5216 5217 wptr_tmp = ring->wptr & ring->buf_mask; 5218 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); 5219 *wptr_saved = wptr_tmp; 5220 /* assume doorbell always being used by mes mapped queue */ 5221 if (*is_queue_unmap) { 5222 WDOORBELL64(aggregated_db_index, wptr_tmp); 5223 WDOORBELL64(ring->doorbell_index, wptr_tmp); 5224 } else { 5225 WDOORBELL64(ring->doorbell_index, wptr_tmp); 5226 5227 if (*is_queue_unmap) 5228 WDOORBELL64(aggregated_db_index, wptr_tmp); 5229 } 5230 } else { 5231 if (ring->use_doorbell) { 5232 /* XXX check if swapping is necessary on BE */ 5233 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5234 ring->wptr); 5235 WDOORBELL64(ring->doorbell_index, ring->wptr); 5236 } else { 5237 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, 5238 lower_32_bits(ring->wptr)); 5239 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, 5240 upper_32_bits(ring->wptr)); 5241 } 5242 } 5243 } 5244 5245 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5246 { 5247 /* gfx11 hardware is 32bit rptr */ 5248 return *(uint32_t *)ring->rptr_cpu_addr; 5249 } 5250 5251 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5252 { 5253 u64 wptr; 5254 5255 /* XXX check if swapping is necessary on BE */ 5256 if (ring->use_doorbell) 5257 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5258 else 5259 BUG(); 5260 return wptr; 5261 } 5262 5263 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5264 { 5265 struct amdgpu_device *adev = ring->adev; 5266 uint32_t *wptr_saved; 5267 uint32_t *is_queue_unmap; 5268 uint64_t aggregated_db_index; 5269 uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size; 5270 uint64_t wptr_tmp; 5271 5272 if (ring->is_mes_queue) { 5273 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); 5274 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + 5275 sizeof(uint32_t)); 5276 aggregated_db_index = 5277 amdgpu_mes_get_aggregated_doorbell_index(adev, 5278 ring->hw_prio); 5279 5280 wptr_tmp = ring->wptr & ring->buf_mask; 5281 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); 5282 *wptr_saved = wptr_tmp; 5283 /* assume doorbell always used by mes mapped queue */ 5284 if (*is_queue_unmap) { 5285 WDOORBELL64(aggregated_db_index, wptr_tmp); 5286 WDOORBELL64(ring->doorbell_index, wptr_tmp); 5287 } else { 5288 WDOORBELL64(ring->doorbell_index, wptr_tmp); 5289 5290 if (*is_queue_unmap) 5291 WDOORBELL64(aggregated_db_index, wptr_tmp); 5292 } 5293 } else { 5294 /* XXX check if swapping is necessary on BE */ 5295 if (ring->use_doorbell) { 5296 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5297 ring->wptr); 5298 WDOORBELL64(ring->doorbell_index, ring->wptr); 5299 } else { 5300 BUG(); /* only DOORBELL method supported on gfx11 now */ 5301 } 5302 } 5303 } 5304 5305 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5306 { 5307 struct amdgpu_device *adev = ring->adev; 5308 u32 ref_and_mask, reg_mem_engine; 5309 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 5310 5311 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 5312 switch (ring->me) { 5313 case 1: 5314 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 5315 break; 5316 case 2: 5317 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 5318 break; 5319 default: 5320 return; 5321 } 5322 reg_mem_engine = 0; 5323 } else { 5324 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 5325 reg_mem_engine = 1; /* pfp */ 5326 } 5327 5328 gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5329 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5330 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5331 ref_and_mask, ref_and_mask, 0x20); 5332 } 5333 5334 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5335 struct amdgpu_job *job, 5336 struct amdgpu_ib *ib, 5337 uint32_t flags) 5338 { 5339 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5340 u32 header, control = 0; 5341 5342 BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE); 5343 5344 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5345 5346 control |= ib->length_dw | (vmid << 24); 5347 5348 if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5349 control |= INDIRECT_BUFFER_PRE_ENB(1); 5350 5351 if (flags & AMDGPU_IB_PREEMPTED) 5352 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5353 5354 if (vmid) 5355 gfx_v11_0_ring_emit_de_meta(ring, 5356 (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); 5357 } 5358 5359 if (ring->is_mes_queue) 5360 /* inherit vmid from mqd */ 5361 control |= 0x400000; 5362 5363 amdgpu_ring_write(ring, header); 5364 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5365 amdgpu_ring_write(ring, 5366 #ifdef __BIG_ENDIAN 5367 (2 << 0) | 5368 #endif 5369 lower_32_bits(ib->gpu_addr)); 5370 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5371 amdgpu_ring_write(ring, control); 5372 } 5373 5374 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5375 struct amdgpu_job *job, 5376 struct amdgpu_ib *ib, 5377 uint32_t flags) 5378 { 5379 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5380 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5381 5382 if (ring->is_mes_queue) 5383 /* inherit vmid from mqd */ 5384 control |= 0x40000000; 5385 5386 /* Currently, there is a high possibility to get wave ID mismatch 5387 * between ME and GDS, leading to a hw deadlock, because ME generates 5388 * different wave IDs than the GDS expects. This situation happens 5389 * randomly when at least 5 compute pipes use GDS ordered append. 5390 * The wave IDs generated by ME are also wrong after suspend/resume. 5391 * Those are probably bugs somewhere else in the kernel driver. 5392 * 5393 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5394 * GDS to 0 for this ring (me/pipe). 5395 */ 5396 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5397 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5398 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID); 5399 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5400 } 5401 5402 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5403 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5404 amdgpu_ring_write(ring, 5405 #ifdef __BIG_ENDIAN 5406 (2 << 0) | 5407 #endif 5408 lower_32_bits(ib->gpu_addr)); 5409 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5410 amdgpu_ring_write(ring, control); 5411 } 5412 5413 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5414 u64 seq, unsigned flags) 5415 { 5416 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5417 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5418 5419 /* RELEASE_MEM - flush caches, send int */ 5420 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5421 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | 5422 PACKET3_RELEASE_MEM_GCR_GL2_WB | 5423 PACKET3_RELEASE_MEM_GCR_GL2_INV | 5424 PACKET3_RELEASE_MEM_GCR_GL2_US | 5425 PACKET3_RELEASE_MEM_GCR_GL1_INV | 5426 PACKET3_RELEASE_MEM_GCR_GLV_INV | 5427 PACKET3_RELEASE_MEM_GCR_GLM_INV | 5428 PACKET3_RELEASE_MEM_GCR_GLM_WB | 5429 PACKET3_RELEASE_MEM_CACHE_POLICY(3) | 5430 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5431 PACKET3_RELEASE_MEM_EVENT_INDEX(5))); 5432 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | 5433 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); 5434 5435 /* 5436 * the address should be Qword aligned if 64bit write, Dword 5437 * aligned if only send 32bit data low (discard data high) 5438 */ 5439 if (write64bit) 5440 BUG_ON(addr & 0x7); 5441 else 5442 BUG_ON(addr & 0x3); 5443 amdgpu_ring_write(ring, lower_32_bits(addr)); 5444 amdgpu_ring_write(ring, upper_32_bits(addr)); 5445 amdgpu_ring_write(ring, lower_32_bits(seq)); 5446 amdgpu_ring_write(ring, upper_32_bits(seq)); 5447 amdgpu_ring_write(ring, ring->is_mes_queue ? 5448 (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0); 5449 } 5450 5451 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5452 { 5453 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5454 uint32_t seq = ring->fence_drv.sync_seq; 5455 uint64_t addr = ring->fence_drv.gpu_addr; 5456 5457 gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr), 5458 upper_32_bits(addr), seq, 0xffffffff, 4); 5459 } 5460 5461 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 5462 uint16_t pasid, uint32_t flush_type, 5463 bool all_hub, uint8_t dst_sel) 5464 { 5465 amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 5466 amdgpu_ring_write(ring, 5467 PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) | 5468 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 5469 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 5470 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 5471 } 5472 5473 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5474 unsigned vmid, uint64_t pd_addr) 5475 { 5476 if (ring->is_mes_queue) 5477 gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0); 5478 else 5479 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5480 5481 /* compute doesn't have PFP */ 5482 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5483 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5484 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5485 amdgpu_ring_write(ring, 0x0); 5486 } 5487 } 5488 5489 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5490 u64 seq, unsigned int flags) 5491 { 5492 struct amdgpu_device *adev = ring->adev; 5493 5494 /* we only allocate 32bit for each seq wb address */ 5495 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5496 5497 /* write fence seq to the "addr" */ 5498 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5499 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5500 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5501 amdgpu_ring_write(ring, lower_32_bits(addr)); 5502 amdgpu_ring_write(ring, upper_32_bits(addr)); 5503 amdgpu_ring_write(ring, lower_32_bits(seq)); 5504 5505 if (flags & AMDGPU_FENCE_FLAG_INT) { 5506 /* set register to trigger INT */ 5507 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5508 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5509 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5510 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS)); 5511 amdgpu_ring_write(ring, 0); 5512 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5513 } 5514 } 5515 5516 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, 5517 uint32_t flags) 5518 { 5519 uint32_t dw2 = 0; 5520 5521 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5522 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5523 /* set load_global_config & load_global_uconfig */ 5524 dw2 |= 0x8001; 5525 /* set load_cs_sh_regs */ 5526 dw2 |= 0x01000000; 5527 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5528 dw2 |= 0x10002; 5529 } 5530 5531 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5532 amdgpu_ring_write(ring, dw2); 5533 amdgpu_ring_write(ring, 0); 5534 } 5535 5536 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 5537 { 5538 unsigned ret; 5539 5540 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5541 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 5542 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 5543 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 5544 ret = ring->wptr & ring->buf_mask; 5545 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 5546 5547 return ret; 5548 } 5549 5550 static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 5551 { 5552 unsigned cur; 5553 BUG_ON(offset > ring->buf_mask); 5554 BUG_ON(ring->ring[offset] != 0x55aa55aa); 5555 5556 cur = (ring->wptr - 1) & ring->buf_mask; 5557 if (likely(cur > offset)) 5558 ring->ring[offset] = cur - offset; 5559 else 5560 ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; 5561 } 5562 5563 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring) 5564 { 5565 int i, r = 0; 5566 struct amdgpu_device *adev = ring->adev; 5567 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 5568 struct amdgpu_ring *kiq_ring = &kiq->ring; 5569 unsigned long flags; 5570 5571 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 5572 return -EINVAL; 5573 5574 spin_lock_irqsave(&kiq->ring_lock, flags); 5575 5576 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 5577 spin_unlock_irqrestore(&kiq->ring_lock, flags); 5578 return -ENOMEM; 5579 } 5580 5581 /* assert preemption condition */ 5582 amdgpu_ring_set_preempt_cond_exec(ring, false); 5583 5584 /* assert IB preemption, emit the trailing fence */ 5585 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, 5586 ring->trail_fence_gpu_addr, 5587 ++ring->trail_seq); 5588 amdgpu_ring_commit(kiq_ring); 5589 5590 spin_unlock_irqrestore(&kiq->ring_lock, flags); 5591 5592 /* poll the trailing fence */ 5593 for (i = 0; i < adev->usec_timeout; i++) { 5594 if (ring->trail_seq == 5595 le32_to_cpu(*(ring->trail_fence_cpu_addr))) 5596 break; 5597 udelay(1); 5598 } 5599 5600 if (i >= adev->usec_timeout) { 5601 r = -EINVAL; 5602 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx); 5603 } 5604 5605 /* deassert preemption condition */ 5606 amdgpu_ring_set_preempt_cond_exec(ring, true); 5607 return r; 5608 } 5609 5610 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) 5611 { 5612 struct amdgpu_device *adev = ring->adev; 5613 struct v10_de_ib_state de_payload = {0}; 5614 uint64_t offset, gds_addr, de_payload_gpu_addr; 5615 void *de_payload_cpu_addr; 5616 int cnt; 5617 5618 if (ring->is_mes_queue) { 5619 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5620 gfx[0].gfx_meta_data) + 5621 offsetof(struct v10_gfx_meta_data, de_payload); 5622 de_payload_gpu_addr = 5623 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5624 de_payload_cpu_addr = 5625 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 5626 5627 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5628 gfx[0].gds_backup) + 5629 offsetof(struct v10_gfx_meta_data, de_payload); 5630 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5631 } else { 5632 offset = offsetof(struct v10_gfx_meta_data, de_payload); 5633 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 5634 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 5635 5636 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + 5637 AMDGPU_CSA_SIZE - adev->gds.gds_size, 5638 PAGE_SIZE); 5639 } 5640 5641 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5642 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5643 5644 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 5645 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5646 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 5647 WRITE_DATA_DST_SEL(8) | 5648 WR_CONFIRM) | 5649 WRITE_DATA_CACHE_POLICY(0)); 5650 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); 5651 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); 5652 5653 if (resume) 5654 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, 5655 sizeof(de_payload) >> 2); 5656 else 5657 amdgpu_ring_write_multiple(ring, (void *)&de_payload, 5658 sizeof(de_payload) >> 2); 5659 } 5660 5661 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 5662 bool secure) 5663 { 5664 uint32_t v = secure ? FRAME_TMZ : 0; 5665 5666 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 5667 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 5668 } 5669 5670 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 5671 uint32_t reg_val_offs) 5672 { 5673 struct amdgpu_device *adev = ring->adev; 5674 5675 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 5676 amdgpu_ring_write(ring, 0 | /* src: register*/ 5677 (5 << 8) | /* dst: memory */ 5678 (1 << 20)); /* write confirm */ 5679 amdgpu_ring_write(ring, reg); 5680 amdgpu_ring_write(ring, 0); 5681 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 5682 reg_val_offs * 4)); 5683 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 5684 reg_val_offs * 4)); 5685 } 5686 5687 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 5688 uint32_t val) 5689 { 5690 uint32_t cmd = 0; 5691 5692 switch (ring->funcs->type) { 5693 case AMDGPU_RING_TYPE_GFX: 5694 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 5695 break; 5696 case AMDGPU_RING_TYPE_KIQ: 5697 cmd = (1 << 16); /* no inc addr */ 5698 break; 5699 default: 5700 cmd = WR_CONFIRM; 5701 break; 5702 } 5703 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5704 amdgpu_ring_write(ring, cmd); 5705 amdgpu_ring_write(ring, reg); 5706 amdgpu_ring_write(ring, 0); 5707 amdgpu_ring_write(ring, val); 5708 } 5709 5710 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 5711 uint32_t val, uint32_t mask) 5712 { 5713 gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 5714 } 5715 5716 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 5717 uint32_t reg0, uint32_t reg1, 5718 uint32_t ref, uint32_t mask) 5719 { 5720 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5721 5722 gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 5723 ref, mask, 0x20); 5724 } 5725 5726 static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring, 5727 unsigned vmid) 5728 { 5729 struct amdgpu_device *adev = ring->adev; 5730 uint32_t value = 0; 5731 5732 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 5733 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 5734 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 5735 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 5736 WREG32_SOC15(GC, 0, regSQ_CMD, value); 5737 } 5738 5739 static void 5740 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5741 uint32_t me, uint32_t pipe, 5742 enum amdgpu_interrupt_state state) 5743 { 5744 uint32_t cp_int_cntl, cp_int_cntl_reg; 5745 5746 if (!me) { 5747 switch (pipe) { 5748 case 0: 5749 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 5750 break; 5751 case 1: 5752 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 5753 break; 5754 default: 5755 DRM_DEBUG("invalid pipe %d\n", pipe); 5756 return; 5757 } 5758 } else { 5759 DRM_DEBUG("invalid me %d\n", me); 5760 return; 5761 } 5762 5763 switch (state) { 5764 case AMDGPU_IRQ_STATE_DISABLE: 5765 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 5766 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 5767 TIME_STAMP_INT_ENABLE, 0); 5768 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 5769 GENERIC0_INT_ENABLE, 0); 5770 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 5771 break; 5772 case AMDGPU_IRQ_STATE_ENABLE: 5773 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 5774 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 5775 TIME_STAMP_INT_ENABLE, 1); 5776 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 5777 GENERIC0_INT_ENABLE, 1); 5778 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 5779 break; 5780 default: 5781 break; 5782 } 5783 } 5784 5785 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5786 int me, int pipe, 5787 enum amdgpu_interrupt_state state) 5788 { 5789 u32 mec_int_cntl, mec_int_cntl_reg; 5790 5791 /* 5792 * amdgpu controls only the first MEC. That's why this function only 5793 * handles the setting of interrupts for this specific MEC. All other 5794 * pipes' interrupts are set by amdkfd. 5795 */ 5796 5797 if (me == 1) { 5798 switch (pipe) { 5799 case 0: 5800 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 5801 break; 5802 case 1: 5803 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 5804 break; 5805 case 2: 5806 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 5807 break; 5808 case 3: 5809 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 5810 break; 5811 default: 5812 DRM_DEBUG("invalid pipe %d\n", pipe); 5813 return; 5814 } 5815 } else { 5816 DRM_DEBUG("invalid me %d\n", me); 5817 return; 5818 } 5819 5820 switch (state) { 5821 case AMDGPU_IRQ_STATE_DISABLE: 5822 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 5823 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5824 TIME_STAMP_INT_ENABLE, 0); 5825 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5826 GENERIC0_INT_ENABLE, 0); 5827 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 5828 break; 5829 case AMDGPU_IRQ_STATE_ENABLE: 5830 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 5831 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5832 TIME_STAMP_INT_ENABLE, 1); 5833 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5834 GENERIC0_INT_ENABLE, 1); 5835 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 5836 break; 5837 default: 5838 break; 5839 } 5840 } 5841 5842 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5843 struct amdgpu_irq_src *src, 5844 unsigned type, 5845 enum amdgpu_interrupt_state state) 5846 { 5847 switch (type) { 5848 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 5849 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state); 5850 break; 5851 case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP: 5852 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state); 5853 break; 5854 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5855 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5856 break; 5857 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5858 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5859 break; 5860 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5861 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5862 break; 5863 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5864 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5865 break; 5866 default: 5867 break; 5868 } 5869 return 0; 5870 } 5871 5872 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, 5873 struct amdgpu_irq_src *source, 5874 struct amdgpu_iv_entry *entry) 5875 { 5876 int i; 5877 u8 me_id, pipe_id, queue_id; 5878 struct amdgpu_ring *ring; 5879 uint32_t mes_queue_id = entry->src_data[0]; 5880 5881 DRM_DEBUG("IH: CP EOP\n"); 5882 5883 if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { 5884 struct amdgpu_mes_queue *queue; 5885 5886 mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; 5887 5888 spin_lock(&adev->mes.queue_id_lock); 5889 queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); 5890 if (queue) { 5891 DRM_DEBUG("process mes queue id = %d\n", mes_queue_id); 5892 amdgpu_fence_process(queue->ring); 5893 } 5894 spin_unlock(&adev->mes.queue_id_lock); 5895 } else { 5896 me_id = (entry->ring_id & 0x0c) >> 2; 5897 pipe_id = (entry->ring_id & 0x03) >> 0; 5898 queue_id = (entry->ring_id & 0x70) >> 4; 5899 5900 switch (me_id) { 5901 case 0: 5902 if (pipe_id == 0) 5903 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5904 else 5905 amdgpu_fence_process(&adev->gfx.gfx_ring[1]); 5906 break; 5907 case 1: 5908 case 2: 5909 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5910 ring = &adev->gfx.compute_ring[i]; 5911 /* Per-queue interrupt is supported for MEC starting from VI. 5912 * The interrupt can only be enabled/disabled per pipe instead 5913 * of per queue. 5914 */ 5915 if ((ring->me == me_id) && 5916 (ring->pipe == pipe_id) && 5917 (ring->queue == queue_id)) 5918 amdgpu_fence_process(ring); 5919 } 5920 break; 5921 } 5922 } 5923 5924 return 0; 5925 } 5926 5927 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 5928 struct amdgpu_irq_src *source, 5929 unsigned type, 5930 enum amdgpu_interrupt_state state) 5931 { 5932 switch (state) { 5933 case AMDGPU_IRQ_STATE_DISABLE: 5934 case AMDGPU_IRQ_STATE_ENABLE: 5935 WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, 5936 PRIV_REG_INT_ENABLE, 5937 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5938 break; 5939 default: 5940 break; 5941 } 5942 5943 return 0; 5944 } 5945 5946 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 5947 struct amdgpu_irq_src *source, 5948 unsigned type, 5949 enum amdgpu_interrupt_state state) 5950 { 5951 switch (state) { 5952 case AMDGPU_IRQ_STATE_DISABLE: 5953 case AMDGPU_IRQ_STATE_ENABLE: 5954 WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, 5955 PRIV_INSTR_INT_ENABLE, 5956 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5957 break; 5958 default: 5959 break; 5960 } 5961 5962 return 0; 5963 } 5964 5965 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, 5966 struct amdgpu_iv_entry *entry) 5967 { 5968 u8 me_id, pipe_id, queue_id; 5969 struct amdgpu_ring *ring; 5970 int i; 5971 5972 me_id = (entry->ring_id & 0x0c) >> 2; 5973 pipe_id = (entry->ring_id & 0x03) >> 0; 5974 queue_id = (entry->ring_id & 0x70) >> 4; 5975 5976 switch (me_id) { 5977 case 0: 5978 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 5979 ring = &adev->gfx.gfx_ring[i]; 5980 /* we only enabled 1 gfx queue per pipe for now */ 5981 if (ring->me == me_id && ring->pipe == pipe_id) 5982 drm_sched_fault(&ring->sched); 5983 } 5984 break; 5985 case 1: 5986 case 2: 5987 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5988 ring = &adev->gfx.compute_ring[i]; 5989 if (ring->me == me_id && ring->pipe == pipe_id && 5990 ring->queue == queue_id) 5991 drm_sched_fault(&ring->sched); 5992 } 5993 break; 5994 default: 5995 BUG(); 5996 break; 5997 } 5998 } 5999 6000 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev, 6001 struct amdgpu_irq_src *source, 6002 struct amdgpu_iv_entry *entry) 6003 { 6004 DRM_ERROR("Illegal register access in command stream\n"); 6005 gfx_v11_0_handle_priv_fault(adev, entry); 6006 return 0; 6007 } 6008 6009 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev, 6010 struct amdgpu_irq_src *source, 6011 struct amdgpu_iv_entry *entry) 6012 { 6013 DRM_ERROR("Illegal instruction in command stream\n"); 6014 gfx_v11_0_handle_priv_fault(adev, entry); 6015 return 0; 6016 } 6017 6018 #if 0 6019 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6020 struct amdgpu_irq_src *src, 6021 unsigned int type, 6022 enum amdgpu_interrupt_state state) 6023 { 6024 uint32_t tmp, target; 6025 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 6026 6027 target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6028 target += ring->pipe; 6029 6030 switch (type) { 6031 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6032 if (state == AMDGPU_IRQ_STATE_DISABLE) { 6033 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6034 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6035 GENERIC2_INT_ENABLE, 0); 6036 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6037 6038 tmp = RREG32_SOC15_IP(GC, target); 6039 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6040 GENERIC2_INT_ENABLE, 0); 6041 WREG32_SOC15_IP(GC, target, tmp); 6042 } else { 6043 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6044 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6045 GENERIC2_INT_ENABLE, 1); 6046 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6047 6048 tmp = RREG32_SOC15_IP(GC, target); 6049 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6050 GENERIC2_INT_ENABLE, 1); 6051 WREG32_SOC15_IP(GC, target, tmp); 6052 } 6053 break; 6054 default: 6055 BUG(); /* kiq only support GENERIC2_INT now */ 6056 break; 6057 } 6058 return 0; 6059 } 6060 #endif 6061 6062 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) 6063 { 6064 const unsigned int gcr_cntl = 6065 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) | 6066 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) | 6067 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) | 6068 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) | 6069 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) | 6070 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) | 6071 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) | 6072 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1); 6073 6074 /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */ 6075 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6)); 6076 amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */ 6077 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6078 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 6079 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6080 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6081 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 6082 amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ 6083 } 6084 6085 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { 6086 .name = "gfx_v11_0", 6087 .early_init = gfx_v11_0_early_init, 6088 .late_init = gfx_v11_0_late_init, 6089 .sw_init = gfx_v11_0_sw_init, 6090 .sw_fini = gfx_v11_0_sw_fini, 6091 .hw_init = gfx_v11_0_hw_init, 6092 .hw_fini = gfx_v11_0_hw_fini, 6093 .suspend = gfx_v11_0_suspend, 6094 .resume = gfx_v11_0_resume, 6095 .is_idle = gfx_v11_0_is_idle, 6096 .wait_for_idle = gfx_v11_0_wait_for_idle, 6097 .soft_reset = gfx_v11_0_soft_reset, 6098 .check_soft_reset = gfx_v11_0_check_soft_reset, 6099 .set_clockgating_state = gfx_v11_0_set_clockgating_state, 6100 .set_powergating_state = gfx_v11_0_set_powergating_state, 6101 .get_clockgating_state = gfx_v11_0_get_clockgating_state, 6102 }; 6103 6104 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 6105 .type = AMDGPU_RING_TYPE_GFX, 6106 .align_mask = 0xff, 6107 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6108 .support_64bit_ptrs = true, 6109 .secure_submission_supported = true, 6110 .vmhub = AMDGPU_GFXHUB_0, 6111 .get_rptr = gfx_v11_0_ring_get_rptr_gfx, 6112 .get_wptr = gfx_v11_0_ring_get_wptr_gfx, 6113 .set_wptr = gfx_v11_0_ring_set_wptr_gfx, 6114 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 6115 5 + /* COND_EXEC */ 6116 7 + /* PIPELINE_SYNC */ 6117 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6118 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6119 2 + /* VM_FLUSH */ 6120 8 + /* FENCE for VM_FLUSH */ 6121 20 + /* GDS switch */ 6122 5 + /* COND_EXEC */ 6123 7 + /* HDP_flush */ 6124 4 + /* VGT_flush */ 6125 31 + /* DE_META */ 6126 3 + /* CNTX_CTRL */ 6127 5 + /* HDP_INVL */ 6128 8 + 8 + /* FENCE x2 */ 6129 8, /* gfx_v11_0_emit_mem_sync */ 6130 .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */ 6131 .emit_ib = gfx_v11_0_ring_emit_ib_gfx, 6132 .emit_fence = gfx_v11_0_ring_emit_fence, 6133 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 6134 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 6135 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 6136 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 6137 .test_ring = gfx_v11_0_ring_test_ring, 6138 .test_ib = gfx_v11_0_ring_test_ib, 6139 .insert_nop = amdgpu_ring_insert_nop, 6140 .pad_ib = amdgpu_ring_generic_pad_ib, 6141 .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, 6142 .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec, 6143 .patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec, 6144 .preempt_ib = gfx_v11_0_ring_preempt_ib, 6145 .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl, 6146 .emit_wreg = gfx_v11_0_ring_emit_wreg, 6147 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 6148 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 6149 .soft_recovery = gfx_v11_0_ring_soft_recovery, 6150 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 6151 }; 6152 6153 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { 6154 .type = AMDGPU_RING_TYPE_COMPUTE, 6155 .align_mask = 0xff, 6156 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6157 .support_64bit_ptrs = true, 6158 .vmhub = AMDGPU_GFXHUB_0, 6159 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 6160 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 6161 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 6162 .emit_frame_size = 6163 20 + /* gfx_v11_0_ring_emit_gds_switch */ 6164 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 6165 5 + /* hdp invalidate */ 6166 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 6167 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6168 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6169 2 + /* gfx_v11_0_ring_emit_vm_flush */ 6170 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */ 6171 8, /* gfx_v11_0_emit_mem_sync */ 6172 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 6173 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 6174 .emit_fence = gfx_v11_0_ring_emit_fence, 6175 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 6176 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 6177 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 6178 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 6179 .test_ring = gfx_v11_0_ring_test_ring, 6180 .test_ib = gfx_v11_0_ring_test_ib, 6181 .insert_nop = amdgpu_ring_insert_nop, 6182 .pad_ib = amdgpu_ring_generic_pad_ib, 6183 .emit_wreg = gfx_v11_0_ring_emit_wreg, 6184 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 6185 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 6186 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 6187 }; 6188 6189 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { 6190 .type = AMDGPU_RING_TYPE_KIQ, 6191 .align_mask = 0xff, 6192 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6193 .support_64bit_ptrs = true, 6194 .vmhub = AMDGPU_GFXHUB_0, 6195 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 6196 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 6197 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 6198 .emit_frame_size = 6199 20 + /* gfx_v11_0_ring_emit_gds_switch */ 6200 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 6201 5 + /*hdp invalidate */ 6202 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 6203 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6204 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6205 2 + /* gfx_v11_0_ring_emit_vm_flush */ 6206 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6207 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 6208 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 6209 .emit_fence = gfx_v11_0_ring_emit_fence_kiq, 6210 .test_ring = gfx_v11_0_ring_test_ring, 6211 .test_ib = gfx_v11_0_ring_test_ib, 6212 .insert_nop = amdgpu_ring_insert_nop, 6213 .pad_ib = amdgpu_ring_generic_pad_ib, 6214 .emit_rreg = gfx_v11_0_ring_emit_rreg, 6215 .emit_wreg = gfx_v11_0_ring_emit_wreg, 6216 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 6217 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 6218 }; 6219 6220 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev) 6221 { 6222 int i; 6223 6224 adev->gfx.kiq.ring.funcs = &gfx_v11_0_ring_funcs_kiq; 6225 6226 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6227 adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx; 6228 6229 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6230 adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute; 6231 } 6232 6233 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = { 6234 .set = gfx_v11_0_set_eop_interrupt_state, 6235 .process = gfx_v11_0_eop_irq, 6236 }; 6237 6238 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = { 6239 .set = gfx_v11_0_set_priv_reg_fault_state, 6240 .process = gfx_v11_0_priv_reg_irq, 6241 }; 6242 6243 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = { 6244 .set = gfx_v11_0_set_priv_inst_fault_state, 6245 .process = gfx_v11_0_priv_inst_irq, 6246 }; 6247 6248 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) 6249 { 6250 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6251 adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs; 6252 6253 adev->gfx.priv_reg_irq.num_types = 1; 6254 adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs; 6255 6256 adev->gfx.priv_inst_irq.num_types = 1; 6257 adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; 6258 } 6259 6260 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev) 6261 { 6262 if (adev->flags & AMD_IS_APU) 6263 adev->gfx.imu.mode = MISSION_MODE; 6264 else 6265 adev->gfx.imu.mode = DEBUG_MODE; 6266 6267 adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs; 6268 } 6269 6270 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev) 6271 { 6272 adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs; 6273 } 6274 6275 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev) 6276 { 6277 unsigned total_cu = adev->gfx.config.max_cu_per_sh * 6278 adev->gfx.config.max_sh_per_se * 6279 adev->gfx.config.max_shader_engines; 6280 6281 adev->gds.gds_size = 0x1000; 6282 adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1; 6283 adev->gds.gws_size = 64; 6284 adev->gds.oa_size = 16; 6285 } 6286 6287 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev) 6288 { 6289 /* set gfx eng mqd */ 6290 adev->mqds[AMDGPU_HW_IP_GFX].mqd_size = 6291 sizeof(struct v11_gfx_mqd); 6292 adev->mqds[AMDGPU_HW_IP_GFX].init_mqd = 6293 gfx_v11_0_gfx_mqd_init; 6294 /* set compute eng mqd */ 6295 adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size = 6296 sizeof(struct v11_compute_mqd); 6297 adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd = 6298 gfx_v11_0_compute_mqd_init; 6299 } 6300 6301 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev, 6302 u32 bitmap) 6303 { 6304 u32 data; 6305 6306 if (!bitmap) 6307 return; 6308 6309 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 6310 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 6311 6312 WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data); 6313 } 6314 6315 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) 6316 { 6317 u32 data, wgp_bitmask; 6318 data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG); 6319 data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG); 6320 6321 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 6322 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 6323 6324 wgp_bitmask = 6325 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); 6326 6327 return (~data) & wgp_bitmask; 6328 } 6329 6330 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) 6331 { 6332 u32 wgp_idx, wgp_active_bitmap; 6333 u32 cu_bitmap_per_wgp, cu_active_bitmap; 6334 6335 wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev); 6336 cu_active_bitmap = 0; 6337 6338 for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) { 6339 /* if there is one WGP enabled, it means 2 CUs will be enabled */ 6340 cu_bitmap_per_wgp = 3 << (2 * wgp_idx); 6341 if (wgp_active_bitmap & (1 << wgp_idx)) 6342 cu_active_bitmap |= cu_bitmap_per_wgp; 6343 } 6344 6345 return cu_active_bitmap; 6346 } 6347 6348 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 6349 struct amdgpu_cu_info *cu_info) 6350 { 6351 int i, j, k, counter, active_cu_number = 0; 6352 u32 mask, bitmap; 6353 unsigned disable_masks[8 * 2]; 6354 6355 if (!adev || !cu_info) 6356 return -EINVAL; 6357 6358 amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2); 6359 6360 mutex_lock(&adev->grbm_idx_mutex); 6361 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 6362 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 6363 mask = 1; 6364 counter = 0; 6365 gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff); 6366 if (i < 8 && j < 2) 6367 gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh( 6368 adev, disable_masks[i * 2 + j]); 6369 bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev); 6370 6371 /** 6372 * GFX11 could support more than 4 SEs, while the bitmap 6373 * in cu_info struct is 4x4 and ioctl interface struct 6374 * drm_amdgpu_info_device should keep stable. 6375 * So we use last two columns of bitmap to store cu mask for 6376 * SEs 4 to 7, the layout of the bitmap is as below: 6377 * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]} 6378 * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]} 6379 * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]} 6380 * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]} 6381 * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]} 6382 * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]} 6383 * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]} 6384 * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]} 6385 */ 6386 cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap; 6387 6388 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { 6389 if (bitmap & mask) 6390 counter++; 6391 6392 mask <<= 1; 6393 } 6394 active_cu_number += counter; 6395 } 6396 } 6397 gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6398 mutex_unlock(&adev->grbm_idx_mutex); 6399 6400 cu_info->number = active_cu_number; 6401 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 6402 6403 return 0; 6404 } 6405 6406 const struct amdgpu_ip_block_version gfx_v11_0_ip_block = 6407 { 6408 .type = AMD_IP_BLOCK_TYPE_GFX, 6409 .major = 11, 6410 .minor = 0, 6411 .rev = 0, 6412 .funcs = &gfx_v11_0_ip_funcs, 6413 }; 6414