1 /* 2 * Copyright 2019 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/firmware.h> 24 #include <drm/drmP.h> 25 #include "amdgpu.h" 26 #include "amdgpu_gfx.h" 27 #include "amdgpu_psp.h" 28 #include "amdgpu_smu.h" 29 #include "nv.h" 30 #include "nvd.h" 31 32 #include "gc/gc_10_1_0_offset.h" 33 #include "gc/gc_10_1_0_sh_mask.h" 34 #include "navi10_enum.h" 35 #include "hdp/hdp_5_0_0_offset.h" 36 #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h" 37 38 #include "soc15.h" 39 #include "soc15_common.h" 40 #include "clearstate_gfx10.h" 41 #include "v10_structs.h" 42 #include "gfx_v10_0.h" 43 #include "nbio_v2_3.h" 44 45 /** 46 * Navi10 has two graphic rings to share each graphic pipe. 47 * 1. Primary ring 48 * 2. Async ring 49 * 50 * In bring-up phase, it just used primary ring so set gfx ring number as 1 at 51 * first. 52 */ 53 #define GFX10_NUM_GFX_RINGS 2 54 #define GFX10_MEC_HPD_SIZE 2048 55 56 #define F32_CE_PROGRAM_RAM_SIZE 65536 57 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 58 59 MODULE_FIRMWARE("amdgpu/navi10_ce.bin"); 60 MODULE_FIRMWARE("amdgpu/navi10_pfp.bin"); 61 MODULE_FIRMWARE("amdgpu/navi10_me.bin"); 62 MODULE_FIRMWARE("amdgpu/navi10_mec.bin"); 63 MODULE_FIRMWARE("amdgpu/navi10_mec2.bin"); 64 MODULE_FIRMWARE("amdgpu/navi10_rlc.bin"); 65 66 static const struct soc15_reg_golden golden_settings_gc_10_1[] = 67 { 68 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), 69 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100), 70 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100), 71 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0x60000ff0, 0x60000100), 72 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000000, 0x40000100), 73 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), 74 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xfeff8fff, 0xfeff8100), 75 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), 76 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000002, 0x00000000), 77 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x000007ff, 0x000005ff), 78 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0x20000000, 0x20000000), 79 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420), 80 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000200, 0x00000200), 81 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0x07900000, 0x04900000), 82 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f), 83 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204), 84 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500), 85 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x000007ff, 0x000001fe), 86 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), 87 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x10321032), 88 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x02310231), 89 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), 90 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), 91 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0x10000000, 0x10000100), 92 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f), 93 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffff9fff, 0x00001188), 94 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009), 95 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000), 96 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 97 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), 98 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), 99 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130), 100 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), 101 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), 102 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010), 103 SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CGTT_CLK_CTRL, 0xfeff0fff, 0x40000100), 104 SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000) 105 }; 106 107 static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] = 108 { 109 /* Pending on emulation bring up */ 110 }; 111 112 static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev); 113 static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev); 114 static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev); 115 static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev); 116 static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, 117 struct amdgpu_cu_info *cu_info); 118 static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev); 119 static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 120 u32 sh_num, u32 instance); 121 static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); 122 123 static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev); 124 static void gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device *adev); 125 static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev); 126 static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); 127 static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume); 128 static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); 129 static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start); 130 131 static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) 132 { 133 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 134 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 135 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 136 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 137 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 138 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 139 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 140 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 141 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 142 } 143 144 static void gfx10_kiq_map_queues(struct amdgpu_ring *kiq_ring, 145 struct amdgpu_ring *ring) 146 { 147 struct amdgpu_device *adev = kiq_ring->adev; 148 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 149 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 150 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 151 152 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 153 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 154 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 155 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 156 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 157 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 158 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 159 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 160 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 161 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 162 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 163 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 164 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 165 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 166 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 167 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 168 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 169 } 170 171 static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 172 struct amdgpu_ring *ring, 173 enum amdgpu_unmap_queues_action action, 174 u64 gpu_addr, u64 seq) 175 { 176 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 177 178 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 179 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 180 PACKET3_UNMAP_QUEUES_ACTION(action) | 181 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 182 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 183 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 184 amdgpu_ring_write(kiq_ring, 185 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 186 187 if (action == PREEMPT_QUEUES_NO_UNMAP) { 188 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 189 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 190 amdgpu_ring_write(kiq_ring, seq); 191 } else { 192 amdgpu_ring_write(kiq_ring, 0); 193 amdgpu_ring_write(kiq_ring, 0); 194 amdgpu_ring_write(kiq_ring, 0); 195 } 196 } 197 198 static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring, 199 struct amdgpu_ring *ring, 200 u64 addr, 201 u64 seq) 202 { 203 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 204 205 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 206 amdgpu_ring_write(kiq_ring, 207 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 208 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 209 PACKET3_QUERY_STATUS_COMMAND(2)); 210 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 211 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 212 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 213 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 214 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 215 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 216 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 217 } 218 219 static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = { 220 .kiq_set_resources = gfx10_kiq_set_resources, 221 .kiq_map_queues = gfx10_kiq_map_queues, 222 .kiq_unmap_queues = gfx10_kiq_unmap_queues, 223 .kiq_query_status = gfx10_kiq_query_status, 224 .set_resources_size = 8, 225 .map_queues_size = 7, 226 .unmap_queues_size = 6, 227 .query_status_size = 7, 228 }; 229 230 static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 231 { 232 adev->gfx.kiq.pmf = &gfx_v10_0_kiq_pm4_funcs; 233 } 234 235 static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) 236 { 237 switch (adev->asic_type) { 238 case CHIP_NAVI10: 239 soc15_program_register_sequence(adev, 240 golden_settings_gc_10_1, 241 (const u32)ARRAY_SIZE(golden_settings_gc_10_1)); 242 soc15_program_register_sequence(adev, 243 golden_settings_gc_10_0_nv10, 244 (const u32)ARRAY_SIZE(golden_settings_gc_10_0_nv10)); 245 break; 246 default: 247 break; 248 } 249 } 250 251 static void gfx_v10_0_scratch_init(struct amdgpu_device *adev) 252 { 253 adev->gfx.scratch.num_reg = 8; 254 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 255 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 256 } 257 258 static void gfx_v10_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 259 bool wc, uint32_t reg, uint32_t val) 260 { 261 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 262 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 263 WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0)); 264 amdgpu_ring_write(ring, reg); 265 amdgpu_ring_write(ring, 0); 266 amdgpu_ring_write(ring, val); 267 } 268 269 static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 270 int mem_space, int opt, uint32_t addr0, 271 uint32_t addr1, uint32_t ref, uint32_t mask, 272 uint32_t inv) 273 { 274 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 275 amdgpu_ring_write(ring, 276 /* memory (1) or register (0) */ 277 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 278 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 279 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 280 WAIT_REG_MEM_ENGINE(eng_sel))); 281 282 if (mem_space) 283 BUG_ON(addr0 & 0x3); /* Dword align */ 284 amdgpu_ring_write(ring, addr0); 285 amdgpu_ring_write(ring, addr1); 286 amdgpu_ring_write(ring, ref); 287 amdgpu_ring_write(ring, mask); 288 amdgpu_ring_write(ring, inv); /* poll interval */ 289 } 290 291 static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring) 292 { 293 struct amdgpu_device *adev = ring->adev; 294 uint32_t scratch; 295 uint32_t tmp = 0; 296 unsigned i; 297 int r; 298 299 r = amdgpu_gfx_scratch_get(adev, &scratch); 300 if (r) { 301 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 302 return r; 303 } 304 305 WREG32(scratch, 0xCAFEDEAD); 306 307 r = amdgpu_ring_alloc(ring, 3); 308 if (r) { 309 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 310 ring->idx, r); 311 amdgpu_gfx_scratch_free(adev, scratch); 312 return r; 313 } 314 315 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 316 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 317 amdgpu_ring_write(ring, 0xDEADBEEF); 318 amdgpu_ring_commit(ring); 319 320 for (i = 0; i < adev->usec_timeout; i++) { 321 tmp = RREG32(scratch); 322 if (tmp == 0xDEADBEEF) 323 break; 324 if (amdgpu_emu_mode == 1) 325 msleep(1); 326 else 327 DRM_UDELAY(1); 328 } 329 if (i < adev->usec_timeout) { 330 if (amdgpu_emu_mode == 1) 331 DRM_INFO("ring test on %d succeeded in %d msecs\n", 332 ring->idx, i); 333 else 334 DRM_INFO("ring test on %d succeeded in %d usecs\n", 335 ring->idx, i); 336 } else { 337 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 338 ring->idx, scratch, tmp); 339 r = -EINVAL; 340 } 341 amdgpu_gfx_scratch_free(adev, scratch); 342 343 return r; 344 } 345 346 static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 347 { 348 struct amdgpu_device *adev = ring->adev; 349 struct amdgpu_ib ib; 350 struct dma_fence *f = NULL; 351 uint32_t scratch; 352 uint32_t tmp = 0; 353 long r; 354 355 r = amdgpu_gfx_scratch_get(adev, &scratch); 356 if (r) { 357 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); 358 return r; 359 } 360 361 WREG32(scratch, 0xCAFEDEAD); 362 363 memset(&ib, 0, sizeof(ib)); 364 r = amdgpu_ib_get(adev, NULL, 256, &ib); 365 if (r) { 366 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 367 goto err1; 368 } 369 370 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 371 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 372 ib.ptr[2] = 0xDEADBEEF; 373 ib.length_dw = 3; 374 375 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 376 if (r) 377 goto err2; 378 379 r = dma_fence_wait_timeout(f, false, timeout); 380 if (r == 0) { 381 DRM_ERROR("amdgpu: IB test timed out.\n"); 382 r = -ETIMEDOUT; 383 goto err2; 384 } else if (r < 0) { 385 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 386 goto err2; 387 } 388 389 tmp = RREG32(scratch); 390 if (tmp == 0xDEADBEEF) { 391 DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 392 r = 0; 393 } else { 394 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", 395 scratch, tmp); 396 r = -EINVAL; 397 } 398 err2: 399 amdgpu_ib_free(adev, &ib, NULL); 400 dma_fence_put(f); 401 err1: 402 amdgpu_gfx_scratch_free(adev, scratch); 403 404 return r; 405 } 406 407 static void gfx_v10_0_free_microcode(struct amdgpu_device *adev) 408 { 409 release_firmware(adev->gfx.pfp_fw); 410 adev->gfx.pfp_fw = NULL; 411 release_firmware(adev->gfx.me_fw); 412 adev->gfx.me_fw = NULL; 413 release_firmware(adev->gfx.ce_fw); 414 adev->gfx.ce_fw = NULL; 415 release_firmware(adev->gfx.rlc_fw); 416 adev->gfx.rlc_fw = NULL; 417 release_firmware(adev->gfx.mec_fw); 418 adev->gfx.mec_fw = NULL; 419 release_firmware(adev->gfx.mec2_fw); 420 adev->gfx.mec2_fw = NULL; 421 422 kfree(adev->gfx.rlc.register_list_format); 423 } 424 425 static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 426 { 427 const struct rlc_firmware_header_v2_1 *rlc_hdr; 428 429 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 430 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 431 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 432 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 433 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 434 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 435 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 436 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 437 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 438 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 439 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 440 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 441 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 442 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 443 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 444 } 445 446 static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) 447 { 448 switch (adev->asic_type) { 449 case CHIP_NAVI10: 450 if ((adev->gfx.rlc_fw_version < 85) || 451 (adev->pm.fw_version < 0x002A0C00)) 452 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 453 break; 454 default: 455 break; 456 } 457 } 458 459 static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) 460 { 461 const char *chip_name; 462 char fw_name[30]; 463 int err; 464 struct amdgpu_firmware_info *info = NULL; 465 const struct common_firmware_header *header = NULL; 466 const struct gfx_firmware_header_v1_0 *cp_hdr; 467 const struct rlc_firmware_header_v2_0 *rlc_hdr; 468 unsigned int *tmp = NULL; 469 unsigned int i = 0; 470 uint16_t version_major; 471 uint16_t version_minor; 472 473 DRM_DEBUG("\n"); 474 475 switch (adev->asic_type) { 476 case CHIP_NAVI10: 477 chip_name = "navi10"; 478 break; 479 default: 480 BUG(); 481 } 482 483 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 484 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 485 if (err) 486 goto out; 487 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 488 if (err) 489 goto out; 490 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 491 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 492 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 493 494 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 495 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 496 if (err) 497 goto out; 498 err = amdgpu_ucode_validate(adev->gfx.me_fw); 499 if (err) 500 goto out; 501 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 502 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 503 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 504 505 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 506 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 507 if (err) 508 goto out; 509 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 510 if (err) 511 goto out; 512 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 513 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 514 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 515 516 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 517 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 518 if (err) 519 goto out; 520 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 521 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 522 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 523 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 524 if (version_major == 2 && version_minor == 1) 525 adev->gfx.rlc.is_rlc_v2_1 = true; 526 527 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 528 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 529 adev->gfx.rlc.save_and_restore_offset = 530 le32_to_cpu(rlc_hdr->save_and_restore_offset); 531 adev->gfx.rlc.clear_state_descriptor_offset = 532 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 533 adev->gfx.rlc.avail_scratch_ram_locations = 534 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 535 adev->gfx.rlc.reg_restore_list_size = 536 le32_to_cpu(rlc_hdr->reg_restore_list_size); 537 adev->gfx.rlc.reg_list_format_start = 538 le32_to_cpu(rlc_hdr->reg_list_format_start); 539 adev->gfx.rlc.reg_list_format_separate_start = 540 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 541 adev->gfx.rlc.starting_offsets_start = 542 le32_to_cpu(rlc_hdr->starting_offsets_start); 543 adev->gfx.rlc.reg_list_format_size_bytes = 544 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 545 adev->gfx.rlc.reg_list_size_bytes = 546 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 547 adev->gfx.rlc.register_list_format = 548 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 549 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 550 if (!adev->gfx.rlc.register_list_format) { 551 err = -ENOMEM; 552 goto out; 553 } 554 555 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 556 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 557 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 558 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 559 560 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 561 562 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 563 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 564 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 565 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 566 567 if (adev->gfx.rlc.is_rlc_v2_1) 568 gfx_v10_0_init_rlc_ext_microcode(adev); 569 570 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 571 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 572 if (err) 573 goto out; 574 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 575 if (err) 576 goto out; 577 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 578 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 579 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 580 581 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 582 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 583 if (!err) { 584 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 585 if (err) 586 goto out; 587 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 588 adev->gfx.mec2_fw->data; 589 adev->gfx.mec2_fw_version = 590 le32_to_cpu(cp_hdr->header.ucode_version); 591 adev->gfx.mec2_feature_version = 592 le32_to_cpu(cp_hdr->ucode_feature_version); 593 } else { 594 err = 0; 595 adev->gfx.mec2_fw = NULL; 596 } 597 598 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 599 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 600 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 601 info->fw = adev->gfx.pfp_fw; 602 header = (const struct common_firmware_header *)info->fw->data; 603 adev->firmware.fw_size += 604 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 605 606 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 607 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 608 info->fw = adev->gfx.me_fw; 609 header = (const struct common_firmware_header *)info->fw->data; 610 adev->firmware.fw_size += 611 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 612 613 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 614 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 615 info->fw = adev->gfx.ce_fw; 616 header = (const struct common_firmware_header *)info->fw->data; 617 adev->firmware.fw_size += 618 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 619 620 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 621 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 622 info->fw = adev->gfx.rlc_fw; 623 header = (const struct common_firmware_header *)info->fw->data; 624 adev->firmware.fw_size += 625 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 626 627 if (adev->gfx.rlc.is_rlc_v2_1 && 628 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 629 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 630 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 631 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 632 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 633 info->fw = adev->gfx.rlc_fw; 634 adev->firmware.fw_size += 635 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 636 637 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 638 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 639 info->fw = adev->gfx.rlc_fw; 640 adev->firmware.fw_size += 641 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 642 643 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 644 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 645 info->fw = adev->gfx.rlc_fw; 646 adev->firmware.fw_size += 647 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 648 } 649 650 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 651 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 652 info->fw = adev->gfx.mec_fw; 653 header = (const struct common_firmware_header *)info->fw->data; 654 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 655 adev->firmware.fw_size += 656 ALIGN(le32_to_cpu(header->ucode_size_bytes) - 657 le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 658 659 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 660 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 661 info->fw = adev->gfx.mec_fw; 662 adev->firmware.fw_size += 663 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 664 665 if (adev->gfx.mec2_fw) { 666 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 667 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 668 info->fw = adev->gfx.mec2_fw; 669 header = (const struct common_firmware_header *)info->fw->data; 670 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 671 adev->firmware.fw_size += 672 ALIGN(le32_to_cpu(header->ucode_size_bytes) - 673 le32_to_cpu(cp_hdr->jt_size) * 4, 674 PAGE_SIZE); 675 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 676 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 677 info->fw = adev->gfx.mec2_fw; 678 adev->firmware.fw_size += 679 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, 680 PAGE_SIZE); 681 } 682 } 683 684 out: 685 if (err) { 686 dev_err(adev->dev, 687 "gfx10: Failed to load firmware \"%s\"\n", 688 fw_name); 689 release_firmware(adev->gfx.pfp_fw); 690 adev->gfx.pfp_fw = NULL; 691 release_firmware(adev->gfx.me_fw); 692 adev->gfx.me_fw = NULL; 693 release_firmware(adev->gfx.ce_fw); 694 adev->gfx.ce_fw = NULL; 695 release_firmware(adev->gfx.rlc_fw); 696 adev->gfx.rlc_fw = NULL; 697 release_firmware(adev->gfx.mec_fw); 698 adev->gfx.mec_fw = NULL; 699 release_firmware(adev->gfx.mec2_fw); 700 adev->gfx.mec2_fw = NULL; 701 } 702 703 gfx_v10_0_check_gfxoff_flag(adev); 704 705 return err; 706 } 707 708 static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev) 709 { 710 u32 count = 0; 711 const struct cs_section_def *sect = NULL; 712 const struct cs_extent_def *ext = NULL; 713 714 /* begin clear state */ 715 count += 2; 716 /* context control state */ 717 count += 3; 718 719 for (sect = gfx10_cs_data; sect->section != NULL; ++sect) { 720 for (ext = sect->section; ext->extent != NULL; ++ext) { 721 if (sect->id == SECT_CONTEXT) 722 count += 2 + ext->reg_count; 723 else 724 return 0; 725 } 726 } 727 728 /* set PA_SC_TILE_STEERING_OVERRIDE */ 729 count += 3; 730 /* end clear state */ 731 count += 2; 732 /* clear state */ 733 count += 2; 734 735 return count; 736 } 737 738 static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev, 739 volatile u32 *buffer) 740 { 741 u32 count = 0, i; 742 const struct cs_section_def *sect = NULL; 743 const struct cs_extent_def *ext = NULL; 744 int ctx_reg_offset; 745 746 if (adev->gfx.rlc.cs_data == NULL) 747 return; 748 if (buffer == NULL) 749 return; 750 751 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 752 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 753 754 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 755 buffer[count++] = cpu_to_le32(0x80000000); 756 buffer[count++] = cpu_to_le32(0x80000000); 757 758 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 759 for (ext = sect->section; ext->extent != NULL; ++ext) { 760 if (sect->id == SECT_CONTEXT) { 761 buffer[count++] = 762 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 763 buffer[count++] = cpu_to_le32(ext->reg_index - 764 PACKET3_SET_CONTEXT_REG_START); 765 for (i = 0; i < ext->reg_count; i++) 766 buffer[count++] = cpu_to_le32(ext->extent[i]); 767 } else { 768 return; 769 } 770 } 771 } 772 773 ctx_reg_offset = 774 SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 775 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 776 buffer[count++] = cpu_to_le32(ctx_reg_offset); 777 buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); 778 779 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 780 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 781 782 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 783 buffer[count++] = cpu_to_le32(0); 784 } 785 786 static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev) 787 { 788 /* clear state block */ 789 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 790 &adev->gfx.rlc.clear_state_gpu_addr, 791 (void **)&adev->gfx.rlc.cs_ptr); 792 793 /* jump table block */ 794 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 795 &adev->gfx.rlc.cp_table_gpu_addr, 796 (void **)&adev->gfx.rlc.cp_table_ptr); 797 } 798 799 static int gfx_v10_0_rlc_init(struct amdgpu_device *adev) 800 { 801 const struct cs_section_def *cs_data; 802 int r; 803 804 adev->gfx.rlc.cs_data = gfx10_cs_data; 805 806 cs_data = adev->gfx.rlc.cs_data; 807 808 if (cs_data) { 809 /* init clear state block */ 810 r = amdgpu_gfx_rlc_init_csb(adev); 811 if (r) 812 return r; 813 } 814 815 return 0; 816 } 817 818 static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev) 819 { 820 int r; 821 822 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 823 if (unlikely(r != 0)) 824 return r; 825 826 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, 827 AMDGPU_GEM_DOMAIN_VRAM); 828 if (!r) 829 adev->gfx.rlc.clear_state_gpu_addr = 830 amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); 831 832 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 833 834 return r; 835 } 836 837 static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev) 838 { 839 int r; 840 841 if (!adev->gfx.rlc.clear_state_obj) 842 return; 843 844 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); 845 if (likely(r == 0)) { 846 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 847 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 848 } 849 } 850 851 static void gfx_v10_0_mec_fini(struct amdgpu_device *adev) 852 { 853 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 854 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 855 } 856 857 static int gfx_v10_0_me_init(struct amdgpu_device *adev) 858 { 859 int r; 860 861 bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); 862 863 amdgpu_gfx_graphics_queue_acquire(adev); 864 865 r = gfx_v10_0_init_microcode(adev); 866 if (r) 867 DRM_ERROR("Failed to load gfx firmware!\n"); 868 869 return r; 870 } 871 872 static int gfx_v10_0_mec_init(struct amdgpu_device *adev) 873 { 874 int r; 875 u32 *hpd; 876 const __le32 *fw_data = NULL; 877 unsigned fw_size; 878 u32 *fw = NULL; 879 size_t mec_hpd_size; 880 881 const struct gfx_firmware_header_v1_0 *mec_hdr = NULL; 882 883 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 884 885 /* take ownership of the relevant compute queues */ 886 amdgpu_gfx_compute_queue_acquire(adev); 887 mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE; 888 889 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 890 AMDGPU_GEM_DOMAIN_GTT, 891 &adev->gfx.mec.hpd_eop_obj, 892 &adev->gfx.mec.hpd_eop_gpu_addr, 893 (void **)&hpd); 894 if (r) { 895 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 896 gfx_v10_0_mec_fini(adev); 897 return r; 898 } 899 900 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); 901 902 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 903 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 904 905 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 906 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 907 908 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 909 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 910 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 911 912 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 913 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 914 &adev->gfx.mec.mec_fw_obj, 915 &adev->gfx.mec.mec_fw_gpu_addr, 916 (void **)&fw); 917 if (r) { 918 dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r); 919 gfx_v10_0_mec_fini(adev); 920 return r; 921 } 922 923 memcpy(fw, fw_data, fw_size); 924 925 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 926 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 927 } 928 929 return 0; 930 } 931 932 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address) 933 { 934 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 935 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 936 (address << SQ_IND_INDEX__INDEX__SHIFT)); 937 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 938 } 939 940 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, 941 uint32_t thread, uint32_t regno, 942 uint32_t num, uint32_t *out) 943 { 944 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 945 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 946 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 947 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | 948 (SQ_IND_INDEX__AUTO_INCR_MASK)); 949 while (num--) 950 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 951 } 952 953 static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 954 { 955 /* in gfx10 the SIMD_ID is specified as part of the INSTANCE 956 * field when performing a select_se_sh so it should be 957 * zero here */ 958 WARN_ON(simd != 0); 959 960 /* type 2 wave data */ 961 dst[(*no_fields)++] = 2; 962 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS); 963 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO); 964 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI); 965 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO); 966 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI); 967 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1); 968 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2); 969 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_INST_DW0); 970 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC); 971 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC); 972 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS); 973 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS); 974 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); 975 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); 976 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); 977 } 978 979 static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 980 uint32_t wave, uint32_t start, 981 uint32_t size, uint32_t *dst) 982 { 983 WARN_ON(simd != 0); 984 985 wave_read_regs( 986 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, 987 dst); 988 } 989 990 static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 991 uint32_t wave, uint32_t thread, 992 uint32_t start, uint32_t size, 993 uint32_t *dst) 994 { 995 wave_read_regs( 996 adev, wave, thread, 997 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 998 } 999 1000 1001 static const struct amdgpu_gfx_funcs gfx_v10_0_gfx_funcs = { 1002 .get_gpu_clock_counter = &gfx_v10_0_get_gpu_clock_counter, 1003 .select_se_sh = &gfx_v10_0_select_se_sh, 1004 .read_wave_data = &gfx_v10_0_read_wave_data, 1005 .read_wave_sgprs = &gfx_v10_0_read_wave_sgprs, 1006 .read_wave_vgprs = &gfx_v10_0_read_wave_vgprs, 1007 }; 1008 1009 static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) 1010 { 1011 u32 gb_addr_config; 1012 1013 adev->gfx.funcs = &gfx_v10_0_gfx_funcs; 1014 1015 switch (adev->asic_type) { 1016 case CHIP_NAVI10: 1017 adev->gfx.config.max_hw_contexts = 8; 1018 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1019 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1020 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1021 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1022 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1023 break; 1024 default: 1025 BUG(); 1026 break; 1027 } 1028 1029 adev->gfx.config.gb_addr_config = gb_addr_config; 1030 1031 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 1032 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 1033 GB_ADDR_CONFIG, NUM_PIPES); 1034 1035 adev->gfx.config.max_tile_pipes = 1036 adev->gfx.config.gb_addr_config_fields.num_pipes; 1037 1038 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 1039 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 1040 GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS); 1041 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 1042 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 1043 GB_ADDR_CONFIG, NUM_RB_PER_SE); 1044 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 1045 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 1046 GB_ADDR_CONFIG, NUM_SHADER_ENGINES); 1047 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 1048 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 1049 GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); 1050 } 1051 1052 static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, 1053 int me, int pipe, int queue) 1054 { 1055 int r; 1056 struct amdgpu_ring *ring; 1057 unsigned int irq_type; 1058 1059 ring = &adev->gfx.gfx_ring[ring_id]; 1060 1061 ring->me = me; 1062 ring->pipe = pipe; 1063 ring->queue = queue; 1064 1065 ring->ring_obj = NULL; 1066 ring->use_doorbell = true; 1067 1068 if (!ring_id) 1069 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 1070 else 1071 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; 1072 sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1073 1074 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; 1075 r = amdgpu_ring_init(adev, ring, 1024, 1076 &adev->gfx.eop_irq, irq_type); 1077 if (r) 1078 return r; 1079 return 0; 1080 } 1081 1082 static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1083 int mec, int pipe, int queue) 1084 { 1085 int r; 1086 unsigned irq_type; 1087 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1088 1089 ring = &adev->gfx.compute_ring[ring_id]; 1090 1091 /* mec0 is me1 */ 1092 ring->me = mec + 1; 1093 ring->pipe = pipe; 1094 ring->queue = queue; 1095 1096 ring->ring_obj = NULL; 1097 ring->use_doorbell = true; 1098 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1099 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1100 + (ring_id * GFX10_MEC_HPD_SIZE); 1101 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1102 1103 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1104 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1105 + ring->pipe; 1106 1107 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1108 r = amdgpu_ring_init(adev, ring, 1024, 1109 &adev->gfx.eop_irq, irq_type); 1110 if (r) 1111 return r; 1112 1113 return 0; 1114 } 1115 1116 static int gfx_v10_0_sw_init(void *handle) 1117 { 1118 int i, j, k, r, ring_id = 0; 1119 struct amdgpu_kiq *kiq; 1120 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1121 1122 switch (adev->asic_type) { 1123 case CHIP_NAVI10: 1124 adev->gfx.me.num_me = 1; 1125 adev->gfx.me.num_pipe_per_me = 2; 1126 adev->gfx.me.num_queue_per_pipe = 1; 1127 adev->gfx.mec.num_mec = 2; 1128 adev->gfx.mec.num_pipe_per_mec = 4; 1129 adev->gfx.mec.num_queue_per_pipe = 8; 1130 break; 1131 default: 1132 adev->gfx.me.num_me = 1; 1133 adev->gfx.me.num_pipe_per_me = 1; 1134 adev->gfx.me.num_queue_per_pipe = 1; 1135 adev->gfx.mec.num_mec = 1; 1136 adev->gfx.mec.num_pipe_per_mec = 4; 1137 adev->gfx.mec.num_queue_per_pipe = 8; 1138 break; 1139 } 1140 1141 /* KIQ event */ 1142 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 1143 GFX_10_1__SRCID__CP_IB2_INTERRUPT_PKT, 1144 &adev->gfx.kiq.irq); 1145 if (r) 1146 return r; 1147 1148 /* EOP Event */ 1149 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 1150 GFX_10_1__SRCID__CP_EOP_INTERRUPT, 1151 &adev->gfx.eop_irq); 1152 if (r) 1153 return r; 1154 1155 /* Privileged reg */ 1156 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_REG_FAULT, 1157 &adev->gfx.priv_reg_irq); 1158 if (r) 1159 return r; 1160 1161 /* Privileged inst */ 1162 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_INSTR_FAULT, 1163 &adev->gfx.priv_inst_irq); 1164 if (r) 1165 return r; 1166 1167 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1168 1169 gfx_v10_0_scratch_init(adev); 1170 1171 r = gfx_v10_0_me_init(adev); 1172 if (r) 1173 return r; 1174 1175 r = gfx_v10_0_rlc_init(adev); 1176 if (r) { 1177 DRM_ERROR("Failed to init rlc BOs!\n"); 1178 return r; 1179 } 1180 1181 r = gfx_v10_0_mec_init(adev); 1182 if (r) { 1183 DRM_ERROR("Failed to init MEC BOs!\n"); 1184 return r; 1185 } 1186 1187 /* set up the gfx ring */ 1188 for (i = 0; i < adev->gfx.me.num_me; i++) { 1189 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { 1190 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 1191 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) 1192 continue; 1193 1194 r = gfx_v10_0_gfx_ring_init(adev, ring_id, 1195 i, k, j); 1196 if (r) 1197 return r; 1198 ring_id++; 1199 } 1200 } 1201 } 1202 1203 ring_id = 0; 1204 /* set up the compute queues - allocate horizontally across pipes */ 1205 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1206 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1207 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1208 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, 1209 j)) 1210 continue; 1211 1212 r = gfx_v10_0_compute_ring_init(adev, ring_id, 1213 i, k, j); 1214 if (r) 1215 return r; 1216 1217 ring_id++; 1218 } 1219 } 1220 } 1221 1222 r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE); 1223 if (r) { 1224 DRM_ERROR("Failed to init KIQ BOs!\n"); 1225 return r; 1226 } 1227 1228 kiq = &adev->gfx.kiq; 1229 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 1230 if (r) 1231 return r; 1232 1233 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd)); 1234 if (r) 1235 return r; 1236 1237 /* allocate visible FB for rlc auto-loading fw */ 1238 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1239 r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev); 1240 if (r) 1241 return r; 1242 } 1243 1244 adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE; 1245 1246 gfx_v10_0_gpu_early_init(adev); 1247 1248 return 0; 1249 } 1250 1251 static void gfx_v10_0_pfp_fini(struct amdgpu_device *adev) 1252 { 1253 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj, 1254 &adev->gfx.pfp.pfp_fw_gpu_addr, 1255 (void **)&adev->gfx.pfp.pfp_fw_ptr); 1256 } 1257 1258 static void gfx_v10_0_ce_fini(struct amdgpu_device *adev) 1259 { 1260 amdgpu_bo_free_kernel(&adev->gfx.ce.ce_fw_obj, 1261 &adev->gfx.ce.ce_fw_gpu_addr, 1262 (void **)&adev->gfx.ce.ce_fw_ptr); 1263 } 1264 1265 static void gfx_v10_0_me_fini(struct amdgpu_device *adev) 1266 { 1267 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj, 1268 &adev->gfx.me.me_fw_gpu_addr, 1269 (void **)&adev->gfx.me.me_fw_ptr); 1270 } 1271 1272 static int gfx_v10_0_sw_fini(void *handle) 1273 { 1274 int i; 1275 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1276 1277 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1278 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1279 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1280 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1281 1282 amdgpu_gfx_mqd_sw_fini(adev); 1283 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 1284 amdgpu_gfx_kiq_fini(adev); 1285 1286 gfx_v10_0_pfp_fini(adev); 1287 gfx_v10_0_ce_fini(adev); 1288 gfx_v10_0_me_fini(adev); 1289 gfx_v10_0_rlc_fini(adev); 1290 gfx_v10_0_mec_fini(adev); 1291 1292 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1293 gfx_v10_0_rlc_backdoor_autoload_buffer_fini(adev); 1294 1295 gfx_v10_0_free_microcode(adev); 1296 1297 return 0; 1298 } 1299 1300 1301 static void gfx_v10_0_tiling_mode_table_init(struct amdgpu_device *adev) 1302 { 1303 /* TODO */ 1304 } 1305 1306 static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 1307 u32 sh_num, u32 instance) 1308 { 1309 u32 data; 1310 1311 if (instance == 0xffffffff) 1312 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, 1313 INSTANCE_BROADCAST_WRITES, 1); 1314 else 1315 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, 1316 instance); 1317 1318 if (se_num == 0xffffffff) 1319 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1320 1); 1321 else 1322 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1323 1324 if (sh_num == 0xffffffff) 1325 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1326 1); 1327 else 1328 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num); 1329 1330 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); 1331 } 1332 1333 static u32 gfx_v10_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1334 { 1335 u32 data, mask; 1336 1337 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 1338 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 1339 1340 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 1341 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 1342 1343 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 1344 adev->gfx.config.max_sh_per_se); 1345 1346 return (~data) & mask; 1347 } 1348 1349 static void gfx_v10_0_setup_rb(struct amdgpu_device *adev) 1350 { 1351 int i, j; 1352 u32 data; 1353 u32 active_rbs = 0; 1354 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 1355 adev->gfx.config.max_sh_per_se; 1356 1357 mutex_lock(&adev->grbm_idx_mutex); 1358 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1359 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1360 gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); 1361 data = gfx_v10_0_get_rb_active_bitmap(adev); 1362 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 1363 rb_bitmap_width_per_sh); 1364 } 1365 } 1366 gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1367 mutex_unlock(&adev->grbm_idx_mutex); 1368 1369 adev->gfx.config.backend_enable_mask = active_rbs; 1370 adev->gfx.config.num_rbs = hweight32(active_rbs); 1371 } 1372 1373 static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *adev) 1374 { 1375 uint32_t num_sc; 1376 uint32_t enabled_rb_per_sh; 1377 uint32_t active_rb_bitmap; 1378 uint32_t num_rb_per_sc; 1379 uint32_t num_packer_per_sc; 1380 uint32_t pa_sc_tile_steering_override; 1381 1382 /* init num_sc */ 1383 num_sc = adev->gfx.config.max_shader_engines * adev->gfx.config.max_sh_per_se * 1384 adev->gfx.config.num_sc_per_sh; 1385 /* init num_rb_per_sc */ 1386 active_rb_bitmap = gfx_v10_0_get_rb_active_bitmap(adev); 1387 enabled_rb_per_sh = hweight32(active_rb_bitmap); 1388 num_rb_per_sc = enabled_rb_per_sh / adev->gfx.config.num_sc_per_sh; 1389 /* init num_packer_per_sc */ 1390 num_packer_per_sc = adev->gfx.config.num_packer_per_sc; 1391 1392 pa_sc_tile_steering_override = 0; 1393 pa_sc_tile_steering_override |= 1394 (order_base_2(num_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_SC__SHIFT) & 1395 PA_SC_TILE_STEERING_OVERRIDE__NUM_SC_MASK; 1396 pa_sc_tile_steering_override |= 1397 (order_base_2(num_rb_per_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SC__SHIFT) & 1398 PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SC_MASK; 1399 pa_sc_tile_steering_override |= 1400 (order_base_2(num_packer_per_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_PACKER_PER_SC__SHIFT) & 1401 PA_SC_TILE_STEERING_OVERRIDE__NUM_PACKER_PER_SC_MASK; 1402 1403 return pa_sc_tile_steering_override; 1404 } 1405 1406 #define DEFAULT_SH_MEM_BASES (0x6000) 1407 #define FIRST_COMPUTE_VMID (8) 1408 #define LAST_COMPUTE_VMID (16) 1409 1410 static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) 1411 { 1412 int i; 1413 uint32_t sh_mem_config; 1414 uint32_t sh_mem_bases; 1415 1416 /* 1417 * Configure apertures: 1418 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 1419 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 1420 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 1421 */ 1422 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 1423 1424 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 1425 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 1426 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 1427 1428 mutex_lock(&adev->srbm_mutex); 1429 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 1430 nv_grbm_select(adev, 0, 0, 0, i); 1431 /* CP and shaders */ 1432 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 1433 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 1434 } 1435 nv_grbm_select(adev, 0, 0, 0, 0); 1436 mutex_unlock(&adev->srbm_mutex); 1437 } 1438 1439 static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) 1440 { 1441 int i, j, k; 1442 int max_wgp_per_sh = adev->gfx.config.max_cu_per_sh >> 1; 1443 u32 tmp, wgp_active_bitmap = 0; 1444 u32 gcrd_targets_disable_tcp = 0; 1445 u32 utcl_invreq_disable = 0; 1446 /* 1447 * GCRD_TARGETS_DISABLE field contains 1448 * for Navi10: GL1C=[18:15], SQC=[14:10], TCP=[9:0] 1449 */ 1450 u32 gcrd_targets_disable_mask = amdgpu_gfx_create_bitmask( 1451 2 * max_wgp_per_sh + /* TCP */ 1452 max_wgp_per_sh + /* SQC */ 1453 4); /* GL1C */ 1454 /* 1455 * UTCL1_UTCL0_INVREQ_DISABLE field contains 1456 * for Navi10: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0] 1457 */ 1458 u32 utcl_invreq_disable_mask = amdgpu_gfx_create_bitmask( 1459 2 * max_wgp_per_sh + /* TCP */ 1460 2 * max_wgp_per_sh + /* SQC */ 1461 4 + /* RMI */ 1462 1); /* SQG */ 1463 1464 if (adev->asic_type == CHIP_NAVI10) { 1465 mutex_lock(&adev->grbm_idx_mutex); 1466 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1467 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1468 gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); 1469 wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev); 1470 /* 1471 * Set corresponding TCP bits for the inactive WGPs in 1472 * GCRD_SA_TARGETS_DISABLE 1473 */ 1474 gcrd_targets_disable_tcp = 0; 1475 /* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */ 1476 utcl_invreq_disable = 0; 1477 1478 for (k = 0; k < max_wgp_per_sh; k++) { 1479 if (!(wgp_active_bitmap & (1 << k))) { 1480 gcrd_targets_disable_tcp |= 3 << (2 * k); 1481 utcl_invreq_disable |= (3 << (2 * k)) | 1482 (3 << (2 * (max_wgp_per_sh + k))); 1483 } 1484 } 1485 1486 tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE); 1487 /* only override TCP & SQC bits */ 1488 tmp &= 0xffffffff << (4 * max_wgp_per_sh); 1489 tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask); 1490 WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp); 1491 1492 tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE); 1493 /* only override TCP bits */ 1494 tmp &= 0xffffffff << (2 * max_wgp_per_sh); 1495 tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask); 1496 WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp); 1497 } 1498 } 1499 1500 gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1501 mutex_unlock(&adev->grbm_idx_mutex); 1502 } 1503 } 1504 1505 static void gfx_v10_0_constants_init(struct amdgpu_device *adev) 1506 { 1507 u32 tmp; 1508 int i; 1509 1510 WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 1511 1512 gfx_v10_0_tiling_mode_table_init(adev); 1513 1514 gfx_v10_0_setup_rb(adev); 1515 gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info); 1516 adev->gfx.config.pa_sc_tile_steering_override = 1517 gfx_v10_0_init_pa_sc_tile_steering_override(adev); 1518 1519 /* XXX SH_MEM regs */ 1520 /* where to put LDS, scratch, GPUVM in FSA64 space */ 1521 mutex_lock(&adev->srbm_mutex); 1522 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { 1523 nv_grbm_select(adev, 0, 0, 0, i); 1524 /* CP and shaders */ 1525 if (i == 0) { 1526 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 1527 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1528 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_MODE, 0); 1529 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); 1530 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0); 1531 } else { 1532 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 1533 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1534 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_MODE, 0); 1535 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); 1536 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 1537 (adev->gmc.private_aperture_start >> 48)); 1538 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 1539 (adev->gmc.shared_aperture_start >> 48)); 1540 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp); 1541 } 1542 } 1543 nv_grbm_select(adev, 0, 0, 0, 0); 1544 1545 mutex_unlock(&adev->srbm_mutex); 1546 1547 gfx_v10_0_init_compute_vmid(adev); 1548 1549 mutex_lock(&adev->grbm_idx_mutex); 1550 /* 1551 * making sure that the following register writes will be broadcasted 1552 * to all the shaders 1553 */ 1554 gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1555 1556 tmp = REG_SET_FIELD(0, PA_SC_FIFO_SIZE, SC_FRONTEND_PRIM_FIFO_SIZE, 1557 adev->gfx.config.sc_prim_fifo_size_frontend); 1558 tmp = REG_SET_FIELD(tmp, PA_SC_FIFO_SIZE, SC_BACKEND_PRIM_FIFO_SIZE, 1559 adev->gfx.config.sc_prim_fifo_size_backend); 1560 tmp = REG_SET_FIELD(tmp, PA_SC_FIFO_SIZE, SC_HIZ_TILE_FIFO_SIZE, 1561 adev->gfx.config.sc_hiz_tile_fifo_size); 1562 tmp = REG_SET_FIELD(tmp, PA_SC_FIFO_SIZE, SC_EARLYZ_TILE_FIFO_SIZE, 1563 adev->gfx.config.sc_earlyz_tile_fifo_size); 1564 WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE, tmp); 1565 1566 mutex_unlock(&adev->grbm_idx_mutex); 1567 } 1568 1569 static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 1570 bool enable) 1571 { 1572 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 1573 1574 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 1575 enable ? 1 : 0); 1576 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 1577 enable ? 1 : 0); 1578 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 1579 enable ? 1 : 0); 1580 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 1581 enable ? 1 : 0); 1582 1583 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 1584 } 1585 1586 static void gfx_v10_0_init_csb(struct amdgpu_device *adev) 1587 { 1588 /* csib */ 1589 WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI, 1590 adev->gfx.rlc.clear_state_gpu_addr >> 32); 1591 WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO, 1592 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 1593 WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); 1594 } 1595 1596 static void gfx_v10_0_init_pg(struct amdgpu_device *adev) 1597 { 1598 gfx_v10_0_init_csb(adev); 1599 1600 amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); 1601 1602 /* TODO: init power gating */ 1603 return; 1604 } 1605 1606 void gfx_v10_0_rlc_stop(struct amdgpu_device *adev) 1607 { 1608 u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); 1609 1610 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 1611 WREG32_SOC15(GC, 0, mmRLC_CNTL, tmp); 1612 } 1613 1614 static void gfx_v10_0_rlc_reset(struct amdgpu_device *adev) 1615 { 1616 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 1617 udelay(50); 1618 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 1619 udelay(50); 1620 } 1621 1622 static void gfx_v10_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, 1623 bool enable) 1624 { 1625 uint32_t rlc_pg_cntl; 1626 1627 rlc_pg_cntl = RREG32_SOC15(GC, 0, mmRLC_PG_CNTL); 1628 1629 if (!enable) { 1630 /* RLC_PG_CNTL[23] = 0 (default) 1631 * RLC will wait for handshake acks with SMU 1632 * GFXOFF will be enabled 1633 * RLC_PG_CNTL[23] = 1 1634 * RLC will not issue any message to SMU 1635 * hence no handshake between SMU & RLC 1636 * GFXOFF will be disabled 1637 */ 1638 rlc_pg_cntl |= 0x80000; 1639 } else 1640 rlc_pg_cntl &= ~0x80000; 1641 WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, rlc_pg_cntl); 1642 } 1643 1644 static void gfx_v10_0_rlc_start(struct amdgpu_device *adev) 1645 { 1646 /* TODO: enable rlc & smu handshake until smu 1647 * and gfxoff feature works as expected */ 1648 if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) 1649 gfx_v10_0_rlc_smu_handshake_cntl(adev, false); 1650 1651 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 1652 udelay(50); 1653 } 1654 1655 static void gfx_v10_0_rlc_enable_srm(struct amdgpu_device *adev) 1656 { 1657 uint32_t tmp; 1658 1659 /* enable Save Restore Machine */ 1660 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 1661 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 1662 tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; 1663 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 1664 } 1665 1666 static int gfx_v10_0_rlc_load_microcode(struct amdgpu_device *adev) 1667 { 1668 const struct rlc_firmware_header_v2_0 *hdr; 1669 const __le32 *fw_data; 1670 unsigned i, fw_size; 1671 1672 if (!adev->gfx.rlc_fw) 1673 return -EINVAL; 1674 1675 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1676 amdgpu_ucode_print_rlc_hdr(&hdr->header); 1677 1678 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1679 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1680 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 1681 1682 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 1683 RLCG_UCODE_LOADING_START_ADDRESS); 1684 1685 for (i = 0; i < fw_size; i++) 1686 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, 1687 le32_to_cpup(fw_data++)); 1688 1689 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 1690 1691 return 0; 1692 } 1693 1694 static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) 1695 { 1696 int r; 1697 1698 if (amdgpu_sriov_vf(adev)) 1699 return 0; 1700 1701 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1702 r = gfx_v10_0_wait_for_rlc_autoload_complete(adev); 1703 if (r) 1704 return r; 1705 gfx_v10_0_init_pg(adev); 1706 1707 /* enable RLC SRM */ 1708 gfx_v10_0_rlc_enable_srm(adev); 1709 1710 } else { 1711 adev->gfx.rlc.funcs->stop(adev); 1712 1713 /* disable CG */ 1714 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 1715 1716 /* disable PG */ 1717 WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, 0); 1718 1719 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1720 /* legacy rlc firmware loading */ 1721 r = gfx_v10_0_rlc_load_microcode(adev); 1722 if (r) 1723 return r; 1724 } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1725 /* rlc backdoor autoload firmware */ 1726 r = gfx_v10_0_rlc_backdoor_autoload_enable(adev); 1727 if (r) 1728 return r; 1729 } 1730 1731 gfx_v10_0_init_pg(adev); 1732 adev->gfx.rlc.funcs->start(adev); 1733 1734 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1735 r = gfx_v10_0_wait_for_rlc_autoload_complete(adev); 1736 if (r) 1737 return r; 1738 } 1739 } 1740 return 0; 1741 } 1742 1743 static struct { 1744 FIRMWARE_ID id; 1745 unsigned int offset; 1746 unsigned int size; 1747 } rlc_autoload_info[FIRMWARE_ID_MAX]; 1748 1749 static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev) 1750 { 1751 int ret; 1752 RLC_TABLE_OF_CONTENT *rlc_toc; 1753 1754 ret = amdgpu_bo_create_reserved(adev, adev->psp.toc_bin_size, PAGE_SIZE, 1755 AMDGPU_GEM_DOMAIN_GTT, 1756 &adev->gfx.rlc.rlc_toc_bo, 1757 &adev->gfx.rlc.rlc_toc_gpu_addr, 1758 (void **)&adev->gfx.rlc.rlc_toc_buf); 1759 if (ret) { 1760 dev_err(adev->dev, "(%d) failed to create rlc toc bo\n", ret); 1761 return ret; 1762 } 1763 1764 /* Copy toc from psp sos fw to rlc toc buffer */ 1765 memcpy(adev->gfx.rlc.rlc_toc_buf, adev->psp.toc_start_addr, adev->psp.toc_bin_size); 1766 1767 rlc_toc = (RLC_TABLE_OF_CONTENT *)adev->gfx.rlc.rlc_toc_buf; 1768 while (rlc_toc && (rlc_toc->id > FIRMWARE_ID_INVALID) && 1769 (rlc_toc->id < FIRMWARE_ID_MAX)) { 1770 if ((rlc_toc->id >= FIRMWARE_ID_CP_CE) && 1771 (rlc_toc->id <= FIRMWARE_ID_CP_MES)) { 1772 /* Offset needs 4KB alignment */ 1773 rlc_toc->offset = ALIGN(rlc_toc->offset * 4, PAGE_SIZE); 1774 } 1775 1776 rlc_autoload_info[rlc_toc->id].id = rlc_toc->id; 1777 rlc_autoload_info[rlc_toc->id].offset = rlc_toc->offset * 4; 1778 rlc_autoload_info[rlc_toc->id].size = rlc_toc->size * 4; 1779 1780 rlc_toc++; 1781 }; 1782 1783 return 0; 1784 } 1785 1786 static uint32_t gfx_v10_0_calc_toc_total_size(struct amdgpu_device *adev) 1787 { 1788 uint32_t total_size = 0; 1789 FIRMWARE_ID id; 1790 int ret; 1791 1792 ret = gfx_v10_0_parse_rlc_toc(adev); 1793 if (ret) { 1794 dev_err(adev->dev, "failed to parse rlc toc\n"); 1795 return 0; 1796 } 1797 1798 for (id = FIRMWARE_ID_RLC_G_UCODE; id < FIRMWARE_ID_MAX; id++) 1799 total_size += rlc_autoload_info[id].size; 1800 1801 /* In case the offset in rlc toc ucode is aligned */ 1802 if (total_size < rlc_autoload_info[FIRMWARE_ID_MAX-1].offset) 1803 total_size = rlc_autoload_info[FIRMWARE_ID_MAX-1].offset + 1804 rlc_autoload_info[FIRMWARE_ID_MAX-1].size; 1805 1806 return total_size; 1807 } 1808 1809 static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev) 1810 { 1811 int r; 1812 uint32_t total_size; 1813 1814 total_size = gfx_v10_0_calc_toc_total_size(adev); 1815 1816 r = amdgpu_bo_create_reserved(adev, total_size, PAGE_SIZE, 1817 AMDGPU_GEM_DOMAIN_GTT, 1818 &adev->gfx.rlc.rlc_autoload_bo, 1819 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1820 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1821 if (r) { 1822 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r); 1823 return r; 1824 } 1825 1826 return 0; 1827 } 1828 1829 static void gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device *adev) 1830 { 1831 amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_toc_bo, 1832 &adev->gfx.rlc.rlc_toc_gpu_addr, 1833 (void **)&adev->gfx.rlc.rlc_toc_buf); 1834 amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo, 1835 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1836 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1837 } 1838 1839 static void gfx_v10_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev, 1840 FIRMWARE_ID id, 1841 const void *fw_data, 1842 uint32_t fw_size) 1843 { 1844 uint32_t toc_offset; 1845 uint32_t toc_fw_size; 1846 char *ptr = adev->gfx.rlc.rlc_autoload_ptr; 1847 1848 if (id <= FIRMWARE_ID_INVALID || id >= FIRMWARE_ID_MAX) 1849 return; 1850 1851 toc_offset = rlc_autoload_info[id].offset; 1852 toc_fw_size = rlc_autoload_info[id].size; 1853 1854 if (fw_size == 0) 1855 fw_size = toc_fw_size; 1856 1857 if (fw_size > toc_fw_size) 1858 fw_size = toc_fw_size; 1859 1860 memcpy(ptr + toc_offset, fw_data, fw_size); 1861 1862 if (fw_size < toc_fw_size) 1863 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); 1864 } 1865 1866 static void gfx_v10_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev) 1867 { 1868 void *data; 1869 uint32_t size; 1870 1871 data = adev->gfx.rlc.rlc_toc_buf; 1872 size = rlc_autoload_info[FIRMWARE_ID_RLC_TOC].size; 1873 1874 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, 1875 FIRMWARE_ID_RLC_TOC, 1876 data, size); 1877 } 1878 1879 static void gfx_v10_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev) 1880 { 1881 const __le32 *fw_data; 1882 uint32_t fw_size; 1883 const struct gfx_firmware_header_v1_0 *cp_hdr; 1884 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1885 1886 /* pfp ucode */ 1887 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1888 adev->gfx.pfp_fw->data; 1889 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1890 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1891 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1892 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, 1893 FIRMWARE_ID_CP_PFP, 1894 fw_data, fw_size); 1895 1896 /* ce ucode */ 1897 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1898 adev->gfx.ce_fw->data; 1899 fw_data = (const __le32 *)(adev->gfx.ce_fw->data + 1900 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1901 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1902 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, 1903 FIRMWARE_ID_CP_CE, 1904 fw_data, fw_size); 1905 1906 /* me ucode */ 1907 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1908 adev->gfx.me_fw->data; 1909 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1910 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1911 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1912 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, 1913 FIRMWARE_ID_CP_ME, 1914 fw_data, fw_size); 1915 1916 /* rlc ucode */ 1917 rlc_hdr = (const struct rlc_firmware_header_v2_0 *) 1918 adev->gfx.rlc_fw->data; 1919 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1920 le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes)); 1921 fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes); 1922 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, 1923 FIRMWARE_ID_RLC_G_UCODE, 1924 fw_data, fw_size); 1925 1926 /* mec1 ucode */ 1927 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1928 adev->gfx.mec_fw->data; 1929 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1930 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1931 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - 1932 cp_hdr->jt_size * 4; 1933 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, 1934 FIRMWARE_ID_CP_MEC, 1935 fw_data, fw_size); 1936 /* mec2 ucode is not necessary if mec2 ucode is same as mec1 */ 1937 } 1938 1939 /* Temporarily put sdma part here */ 1940 static void gfx_v10_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev) 1941 { 1942 const __le32 *fw_data; 1943 uint32_t fw_size; 1944 const struct sdma_firmware_header_v1_0 *sdma_hdr; 1945 int i; 1946 1947 for (i = 0; i < adev->sdma.num_instances; i++) { 1948 sdma_hdr = (const struct sdma_firmware_header_v1_0 *) 1949 adev->sdma.instance[i].fw->data; 1950 fw_data = (const __le32 *) (adev->sdma.instance[i].fw->data + 1951 le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes)); 1952 fw_size = le32_to_cpu(sdma_hdr->header.ucode_size_bytes); 1953 1954 if (i == 0) { 1955 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, 1956 FIRMWARE_ID_SDMA0_UCODE, fw_data, fw_size); 1957 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, 1958 FIRMWARE_ID_SDMA0_JT, 1959 (uint32_t *)fw_data + 1960 sdma_hdr->jt_offset, 1961 sdma_hdr->jt_size * 4); 1962 } else if (i == 1) { 1963 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, 1964 FIRMWARE_ID_SDMA1_UCODE, fw_data, fw_size); 1965 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, 1966 FIRMWARE_ID_SDMA1_JT, 1967 (uint32_t *)fw_data + 1968 sdma_hdr->jt_offset, 1969 sdma_hdr->jt_size * 4); 1970 } 1971 } 1972 } 1973 1974 static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) 1975 { 1976 uint32_t rlc_g_offset, rlc_g_size, tmp; 1977 uint64_t gpu_addr; 1978 1979 gfx_v10_0_rlc_backdoor_autoload_copy_toc_ucode(adev); 1980 gfx_v10_0_rlc_backdoor_autoload_copy_sdma_ucode(adev); 1981 gfx_v10_0_rlc_backdoor_autoload_copy_gfx_ucode(adev); 1982 1983 rlc_g_offset = rlc_autoload_info[FIRMWARE_ID_RLC_G_UCODE].offset; 1984 rlc_g_size = rlc_autoload_info[FIRMWARE_ID_RLC_G_UCODE].size; 1985 gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset; 1986 1987 WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_ADDR_HI, upper_32_bits(gpu_addr)); 1988 WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_ADDR_LO, lower_32_bits(gpu_addr)); 1989 WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_SIZE, rlc_g_size); 1990 1991 tmp = RREG32_SOC15(GC, 0, mmRLC_HYP_RESET_VECTOR); 1992 if (!(tmp & (RLC_HYP_RESET_VECTOR__COLD_BOOT_EXIT_MASK | 1993 RLC_HYP_RESET_VECTOR__VDDGFX_EXIT_MASK))) { 1994 DRM_ERROR("Neither COLD_BOOT_EXIT nor VDDGFX_EXIT is set\n"); 1995 return -EINVAL; 1996 } 1997 1998 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); 1999 if (tmp & RLC_CNTL__RLC_ENABLE_F32_MASK) { 2000 DRM_ERROR("RLC ROM should halt itself\n"); 2001 return -EINVAL; 2002 } 2003 2004 return 0; 2005 } 2006 2007 static int gfx_v10_0_rlc_backdoor_autoload_config_me_cache(struct amdgpu_device *adev) 2008 { 2009 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2010 uint32_t tmp; 2011 int i; 2012 uint64_t addr; 2013 2014 /* Trigger an invalidation of the L1 instruction caches */ 2015 tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL); 2016 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2017 WREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL, tmp); 2018 2019 /* Wait for invalidation complete */ 2020 for (i = 0; i < usec_timeout; i++) { 2021 tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL); 2022 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2023 INVALIDATE_CACHE_COMPLETE)) 2024 break; 2025 udelay(1); 2026 } 2027 2028 if (i >= usec_timeout) { 2029 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2030 return -EINVAL; 2031 } 2032 2033 /* Program me ucode address into intruction cache address register */ 2034 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2035 rlc_autoload_info[FIRMWARE_ID_CP_ME].offset; 2036 WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_LO, 2037 lower_32_bits(addr) & 0xFFFFF000); 2038 WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_HI, 2039 upper_32_bits(addr)); 2040 2041 return 0; 2042 } 2043 2044 static int gfx_v10_0_rlc_backdoor_autoload_config_ce_cache(struct amdgpu_device *adev) 2045 { 2046 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2047 uint32_t tmp; 2048 int i; 2049 uint64_t addr; 2050 2051 /* Trigger an invalidation of the L1 instruction caches */ 2052 tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL); 2053 tmp = REG_SET_FIELD(tmp, CP_CE_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2054 WREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL, tmp); 2055 2056 /* Wait for invalidation complete */ 2057 for (i = 0; i < usec_timeout; i++) { 2058 tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL); 2059 if (1 == REG_GET_FIELD(tmp, CP_CE_IC_OP_CNTL, 2060 INVALIDATE_CACHE_COMPLETE)) 2061 break; 2062 udelay(1); 2063 } 2064 2065 if (i >= usec_timeout) { 2066 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2067 return -EINVAL; 2068 } 2069 2070 /* Program ce ucode address into intruction cache address register */ 2071 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2072 rlc_autoload_info[FIRMWARE_ID_CP_CE].offset; 2073 WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_LO, 2074 lower_32_bits(addr) & 0xFFFFF000); 2075 WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_HI, 2076 upper_32_bits(addr)); 2077 2078 return 0; 2079 } 2080 2081 static int gfx_v10_0_rlc_backdoor_autoload_config_pfp_cache(struct amdgpu_device *adev) 2082 { 2083 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2084 uint32_t tmp; 2085 int i; 2086 uint64_t addr; 2087 2088 /* Trigger an invalidation of the L1 instruction caches */ 2089 tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL); 2090 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2091 WREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL, tmp); 2092 2093 /* Wait for invalidation complete */ 2094 for (i = 0; i < usec_timeout; i++) { 2095 tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL); 2096 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2097 INVALIDATE_CACHE_COMPLETE)) 2098 break; 2099 udelay(1); 2100 } 2101 2102 if (i >= usec_timeout) { 2103 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2104 return -EINVAL; 2105 } 2106 2107 /* Program pfp ucode address into intruction cache address register */ 2108 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2109 rlc_autoload_info[FIRMWARE_ID_CP_PFP].offset; 2110 WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_LO, 2111 lower_32_bits(addr) & 0xFFFFF000); 2112 WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_HI, 2113 upper_32_bits(addr)); 2114 2115 return 0; 2116 } 2117 2118 static int gfx_v10_0_rlc_backdoor_autoload_config_mec_cache(struct amdgpu_device *adev) 2119 { 2120 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2121 uint32_t tmp; 2122 int i; 2123 uint64_t addr; 2124 2125 /* Trigger an invalidation of the L1 instruction caches */ 2126 tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL); 2127 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2128 WREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL, tmp); 2129 2130 /* Wait for invalidation complete */ 2131 for (i = 0; i < usec_timeout; i++) { 2132 tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL); 2133 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2134 INVALIDATE_CACHE_COMPLETE)) 2135 break; 2136 udelay(1); 2137 } 2138 2139 if (i >= usec_timeout) { 2140 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2141 return -EINVAL; 2142 } 2143 2144 /* Program mec1 ucode address into intruction cache address register */ 2145 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2146 rlc_autoload_info[FIRMWARE_ID_CP_MEC].offset; 2147 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 2148 lower_32_bits(addr) & 0xFFFFF000); 2149 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 2150 upper_32_bits(addr)); 2151 2152 return 0; 2153 } 2154 2155 static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) 2156 { 2157 uint32_t cp_status; 2158 uint32_t bootload_status; 2159 int i, r; 2160 2161 for (i = 0; i < adev->usec_timeout; i++) { 2162 cp_status = RREG32_SOC15(GC, 0, mmCP_STAT); 2163 bootload_status = RREG32_SOC15(GC, 0, mmRLC_RLCS_BOOTLOAD_STATUS); 2164 if ((cp_status == 0) && 2165 (REG_GET_FIELD(bootload_status, 2166 RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { 2167 break; 2168 } 2169 udelay(1); 2170 } 2171 2172 if (i >= adev->usec_timeout) { 2173 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n"); 2174 return -ETIMEDOUT; 2175 } 2176 2177 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 2178 r = gfx_v10_0_rlc_backdoor_autoload_config_me_cache(adev); 2179 if (r) 2180 return r; 2181 2182 r = gfx_v10_0_rlc_backdoor_autoload_config_ce_cache(adev); 2183 if (r) 2184 return r; 2185 2186 r = gfx_v10_0_rlc_backdoor_autoload_config_pfp_cache(adev); 2187 if (r) 2188 return r; 2189 2190 r = gfx_v10_0_rlc_backdoor_autoload_config_mec_cache(adev); 2191 if (r) 2192 return r; 2193 } 2194 2195 return 0; 2196 } 2197 2198 static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 2199 { 2200 int i; 2201 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 2202 2203 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 2204 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 2205 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 2206 if (!enable) { 2207 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2208 adev->gfx.gfx_ring[i].sched.ready = false; 2209 } 2210 WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); 2211 udelay(50); 2212 } 2213 2214 static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) 2215 { 2216 int r; 2217 const struct gfx_firmware_header_v1_0 *pfp_hdr; 2218 const __le32 *fw_data; 2219 unsigned i, fw_size; 2220 uint32_t tmp; 2221 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2222 2223 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 2224 adev->gfx.pfp_fw->data; 2225 2226 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 2227 2228 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 2229 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 2230 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes); 2231 2232 r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes, 2233 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 2234 &adev->gfx.pfp.pfp_fw_obj, 2235 &adev->gfx.pfp.pfp_fw_gpu_addr, 2236 (void **)&adev->gfx.pfp.pfp_fw_ptr); 2237 if (r) { 2238 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r); 2239 gfx_v10_0_pfp_fini(adev); 2240 return r; 2241 } 2242 2243 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size); 2244 2245 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 2246 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 2247 2248 /* Trigger an invalidation of the L1 instruction caches */ 2249 tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL); 2250 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2251 WREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL, tmp); 2252 2253 /* Wait for invalidation complete */ 2254 for (i = 0; i < usec_timeout; i++) { 2255 tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL); 2256 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2257 INVALIDATE_CACHE_COMPLETE)) 2258 break; 2259 udelay(1); 2260 } 2261 2262 if (i >= usec_timeout) { 2263 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2264 return -EINVAL; 2265 } 2266 2267 if (amdgpu_emu_mode == 1) 2268 adev->nbio_funcs->hdp_flush(adev, NULL); 2269 2270 tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL); 2271 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2272 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2273 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2274 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2275 WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL, tmp); 2276 WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_LO, 2277 adev->gfx.pfp.pfp_fw_gpu_addr & 0xFFFFF000); 2278 WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_HI, 2279 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 2280 2281 return 0; 2282 } 2283 2284 static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev) 2285 { 2286 int r; 2287 const struct gfx_firmware_header_v1_0 *ce_hdr; 2288 const __le32 *fw_data; 2289 unsigned i, fw_size; 2290 uint32_t tmp; 2291 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2292 2293 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 2294 adev->gfx.ce_fw->data; 2295 2296 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 2297 2298 fw_data = (const __le32 *)(adev->gfx.ce_fw->data + 2299 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 2300 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes); 2301 2302 r = amdgpu_bo_create_reserved(adev, ce_hdr->header.ucode_size_bytes, 2303 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 2304 &adev->gfx.ce.ce_fw_obj, 2305 &adev->gfx.ce.ce_fw_gpu_addr, 2306 (void **)&adev->gfx.ce.ce_fw_ptr); 2307 if (r) { 2308 dev_err(adev->dev, "(%d) failed to create ce fw bo\n", r); 2309 gfx_v10_0_ce_fini(adev); 2310 return r; 2311 } 2312 2313 memcpy(adev->gfx.ce.ce_fw_ptr, fw_data, fw_size); 2314 2315 amdgpu_bo_kunmap(adev->gfx.ce.ce_fw_obj); 2316 amdgpu_bo_unreserve(adev->gfx.ce.ce_fw_obj); 2317 2318 /* Trigger an invalidation of the L1 instruction caches */ 2319 tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL); 2320 tmp = REG_SET_FIELD(tmp, CP_CE_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2321 WREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL, tmp); 2322 2323 /* Wait for invalidation complete */ 2324 for (i = 0; i < usec_timeout; i++) { 2325 tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL); 2326 if (1 == REG_GET_FIELD(tmp, CP_CE_IC_OP_CNTL, 2327 INVALIDATE_CACHE_COMPLETE)) 2328 break; 2329 udelay(1); 2330 } 2331 2332 if (i >= usec_timeout) { 2333 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2334 return -EINVAL; 2335 } 2336 2337 if (amdgpu_emu_mode == 1) 2338 adev->nbio_funcs->hdp_flush(adev, NULL); 2339 2340 tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL); 2341 tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0); 2342 tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, CACHE_POLICY, 0); 2343 tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, EXE_DISABLE, 0); 2344 tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2345 WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_LO, 2346 adev->gfx.ce.ce_fw_gpu_addr & 0xFFFFF000); 2347 WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_HI, 2348 upper_32_bits(adev->gfx.ce.ce_fw_gpu_addr)); 2349 2350 return 0; 2351 } 2352 2353 static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) 2354 { 2355 int r; 2356 const struct gfx_firmware_header_v1_0 *me_hdr; 2357 const __le32 *fw_data; 2358 unsigned i, fw_size; 2359 uint32_t tmp; 2360 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2361 2362 me_hdr = (const struct gfx_firmware_header_v1_0 *) 2363 adev->gfx.me_fw->data; 2364 2365 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 2366 2367 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 2368 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 2369 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes); 2370 2371 r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes, 2372 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 2373 &adev->gfx.me.me_fw_obj, 2374 &adev->gfx.me.me_fw_gpu_addr, 2375 (void **)&adev->gfx.me.me_fw_ptr); 2376 if (r) { 2377 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r); 2378 gfx_v10_0_me_fini(adev); 2379 return r; 2380 } 2381 2382 memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size); 2383 2384 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 2385 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 2386 2387 /* Trigger an invalidation of the L1 instruction caches */ 2388 tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL); 2389 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2390 WREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL, tmp); 2391 2392 /* Wait for invalidation complete */ 2393 for (i = 0; i < usec_timeout; i++) { 2394 tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL); 2395 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2396 INVALIDATE_CACHE_COMPLETE)) 2397 break; 2398 udelay(1); 2399 } 2400 2401 if (i >= usec_timeout) { 2402 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2403 return -EINVAL; 2404 } 2405 2406 if (amdgpu_emu_mode == 1) 2407 adev->nbio_funcs->hdp_flush(adev, NULL); 2408 2409 tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL); 2410 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2411 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2412 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2413 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2414 WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_LO, 2415 adev->gfx.me.me_fw_gpu_addr & 0xFFFFF000); 2416 WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_HI, 2417 upper_32_bits(adev->gfx.me.me_fw_gpu_addr)); 2418 2419 return 0; 2420 } 2421 2422 static int gfx_v10_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 2423 { 2424 int r; 2425 2426 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 2427 return -EINVAL; 2428 2429 gfx_v10_0_cp_gfx_enable(adev, false); 2430 2431 r = gfx_v10_0_cp_gfx_load_pfp_microcode(adev); 2432 if (r) { 2433 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r); 2434 return r; 2435 } 2436 2437 r = gfx_v10_0_cp_gfx_load_ce_microcode(adev); 2438 if (r) { 2439 dev_err(adev->dev, "(%d) failed to load ce fw\n", r); 2440 return r; 2441 } 2442 2443 r = gfx_v10_0_cp_gfx_load_me_microcode(adev); 2444 if (r) { 2445 dev_err(adev->dev, "(%d) failed to load me fw\n", r); 2446 return r; 2447 } 2448 2449 return 0; 2450 } 2451 2452 static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev) 2453 { 2454 struct amdgpu_ring *ring; 2455 const struct cs_section_def *sect = NULL; 2456 const struct cs_extent_def *ext = NULL; 2457 int r, i; 2458 int ctx_reg_offset; 2459 2460 /* init the CP */ 2461 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, 2462 adev->gfx.config.max_hw_contexts - 1); 2463 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 2464 2465 gfx_v10_0_cp_gfx_enable(adev, true); 2466 2467 ring = &adev->gfx.gfx_ring[0]; 2468 r = amdgpu_ring_alloc(ring, gfx_v10_0_get_csb_size(adev) + 4); 2469 if (r) { 2470 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 2471 return r; 2472 } 2473 2474 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2475 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 2476 2477 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 2478 amdgpu_ring_write(ring, 0x80000000); 2479 amdgpu_ring_write(ring, 0x80000000); 2480 2481 for (sect = gfx10_cs_data; sect->section != NULL; ++sect) { 2482 for (ext = sect->section; ext->extent != NULL; ++ext) { 2483 if (sect->id == SECT_CONTEXT) { 2484 amdgpu_ring_write(ring, 2485 PACKET3(PACKET3_SET_CONTEXT_REG, 2486 ext->reg_count)); 2487 amdgpu_ring_write(ring, ext->reg_index - 2488 PACKET3_SET_CONTEXT_REG_START); 2489 for (i = 0; i < ext->reg_count; i++) 2490 amdgpu_ring_write(ring, ext->extent[i]); 2491 } 2492 } 2493 } 2494 2495 ctx_reg_offset = 2496 SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 2497 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 2498 amdgpu_ring_write(ring, ctx_reg_offset); 2499 amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override); 2500 2501 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2502 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 2503 2504 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 2505 amdgpu_ring_write(ring, 0); 2506 2507 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 2508 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 2509 amdgpu_ring_write(ring, 0x8000); 2510 amdgpu_ring_write(ring, 0x8000); 2511 2512 amdgpu_ring_commit(ring); 2513 2514 /* submit cs packet to copy state 0 to next available state */ 2515 ring = &adev->gfx.gfx_ring[1]; 2516 r = amdgpu_ring_alloc(ring, 2); 2517 if (r) { 2518 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 2519 return r; 2520 } 2521 2522 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 2523 amdgpu_ring_write(ring, 0); 2524 2525 amdgpu_ring_commit(ring); 2526 2527 return 0; 2528 } 2529 2530 static void gfx_v10_0_cp_gfx_switch_pipe(struct amdgpu_device *adev, 2531 CP_PIPE_ID pipe) 2532 { 2533 u32 tmp; 2534 2535 tmp = RREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL); 2536 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe); 2537 2538 WREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL, tmp); 2539 } 2540 2541 static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, 2542 struct amdgpu_ring *ring) 2543 { 2544 u32 tmp; 2545 2546 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 2547 if (ring->use_doorbell) { 2548 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2549 DOORBELL_OFFSET, ring->doorbell_index); 2550 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2551 DOORBELL_EN, 1); 2552 } else { 2553 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2554 DOORBELL_EN, 0); 2555 } 2556 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 2557 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 2558 DOORBELL_RANGE_LOWER, ring->doorbell_index); 2559 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 2560 2561 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 2562 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 2563 } 2564 2565 static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) 2566 { 2567 struct amdgpu_ring *ring; 2568 u32 tmp; 2569 u32 rb_bufsz; 2570 u64 rb_addr, rptr_addr, wptr_gpu_addr; 2571 u32 i; 2572 2573 /* Set the write pointer delay */ 2574 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 2575 2576 /* set the RB to use vmid 0 */ 2577 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 2578 2579 /* Init gfx ring 0 for pipe 0 */ 2580 mutex_lock(&adev->srbm_mutex); 2581 gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 2582 mutex_unlock(&adev->srbm_mutex); 2583 /* Set ring buffer size */ 2584 ring = &adev->gfx.gfx_ring[0]; 2585 rb_bufsz = order_base_2(ring->ring_size / 8); 2586 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 2587 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 2588 #ifdef __BIG_ENDIAN 2589 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 2590 #endif 2591 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 2592 2593 /* Initialize the ring buffer's write pointers */ 2594 ring->wptr = 0; 2595 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 2596 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 2597 2598 /* set the wb address wether it's enabled or not */ 2599 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 2600 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 2601 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 2602 CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 2603 2604 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2605 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, 2606 lower_32_bits(wptr_gpu_addr)); 2607 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, 2608 upper_32_bits(wptr_gpu_addr)); 2609 2610 mdelay(1); 2611 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 2612 2613 rb_addr = ring->gpu_addr >> 8; 2614 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 2615 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 2616 2617 WREG32_SOC15(GC, 0, mmCP_RB_ACTIVE, 1); 2618 2619 gfx_v10_0_cp_gfx_set_doorbell(adev, ring); 2620 2621 /* Init gfx ring 1 for pipe 1 */ 2622 mutex_lock(&adev->srbm_mutex); 2623 gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1); 2624 mutex_unlock(&adev->srbm_mutex); 2625 ring = &adev->gfx.gfx_ring[1]; 2626 rb_bufsz = order_base_2(ring->ring_size / 8); 2627 tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); 2628 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); 2629 #ifdef __BIG_ENDIAN 2630 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, BUF_SWAP, 1); 2631 #endif 2632 WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp); 2633 /* Initialize the ring buffer's write pointers */ 2634 ring->wptr = 0; 2635 WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr)); 2636 WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); 2637 /* Set the wb address wether it's enabled or not */ 2638 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 2639 WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); 2640 WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 2641 CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 2642 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2643 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, 2644 lower_32_bits(wptr_gpu_addr)); 2645 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, 2646 upper_32_bits(wptr_gpu_addr)); 2647 2648 mdelay(1); 2649 WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp); 2650 2651 rb_addr = ring->gpu_addr >> 8; 2652 WREG32_SOC15(GC, 0, mmCP_RB1_BASE, rb_addr); 2653 WREG32_SOC15(GC, 0, mmCP_RB1_BASE_HI, upper_32_bits(rb_addr)); 2654 WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1); 2655 2656 gfx_v10_0_cp_gfx_set_doorbell(adev, ring); 2657 2658 /* Switch to pipe 0 */ 2659 mutex_lock(&adev->srbm_mutex); 2660 gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 2661 mutex_unlock(&adev->srbm_mutex); 2662 2663 /* start the ring */ 2664 gfx_v10_0_cp_gfx_start(adev); 2665 2666 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2667 ring = &adev->gfx.gfx_ring[i]; 2668 ring->sched.ready = true; 2669 } 2670 2671 return 0; 2672 } 2673 2674 static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 2675 { 2676 int i; 2677 2678 if (enable) { 2679 WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0); 2680 } else { 2681 WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 2682 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | 2683 CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 2684 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2685 adev->gfx.compute_ring[i].sched.ready = false; 2686 adev->gfx.kiq.ring.sched.ready = false; 2687 } 2688 udelay(50); 2689 } 2690 2691 static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev) 2692 { 2693 const struct gfx_firmware_header_v1_0 *mec_hdr; 2694 const __le32 *fw_data; 2695 unsigned i; 2696 u32 tmp; 2697 u32 usec_timeout = 50000; /* Wait for 50 ms */ 2698 2699 if (!adev->gfx.mec_fw) 2700 return -EINVAL; 2701 2702 gfx_v10_0_cp_compute_enable(adev, false); 2703 2704 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 2705 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 2706 2707 fw_data = (const __le32 *) 2708 (adev->gfx.mec_fw->data + 2709 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 2710 2711 /* Trigger an invalidation of the L1 instruction caches */ 2712 tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL); 2713 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2714 WREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL, tmp); 2715 2716 /* Wait for invalidation complete */ 2717 for (i = 0; i < usec_timeout; i++) { 2718 tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL); 2719 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2720 INVALIDATE_CACHE_COMPLETE)) 2721 break; 2722 udelay(1); 2723 } 2724 2725 if (i >= usec_timeout) { 2726 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2727 return -EINVAL; 2728 } 2729 2730 if (amdgpu_emu_mode == 1) 2731 adev->nbio_funcs->hdp_flush(adev, NULL); 2732 2733 tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL); 2734 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2735 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2736 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2737 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 2738 2739 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr & 2740 0xFFFFF000); 2741 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 2742 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 2743 2744 /* MEC1 */ 2745 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 0); 2746 2747 for (i = 0; i < mec_hdr->jt_size; i++) 2748 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 2749 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 2750 2751 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 2752 2753 /* 2754 * TODO: Loading MEC2 firmware is only necessary if MEC2 should run 2755 * different microcode than MEC1. 2756 */ 2757 2758 return 0; 2759 } 2760 2761 static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring) 2762 { 2763 uint32_t tmp; 2764 struct amdgpu_device *adev = ring->adev; 2765 2766 /* tell RLC which is KIQ queue */ 2767 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 2768 tmp &= 0xffffff00; 2769 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 2770 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2771 tmp |= 0x80; 2772 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2773 } 2774 2775 static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring) 2776 { 2777 struct amdgpu_device *adev = ring->adev; 2778 struct v10_gfx_mqd *mqd = ring->mqd_ptr; 2779 uint64_t hqd_gpu_addr, wb_gpu_addr; 2780 uint32_t tmp; 2781 uint32_t rb_bufsz; 2782 2783 /* set up gfx hqd wptr */ 2784 mqd->cp_gfx_hqd_wptr = 0; 2785 mqd->cp_gfx_hqd_wptr_hi = 0; 2786 2787 /* set the pointer to the MQD */ 2788 mqd->cp_mqd_base_addr = ring->mqd_gpu_addr & 0xfffffffc; 2789 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 2790 2791 /* set up mqd control */ 2792 tmp = RREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL); 2793 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); 2794 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); 2795 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); 2796 mqd->cp_gfx_mqd_control = tmp; 2797 2798 /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ 2799 tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_VMID); 2800 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); 2801 mqd->cp_gfx_hqd_vmid = 0; 2802 2803 /* set up default queue priority level 2804 * 0x0 = low priority, 0x1 = high priority */ 2805 tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY); 2806 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0); 2807 mqd->cp_gfx_hqd_queue_priority = tmp; 2808 2809 /* set up time quantum */ 2810 tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM); 2811 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); 2812 mqd->cp_gfx_hqd_quantum = tmp; 2813 2814 /* set up gfx hqd base. this is similar as CP_RB_BASE */ 2815 hqd_gpu_addr = ring->gpu_addr >> 8; 2816 mqd->cp_gfx_hqd_base = hqd_gpu_addr; 2817 mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr); 2818 2819 /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */ 2820 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 2821 mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc; 2822 mqd->cp_gfx_hqd_rptr_addr_hi = 2823 upper_32_bits(wb_gpu_addr) & 0xffff; 2824 2825 /* set up rb_wptr_poll addr */ 2826 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2827 mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 2828 mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 2829 2830 /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ 2831 rb_bufsz = order_base_2(ring->ring_size / 4) - 1; 2832 tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL); 2833 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); 2834 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); 2835 #ifdef __BIG_ENDIAN 2836 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); 2837 #endif 2838 mqd->cp_gfx_hqd_cntl = tmp; 2839 2840 /* set up cp_doorbell_control */ 2841 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 2842 if (ring->use_doorbell) { 2843 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2844 DOORBELL_OFFSET, ring->doorbell_index); 2845 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2846 DOORBELL_EN, 1); 2847 } else 2848 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2849 DOORBELL_EN, 0); 2850 mqd->cp_rb_doorbell_control = tmp; 2851 2852 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 2853 ring->wptr = 0; 2854 mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR); 2855 2856 /* active the queue */ 2857 mqd->cp_gfx_hqd_active = 1; 2858 2859 return 0; 2860 } 2861 2862 #ifdef BRING_UP_DEBUG 2863 static int gfx_v10_0_gfx_queue_init_register(struct amdgpu_ring *ring) 2864 { 2865 struct amdgpu_device *adev = ring->adev; 2866 struct v10_gfx_mqd *mqd = ring->mqd_ptr; 2867 2868 /* set mmCP_GFX_HQD_WPTR/_HI to 0 */ 2869 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr); 2870 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi); 2871 2872 /* set GFX_MQD_BASE */ 2873 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr); 2874 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 2875 2876 /* set GFX_MQD_CONTROL */ 2877 WREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control); 2878 2879 /* set GFX_HQD_VMID to 0 */ 2880 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid); 2881 2882 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY, 2883 mqd->cp_gfx_hqd_queue_priority); 2884 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum); 2885 2886 /* set GFX_HQD_BASE, similar as CP_RB_BASE */ 2887 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base); 2888 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi); 2889 2890 /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */ 2891 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr); 2892 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi); 2893 2894 /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */ 2895 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl); 2896 2897 /* set RB_WPTR_POLL_ADDR */ 2898 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo); 2899 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi); 2900 2901 /* set RB_DOORBELL_CONTROL */ 2902 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control); 2903 2904 /* active the queue */ 2905 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active); 2906 2907 return 0; 2908 } 2909 #endif 2910 2911 static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) 2912 { 2913 struct amdgpu_device *adev = ring->adev; 2914 struct v10_gfx_mqd *mqd = ring->mqd_ptr; 2915 2916 if (!adev->in_gpu_reset && !adev->in_suspend) { 2917 memset((void *)mqd, 0, sizeof(*mqd)); 2918 mutex_lock(&adev->srbm_mutex); 2919 nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 2920 gfx_v10_0_gfx_mqd_init(ring); 2921 #ifdef BRING_UP_DEBUG 2922 gfx_v10_0_gfx_queue_init_register(ring); 2923 #endif 2924 nv_grbm_select(adev, 0, 0, 0, 0); 2925 mutex_unlock(&adev->srbm_mutex); 2926 if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]) 2927 memcpy(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], mqd, sizeof(*mqd)); 2928 } else if (adev->in_gpu_reset) { 2929 /* reset mqd with the backup copy */ 2930 if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]) 2931 memcpy(mqd, adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], sizeof(*mqd)); 2932 /* reset the ring */ 2933 ring->wptr = 0; 2934 amdgpu_ring_clear_ring(ring); 2935 #ifdef BRING_UP_DEBUG 2936 mutex_lock(&adev->srbm_mutex); 2937 nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 2938 gfx_v10_0_gfx_queue_init_register(ring); 2939 nv_grbm_select(adev, 0, 0, 0, 0); 2940 mutex_unlock(&adev->srbm_mutex); 2941 #endif 2942 } else { 2943 amdgpu_ring_clear_ring(ring); 2944 } 2945 2946 return 0; 2947 } 2948 2949 #ifndef BRING_UP_DEBUG 2950 static int gfx_v10_0_kiq_enable_kgq(struct amdgpu_device *adev) 2951 { 2952 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 2953 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 2954 int r, i; 2955 2956 if (!kiq->pmf || !kiq->pmf->kiq_map_queues) 2957 return -EINVAL; 2958 2959 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * 2960 adev->gfx.num_gfx_rings); 2961 if (r) { 2962 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 2963 return r; 2964 } 2965 2966 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2967 kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]); 2968 2969 r = amdgpu_ring_test_ring(kiq_ring); 2970 if (r) { 2971 DRM_ERROR("kfq enable failed\n"); 2972 kiq_ring->sched.ready = false; 2973 } 2974 return r; 2975 } 2976 #endif 2977 2978 static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) 2979 { 2980 int r, i; 2981 struct amdgpu_ring *ring; 2982 2983 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2984 ring = &adev->gfx.gfx_ring[i]; 2985 2986 r = amdgpu_bo_reserve(ring->mqd_obj, false); 2987 if (unlikely(r != 0)) 2988 goto done; 2989 2990 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 2991 if (!r) { 2992 r = gfx_v10_0_gfx_init_queue(ring); 2993 amdgpu_bo_kunmap(ring->mqd_obj); 2994 ring->mqd_ptr = NULL; 2995 } 2996 amdgpu_bo_unreserve(ring->mqd_obj); 2997 if (r) 2998 goto done; 2999 } 3000 #ifndef BRING_UP_DEBUG 3001 r = gfx_v10_0_kiq_enable_kgq(adev); 3002 if (r) 3003 goto done; 3004 #endif 3005 r = gfx_v10_0_cp_gfx_start(adev); 3006 if (r) 3007 goto done; 3008 3009 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 3010 ring = &adev->gfx.gfx_ring[i]; 3011 ring->sched.ready = true; 3012 } 3013 done: 3014 return r; 3015 } 3016 3017 static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring) 3018 { 3019 struct amdgpu_device *adev = ring->adev; 3020 struct v10_compute_mqd *mqd = ring->mqd_ptr; 3021 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3022 uint32_t tmp; 3023 3024 mqd->header = 0xC0310800; 3025 mqd->compute_pipelinestat_enable = 0x00000001; 3026 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3027 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3028 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3029 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3030 mqd->compute_misc_reserved = 0x00000003; 3031 3032 eop_base_addr = ring->eop_gpu_addr >> 8; 3033 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3034 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3035 3036 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3037 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3038 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3039 (order_base_2(GFX10_MEC_HPD_SIZE / 4) - 1)); 3040 3041 mqd->cp_hqd_eop_control = tmp; 3042 3043 /* enable doorbell? */ 3044 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3045 3046 if (ring->use_doorbell) { 3047 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3048 DOORBELL_OFFSET, ring->doorbell_index); 3049 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3050 DOORBELL_EN, 1); 3051 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3052 DOORBELL_SOURCE, 0); 3053 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3054 DOORBELL_HIT, 0); 3055 } else { 3056 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3057 DOORBELL_EN, 0); 3058 } 3059 3060 mqd->cp_hqd_pq_doorbell_control = tmp; 3061 3062 /* disable the queue if it's active */ 3063 ring->wptr = 0; 3064 mqd->cp_hqd_dequeue_request = 0; 3065 mqd->cp_hqd_pq_rptr = 0; 3066 mqd->cp_hqd_pq_wptr_lo = 0; 3067 mqd->cp_hqd_pq_wptr_hi = 0; 3068 3069 /* set the pointer to the MQD */ 3070 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3071 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3072 3073 /* set MQD vmid to 0 */ 3074 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3075 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3076 mqd->cp_mqd_control = tmp; 3077 3078 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3079 hqd_gpu_addr = ring->gpu_addr >> 8; 3080 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3081 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3082 3083 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3084 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3085 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3086 (order_base_2(ring->ring_size / 4) - 1)); 3087 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3088 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 3089 #ifdef __BIG_ENDIAN 3090 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3091 #endif 3092 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3093 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); 3094 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3095 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3096 mqd->cp_hqd_pq_control = tmp; 3097 3098 /* set the wb address whether it's enabled or not */ 3099 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3100 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3101 mqd->cp_hqd_pq_rptr_report_addr_hi = 3102 upper_32_bits(wb_gpu_addr) & 0xffff; 3103 3104 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3105 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3106 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3107 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3108 3109 tmp = 0; 3110 /* enable the doorbell if requested */ 3111 if (ring->use_doorbell) { 3112 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3113 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3114 DOORBELL_OFFSET, ring->doorbell_index); 3115 3116 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3117 DOORBELL_EN, 1); 3118 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3119 DOORBELL_SOURCE, 0); 3120 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3121 DOORBELL_HIT, 0); 3122 } 3123 3124 mqd->cp_hqd_pq_doorbell_control = tmp; 3125 3126 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3127 ring->wptr = 0; 3128 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3129 3130 /* set the vmid for the queue */ 3131 mqd->cp_hqd_vmid = 0; 3132 3133 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3134 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3135 mqd->cp_hqd_persistent_state = tmp; 3136 3137 /* set MIN_IB_AVAIL_SIZE */ 3138 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3139 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3140 mqd->cp_hqd_ib_control = tmp; 3141 3142 /* activate the queue */ 3143 mqd->cp_hqd_active = 1; 3144 3145 return 0; 3146 } 3147 3148 static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring) 3149 { 3150 struct amdgpu_device *adev = ring->adev; 3151 struct v10_compute_mqd *mqd = ring->mqd_ptr; 3152 int j; 3153 3154 /* disable wptr polling */ 3155 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3156 3157 /* write the EOP addr */ 3158 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3159 mqd->cp_hqd_eop_base_addr_lo); 3160 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3161 mqd->cp_hqd_eop_base_addr_hi); 3162 3163 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3164 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, 3165 mqd->cp_hqd_eop_control); 3166 3167 /* enable doorbell? */ 3168 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3169 mqd->cp_hqd_pq_doorbell_control); 3170 3171 /* disable the queue if it's active */ 3172 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3173 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3174 for (j = 0; j < adev->usec_timeout; j++) { 3175 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3176 break; 3177 udelay(1); 3178 } 3179 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3180 mqd->cp_hqd_dequeue_request); 3181 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 3182 mqd->cp_hqd_pq_rptr); 3183 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3184 mqd->cp_hqd_pq_wptr_lo); 3185 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3186 mqd->cp_hqd_pq_wptr_hi); 3187 } 3188 3189 /* set the pointer to the MQD */ 3190 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, 3191 mqd->cp_mqd_base_addr_lo); 3192 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3193 mqd->cp_mqd_base_addr_hi); 3194 3195 /* set MQD vmid to 0 */ 3196 WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, 3197 mqd->cp_mqd_control); 3198 3199 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3200 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, 3201 mqd->cp_hqd_pq_base_lo); 3202 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, 3203 mqd->cp_hqd_pq_base_hi); 3204 3205 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3206 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, 3207 mqd->cp_hqd_pq_control); 3208 3209 /* set the wb address whether it's enabled or not */ 3210 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3211 mqd->cp_hqd_pq_rptr_report_addr_lo); 3212 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3213 mqd->cp_hqd_pq_rptr_report_addr_hi); 3214 3215 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3216 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3217 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3218 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3219 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3220 3221 /* enable the doorbell if requested */ 3222 if (ring->use_doorbell) { 3223 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3224 (adev->doorbell_index.kiq * 2) << 2); 3225 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3226 (adev->doorbell_index.userqueue_end * 2) << 2); 3227 } 3228 3229 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3230 mqd->cp_hqd_pq_doorbell_control); 3231 3232 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3233 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3234 mqd->cp_hqd_pq_wptr_lo); 3235 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3236 mqd->cp_hqd_pq_wptr_hi); 3237 3238 /* set the vmid for the queue */ 3239 WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3240 3241 WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3242 mqd->cp_hqd_persistent_state); 3243 3244 /* activate the queue */ 3245 WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 3246 mqd->cp_hqd_active); 3247 3248 if (ring->use_doorbell) 3249 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3250 3251 return 0; 3252 } 3253 3254 static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring) 3255 { 3256 struct amdgpu_device *adev = ring->adev; 3257 struct v10_compute_mqd *mqd = ring->mqd_ptr; 3258 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3259 3260 gfx_v10_0_kiq_setting(ring); 3261 3262 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3263 /* reset MQD to a clean status */ 3264 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3265 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 3266 3267 /* reset ring buffer */ 3268 ring->wptr = 0; 3269 amdgpu_ring_clear_ring(ring); 3270 3271 mutex_lock(&adev->srbm_mutex); 3272 nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3273 gfx_v10_0_kiq_init_register(ring); 3274 nv_grbm_select(adev, 0, 0, 0, 0); 3275 mutex_unlock(&adev->srbm_mutex); 3276 } else { 3277 memset((void *)mqd, 0, sizeof(*mqd)); 3278 mutex_lock(&adev->srbm_mutex); 3279 nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3280 gfx_v10_0_compute_mqd_init(ring); 3281 gfx_v10_0_kiq_init_register(ring); 3282 nv_grbm_select(adev, 0, 0, 0, 0); 3283 mutex_unlock(&adev->srbm_mutex); 3284 3285 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3286 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 3287 } 3288 3289 return 0; 3290 } 3291 3292 static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring) 3293 { 3294 struct amdgpu_device *adev = ring->adev; 3295 struct v10_compute_mqd *mqd = ring->mqd_ptr; 3296 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3297 3298 if (!adev->in_gpu_reset && !adev->in_suspend) { 3299 memset((void *)mqd, 0, sizeof(*mqd)); 3300 mutex_lock(&adev->srbm_mutex); 3301 nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3302 gfx_v10_0_compute_mqd_init(ring); 3303 nv_grbm_select(adev, 0, 0, 0, 0); 3304 mutex_unlock(&adev->srbm_mutex); 3305 3306 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3307 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 3308 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3309 /* reset MQD to a clean status */ 3310 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3311 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 3312 3313 /* reset ring buffer */ 3314 ring->wptr = 0; 3315 amdgpu_ring_clear_ring(ring); 3316 } else { 3317 amdgpu_ring_clear_ring(ring); 3318 } 3319 3320 return 0; 3321 } 3322 3323 static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev) 3324 { 3325 struct amdgpu_ring *ring; 3326 int r; 3327 3328 ring = &adev->gfx.kiq.ring; 3329 3330 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3331 if (unlikely(r != 0)) 3332 return r; 3333 3334 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3335 if (unlikely(r != 0)) 3336 return r; 3337 3338 gfx_v10_0_kiq_init_queue(ring); 3339 amdgpu_bo_kunmap(ring->mqd_obj); 3340 ring->mqd_ptr = NULL; 3341 amdgpu_bo_unreserve(ring->mqd_obj); 3342 ring->sched.ready = true; 3343 return 0; 3344 } 3345 3346 static int gfx_v10_0_kcq_resume(struct amdgpu_device *adev) 3347 { 3348 struct amdgpu_ring *ring = NULL; 3349 int r = 0, i; 3350 3351 gfx_v10_0_cp_compute_enable(adev, true); 3352 3353 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3354 ring = &adev->gfx.compute_ring[i]; 3355 3356 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3357 if (unlikely(r != 0)) 3358 goto done; 3359 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3360 if (!r) { 3361 r = gfx_v10_0_kcq_init_queue(ring); 3362 amdgpu_bo_kunmap(ring->mqd_obj); 3363 ring->mqd_ptr = NULL; 3364 } 3365 amdgpu_bo_unreserve(ring->mqd_obj); 3366 if (r) 3367 goto done; 3368 } 3369 3370 r = amdgpu_gfx_enable_kcq(adev); 3371 done: 3372 return r; 3373 } 3374 3375 static int gfx_v10_0_cp_resume(struct amdgpu_device *adev) 3376 { 3377 int r, i; 3378 struct amdgpu_ring *ring; 3379 3380 if (!(adev->flags & AMD_IS_APU)) 3381 gfx_v10_0_enable_gui_idle_interrupt(adev, false); 3382 3383 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 3384 /* legacy firmware loading */ 3385 r = gfx_v10_0_cp_gfx_load_microcode(adev); 3386 if (r) 3387 return r; 3388 3389 r = gfx_v10_0_cp_compute_load_microcode(adev); 3390 if (r) 3391 return r; 3392 } 3393 3394 r = gfx_v10_0_kiq_resume(adev); 3395 if (r) 3396 return r; 3397 3398 r = gfx_v10_0_kcq_resume(adev); 3399 if (r) 3400 return r; 3401 3402 if (!amdgpu_async_gfx_ring) { 3403 r = gfx_v10_0_cp_gfx_resume(adev); 3404 if (r) 3405 return r; 3406 } else { 3407 r = gfx_v10_0_cp_async_gfx_ring_resume(adev); 3408 if (r) 3409 return r; 3410 } 3411 3412 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 3413 ring = &adev->gfx.gfx_ring[i]; 3414 DRM_INFO("gfx %d ring me %d pipe %d q %d\n", 3415 i, ring->me, ring->pipe, ring->queue); 3416 r = amdgpu_ring_test_ring(ring); 3417 if (r) { 3418 ring->sched.ready = false; 3419 return r; 3420 } 3421 } 3422 3423 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3424 ring = &adev->gfx.compute_ring[i]; 3425 ring->sched.ready = true; 3426 DRM_INFO("compute ring %d mec %d pipe %d q %d\n", 3427 i, ring->me, ring->pipe, ring->queue); 3428 r = amdgpu_ring_test_ring(ring); 3429 if (r) 3430 ring->sched.ready = false; 3431 } 3432 3433 return 0; 3434 } 3435 3436 static void gfx_v10_0_cp_enable(struct amdgpu_device *adev, bool enable) 3437 { 3438 gfx_v10_0_cp_gfx_enable(adev, enable); 3439 gfx_v10_0_cp_compute_enable(adev, enable); 3440 } 3441 3442 static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) 3443 { 3444 uint32_t data, pattern = 0xDEADBEEF; 3445 3446 /* check if mmVGT_ESGS_RING_SIZE_UMD 3447 * has been remapped to mmVGT_ESGS_RING_SIZE */ 3448 data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE); 3449 3450 WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, 0); 3451 3452 WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern); 3453 3454 if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE) == pattern) { 3455 WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data); 3456 return true; 3457 } else { 3458 WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data); 3459 return false; 3460 } 3461 } 3462 3463 static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) 3464 { 3465 uint32_t data; 3466 3467 /* initialize cam_index to 0 3468 * index will auto-inc after each data writting */ 3469 WREG32_SOC15(GC, 0, mmGRBM_CAM_INDEX, 0); 3470 3471 /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */ 3472 data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) << 3473 GRBM_CAM_DATA__CAM_ADDR__SHIFT) | 3474 (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE) << 3475 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); 3476 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); 3477 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); 3478 3479 /* mmVGT_TF_MEMORY_BASE_UMD -> mmVGT_TF_MEMORY_BASE */ 3480 data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_UMD) << 3481 GRBM_CAM_DATA__CAM_ADDR__SHIFT) | 3482 (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE) << 3483 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); 3484 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); 3485 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); 3486 3487 /* mmVGT_TF_MEMORY_BASE_HI_UMD -> mmVGT_TF_MEMORY_BASE_HI */ 3488 data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_UMD) << 3489 GRBM_CAM_DATA__CAM_ADDR__SHIFT) | 3490 (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI) << 3491 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); 3492 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); 3493 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); 3494 3495 /* mmVGT_HS_OFFCHIP_PARAM_UMD -> mmVGT_HS_OFFCHIP_PARAM */ 3496 data = (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_UMD) << 3497 GRBM_CAM_DATA__CAM_ADDR__SHIFT) | 3498 (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM) << 3499 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); 3500 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); 3501 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); 3502 3503 /* mmVGT_ESGS_RING_SIZE_UMD -> mmVGT_ESGS_RING_SIZE */ 3504 data = (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_UMD) << 3505 GRBM_CAM_DATA__CAM_ADDR__SHIFT) | 3506 (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE) << 3507 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); 3508 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); 3509 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); 3510 3511 /* mmVGT_GSVS_RING_SIZE_UMD -> mmVGT_GSVS_RING_SIZE */ 3512 data = (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_UMD) << 3513 GRBM_CAM_DATA__CAM_ADDR__SHIFT) | 3514 (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE) << 3515 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); 3516 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); 3517 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); 3518 3519 /* mmSPI_CONFIG_CNTL_REMAP -> mmSPI_CONFIG_CNTL */ 3520 data = (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_REMAP) << 3521 GRBM_CAM_DATA__CAM_ADDR__SHIFT) | 3522 (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL) << 3523 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); 3524 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); 3525 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); 3526 } 3527 3528 static int gfx_v10_0_hw_init(void *handle) 3529 { 3530 int r; 3531 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3532 3533 r = gfx_v10_0_csb_vram_pin(adev); 3534 if (r) 3535 return r; 3536 3537 if (!amdgpu_emu_mode) 3538 gfx_v10_0_init_golden_registers(adev); 3539 3540 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 3541 /** 3542 * For gfx 10, rlc firmware loading relies on smu firmware is 3543 * loaded firstly, so in direct type, it has to load smc ucode 3544 * here before rlc. 3545 */ 3546 r = smu_load_microcode(&adev->smu); 3547 if (r) 3548 return r; 3549 3550 r = smu_check_fw_status(&adev->smu); 3551 if (r) { 3552 pr_err("SMC firmware status is not correct\n"); 3553 return r; 3554 } 3555 } 3556 3557 /* if GRBM CAM not remapped, set up the remapping */ 3558 if (!gfx_v10_0_check_grbm_cam_remapping(adev)) 3559 gfx_v10_0_setup_grbm_cam_remapping(adev); 3560 3561 gfx_v10_0_constants_init(adev); 3562 3563 r = gfx_v10_0_rlc_resume(adev); 3564 if (r) 3565 return r; 3566 3567 /* 3568 * init golden registers and rlc resume may override some registers, 3569 * reconfig them here 3570 */ 3571 gfx_v10_0_tcp_harvest(adev); 3572 3573 r = gfx_v10_0_cp_resume(adev); 3574 if (r) 3575 return r; 3576 3577 return r; 3578 } 3579 3580 #ifndef BRING_UP_DEBUG 3581 static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device *adev) 3582 { 3583 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 3584 struct amdgpu_ring *kiq_ring = &kiq->ring; 3585 int i; 3586 3587 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 3588 return -EINVAL; 3589 3590 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * 3591 adev->gfx.num_gfx_rings)) 3592 return -ENOMEM; 3593 3594 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 3595 kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], 3596 PREEMPT_QUEUES, 0, 0); 3597 3598 return amdgpu_ring_test_ring(kiq_ring); 3599 } 3600 #endif 3601 3602 static int gfx_v10_0_hw_fini(void *handle) 3603 { 3604 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3605 int r; 3606 3607 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3608 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3609 #ifndef BRING_UP_DEBUG 3610 if (amdgpu_async_gfx_ring) { 3611 r = gfx_v10_0_kiq_disable_kgq(adev); 3612 if (r) 3613 DRM_ERROR("KGQ disable failed\n"); 3614 } 3615 #endif 3616 if (amdgpu_gfx_disable_kcq(adev)) 3617 DRM_ERROR("KCQ disable failed\n"); 3618 if (amdgpu_sriov_vf(adev)) { 3619 pr_debug("For SRIOV client, shouldn't do anything.\n"); 3620 return 0; 3621 } 3622 gfx_v10_0_cp_enable(adev, false); 3623 gfx_v10_0_enable_gui_idle_interrupt(adev, false); 3624 gfx_v10_0_csb_vram_unpin(adev); 3625 3626 return 0; 3627 } 3628 3629 static int gfx_v10_0_suspend(void *handle) 3630 { 3631 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3632 3633 adev->in_suspend = true; 3634 return gfx_v10_0_hw_fini(adev); 3635 } 3636 3637 static int gfx_v10_0_resume(void *handle) 3638 { 3639 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3640 int r; 3641 3642 r = gfx_v10_0_hw_init(adev); 3643 adev->in_suspend = false; 3644 return r; 3645 } 3646 3647 static bool gfx_v10_0_is_idle(void *handle) 3648 { 3649 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3650 3651 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 3652 GRBM_STATUS, GUI_ACTIVE)) 3653 return false; 3654 else 3655 return true; 3656 } 3657 3658 static int gfx_v10_0_wait_for_idle(void *handle) 3659 { 3660 unsigned i; 3661 u32 tmp; 3662 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3663 3664 for (i = 0; i < adev->usec_timeout; i++) { 3665 /* read MC_STATUS */ 3666 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) & 3667 GRBM_STATUS__GUI_ACTIVE_MASK; 3668 3669 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) 3670 return 0; 3671 udelay(1); 3672 } 3673 return -ETIMEDOUT; 3674 } 3675 3676 static int gfx_v10_0_soft_reset(void *handle) 3677 { 3678 u32 grbm_soft_reset = 0; 3679 u32 tmp; 3680 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3681 3682 /* GRBM_STATUS */ 3683 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 3684 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 3685 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 3686 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__DB_BUSY_MASK | 3687 GRBM_STATUS__CB_BUSY_MASK | GRBM_STATUS__GDS_BUSY_MASK | 3688 GRBM_STATUS__SPI_BUSY_MASK | GRBM_STATUS__GE_BUSY_NO_DMA_MASK 3689 | GRBM_STATUS__BCI_BUSY_MASK)) { 3690 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3691 GRBM_SOFT_RESET, SOFT_RESET_CP, 3692 1); 3693 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3694 GRBM_SOFT_RESET, SOFT_RESET_GFX, 3695 1); 3696 } 3697 3698 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 3699 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3700 GRBM_SOFT_RESET, SOFT_RESET_CP, 3701 1); 3702 } 3703 3704 /* GRBM_STATUS2 */ 3705 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 3706 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 3707 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3708 GRBM_SOFT_RESET, SOFT_RESET_RLC, 3709 1); 3710 3711 if (grbm_soft_reset) { 3712 /* stop the rlc */ 3713 gfx_v10_0_rlc_stop(adev); 3714 3715 /* Disable GFX parsing/prefetching */ 3716 gfx_v10_0_cp_gfx_enable(adev, false); 3717 3718 /* Disable MEC parsing/prefetching */ 3719 gfx_v10_0_cp_compute_enable(adev, false); 3720 3721 if (grbm_soft_reset) { 3722 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3723 tmp |= grbm_soft_reset; 3724 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 3725 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3726 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3727 3728 udelay(50); 3729 3730 tmp &= ~grbm_soft_reset; 3731 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3732 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3733 } 3734 3735 /* Wait a little for things to settle down */ 3736 udelay(50); 3737 } 3738 return 0; 3739 } 3740 3741 static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) 3742 { 3743 uint64_t clock; 3744 3745 mutex_lock(&adev->gfx.gpu_clock_mutex); 3746 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 3747 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 3748 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 3749 mutex_unlock(&adev->gfx.gpu_clock_mutex); 3750 return clock; 3751 } 3752 3753 static void gfx_v10_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 3754 uint32_t vmid, 3755 uint32_t gds_base, uint32_t gds_size, 3756 uint32_t gws_base, uint32_t gws_size, 3757 uint32_t oa_base, uint32_t oa_size) 3758 { 3759 struct amdgpu_device *adev = ring->adev; 3760 3761 /* GDS Base */ 3762 gfx_v10_0_write_data_to_reg(ring, 0, false, 3763 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 3764 gds_base); 3765 3766 /* GDS Size */ 3767 gfx_v10_0_write_data_to_reg(ring, 0, false, 3768 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 3769 gds_size); 3770 3771 /* GWS */ 3772 gfx_v10_0_write_data_to_reg(ring, 0, false, 3773 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 3774 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 3775 3776 /* OA */ 3777 gfx_v10_0_write_data_to_reg(ring, 0, false, 3778 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 3779 (1 << (oa_size + oa_base)) - (1 << oa_base)); 3780 } 3781 3782 static int gfx_v10_0_early_init(void *handle) 3783 { 3784 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3785 3786 adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS; 3787 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 3788 3789 gfx_v10_0_set_kiq_pm4_funcs(adev); 3790 gfx_v10_0_set_ring_funcs(adev); 3791 gfx_v10_0_set_irq_funcs(adev); 3792 gfx_v10_0_set_gds_init(adev); 3793 gfx_v10_0_set_rlc_funcs(adev); 3794 3795 return 0; 3796 } 3797 3798 static int gfx_v10_0_late_init(void *handle) 3799 { 3800 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3801 int r; 3802 3803 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 3804 if (r) 3805 return r; 3806 3807 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 3808 if (r) 3809 return r; 3810 3811 return 0; 3812 } 3813 3814 static bool gfx_v10_0_is_rlc_enabled(struct amdgpu_device *adev) 3815 { 3816 uint32_t rlc_cntl; 3817 3818 /* if RLC is not enabled, do nothing */ 3819 rlc_cntl = RREG32_SOC15(GC, 0, mmRLC_CNTL); 3820 return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; 3821 } 3822 3823 static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev) 3824 { 3825 uint32_t data; 3826 unsigned i; 3827 3828 data = RLC_SAFE_MODE__CMD_MASK; 3829 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 3830 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 3831 3832 /* wait for RLC_SAFE_MODE */ 3833 for (i = 0; i < adev->usec_timeout; i++) { 3834 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 3835 break; 3836 udelay(1); 3837 } 3838 } 3839 3840 static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev) 3841 { 3842 uint32_t data; 3843 3844 data = RLC_SAFE_MODE__CMD_MASK; 3845 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 3846 } 3847 3848 static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 3849 bool enable) 3850 { 3851 uint32_t data, def; 3852 3853 /* It is disabled by HW by default */ 3854 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 3855 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 3856 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 3857 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 3858 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 3859 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 3860 3861 /* only for Vega10 & Raven1 */ 3862 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 3863 3864 if (def != data) 3865 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 3866 3867 /* MGLS is a global flag to control all MGLS in GFX */ 3868 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 3869 /* 2 - RLC memory Light sleep */ 3870 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 3871 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 3872 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 3873 if (def != data) 3874 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 3875 } 3876 /* 3 - CP memory Light sleep */ 3877 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 3878 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 3879 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 3880 if (def != data) 3881 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 3882 } 3883 } 3884 } else { 3885 /* 1 - MGCG_OVERRIDE */ 3886 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 3887 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 3888 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 3889 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 3890 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 3891 if (def != data) 3892 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 3893 3894 /* 2 - disable MGLS in RLC */ 3895 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 3896 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 3897 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 3898 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 3899 } 3900 3901 /* 3 - disable MGLS in CP */ 3902 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 3903 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 3904 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 3905 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 3906 } 3907 } 3908 } 3909 3910 static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev, 3911 bool enable) 3912 { 3913 uint32_t data, def; 3914 3915 /* Enable 3D CGCG/CGLS */ 3916 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 3917 /* write cmd to clear cgcg/cgls ov */ 3918 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 3919 /* unset CGCG override */ 3920 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 3921 /* update CGCG and CGLS override bits */ 3922 if (def != data) 3923 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 3924 /* enable 3Dcgcg FSM(0x0000363f) */ 3925 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 3926 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 3927 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 3928 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 3929 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 3930 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 3931 if (def != data) 3932 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 3933 3934 /* set IDLE_POLL_COUNT(0x00900100) */ 3935 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 3936 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 3937 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 3938 if (def != data) 3939 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 3940 } else { 3941 /* Disable CGCG/CGLS */ 3942 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 3943 /* disable cgcg, cgls should be disabled */ 3944 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 3945 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 3946 /* disable cgcg and cgls in FSM */ 3947 if (def != data) 3948 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 3949 } 3950 } 3951 3952 static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 3953 bool enable) 3954 { 3955 uint32_t def, data; 3956 3957 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 3958 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 3959 /* unset CGCG override */ 3960 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 3961 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 3962 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 3963 else 3964 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 3965 /* update CGCG and CGLS override bits */ 3966 if (def != data) 3967 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 3968 3969 /* enable cgcg FSM(0x0000363F) */ 3970 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 3971 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 3972 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 3973 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 3974 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 3975 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 3976 if (def != data) 3977 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 3978 3979 /* set IDLE_POLL_COUNT(0x00900100) */ 3980 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 3981 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 3982 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 3983 if (def != data) 3984 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 3985 } else { 3986 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 3987 /* reset CGCG/CGLS bits */ 3988 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 3989 /* disable cgcg and cgls in FSM */ 3990 if (def != data) 3991 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 3992 } 3993 } 3994 3995 static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, 3996 bool enable) 3997 { 3998 amdgpu_gfx_rlc_enter_safe_mode(adev); 3999 4000 if (enable) { 4001 /* CGCG/CGLS should be enabled after MGCG/MGLS 4002 * === MGCG + MGLS === 4003 */ 4004 gfx_v10_0_update_medium_grain_clock_gating(adev, enable); 4005 /* === CGCG /CGLS for GFX 3D Only === */ 4006 gfx_v10_0_update_3d_clock_gating(adev, enable); 4007 /* === CGCG + CGLS === */ 4008 gfx_v10_0_update_coarse_grain_clock_gating(adev, enable); 4009 } else { 4010 /* CGCG/CGLS should be disabled before MGCG/MGLS 4011 * === CGCG + CGLS === 4012 */ 4013 gfx_v10_0_update_coarse_grain_clock_gating(adev, enable); 4014 /* === CGCG /CGLS for GFX 3D Only === */ 4015 gfx_v10_0_update_3d_clock_gating(adev, enable); 4016 /* === MGCG + MGLS === */ 4017 gfx_v10_0_update_medium_grain_clock_gating(adev, enable); 4018 } 4019 4020 if (adev->cg_flags & 4021 (AMD_CG_SUPPORT_GFX_MGCG | 4022 AMD_CG_SUPPORT_GFX_CGLS | 4023 AMD_CG_SUPPORT_GFX_CGCG | 4024 AMD_CG_SUPPORT_GFX_CGLS | 4025 AMD_CG_SUPPORT_GFX_3D_CGCG | 4026 AMD_CG_SUPPORT_GFX_3D_CGLS)) 4027 gfx_v10_0_enable_gui_idle_interrupt(adev, enable); 4028 4029 amdgpu_gfx_rlc_exit_safe_mode(adev); 4030 4031 return 0; 4032 } 4033 4034 static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = { 4035 .is_rlc_enabled = gfx_v10_0_is_rlc_enabled, 4036 .set_safe_mode = gfx_v10_0_set_safe_mode, 4037 .unset_safe_mode = gfx_v10_0_unset_safe_mode, 4038 .init = gfx_v10_0_rlc_init, 4039 .get_csb_size = gfx_v10_0_get_csb_size, 4040 .get_csb_buffer = gfx_v10_0_get_csb_buffer, 4041 .resume = gfx_v10_0_rlc_resume, 4042 .stop = gfx_v10_0_rlc_stop, 4043 .reset = gfx_v10_0_rlc_reset, 4044 .start = gfx_v10_0_rlc_start 4045 }; 4046 4047 static int gfx_v10_0_set_powergating_state(void *handle, 4048 enum amd_powergating_state state) 4049 { 4050 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4051 bool enable = (state == AMD_PG_STATE_GATE) ? true : false; 4052 switch (adev->asic_type) { 4053 case CHIP_NAVI10: 4054 if (!enable) { 4055 amdgpu_gfx_off_ctrl(adev, false); 4056 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4057 } else 4058 amdgpu_gfx_off_ctrl(adev, true); 4059 break; 4060 default: 4061 break; 4062 } 4063 return 0; 4064 } 4065 4066 static int gfx_v10_0_set_clockgating_state(void *handle, 4067 enum amd_clockgating_state state) 4068 { 4069 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4070 4071 switch (adev->asic_type) { 4072 case CHIP_NAVI10: 4073 gfx_v10_0_update_gfx_clock_gating(adev, 4074 state == AMD_CG_STATE_GATE ? true : false); 4075 break; 4076 default: 4077 break; 4078 } 4079 return 0; 4080 } 4081 4082 static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags) 4083 { 4084 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4085 int data; 4086 4087 /* AMD_CG_SUPPORT_GFX_MGCG */ 4088 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4089 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 4090 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 4091 4092 /* AMD_CG_SUPPORT_GFX_CGCG */ 4093 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4094 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 4095 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 4096 4097 /* AMD_CG_SUPPORT_GFX_CGLS */ 4098 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 4099 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 4100 4101 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 4102 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4103 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 4104 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 4105 4106 /* AMD_CG_SUPPORT_GFX_CP_LS */ 4107 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4108 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 4109 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 4110 4111 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 4112 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4113 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 4114 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 4115 4116 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 4117 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 4118 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 4119 } 4120 4121 static u64 gfx_v10_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 4122 { 4123 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 is 32bit rptr*/ 4124 } 4125 4126 static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 4127 { 4128 struct amdgpu_device *adev = ring->adev; 4129 u64 wptr; 4130 4131 /* XXX check if swapping is necessary on BE */ 4132 if (ring->use_doorbell) { 4133 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 4134 } else { 4135 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 4136 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 4137 } 4138 4139 return wptr; 4140 } 4141 4142 static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 4143 { 4144 struct amdgpu_device *adev = ring->adev; 4145 4146 if (ring->use_doorbell) { 4147 /* XXX check if swapping is necessary on BE */ 4148 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4149 WDOORBELL64(ring->doorbell_index, ring->wptr); 4150 } else { 4151 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4152 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 4153 } 4154 } 4155 4156 static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 4157 { 4158 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 hardware is 32bit rptr */ 4159 } 4160 4161 static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 4162 { 4163 u64 wptr; 4164 4165 /* XXX check if swapping is necessary on BE */ 4166 if (ring->use_doorbell) 4167 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 4168 else 4169 BUG(); 4170 return wptr; 4171 } 4172 4173 static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 4174 { 4175 struct amdgpu_device *adev = ring->adev; 4176 4177 /* XXX check if swapping is necessary on BE */ 4178 if (ring->use_doorbell) { 4179 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4180 WDOORBELL64(ring->doorbell_index, ring->wptr); 4181 } else { 4182 BUG(); /* only DOORBELL method supported on gfx10 now */ 4183 } 4184 } 4185 4186 static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 4187 { 4188 struct amdgpu_device *adev = ring->adev; 4189 u32 ref_and_mask, reg_mem_engine; 4190 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; 4191 4192 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 4193 switch (ring->me) { 4194 case 1: 4195 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 4196 break; 4197 case 2: 4198 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 4199 break; 4200 default: 4201 return; 4202 } 4203 reg_mem_engine = 0; 4204 } else { 4205 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 4206 reg_mem_engine = 1; /* pfp */ 4207 } 4208 4209 gfx_v10_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 4210 adev->nbio_funcs->get_hdp_flush_req_offset(adev), 4211 adev->nbio_funcs->get_hdp_flush_done_offset(adev), 4212 ref_and_mask, ref_and_mask, 0x20); 4213 } 4214 4215 static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 4216 struct amdgpu_job *job, 4217 struct amdgpu_ib *ib, 4218 uint32_t flags) 4219 { 4220 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4221 u32 header, control = 0; 4222 4223 if (ib->flags & AMDGPU_IB_FLAG_CE) 4224 header = PACKET3(PACKET3_INDIRECT_BUFFER_CNST, 2); 4225 else 4226 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 4227 4228 control |= ib->length_dw | (vmid << 24); 4229 4230 if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 4231 control |= INDIRECT_BUFFER_PRE_ENB(1); 4232 4233 if (flags & AMDGPU_IB_PREEMPTED) 4234 control |= INDIRECT_BUFFER_PRE_RESUME(1); 4235 4236 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 4237 gfx_v10_0_ring_emit_de_meta(ring, 4238 flags & AMDGPU_IB_PREEMPTED ? true : false); 4239 } 4240 4241 amdgpu_ring_write(ring, header); 4242 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4243 amdgpu_ring_write(ring, 4244 #ifdef __BIG_ENDIAN 4245 (2 << 0) | 4246 #endif 4247 lower_32_bits(ib->gpu_addr)); 4248 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4249 amdgpu_ring_write(ring, control); 4250 } 4251 4252 static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 4253 struct amdgpu_job *job, 4254 struct amdgpu_ib *ib, 4255 uint32_t flags) 4256 { 4257 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4258 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 4259 4260 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 4261 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4262 amdgpu_ring_write(ring, 4263 #ifdef __BIG_ENDIAN 4264 (2 << 0) | 4265 #endif 4266 lower_32_bits(ib->gpu_addr)); 4267 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4268 amdgpu_ring_write(ring, control); 4269 } 4270 4271 static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 4272 u64 seq, unsigned flags) 4273 { 4274 struct amdgpu_device *adev = ring->adev; 4275 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 4276 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 4277 4278 /* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */ 4279 if (adev->pdev->device == 0x50) 4280 int_sel = false; 4281 4282 /* RELEASE_MEM - flush caches, send int */ 4283 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 4284 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | 4285 PACKET3_RELEASE_MEM_GCR_GL2_WB | 4286 PACKET3_RELEASE_MEM_GCR_GL2_INV | 4287 PACKET3_RELEASE_MEM_GCR_GL2_US | 4288 PACKET3_RELEASE_MEM_GCR_GL1_INV | 4289 PACKET3_RELEASE_MEM_GCR_GLV_INV | 4290 PACKET3_RELEASE_MEM_GCR_GLM_INV | 4291 PACKET3_RELEASE_MEM_GCR_GLM_WB | 4292 PACKET3_RELEASE_MEM_CACHE_POLICY(3) | 4293 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 4294 PACKET3_RELEASE_MEM_EVENT_INDEX(5))); 4295 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | 4296 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); 4297 4298 /* 4299 * the address should be Qword aligned if 64bit write, Dword 4300 * aligned if only send 32bit data low (discard data high) 4301 */ 4302 if (write64bit) 4303 BUG_ON(addr & 0x7); 4304 else 4305 BUG_ON(addr & 0x3); 4306 amdgpu_ring_write(ring, lower_32_bits(addr)); 4307 amdgpu_ring_write(ring, upper_32_bits(addr)); 4308 amdgpu_ring_write(ring, lower_32_bits(seq)); 4309 amdgpu_ring_write(ring, upper_32_bits(seq)); 4310 amdgpu_ring_write(ring, 0); 4311 } 4312 4313 static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 4314 { 4315 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 4316 uint32_t seq = ring->fence_drv.sync_seq; 4317 uint64_t addr = ring->fence_drv.gpu_addr; 4318 4319 gfx_v10_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr), 4320 upper_32_bits(addr), seq, 0xffffffff, 4); 4321 } 4322 4323 static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 4324 unsigned vmid, uint64_t pd_addr) 4325 { 4326 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 4327 4328 /* compute doesn't have PFP */ 4329 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 4330 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 4331 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 4332 amdgpu_ring_write(ring, 0x0); 4333 } 4334 } 4335 4336 static void gfx_v10_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 4337 u64 seq, unsigned int flags) 4338 { 4339 struct amdgpu_device *adev = ring->adev; 4340 4341 /* we only allocate 32bit for each seq wb address */ 4342 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 4343 4344 /* write fence seq to the "addr" */ 4345 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4346 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4347 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 4348 amdgpu_ring_write(ring, lower_32_bits(addr)); 4349 amdgpu_ring_write(ring, upper_32_bits(addr)); 4350 amdgpu_ring_write(ring, lower_32_bits(seq)); 4351 4352 if (flags & AMDGPU_FENCE_FLAG_INT) { 4353 /* set register to trigger INT */ 4354 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4355 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4356 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 4357 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 4358 amdgpu_ring_write(ring, 0); 4359 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 4360 } 4361 } 4362 4363 static void gfx_v10_0_ring_emit_sb(struct amdgpu_ring *ring) 4364 { 4365 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 4366 amdgpu_ring_write(ring, 0); 4367 } 4368 4369 static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 4370 { 4371 uint32_t dw2 = 0; 4372 4373 if (amdgpu_mcbp) 4374 gfx_v10_0_ring_emit_ce_meta(ring, 4375 flags & AMDGPU_IB_PREEMPTED ? true : false); 4376 4377 gfx_v10_0_ring_emit_tmz(ring, true); 4378 4379 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 4380 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 4381 /* set load_global_config & load_global_uconfig */ 4382 dw2 |= 0x8001; 4383 /* set load_cs_sh_regs */ 4384 dw2 |= 0x01000000; 4385 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 4386 dw2 |= 0x10002; 4387 4388 /* set load_ce_ram if preamble presented */ 4389 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 4390 dw2 |= 0x10000000; 4391 } else { 4392 /* still load_ce_ram if this is the first time preamble presented 4393 * although there is no context switch happens. 4394 */ 4395 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 4396 dw2 |= 0x10000000; 4397 } 4398 4399 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4400 amdgpu_ring_write(ring, dw2); 4401 amdgpu_ring_write(ring, 0); 4402 } 4403 4404 static unsigned gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 4405 { 4406 unsigned ret; 4407 4408 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 4409 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 4410 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 4411 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 4412 ret = ring->wptr & ring->buf_mask; 4413 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 4414 4415 return ret; 4416 } 4417 4418 static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 4419 { 4420 unsigned cur; 4421 BUG_ON(offset > ring->buf_mask); 4422 BUG_ON(ring->ring[offset] != 0x55aa55aa); 4423 4424 cur = (ring->wptr - 1) & ring->buf_mask; 4425 if (likely(cur > offset)) 4426 ring->ring[offset] = cur - offset; 4427 else 4428 ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; 4429 } 4430 4431 static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring) 4432 { 4433 int i, r = 0; 4434 struct amdgpu_device *adev = ring->adev; 4435 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 4436 struct amdgpu_ring *kiq_ring = &kiq->ring; 4437 4438 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 4439 return -EINVAL; 4440 4441 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) 4442 return -ENOMEM; 4443 4444 /* assert preemption condition */ 4445 amdgpu_ring_set_preempt_cond_exec(ring, false); 4446 4447 /* assert IB preemption, emit the trailing fence */ 4448 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, 4449 ring->trail_fence_gpu_addr, 4450 ++ring->trail_seq); 4451 amdgpu_ring_commit(kiq_ring); 4452 4453 /* poll the trailing fence */ 4454 for (i = 0; i < adev->usec_timeout; i++) { 4455 if (ring->trail_seq == 4456 le32_to_cpu(*(ring->trail_fence_cpu_addr))) 4457 break; 4458 DRM_UDELAY(1); 4459 } 4460 4461 if (i >= adev->usec_timeout) { 4462 r = -EINVAL; 4463 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx); 4464 } 4465 4466 /* deassert preemption condition */ 4467 amdgpu_ring_set_preempt_cond_exec(ring, true); 4468 return r; 4469 } 4470 4471 static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) 4472 { 4473 struct amdgpu_device *adev = ring->adev; 4474 struct v10_ce_ib_state ce_payload = {0}; 4475 uint64_t csa_addr; 4476 int cnt; 4477 4478 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 4479 csa_addr = amdgpu_csa_vaddr(ring->adev); 4480 4481 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 4482 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 4483 WRITE_DATA_DST_SEL(8) | 4484 WR_CONFIRM) | 4485 WRITE_DATA_CACHE_POLICY(0)); 4486 amdgpu_ring_write(ring, lower_32_bits(csa_addr + 4487 offsetof(struct v10_gfx_meta_data, ce_payload))); 4488 amdgpu_ring_write(ring, upper_32_bits(csa_addr + 4489 offsetof(struct v10_gfx_meta_data, ce_payload))); 4490 4491 if (resume) 4492 amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr + 4493 offsetof(struct v10_gfx_meta_data, 4494 ce_payload), 4495 sizeof(ce_payload) >> 2); 4496 else 4497 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, 4498 sizeof(ce_payload) >> 2); 4499 } 4500 4501 static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) 4502 { 4503 struct amdgpu_device *adev = ring->adev; 4504 struct v10_de_ib_state de_payload = {0}; 4505 uint64_t csa_addr, gds_addr; 4506 int cnt; 4507 4508 csa_addr = amdgpu_csa_vaddr(ring->adev); 4509 gds_addr = ALIGN(csa_addr + AMDGPU_CSA_SIZE - adev->gds.gds_size, 4510 PAGE_SIZE); 4511 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 4512 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 4513 4514 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 4515 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 4516 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 4517 WRITE_DATA_DST_SEL(8) | 4518 WR_CONFIRM) | 4519 WRITE_DATA_CACHE_POLICY(0)); 4520 amdgpu_ring_write(ring, lower_32_bits(csa_addr + 4521 offsetof(struct v10_gfx_meta_data, de_payload))); 4522 amdgpu_ring_write(ring, upper_32_bits(csa_addr + 4523 offsetof(struct v10_gfx_meta_data, de_payload))); 4524 4525 if (resume) 4526 amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr + 4527 offsetof(struct v10_gfx_meta_data, 4528 de_payload), 4529 sizeof(de_payload) >> 2); 4530 else 4531 amdgpu_ring_write_multiple(ring, (void *)&de_payload, 4532 sizeof(de_payload) >> 2); 4533 } 4534 4535 static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) 4536 { 4537 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 4538 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ 4539 } 4540 4541 static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 4542 { 4543 struct amdgpu_device *adev = ring->adev; 4544 4545 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 4546 amdgpu_ring_write(ring, 0 | /* src: register*/ 4547 (5 << 8) | /* dst: memory */ 4548 (1 << 20)); /* write confirm */ 4549 amdgpu_ring_write(ring, reg); 4550 amdgpu_ring_write(ring, 0); 4551 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 4552 adev->virt.reg_val_offs * 4)); 4553 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 4554 adev->virt.reg_val_offs * 4)); 4555 } 4556 4557 static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 4558 uint32_t val) 4559 { 4560 uint32_t cmd = 0; 4561 4562 switch (ring->funcs->type) { 4563 case AMDGPU_RING_TYPE_GFX: 4564 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 4565 break; 4566 case AMDGPU_RING_TYPE_KIQ: 4567 cmd = (1 << 16); /* no inc addr */ 4568 break; 4569 default: 4570 cmd = WR_CONFIRM; 4571 break; 4572 } 4573 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4574 amdgpu_ring_write(ring, cmd); 4575 amdgpu_ring_write(ring, reg); 4576 amdgpu_ring_write(ring, 0); 4577 amdgpu_ring_write(ring, val); 4578 } 4579 4580 static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 4581 uint32_t val, uint32_t mask) 4582 { 4583 gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 4584 } 4585 4586 static void 4587 gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 4588 uint32_t me, uint32_t pipe, 4589 enum amdgpu_interrupt_state state) 4590 { 4591 uint32_t cp_int_cntl, cp_int_cntl_reg; 4592 4593 if (!me) { 4594 switch (pipe) { 4595 case 0: 4596 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0); 4597 break; 4598 case 1: 4599 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING1); 4600 break; 4601 default: 4602 DRM_DEBUG("invalid pipe %d\n", pipe); 4603 return; 4604 } 4605 } else { 4606 DRM_DEBUG("invalid me %d\n", me); 4607 return; 4608 } 4609 4610 switch (state) { 4611 case AMDGPU_IRQ_STATE_DISABLE: 4612 cp_int_cntl = RREG32(cp_int_cntl_reg); 4613 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 4614 TIME_STAMP_INT_ENABLE, 0); 4615 WREG32(cp_int_cntl_reg, cp_int_cntl); 4616 case AMDGPU_IRQ_STATE_ENABLE: 4617 cp_int_cntl = RREG32(cp_int_cntl_reg); 4618 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 4619 TIME_STAMP_INT_ENABLE, 1); 4620 WREG32(cp_int_cntl_reg, cp_int_cntl); 4621 break; 4622 default: 4623 break; 4624 } 4625 } 4626 4627 static void gfx_v10_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 4628 int me, int pipe, 4629 enum amdgpu_interrupt_state state) 4630 { 4631 u32 mec_int_cntl, mec_int_cntl_reg; 4632 4633 /* 4634 * amdgpu controls only the first MEC. That's why this function only 4635 * handles the setting of interrupts for this specific MEC. All other 4636 * pipes' interrupts are set by amdkfd. 4637 */ 4638 4639 if (me == 1) { 4640 switch (pipe) { 4641 case 0: 4642 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 4643 break; 4644 case 1: 4645 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 4646 break; 4647 case 2: 4648 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 4649 break; 4650 case 3: 4651 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 4652 break; 4653 default: 4654 DRM_DEBUG("invalid pipe %d\n", pipe); 4655 return; 4656 } 4657 } else { 4658 DRM_DEBUG("invalid me %d\n", me); 4659 return; 4660 } 4661 4662 switch (state) { 4663 case AMDGPU_IRQ_STATE_DISABLE: 4664 mec_int_cntl = RREG32(mec_int_cntl_reg); 4665 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 4666 TIME_STAMP_INT_ENABLE, 0); 4667 WREG32(mec_int_cntl_reg, mec_int_cntl); 4668 break; 4669 case AMDGPU_IRQ_STATE_ENABLE: 4670 mec_int_cntl = RREG32(mec_int_cntl_reg); 4671 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 4672 TIME_STAMP_INT_ENABLE, 1); 4673 WREG32(mec_int_cntl_reg, mec_int_cntl); 4674 break; 4675 default: 4676 break; 4677 } 4678 } 4679 4680 static int gfx_v10_0_set_eop_interrupt_state(struct amdgpu_device *adev, 4681 struct amdgpu_irq_src *src, 4682 unsigned type, 4683 enum amdgpu_interrupt_state state) 4684 { 4685 switch (type) { 4686 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 4687 gfx_v10_0_set_gfx_eop_interrupt_state(adev, 0, 0, state); 4688 break; 4689 case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP: 4690 gfx_v10_0_set_gfx_eop_interrupt_state(adev, 0, 1, state); 4691 break; 4692 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 4693 gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 4694 break; 4695 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 4696 gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 4697 break; 4698 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 4699 gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 4700 break; 4701 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 4702 gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 4703 break; 4704 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 4705 gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 4706 break; 4707 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 4708 gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 4709 break; 4710 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 4711 gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 4712 break; 4713 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 4714 gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 4715 break; 4716 default: 4717 break; 4718 } 4719 return 0; 4720 } 4721 4722 static int gfx_v10_0_eop_irq(struct amdgpu_device *adev, 4723 struct amdgpu_irq_src *source, 4724 struct amdgpu_iv_entry *entry) 4725 { 4726 int i; 4727 u8 me_id, pipe_id, queue_id; 4728 struct amdgpu_ring *ring; 4729 4730 DRM_DEBUG("IH: CP EOP\n"); 4731 me_id = (entry->ring_id & 0x0c) >> 2; 4732 pipe_id = (entry->ring_id & 0x03) >> 0; 4733 queue_id = (entry->ring_id & 0x70) >> 4; 4734 4735 switch (me_id) { 4736 case 0: 4737 if (pipe_id == 0) 4738 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 4739 else 4740 amdgpu_fence_process(&adev->gfx.gfx_ring[1]); 4741 break; 4742 case 1: 4743 case 2: 4744 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4745 ring = &adev->gfx.compute_ring[i]; 4746 /* Per-queue interrupt is supported for MEC starting from VI. 4747 * The interrupt can only be enabled/disabled per pipe instead of per queue. 4748 */ 4749 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 4750 amdgpu_fence_process(ring); 4751 } 4752 break; 4753 } 4754 return 0; 4755 } 4756 4757 static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 4758 struct amdgpu_irq_src *source, 4759 unsigned type, 4760 enum amdgpu_interrupt_state state) 4761 { 4762 switch (state) { 4763 case AMDGPU_IRQ_STATE_DISABLE: 4764 case AMDGPU_IRQ_STATE_ENABLE: 4765 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4766 PRIV_REG_INT_ENABLE, 4767 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4768 break; 4769 default: 4770 break; 4771 } 4772 4773 return 0; 4774 } 4775 4776 static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 4777 struct amdgpu_irq_src *source, 4778 unsigned type, 4779 enum amdgpu_interrupt_state state) 4780 { 4781 switch (state) { 4782 case AMDGPU_IRQ_STATE_DISABLE: 4783 case AMDGPU_IRQ_STATE_ENABLE: 4784 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4785 PRIV_INSTR_INT_ENABLE, 4786 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4787 default: 4788 break; 4789 } 4790 4791 return 0; 4792 } 4793 4794 static void gfx_v10_0_handle_priv_fault(struct amdgpu_device *adev, 4795 struct amdgpu_iv_entry *entry) 4796 { 4797 u8 me_id, pipe_id, queue_id; 4798 struct amdgpu_ring *ring; 4799 int i; 4800 4801 me_id = (entry->ring_id & 0x0c) >> 2; 4802 pipe_id = (entry->ring_id & 0x03) >> 0; 4803 queue_id = (entry->ring_id & 0x70) >> 4; 4804 4805 switch (me_id) { 4806 case 0: 4807 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4808 ring = &adev->gfx.gfx_ring[i]; 4809 /* we only enabled 1 gfx queue per pipe for now */ 4810 if (ring->me == me_id && ring->pipe == pipe_id) 4811 drm_sched_fault(&ring->sched); 4812 } 4813 break; 4814 case 1: 4815 case 2: 4816 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4817 ring = &adev->gfx.compute_ring[i]; 4818 if (ring->me == me_id && ring->pipe == pipe_id && 4819 ring->queue == queue_id) 4820 drm_sched_fault(&ring->sched); 4821 } 4822 break; 4823 default: 4824 BUG(); 4825 } 4826 } 4827 4828 static int gfx_v10_0_priv_reg_irq(struct amdgpu_device *adev, 4829 struct amdgpu_irq_src *source, 4830 struct amdgpu_iv_entry *entry) 4831 { 4832 DRM_ERROR("Illegal register access in command stream\n"); 4833 gfx_v10_0_handle_priv_fault(adev, entry); 4834 return 0; 4835 } 4836 4837 static int gfx_v10_0_priv_inst_irq(struct amdgpu_device *adev, 4838 struct amdgpu_irq_src *source, 4839 struct amdgpu_iv_entry *entry) 4840 { 4841 DRM_ERROR("Illegal instruction in command stream\n"); 4842 gfx_v10_0_handle_priv_fault(adev, entry); 4843 return 0; 4844 } 4845 4846 static int gfx_v10_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 4847 struct amdgpu_irq_src *src, 4848 unsigned int type, 4849 enum amdgpu_interrupt_state state) 4850 { 4851 uint32_t tmp, target; 4852 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 4853 4854 if (ring->me == 1) 4855 target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 4856 else 4857 target = SOC15_REG_OFFSET(GC, 0, mmCP_ME2_PIPE0_INT_CNTL); 4858 target += ring->pipe; 4859 4860 switch (type) { 4861 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 4862 if (state == AMDGPU_IRQ_STATE_DISABLE) { 4863 tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); 4864 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 4865 GENERIC2_INT_ENABLE, 0); 4866 WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); 4867 4868 tmp = RREG32(target); 4869 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, 4870 GENERIC2_INT_ENABLE, 0); 4871 WREG32(target, tmp); 4872 } else { 4873 tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); 4874 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 4875 GENERIC2_INT_ENABLE, 1); 4876 WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); 4877 4878 tmp = RREG32(target); 4879 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, 4880 GENERIC2_INT_ENABLE, 1); 4881 WREG32(target, tmp); 4882 } 4883 break; 4884 default: 4885 BUG(); /* kiq only support GENERIC2_INT now */ 4886 break; 4887 } 4888 return 0; 4889 } 4890 4891 static int gfx_v10_0_kiq_irq(struct amdgpu_device *adev, 4892 struct amdgpu_irq_src *source, 4893 struct amdgpu_iv_entry *entry) 4894 { 4895 u8 me_id, pipe_id, queue_id; 4896 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 4897 4898 me_id = (entry->ring_id & 0x0c) >> 2; 4899 pipe_id = (entry->ring_id & 0x03) >> 0; 4900 queue_id = (entry->ring_id & 0x70) >> 4; 4901 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", 4902 me_id, pipe_id, queue_id); 4903 4904 amdgpu_fence_process(ring); 4905 return 0; 4906 } 4907 4908 static const struct amd_ip_funcs gfx_v10_0_ip_funcs = { 4909 .name = "gfx_v10_0", 4910 .early_init = gfx_v10_0_early_init, 4911 .late_init = gfx_v10_0_late_init, 4912 .sw_init = gfx_v10_0_sw_init, 4913 .sw_fini = gfx_v10_0_sw_fini, 4914 .hw_init = gfx_v10_0_hw_init, 4915 .hw_fini = gfx_v10_0_hw_fini, 4916 .suspend = gfx_v10_0_suspend, 4917 .resume = gfx_v10_0_resume, 4918 .is_idle = gfx_v10_0_is_idle, 4919 .wait_for_idle = gfx_v10_0_wait_for_idle, 4920 .soft_reset = gfx_v10_0_soft_reset, 4921 .set_clockgating_state = gfx_v10_0_set_clockgating_state, 4922 .set_powergating_state = gfx_v10_0_set_powergating_state, 4923 .get_clockgating_state = gfx_v10_0_get_clockgating_state, 4924 }; 4925 4926 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { 4927 .type = AMDGPU_RING_TYPE_GFX, 4928 .align_mask = 0xff, 4929 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 4930 .support_64bit_ptrs = true, 4931 .vmhub = AMDGPU_GFXHUB, 4932 .get_rptr = gfx_v10_0_ring_get_rptr_gfx, 4933 .get_wptr = gfx_v10_0_ring_get_wptr_gfx, 4934 .set_wptr = gfx_v10_0_ring_set_wptr_gfx, 4935 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 4936 5 + /* COND_EXEC */ 4937 7 + /* PIPELINE_SYNC */ 4938 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 4939 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 4940 2 + /* VM_FLUSH */ 4941 8 + /* FENCE for VM_FLUSH */ 4942 20 + /* GDS switch */ 4943 4 + /* double SWITCH_BUFFER, 4944 * the first COND_EXEC jump to the place 4945 * just prior to this double SWITCH_BUFFER 4946 */ 4947 5 + /* COND_EXEC */ 4948 7 + /* HDP_flush */ 4949 4 + /* VGT_flush */ 4950 14 + /* CE_META */ 4951 31 + /* DE_META */ 4952 3 + /* CNTX_CTRL */ 4953 5 + /* HDP_INVL */ 4954 8 + 8 + /* FENCE x2 */ 4955 2, /* SWITCH_BUFFER */ 4956 .emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_gfx */ 4957 .emit_ib = gfx_v10_0_ring_emit_ib_gfx, 4958 .emit_fence = gfx_v10_0_ring_emit_fence, 4959 .emit_pipeline_sync = gfx_v10_0_ring_emit_pipeline_sync, 4960 .emit_vm_flush = gfx_v10_0_ring_emit_vm_flush, 4961 .emit_gds_switch = gfx_v10_0_ring_emit_gds_switch, 4962 .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush, 4963 .test_ring = gfx_v10_0_ring_test_ring, 4964 .test_ib = gfx_v10_0_ring_test_ib, 4965 .insert_nop = amdgpu_ring_insert_nop, 4966 .pad_ib = amdgpu_ring_generic_pad_ib, 4967 .emit_switch_buffer = gfx_v10_0_ring_emit_sb, 4968 .emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl, 4969 .init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec, 4970 .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec, 4971 .preempt_ib = gfx_v10_0_ring_preempt_ib, 4972 .emit_tmz = gfx_v10_0_ring_emit_tmz, 4973 .emit_wreg = gfx_v10_0_ring_emit_wreg, 4974 .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, 4975 }; 4976 4977 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { 4978 .type = AMDGPU_RING_TYPE_COMPUTE, 4979 .align_mask = 0xff, 4980 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 4981 .support_64bit_ptrs = true, 4982 .vmhub = AMDGPU_GFXHUB, 4983 .get_rptr = gfx_v10_0_ring_get_rptr_compute, 4984 .get_wptr = gfx_v10_0_ring_get_wptr_compute, 4985 .set_wptr = gfx_v10_0_ring_set_wptr_compute, 4986 .emit_frame_size = 4987 20 + /* gfx_v10_0_ring_emit_gds_switch */ 4988 7 + /* gfx_v10_0_ring_emit_hdp_flush */ 4989 5 + /* hdp invalidate */ 4990 7 + /* gfx_v10_0_ring_emit_pipeline_sync */ 4991 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 4992 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 4993 2 + /* gfx_v10_0_ring_emit_vm_flush */ 4994 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */ 4995 .emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_compute */ 4996 .emit_ib = gfx_v10_0_ring_emit_ib_compute, 4997 .emit_fence = gfx_v10_0_ring_emit_fence, 4998 .emit_pipeline_sync = gfx_v10_0_ring_emit_pipeline_sync, 4999 .emit_vm_flush = gfx_v10_0_ring_emit_vm_flush, 5000 .emit_gds_switch = gfx_v10_0_ring_emit_gds_switch, 5001 .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush, 5002 .test_ring = gfx_v10_0_ring_test_ring, 5003 .test_ib = gfx_v10_0_ring_test_ib, 5004 .insert_nop = amdgpu_ring_insert_nop, 5005 .pad_ib = amdgpu_ring_generic_pad_ib, 5006 .emit_wreg = gfx_v10_0_ring_emit_wreg, 5007 .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, 5008 }; 5009 5010 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { 5011 .type = AMDGPU_RING_TYPE_KIQ, 5012 .align_mask = 0xff, 5013 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5014 .support_64bit_ptrs = true, 5015 .vmhub = AMDGPU_GFXHUB, 5016 .get_rptr = gfx_v10_0_ring_get_rptr_compute, 5017 .get_wptr = gfx_v10_0_ring_get_wptr_compute, 5018 .set_wptr = gfx_v10_0_ring_set_wptr_compute, 5019 .emit_frame_size = 5020 20 + /* gfx_v10_0_ring_emit_gds_switch */ 5021 7 + /* gfx_v10_0_ring_emit_hdp_flush */ 5022 5 + /*hdp invalidate */ 5023 7 + /* gfx_v10_0_ring_emit_pipeline_sync */ 5024 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5025 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5026 2 + /* gfx_v10_0_ring_emit_vm_flush */ 5027 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 5028 .emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_compute */ 5029 .emit_ib = gfx_v10_0_ring_emit_ib_compute, 5030 .emit_fence = gfx_v10_0_ring_emit_fence_kiq, 5031 .test_ring = gfx_v10_0_ring_test_ring, 5032 .test_ib = gfx_v10_0_ring_test_ib, 5033 .insert_nop = amdgpu_ring_insert_nop, 5034 .pad_ib = amdgpu_ring_generic_pad_ib, 5035 .emit_rreg = gfx_v10_0_ring_emit_rreg, 5036 .emit_wreg = gfx_v10_0_ring_emit_wreg, 5037 .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, 5038 }; 5039 5040 static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev) 5041 { 5042 int i; 5043 5044 adev->gfx.kiq.ring.funcs = &gfx_v10_0_ring_funcs_kiq; 5045 5046 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 5047 adev->gfx.gfx_ring[i].funcs = &gfx_v10_0_ring_funcs_gfx; 5048 5049 for (i = 0; i < adev->gfx.num_compute_rings; i++) 5050 adev->gfx.compute_ring[i].funcs = &gfx_v10_0_ring_funcs_compute; 5051 } 5052 5053 static const struct amdgpu_irq_src_funcs gfx_v10_0_eop_irq_funcs = { 5054 .set = gfx_v10_0_set_eop_interrupt_state, 5055 .process = gfx_v10_0_eop_irq, 5056 }; 5057 5058 static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_reg_irq_funcs = { 5059 .set = gfx_v10_0_set_priv_reg_fault_state, 5060 .process = gfx_v10_0_priv_reg_irq, 5061 }; 5062 5063 static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_inst_irq_funcs = { 5064 .set = gfx_v10_0_set_priv_inst_fault_state, 5065 .process = gfx_v10_0_priv_inst_irq, 5066 }; 5067 5068 static const struct amdgpu_irq_src_funcs gfx_v10_0_kiq_irq_funcs = { 5069 .set = gfx_v10_0_kiq_set_interrupt_state, 5070 .process = gfx_v10_0_kiq_irq, 5071 }; 5072 5073 static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev) 5074 { 5075 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 5076 adev->gfx.eop_irq.funcs = &gfx_v10_0_eop_irq_funcs; 5077 5078 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; 5079 adev->gfx.kiq.irq.funcs = &gfx_v10_0_kiq_irq_funcs; 5080 5081 adev->gfx.priv_reg_irq.num_types = 1; 5082 adev->gfx.priv_reg_irq.funcs = &gfx_v10_0_priv_reg_irq_funcs; 5083 5084 adev->gfx.priv_inst_irq.num_types = 1; 5085 adev->gfx.priv_inst_irq.funcs = &gfx_v10_0_priv_inst_irq_funcs; 5086 } 5087 5088 static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) 5089 { 5090 switch (adev->asic_type) { 5091 case CHIP_NAVI10: 5092 adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; 5093 break; 5094 default: 5095 break; 5096 } 5097 } 5098 5099 static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev) 5100 { 5101 /* init asic gds info */ 5102 switch (adev->asic_type) { 5103 case CHIP_NAVI10: 5104 adev->gds.gds_size = 0x10000; 5105 break; 5106 default: 5107 adev->gds.gds_size = 0x10000; 5108 break; 5109 } 5110 5111 adev->gds.gws_size = 64; 5112 adev->gds.oa_size = 16; 5113 } 5114 5115 static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev, 5116 u32 bitmap) 5117 { 5118 u32 data; 5119 5120 if (!bitmap) 5121 return; 5122 5123 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 5124 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 5125 5126 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 5127 } 5128 5129 static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) 5130 { 5131 u32 data, wgp_bitmask; 5132 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 5133 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 5134 5135 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 5136 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 5137 5138 wgp_bitmask = 5139 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); 5140 5141 return (~data) & wgp_bitmask; 5142 } 5143 5144 static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) 5145 { 5146 u32 wgp_idx, wgp_active_bitmap; 5147 u32 cu_bitmap_per_wgp, cu_active_bitmap; 5148 5149 wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev); 5150 cu_active_bitmap = 0; 5151 5152 for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) { 5153 /* if there is one WGP enabled, it means 2 CUs will be enabled */ 5154 cu_bitmap_per_wgp = 3 << (2 * wgp_idx); 5155 if (wgp_active_bitmap & (1 << wgp_idx)) 5156 cu_active_bitmap |= cu_bitmap_per_wgp; 5157 } 5158 5159 return cu_active_bitmap; 5160 } 5161 5162 static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, 5163 struct amdgpu_cu_info *cu_info) 5164 { 5165 int i, j, k, counter, active_cu_number = 0; 5166 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 5167 unsigned disable_masks[4 * 2]; 5168 5169 if (!adev || !cu_info) 5170 return -EINVAL; 5171 5172 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 5173 5174 mutex_lock(&adev->grbm_idx_mutex); 5175 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 5176 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 5177 mask = 1; 5178 ao_bitmap = 0; 5179 counter = 0; 5180 gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); 5181 if (i < 4 && j < 2) 5182 gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh( 5183 adev, disable_masks[i * 2 + j]); 5184 bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev); 5185 cu_info->bitmap[i][j] = bitmap; 5186 5187 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { 5188 if (bitmap & mask) { 5189 if (counter < adev->gfx.config.max_cu_per_sh) 5190 ao_bitmap |= mask; 5191 counter++; 5192 } 5193 mask <<= 1; 5194 } 5195 active_cu_number += counter; 5196 if (i < 2 && j < 2) 5197 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 5198 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 5199 } 5200 } 5201 gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5202 mutex_unlock(&adev->grbm_idx_mutex); 5203 5204 cu_info->number = active_cu_number; 5205 cu_info->ao_cu_mask = ao_cu_mask; 5206 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 5207 5208 return 0; 5209 } 5210 5211 const struct amdgpu_ip_block_version gfx_v10_0_ip_block = 5212 { 5213 .type = AMD_IP_BLOCK_TYPE_GFX, 5214 .major = 10, 5215 .minor = 0, 5216 .rev = 0, 5217 .funcs = &gfx_v10_0_ip_funcs, 5218 }; 5219