1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/firmware.h> 24 #include "drmP.h" 25 #include "amdgpu.h" 26 #include "amdgpu_gfx.h" 27 #include "soc15.h" 28 #include "soc15d.h" 29 30 #include "vega10/soc15ip.h" 31 #include "vega10/GC/gc_9_0_offset.h" 32 #include "vega10/GC/gc_9_0_sh_mask.h" 33 #include "vega10/vega10_enum.h" 34 #include "vega10/HDP/hdp_4_0_offset.h" 35 36 #include "soc15_common.h" 37 #include "clearstate_gfx9.h" 38 #include "v9_structs.h" 39 40 #define GFX9_NUM_GFX_RINGS 1 41 #define GFX9_NUM_COMPUTE_RINGS 8 42 #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000 43 44 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 45 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 46 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 47 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 48 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 49 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 50 51 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 52 { 53 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 54 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0)}, 55 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_SIZE), 56 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID1), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID1)}, 57 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_SIZE), 58 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID2), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID2)}, 59 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_SIZE), 60 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID3), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID3)}, 61 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_SIZE), 62 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID4), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID4)}, 63 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_SIZE), 64 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID5), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID5)}, 65 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_SIZE), 66 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID6), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID6)}, 67 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_SIZE), 68 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID7), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID7)}, 69 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_SIZE), 70 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID8), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID8)}, 71 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_SIZE), 72 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID9), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID9)}, 73 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_SIZE), 74 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID10), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID10)}, 75 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_SIZE), 76 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID11), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID11)}, 77 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_SIZE), 78 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID12), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID12)}, 79 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_SIZE), 80 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID13), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID13)}, 81 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_SIZE), 82 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID14), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID14)}, 83 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_SIZE), 84 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID15), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID15)} 85 }; 86 87 static const u32 golden_settings_gc_9_0[] = 88 { 89 SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00ffeff, 0x00000400, 90 SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024, 91 SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001, 92 SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000, 93 SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000, 94 SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x4a2c0e68, 95 SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0xb5d3f197, 96 SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000003ff 97 }; 98 99 static const u32 golden_settings_gc_9_0_vg10[] = 100 { 101 SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL), 0x0000f000, 0x00012107, 102 SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL_3), 0x30000000, 0x10000000, 103 SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), 0xffff77ff, 0x2a114042, 104 SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x2a114042, 105 SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0x00008000, 0x00048000, 106 SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000, 107 SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x00001800, 0x00000800, 108 SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1),0x0000000f, 0x00000007 109 }; 110 111 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 112 113 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 114 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 115 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 116 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 117 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 118 struct amdgpu_cu_info *cu_info); 119 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 120 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); 121 122 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 123 { 124 switch (adev->asic_type) { 125 case CHIP_VEGA10: 126 amdgpu_program_register_sequence(adev, 127 golden_settings_gc_9_0, 128 (const u32)ARRAY_SIZE(golden_settings_gc_9_0)); 129 amdgpu_program_register_sequence(adev, 130 golden_settings_gc_9_0_vg10, 131 (const u32)ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 132 break; 133 default: 134 break; 135 } 136 } 137 138 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 139 { 140 adev->gfx.scratch.num_reg = 7; 141 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 142 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 143 } 144 145 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 146 bool wc, uint32_t reg, uint32_t val) 147 { 148 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 149 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 150 WRITE_DATA_DST_SEL(0) | 151 (wc ? WR_CONFIRM : 0)); 152 amdgpu_ring_write(ring, reg); 153 amdgpu_ring_write(ring, 0); 154 amdgpu_ring_write(ring, val); 155 } 156 157 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 158 int mem_space, int opt, uint32_t addr0, 159 uint32_t addr1, uint32_t ref, uint32_t mask, 160 uint32_t inv) 161 { 162 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 163 amdgpu_ring_write(ring, 164 /* memory (1) or register (0) */ 165 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 166 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 167 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 168 WAIT_REG_MEM_ENGINE(eng_sel))); 169 170 if (mem_space) 171 BUG_ON(addr0 & 0x3); /* Dword align */ 172 amdgpu_ring_write(ring, addr0); 173 amdgpu_ring_write(ring, addr1); 174 amdgpu_ring_write(ring, ref); 175 amdgpu_ring_write(ring, mask); 176 amdgpu_ring_write(ring, inv); /* poll interval */ 177 } 178 179 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 180 { 181 struct amdgpu_device *adev = ring->adev; 182 uint32_t scratch; 183 uint32_t tmp = 0; 184 unsigned i; 185 int r; 186 187 r = amdgpu_gfx_scratch_get(adev, &scratch); 188 if (r) { 189 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 190 return r; 191 } 192 WREG32(scratch, 0xCAFEDEAD); 193 r = amdgpu_ring_alloc(ring, 3); 194 if (r) { 195 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 196 ring->idx, r); 197 amdgpu_gfx_scratch_free(adev, scratch); 198 return r; 199 } 200 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 201 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 202 amdgpu_ring_write(ring, 0xDEADBEEF); 203 amdgpu_ring_commit(ring); 204 205 for (i = 0; i < adev->usec_timeout; i++) { 206 tmp = RREG32(scratch); 207 if (tmp == 0xDEADBEEF) 208 break; 209 DRM_UDELAY(1); 210 } 211 if (i < adev->usec_timeout) { 212 DRM_INFO("ring test on %d succeeded in %d usecs\n", 213 ring->idx, i); 214 } else { 215 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 216 ring->idx, scratch, tmp); 217 r = -EINVAL; 218 } 219 amdgpu_gfx_scratch_free(adev, scratch); 220 return r; 221 } 222 223 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 224 { 225 struct amdgpu_device *adev = ring->adev; 226 struct amdgpu_ib ib; 227 struct dma_fence *f = NULL; 228 uint32_t scratch; 229 uint32_t tmp = 0; 230 long r; 231 232 r = amdgpu_gfx_scratch_get(adev, &scratch); 233 if (r) { 234 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); 235 return r; 236 } 237 WREG32(scratch, 0xCAFEDEAD); 238 memset(&ib, 0, sizeof(ib)); 239 r = amdgpu_ib_get(adev, NULL, 256, &ib); 240 if (r) { 241 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 242 goto err1; 243 } 244 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 245 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 246 ib.ptr[2] = 0xDEADBEEF; 247 ib.length_dw = 3; 248 249 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 250 if (r) 251 goto err2; 252 253 r = dma_fence_wait_timeout(f, false, timeout); 254 if (r == 0) { 255 DRM_ERROR("amdgpu: IB test timed out.\n"); 256 r = -ETIMEDOUT; 257 goto err2; 258 } else if (r < 0) { 259 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 260 goto err2; 261 } 262 tmp = RREG32(scratch); 263 if (tmp == 0xDEADBEEF) { 264 DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 265 r = 0; 266 } else { 267 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", 268 scratch, tmp); 269 r = -EINVAL; 270 } 271 err2: 272 amdgpu_ib_free(adev, &ib, NULL); 273 dma_fence_put(f); 274 err1: 275 amdgpu_gfx_scratch_free(adev, scratch); 276 return r; 277 } 278 279 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 280 { 281 const char *chip_name; 282 char fw_name[30]; 283 int err; 284 struct amdgpu_firmware_info *info = NULL; 285 const struct common_firmware_header *header = NULL; 286 const struct gfx_firmware_header_v1_0 *cp_hdr; 287 288 DRM_DEBUG("\n"); 289 290 switch (adev->asic_type) { 291 case CHIP_VEGA10: 292 chip_name = "vega10"; 293 break; 294 default: 295 BUG(); 296 } 297 298 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 299 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 300 if (err) 301 goto out; 302 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 303 if (err) 304 goto out; 305 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 306 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 307 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 308 309 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 310 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 311 if (err) 312 goto out; 313 err = amdgpu_ucode_validate(adev->gfx.me_fw); 314 if (err) 315 goto out; 316 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 317 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 318 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 319 320 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 321 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 322 if (err) 323 goto out; 324 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 325 if (err) 326 goto out; 327 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 328 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 329 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 330 331 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 332 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 333 if (err) 334 goto out; 335 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 336 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data; 337 adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 338 adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 339 340 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 341 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 342 if (err) 343 goto out; 344 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 345 if (err) 346 goto out; 347 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 348 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 349 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 350 351 352 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 353 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 354 if (!err) { 355 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 356 if (err) 357 goto out; 358 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 359 adev->gfx.mec2_fw->data; 360 adev->gfx.mec2_fw_version = 361 le32_to_cpu(cp_hdr->header.ucode_version); 362 adev->gfx.mec2_feature_version = 363 le32_to_cpu(cp_hdr->ucode_feature_version); 364 } else { 365 err = 0; 366 adev->gfx.mec2_fw = NULL; 367 } 368 369 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 370 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 371 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 372 info->fw = adev->gfx.pfp_fw; 373 header = (const struct common_firmware_header *)info->fw->data; 374 adev->firmware.fw_size += 375 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 376 377 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 378 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 379 info->fw = adev->gfx.me_fw; 380 header = (const struct common_firmware_header *)info->fw->data; 381 adev->firmware.fw_size += 382 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 383 384 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 385 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 386 info->fw = adev->gfx.ce_fw; 387 header = (const struct common_firmware_header *)info->fw->data; 388 adev->firmware.fw_size += 389 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 390 391 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 392 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 393 info->fw = adev->gfx.rlc_fw; 394 header = (const struct common_firmware_header *)info->fw->data; 395 adev->firmware.fw_size += 396 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 397 398 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 399 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 400 info->fw = adev->gfx.mec_fw; 401 header = (const struct common_firmware_header *)info->fw->data; 402 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 403 adev->firmware.fw_size += 404 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 405 406 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 407 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 408 info->fw = adev->gfx.mec_fw; 409 adev->firmware.fw_size += 410 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 411 412 if (adev->gfx.mec2_fw) { 413 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 414 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 415 info->fw = adev->gfx.mec2_fw; 416 header = (const struct common_firmware_header *)info->fw->data; 417 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 418 adev->firmware.fw_size += 419 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 420 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 421 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 422 info->fw = adev->gfx.mec2_fw; 423 adev->firmware.fw_size += 424 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 425 } 426 427 } 428 429 out: 430 if (err) { 431 dev_err(adev->dev, 432 "gfx9: Failed to load firmware \"%s\"\n", 433 fw_name); 434 release_firmware(adev->gfx.pfp_fw); 435 adev->gfx.pfp_fw = NULL; 436 release_firmware(adev->gfx.me_fw); 437 adev->gfx.me_fw = NULL; 438 release_firmware(adev->gfx.ce_fw); 439 adev->gfx.ce_fw = NULL; 440 release_firmware(adev->gfx.rlc_fw); 441 adev->gfx.rlc_fw = NULL; 442 release_firmware(adev->gfx.mec_fw); 443 adev->gfx.mec_fw = NULL; 444 release_firmware(adev->gfx.mec2_fw); 445 adev->gfx.mec2_fw = NULL; 446 } 447 return err; 448 } 449 450 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 451 { 452 int r; 453 454 if (adev->gfx.mec.hpd_eop_obj) { 455 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true); 456 if (unlikely(r != 0)) 457 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); 458 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); 459 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 460 461 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); 462 adev->gfx.mec.hpd_eop_obj = NULL; 463 } 464 if (adev->gfx.mec.mec_fw_obj) { 465 r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, true); 466 if (unlikely(r != 0)) 467 dev_warn(adev->dev, "(%d) reserve mec firmware bo failed\n", r); 468 amdgpu_bo_unpin(adev->gfx.mec.mec_fw_obj); 469 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 470 471 amdgpu_bo_unref(&adev->gfx.mec.mec_fw_obj); 472 adev->gfx.mec.mec_fw_obj = NULL; 473 } 474 } 475 476 #define MEC_HPD_SIZE 2048 477 478 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 479 { 480 int r; 481 u32 *hpd; 482 const __le32 *fw_data; 483 unsigned fw_size; 484 u32 *fw; 485 486 const struct gfx_firmware_header_v1_0 *mec_hdr; 487 488 /* 489 * we assign only 1 pipe because all other pipes will 490 * be handled by KFD 491 */ 492 adev->gfx.mec.num_mec = 1; 493 adev->gfx.mec.num_pipe = 1; 494 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; 495 496 if (adev->gfx.mec.hpd_eop_obj == NULL) { 497 r = amdgpu_bo_create(adev, 498 adev->gfx.mec.num_queue * MEC_HPD_SIZE, 499 PAGE_SIZE, true, 500 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, 501 &adev->gfx.mec.hpd_eop_obj); 502 if (r) { 503 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 504 return r; 505 } 506 } 507 508 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 509 if (unlikely(r != 0)) { 510 gfx_v9_0_mec_fini(adev); 511 return r; 512 } 513 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, 514 &adev->gfx.mec.hpd_eop_gpu_addr); 515 if (r) { 516 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r); 517 gfx_v9_0_mec_fini(adev); 518 return r; 519 } 520 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); 521 if (r) { 522 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); 523 gfx_v9_0_mec_fini(adev); 524 return r; 525 } 526 527 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); 528 529 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 530 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 531 532 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 533 534 fw_data = (const __le32 *) 535 (adev->gfx.mec_fw->data + 536 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 537 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 538 539 if (adev->gfx.mec.mec_fw_obj == NULL) { 540 r = amdgpu_bo_create(adev, 541 mec_hdr->header.ucode_size_bytes, 542 PAGE_SIZE, true, 543 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, 544 &adev->gfx.mec.mec_fw_obj); 545 if (r) { 546 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 547 return r; 548 } 549 } 550 551 r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, false); 552 if (unlikely(r != 0)) { 553 gfx_v9_0_mec_fini(adev); 554 return r; 555 } 556 r = amdgpu_bo_pin(adev->gfx.mec.mec_fw_obj, AMDGPU_GEM_DOMAIN_GTT, 557 &adev->gfx.mec.mec_fw_gpu_addr); 558 if (r) { 559 dev_warn(adev->dev, "(%d) pin mec firmware bo failed\n", r); 560 gfx_v9_0_mec_fini(adev); 561 return r; 562 } 563 r = amdgpu_bo_kmap(adev->gfx.mec.mec_fw_obj, (void **)&fw); 564 if (r) { 565 dev_warn(adev->dev, "(%d) map firmware bo failed\n", r); 566 gfx_v9_0_mec_fini(adev); 567 return r; 568 } 569 memcpy(fw, fw_data, fw_size); 570 571 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 572 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 573 574 575 return 0; 576 } 577 578 static void gfx_v9_0_kiq_fini(struct amdgpu_device *adev) 579 { 580 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 581 582 amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); 583 } 584 585 static int gfx_v9_0_kiq_init(struct amdgpu_device *adev) 586 { 587 int r; 588 u32 *hpd; 589 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 590 591 r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE, 592 AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, 593 &kiq->eop_gpu_addr, (void **)&hpd); 594 if (r) { 595 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); 596 return r; 597 } 598 599 memset(hpd, 0, MEC_HPD_SIZE); 600 601 r = amdgpu_bo_reserve(kiq->eop_obj, true); 602 if (unlikely(r != 0)) 603 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); 604 amdgpu_bo_kunmap(kiq->eop_obj); 605 amdgpu_bo_unreserve(kiq->eop_obj); 606 607 return 0; 608 } 609 610 static int gfx_v9_0_kiq_init_ring(struct amdgpu_device *adev, 611 struct amdgpu_ring *ring, 612 struct amdgpu_irq_src *irq) 613 { 614 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 615 int r = 0; 616 617 r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); 618 if (r) 619 return r; 620 621 ring->adev = NULL; 622 ring->ring_obj = NULL; 623 ring->use_doorbell = true; 624 ring->doorbell_index = AMDGPU_DOORBELL_KIQ; 625 if (adev->gfx.mec2_fw) { 626 ring->me = 2; 627 ring->pipe = 0; 628 } else { 629 ring->me = 1; 630 ring->pipe = 1; 631 } 632 633 ring->queue = 0; 634 ring->eop_gpu_addr = kiq->eop_gpu_addr; 635 sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue); 636 r = amdgpu_ring_init(adev, ring, 1024, 637 irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); 638 if (r) 639 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); 640 641 return r; 642 } 643 static void gfx_v9_0_kiq_free_ring(struct amdgpu_ring *ring, 644 struct amdgpu_irq_src *irq) 645 { 646 amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); 647 amdgpu_ring_fini(ring); 648 } 649 650 /* create MQD for each compute queue */ 651 static int gfx_v9_0_compute_mqd_sw_init(struct amdgpu_device *adev) 652 { 653 struct amdgpu_ring *ring = NULL; 654 int r, i; 655 656 /* create MQD for KIQ */ 657 ring = &adev->gfx.kiq.ring; 658 if (!ring->mqd_obj) { 659 r = amdgpu_bo_create_kernel(adev, sizeof(struct v9_mqd), PAGE_SIZE, 660 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, 661 &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); 662 if (r) { 663 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); 664 return r; 665 } 666 667 /*TODO: prepare MQD backup */ 668 } 669 670 /* create MQD for each KCQ */ 671 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 672 ring = &adev->gfx.compute_ring[i]; 673 if (!ring->mqd_obj) { 674 r = amdgpu_bo_create_kernel(adev, sizeof(struct v9_mqd), PAGE_SIZE, 675 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, 676 &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); 677 if (r) { 678 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); 679 return r; 680 } 681 682 /* TODO: prepare MQD backup */ 683 } 684 } 685 686 return 0; 687 } 688 689 static void gfx_v9_0_compute_mqd_sw_fini(struct amdgpu_device *adev) 690 { 691 struct amdgpu_ring *ring = NULL; 692 int i; 693 694 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 695 ring = &adev->gfx.compute_ring[i]; 696 amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); 697 } 698 699 ring = &adev->gfx.kiq.ring; 700 amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); 701 } 702 703 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 704 { 705 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 706 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 707 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 708 (address << SQ_IND_INDEX__INDEX__SHIFT) | 709 (SQ_IND_INDEX__FORCE_READ_MASK)); 710 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 711 } 712 713 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 714 uint32_t wave, uint32_t thread, 715 uint32_t regno, uint32_t num, uint32_t *out) 716 { 717 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 718 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 719 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 720 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 721 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 722 (SQ_IND_INDEX__FORCE_READ_MASK) | 723 (SQ_IND_INDEX__AUTO_INCR_MASK)); 724 while (num--) 725 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 726 } 727 728 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 729 { 730 /* type 1 wave data */ 731 dst[(*no_fields)++] = 1; 732 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 733 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 734 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 735 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 736 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 737 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 738 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 739 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 740 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 741 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 742 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 743 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 744 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 745 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 746 } 747 748 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 749 uint32_t wave, uint32_t start, 750 uint32_t size, uint32_t *dst) 751 { 752 wave_read_regs( 753 adev, simd, wave, 0, 754 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 755 } 756 757 758 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 759 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 760 .select_se_sh = &gfx_v9_0_select_se_sh, 761 .read_wave_data = &gfx_v9_0_read_wave_data, 762 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 763 }; 764 765 static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 766 { 767 u32 gb_addr_config; 768 769 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 770 771 switch (adev->asic_type) { 772 case CHIP_VEGA10: 773 adev->gfx.config.max_shader_engines = 4; 774 adev->gfx.config.max_cu_per_sh = 16; 775 adev->gfx.config.max_sh_per_se = 1; 776 adev->gfx.config.max_backends_per_se = 4; 777 adev->gfx.config.max_texture_channel_caches = 16; 778 adev->gfx.config.max_gprs = 256; 779 adev->gfx.config.max_gs_threads = 32; 780 adev->gfx.config.max_hw_contexts = 8; 781 782 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 783 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 784 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 785 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 786 adev->gfx.config.gs_vgt_table_depth = 32; 787 adev->gfx.config.gs_prim_buffer_depth = 1792; 788 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 789 break; 790 default: 791 BUG(); 792 break; 793 } 794 795 adev->gfx.config.gb_addr_config = gb_addr_config; 796 797 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 798 REG_GET_FIELD( 799 adev->gfx.config.gb_addr_config, 800 GB_ADDR_CONFIG, 801 NUM_PIPES); 802 803 adev->gfx.config.max_tile_pipes = 804 adev->gfx.config.gb_addr_config_fields.num_pipes; 805 806 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 807 REG_GET_FIELD( 808 adev->gfx.config.gb_addr_config, 809 GB_ADDR_CONFIG, 810 NUM_BANKS); 811 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 812 REG_GET_FIELD( 813 adev->gfx.config.gb_addr_config, 814 GB_ADDR_CONFIG, 815 MAX_COMPRESSED_FRAGS); 816 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 817 REG_GET_FIELD( 818 adev->gfx.config.gb_addr_config, 819 GB_ADDR_CONFIG, 820 NUM_RB_PER_SE); 821 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 822 REG_GET_FIELD( 823 adev->gfx.config.gb_addr_config, 824 GB_ADDR_CONFIG, 825 NUM_SHADER_ENGINES); 826 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 827 REG_GET_FIELD( 828 adev->gfx.config.gb_addr_config, 829 GB_ADDR_CONFIG, 830 PIPE_INTERLEAVE_SIZE)); 831 } 832 833 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, 834 struct amdgpu_ngg_buf *ngg_buf, 835 int size_se, 836 int default_size_se) 837 { 838 int r; 839 840 if (size_se < 0) { 841 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se); 842 return -EINVAL; 843 } 844 size_se = size_se ? size_se : default_size_se; 845 846 ngg_buf->size = size_se * adev->gfx.config.max_shader_engines; 847 r = amdgpu_bo_create_kernel(adev, ngg_buf->size, 848 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 849 &ngg_buf->bo, 850 &ngg_buf->gpu_addr, 851 NULL); 852 if (r) { 853 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r); 854 return r; 855 } 856 ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo); 857 858 return r; 859 } 860 861 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev) 862 { 863 int i; 864 865 for (i = 0; i < NGG_BUF_MAX; i++) 866 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo, 867 &adev->gfx.ngg.buf[i].gpu_addr, 868 NULL); 869 870 memset(&adev->gfx.ngg.buf[0], 0, 871 sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX); 872 873 adev->gfx.ngg.init = false; 874 875 return 0; 876 } 877 878 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) 879 { 880 int r; 881 882 if (!amdgpu_ngg || adev->gfx.ngg.init == true) 883 return 0; 884 885 /* GDS reserve memory: 64 bytes alignment */ 886 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); 887 adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size; 888 adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size; 889 adev->gfx.ngg.gds_reserve_addr = amdgpu_gds_reg_offset[0].mem_base; 890 adev->gfx.ngg.gds_reserve_addr += adev->gds.mem.gfx_partition_size; 891 892 /* Primitive Buffer */ 893 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], 894 amdgpu_prim_buf_per_se, 895 64 * 1024); 896 if (r) { 897 dev_err(adev->dev, "Failed to create Primitive Buffer\n"); 898 goto err; 899 } 900 901 /* Position Buffer */ 902 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS], 903 amdgpu_pos_buf_per_se, 904 256 * 1024); 905 if (r) { 906 dev_err(adev->dev, "Failed to create Position Buffer\n"); 907 goto err; 908 } 909 910 /* Control Sideband */ 911 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL], 912 amdgpu_cntl_sb_buf_per_se, 913 256); 914 if (r) { 915 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n"); 916 goto err; 917 } 918 919 /* Parameter Cache, not created by default */ 920 if (amdgpu_param_buf_per_se <= 0) 921 goto out; 922 923 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM], 924 amdgpu_param_buf_per_se, 925 512 * 1024); 926 if (r) { 927 dev_err(adev->dev, "Failed to create Parameter Cache\n"); 928 goto err; 929 } 930 931 out: 932 adev->gfx.ngg.init = true; 933 return 0; 934 err: 935 gfx_v9_0_ngg_fini(adev); 936 return r; 937 } 938 939 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) 940 { 941 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 942 int r; 943 u32 data; 944 u32 size; 945 u32 base; 946 947 if (!amdgpu_ngg) 948 return 0; 949 950 /* Program buffer size */ 951 data = 0; 952 size = adev->gfx.ngg.buf[NGG_PRIM].size / 256; 953 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, size); 954 955 size = adev->gfx.ngg.buf[NGG_POS].size / 256; 956 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, size); 957 958 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); 959 960 data = 0; 961 size = adev->gfx.ngg.buf[NGG_CNTL].size / 256; 962 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, size); 963 964 size = adev->gfx.ngg.buf[NGG_PARAM].size / 1024; 965 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, size); 966 967 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); 968 969 /* Program buffer base address */ 970 base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 971 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); 972 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data); 973 974 base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 975 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); 976 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data); 977 978 base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 979 data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); 980 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data); 981 982 base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 983 data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); 984 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data); 985 986 base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 987 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); 988 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data); 989 990 base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 991 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); 992 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data); 993 994 /* Clear GDS reserved memory */ 995 r = amdgpu_ring_alloc(ring, 17); 996 if (r) { 997 DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n", 998 ring->idx, r); 999 return r; 1000 } 1001 1002 gfx_v9_0_write_data_to_reg(ring, 0, false, 1003 amdgpu_gds_reg_offset[0].mem_size, 1004 (adev->gds.mem.total_size + 1005 adev->gfx.ngg.gds_reserve_size) >> 1006 AMDGPU_GDS_SHIFT); 1007 1008 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 1009 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 1010 PACKET3_DMA_DATA_SRC_SEL(2))); 1011 amdgpu_ring_write(ring, 0); 1012 amdgpu_ring_write(ring, 0); 1013 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); 1014 amdgpu_ring_write(ring, 0); 1015 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_size); 1016 1017 1018 gfx_v9_0_write_data_to_reg(ring, 0, false, 1019 amdgpu_gds_reg_offset[0].mem_size, 0); 1020 1021 amdgpu_ring_commit(ring); 1022 1023 return 0; 1024 } 1025 1026 static int gfx_v9_0_sw_init(void *handle) 1027 { 1028 int i, r; 1029 struct amdgpu_ring *ring; 1030 struct amdgpu_kiq *kiq; 1031 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1032 1033 /* KIQ event */ 1034 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq); 1035 if (r) 1036 return r; 1037 1038 /* EOP Event */ 1039 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq); 1040 if (r) 1041 return r; 1042 1043 /* Privileged reg */ 1044 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 184, 1045 &adev->gfx.priv_reg_irq); 1046 if (r) 1047 return r; 1048 1049 /* Privileged inst */ 1050 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 185, 1051 &adev->gfx.priv_inst_irq); 1052 if (r) 1053 return r; 1054 1055 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1056 1057 gfx_v9_0_scratch_init(adev); 1058 1059 r = gfx_v9_0_init_microcode(adev); 1060 if (r) { 1061 DRM_ERROR("Failed to load gfx firmware!\n"); 1062 return r; 1063 } 1064 1065 r = gfx_v9_0_mec_init(adev); 1066 if (r) { 1067 DRM_ERROR("Failed to init MEC BOs!\n"); 1068 return r; 1069 } 1070 1071 /* set up the gfx ring */ 1072 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 1073 ring = &adev->gfx.gfx_ring[i]; 1074 ring->ring_obj = NULL; 1075 sprintf(ring->name, "gfx"); 1076 ring->use_doorbell = true; 1077 ring->doorbell_index = AMDGPU_DOORBELL64_GFX_RING0 << 1; 1078 r = amdgpu_ring_init(adev, ring, 1024, 1079 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); 1080 if (r) 1081 return r; 1082 } 1083 1084 /* set up the compute queues */ 1085 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 1086 unsigned irq_type; 1087 1088 /* max 32 queues per MEC */ 1089 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { 1090 DRM_ERROR("Too many (%d) compute rings!\n", i); 1091 break; 1092 } 1093 ring = &adev->gfx.compute_ring[i]; 1094 ring->ring_obj = NULL; 1095 ring->use_doorbell = true; 1096 ring->doorbell_index = (AMDGPU_DOORBELL64_MEC_RING0 + i) << 1; 1097 ring->me = 1; /* first MEC */ 1098 ring->pipe = i / 8; 1099 ring->queue = i % 8; 1100 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); 1101 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1102 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; 1103 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1104 r = amdgpu_ring_init(adev, ring, 1024, 1105 &adev->gfx.eop_irq, irq_type); 1106 if (r) 1107 return r; 1108 } 1109 1110 if (amdgpu_sriov_vf(adev)) { 1111 r = gfx_v9_0_kiq_init(adev); 1112 if (r) { 1113 DRM_ERROR("Failed to init KIQ BOs!\n"); 1114 return r; 1115 } 1116 1117 kiq = &adev->gfx.kiq; 1118 r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 1119 if (r) 1120 return r; 1121 1122 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 1123 r = gfx_v9_0_compute_mqd_sw_init(adev); 1124 if (r) 1125 return r; 1126 } 1127 1128 /* reserve GDS, GWS and OA resource for gfx */ 1129 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 1130 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, 1131 &adev->gds.gds_gfx_bo, NULL, NULL); 1132 if (r) 1133 return r; 1134 1135 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, 1136 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, 1137 &adev->gds.gws_gfx_bo, NULL, NULL); 1138 if (r) 1139 return r; 1140 1141 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, 1142 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, 1143 &adev->gds.oa_gfx_bo, NULL, NULL); 1144 if (r) 1145 return r; 1146 1147 adev->gfx.ce_ram_size = 0x8000; 1148 1149 gfx_v9_0_gpu_early_init(adev); 1150 1151 r = gfx_v9_0_ngg_init(adev); 1152 if (r) 1153 return r; 1154 1155 return 0; 1156 } 1157 1158 1159 static int gfx_v9_0_sw_fini(void *handle) 1160 { 1161 int i; 1162 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1163 1164 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 1165 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 1166 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 1167 1168 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1169 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1170 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1171 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1172 1173 if (amdgpu_sriov_vf(adev)) { 1174 gfx_v9_0_compute_mqd_sw_fini(adev); 1175 gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 1176 gfx_v9_0_kiq_fini(adev); 1177 } 1178 1179 gfx_v9_0_mec_fini(adev); 1180 gfx_v9_0_ngg_fini(adev); 1181 1182 return 0; 1183 } 1184 1185 1186 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 1187 { 1188 /* TODO */ 1189 } 1190 1191 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) 1192 { 1193 u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 1194 1195 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) { 1196 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 1197 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 1198 } else if (se_num == 0xffffffff) { 1199 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 1200 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 1201 } else if (sh_num == 0xffffffff) { 1202 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 1203 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1204 } else { 1205 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 1206 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1207 } 1208 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); 1209 } 1210 1211 static u32 gfx_v9_0_create_bitmask(u32 bit_width) 1212 { 1213 return (u32)((1ULL << bit_width) - 1); 1214 } 1215 1216 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1217 { 1218 u32 data, mask; 1219 1220 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 1221 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 1222 1223 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 1224 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 1225 1226 mask = gfx_v9_0_create_bitmask(adev->gfx.config.max_backends_per_se / 1227 adev->gfx.config.max_sh_per_se); 1228 1229 return (~data) & mask; 1230 } 1231 1232 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 1233 { 1234 int i, j; 1235 u32 data; 1236 u32 active_rbs = 0; 1237 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 1238 adev->gfx.config.max_sh_per_se; 1239 1240 mutex_lock(&adev->grbm_idx_mutex); 1241 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1242 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1243 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1244 data = gfx_v9_0_get_rb_active_bitmap(adev); 1245 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 1246 rb_bitmap_width_per_sh); 1247 } 1248 } 1249 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1250 mutex_unlock(&adev->grbm_idx_mutex); 1251 1252 adev->gfx.config.backend_enable_mask = active_rbs; 1253 adev->gfx.config.num_rbs = hweight32(active_rbs); 1254 } 1255 1256 #define DEFAULT_SH_MEM_BASES (0x6000) 1257 #define FIRST_COMPUTE_VMID (8) 1258 #define LAST_COMPUTE_VMID (16) 1259 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 1260 { 1261 int i; 1262 uint32_t sh_mem_config; 1263 uint32_t sh_mem_bases; 1264 1265 /* 1266 * Configure apertures: 1267 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 1268 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 1269 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 1270 */ 1271 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 1272 1273 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 1274 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 1275 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 1276 1277 mutex_lock(&adev->srbm_mutex); 1278 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 1279 soc15_grbm_select(adev, 0, 0, 0, i); 1280 /* CP and shaders */ 1281 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 1282 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 1283 } 1284 soc15_grbm_select(adev, 0, 0, 0, 0); 1285 mutex_unlock(&adev->srbm_mutex); 1286 } 1287 1288 static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) 1289 { 1290 u32 tmp; 1291 int i; 1292 1293 WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 1294 1295 gfx_v9_0_tiling_mode_table_init(adev); 1296 1297 gfx_v9_0_setup_rb(adev); 1298 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 1299 1300 /* XXX SH_MEM regs */ 1301 /* where to put LDS, scratch, GPUVM in FSA64 space */ 1302 mutex_lock(&adev->srbm_mutex); 1303 for (i = 0; i < 16; i++) { 1304 soc15_grbm_select(adev, 0, 0, 0, i); 1305 /* CP and shaders */ 1306 tmp = 0; 1307 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 1308 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1309 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); 1310 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0); 1311 } 1312 soc15_grbm_select(adev, 0, 0, 0, 0); 1313 1314 mutex_unlock(&adev->srbm_mutex); 1315 1316 gfx_v9_0_init_compute_vmid(adev); 1317 1318 mutex_lock(&adev->grbm_idx_mutex); 1319 /* 1320 * making sure that the following register writes will be broadcasted 1321 * to all the shaders 1322 */ 1323 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1324 1325 WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE, 1326 (adev->gfx.config.sc_prim_fifo_size_frontend << 1327 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 1328 (adev->gfx.config.sc_prim_fifo_size_backend << 1329 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 1330 (adev->gfx.config.sc_hiz_tile_fifo_size << 1331 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 1332 (adev->gfx.config.sc_earlyz_tile_fifo_size << 1333 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 1334 mutex_unlock(&adev->grbm_idx_mutex); 1335 1336 } 1337 1338 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 1339 { 1340 u32 i, j, k; 1341 u32 mask; 1342 1343 mutex_lock(&adev->grbm_idx_mutex); 1344 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1345 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1346 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1347 for (k = 0; k < adev->usec_timeout; k++) { 1348 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 1349 break; 1350 udelay(1); 1351 } 1352 } 1353 } 1354 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1355 mutex_unlock(&adev->grbm_idx_mutex); 1356 1357 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 1358 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 1359 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 1360 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 1361 for (k = 0; k < adev->usec_timeout; k++) { 1362 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 1363 break; 1364 udelay(1); 1365 } 1366 } 1367 1368 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 1369 bool enable) 1370 { 1371 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 1372 1373 if (enable) 1374 return; 1375 1376 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 1377 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 1378 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 1379 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 1380 1381 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 1382 } 1383 1384 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 1385 { 1386 u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); 1387 1388 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 1389 WREG32_SOC15(GC, 0, mmRLC_CNTL, tmp); 1390 1391 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 1392 1393 gfx_v9_0_wait_for_rlc_serdes(adev); 1394 } 1395 1396 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 1397 { 1398 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 1399 udelay(50); 1400 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 1401 udelay(50); 1402 } 1403 1404 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 1405 { 1406 #ifdef AMDGPU_RLC_DEBUG_RETRY 1407 u32 rlc_ucode_ver; 1408 #endif 1409 1410 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 1411 1412 /* carrizo do enable cp interrupt after cp inited */ 1413 if (!(adev->flags & AMD_IS_APU)) 1414 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 1415 1416 udelay(50); 1417 1418 #ifdef AMDGPU_RLC_DEBUG_RETRY 1419 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 1420 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 1421 if(rlc_ucode_ver == 0x108) { 1422 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 1423 rlc_ucode_ver, adev->gfx.rlc_fw_version); 1424 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 1425 * default is 0x9C4 to create a 100us interval */ 1426 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 1427 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 1428 * to disable the page fault retry interrupts, default is 1429 * 0x100 (256) */ 1430 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 1431 } 1432 #endif 1433 } 1434 1435 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 1436 { 1437 const struct rlc_firmware_header_v2_0 *hdr; 1438 const __le32 *fw_data; 1439 unsigned i, fw_size; 1440 1441 if (!adev->gfx.rlc_fw) 1442 return -EINVAL; 1443 1444 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1445 amdgpu_ucode_print_rlc_hdr(&hdr->header); 1446 1447 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1448 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1449 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 1450 1451 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 1452 RLCG_UCODE_LOADING_START_ADDRESS); 1453 for (i = 0; i < fw_size; i++) 1454 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 1455 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 1456 1457 return 0; 1458 } 1459 1460 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 1461 { 1462 int r; 1463 1464 if (amdgpu_sriov_vf(adev)) 1465 return 0; 1466 1467 gfx_v9_0_rlc_stop(adev); 1468 1469 /* disable CG */ 1470 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 1471 1472 /* disable PG */ 1473 WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, 0); 1474 1475 gfx_v9_0_rlc_reset(adev); 1476 1477 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 1478 /* legacy rlc firmware loading */ 1479 r = gfx_v9_0_rlc_load_microcode(adev); 1480 if (r) 1481 return r; 1482 } 1483 1484 gfx_v9_0_rlc_start(adev); 1485 1486 return 0; 1487 } 1488 1489 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 1490 { 1491 int i; 1492 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 1493 1494 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 1495 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 1496 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 1497 if (!enable) { 1498 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1499 adev->gfx.gfx_ring[i].ready = false; 1500 } 1501 WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); 1502 udelay(50); 1503 } 1504 1505 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 1506 { 1507 const struct gfx_firmware_header_v1_0 *pfp_hdr; 1508 const struct gfx_firmware_header_v1_0 *ce_hdr; 1509 const struct gfx_firmware_header_v1_0 *me_hdr; 1510 const __le32 *fw_data; 1511 unsigned i, fw_size; 1512 1513 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 1514 return -EINVAL; 1515 1516 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 1517 adev->gfx.pfp_fw->data; 1518 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 1519 adev->gfx.ce_fw->data; 1520 me_hdr = (const struct gfx_firmware_header_v1_0 *) 1521 adev->gfx.me_fw->data; 1522 1523 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 1524 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 1525 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 1526 1527 gfx_v9_0_cp_gfx_enable(adev, false); 1528 1529 /* PFP */ 1530 fw_data = (const __le32 *) 1531 (adev->gfx.pfp_fw->data + 1532 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 1533 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 1534 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 1535 for (i = 0; i < fw_size; i++) 1536 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 1537 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 1538 1539 /* CE */ 1540 fw_data = (const __le32 *) 1541 (adev->gfx.ce_fw->data + 1542 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 1543 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 1544 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 1545 for (i = 0; i < fw_size; i++) 1546 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 1547 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 1548 1549 /* ME */ 1550 fw_data = (const __le32 *) 1551 (adev->gfx.me_fw->data + 1552 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 1553 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 1554 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 1555 for (i = 0; i < fw_size; i++) 1556 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 1557 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 1558 1559 return 0; 1560 } 1561 1562 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 1563 { 1564 u32 count = 0; 1565 const struct cs_section_def *sect = NULL; 1566 const struct cs_extent_def *ext = NULL; 1567 1568 /* begin clear state */ 1569 count += 2; 1570 /* context control state */ 1571 count += 3; 1572 1573 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1574 for (ext = sect->section; ext->extent != NULL; ++ext) { 1575 if (sect->id == SECT_CONTEXT) 1576 count += 2 + ext->reg_count; 1577 else 1578 return 0; 1579 } 1580 } 1581 /* pa_sc_raster_config/pa_sc_raster_config1 */ 1582 count += 4; 1583 /* end clear state */ 1584 count += 2; 1585 /* clear state */ 1586 count += 2; 1587 1588 return count; 1589 } 1590 1591 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 1592 { 1593 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 1594 const struct cs_section_def *sect = NULL; 1595 const struct cs_extent_def *ext = NULL; 1596 int r, i; 1597 1598 /* init the CP */ 1599 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 1600 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 1601 1602 gfx_v9_0_cp_gfx_enable(adev, true); 1603 1604 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4); 1605 if (r) { 1606 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 1607 return r; 1608 } 1609 1610 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1611 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1612 1613 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1614 amdgpu_ring_write(ring, 0x80000000); 1615 amdgpu_ring_write(ring, 0x80000000); 1616 1617 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1618 for (ext = sect->section; ext->extent != NULL; ++ext) { 1619 if (sect->id == SECT_CONTEXT) { 1620 amdgpu_ring_write(ring, 1621 PACKET3(PACKET3_SET_CONTEXT_REG, 1622 ext->reg_count)); 1623 amdgpu_ring_write(ring, 1624 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 1625 for (i = 0; i < ext->reg_count; i++) 1626 amdgpu_ring_write(ring, ext->extent[i]); 1627 } 1628 } 1629 } 1630 1631 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1632 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 1633 1634 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 1635 amdgpu_ring_write(ring, 0); 1636 1637 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 1638 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 1639 amdgpu_ring_write(ring, 0x8000); 1640 amdgpu_ring_write(ring, 0x8000); 1641 1642 amdgpu_ring_commit(ring); 1643 1644 return 0; 1645 } 1646 1647 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 1648 { 1649 struct amdgpu_ring *ring; 1650 u32 tmp; 1651 u32 rb_bufsz; 1652 u64 rb_addr, rptr_addr, wptr_gpu_addr; 1653 1654 /* Set the write pointer delay */ 1655 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 1656 1657 /* set the RB to use vmid 0 */ 1658 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 1659 1660 /* Set ring buffer size */ 1661 ring = &adev->gfx.gfx_ring[0]; 1662 rb_bufsz = order_base_2(ring->ring_size / 8); 1663 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 1664 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 1665 #ifdef __BIG_ENDIAN 1666 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 1667 #endif 1668 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 1669 1670 /* Initialize the ring buffer's write pointers */ 1671 ring->wptr = 0; 1672 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 1673 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 1674 1675 /* set the wb address wether it's enabled or not */ 1676 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 1677 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 1678 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 1679 1680 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 1681 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 1682 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 1683 1684 mdelay(1); 1685 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 1686 1687 rb_addr = ring->gpu_addr >> 8; 1688 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 1689 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 1690 1691 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 1692 if (ring->use_doorbell) { 1693 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 1694 DOORBELL_OFFSET, ring->doorbell_index); 1695 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 1696 DOORBELL_EN, 1); 1697 } else { 1698 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 1699 } 1700 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 1701 1702 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 1703 DOORBELL_RANGE_LOWER, ring->doorbell_index); 1704 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 1705 1706 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 1707 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 1708 1709 1710 /* start the ring */ 1711 gfx_v9_0_cp_gfx_start(adev); 1712 ring->ready = true; 1713 1714 return 0; 1715 } 1716 1717 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 1718 { 1719 int i; 1720 1721 if (enable) { 1722 WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0); 1723 } else { 1724 WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 1725 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 1726 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1727 adev->gfx.compute_ring[i].ready = false; 1728 adev->gfx.kiq.ring.ready = false; 1729 } 1730 udelay(50); 1731 } 1732 1733 static int gfx_v9_0_cp_compute_start(struct amdgpu_device *adev) 1734 { 1735 gfx_v9_0_cp_compute_enable(adev, true); 1736 1737 return 0; 1738 } 1739 1740 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 1741 { 1742 const struct gfx_firmware_header_v1_0 *mec_hdr; 1743 const __le32 *fw_data; 1744 unsigned i; 1745 u32 tmp; 1746 1747 if (!adev->gfx.mec_fw) 1748 return -EINVAL; 1749 1750 gfx_v9_0_cp_compute_enable(adev, false); 1751 1752 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1753 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 1754 1755 fw_data = (const __le32 *) 1756 (adev->gfx.mec_fw->data + 1757 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1758 tmp = 0; 1759 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 1760 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 1761 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 1762 1763 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 1764 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 1765 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 1766 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 1767 1768 /* MEC1 */ 1769 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 1770 mec_hdr->jt_offset); 1771 for (i = 0; i < mec_hdr->jt_size; i++) 1772 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 1773 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 1774 1775 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 1776 adev->gfx.mec_fw_version); 1777 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 1778 1779 return 0; 1780 } 1781 1782 static void gfx_v9_0_cp_compute_fini(struct amdgpu_device *adev) 1783 { 1784 int i, r; 1785 1786 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 1787 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 1788 1789 if (ring->mqd_obj) { 1790 r = amdgpu_bo_reserve(ring->mqd_obj, true); 1791 if (unlikely(r != 0)) 1792 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); 1793 1794 amdgpu_bo_unpin(ring->mqd_obj); 1795 amdgpu_bo_unreserve(ring->mqd_obj); 1796 1797 amdgpu_bo_unref(&ring->mqd_obj); 1798 ring->mqd_obj = NULL; 1799 } 1800 } 1801 } 1802 1803 static int gfx_v9_0_init_queue(struct amdgpu_ring *ring); 1804 1805 static int gfx_v9_0_cp_compute_resume(struct amdgpu_device *adev) 1806 { 1807 int i, r; 1808 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 1809 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 1810 if (gfx_v9_0_init_queue(ring)) 1811 dev_warn(adev->dev, "compute queue %d init failed!\n", i); 1812 } 1813 1814 r = gfx_v9_0_cp_compute_start(adev); 1815 if (r) 1816 return r; 1817 1818 return 0; 1819 } 1820 1821 /* KIQ functions */ 1822 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 1823 { 1824 uint32_t tmp; 1825 struct amdgpu_device *adev = ring->adev; 1826 1827 /* tell RLC which is KIQ queue */ 1828 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 1829 tmp &= 0xffffff00; 1830 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 1831 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 1832 tmp |= 0x80; 1833 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 1834 } 1835 1836 static void gfx_v9_0_kiq_enable(struct amdgpu_ring *ring) 1837 { 1838 amdgpu_ring_alloc(ring, 8); 1839 /* set resources */ 1840 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 1841 amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ 1842 amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */ 1843 amdgpu_ring_write(ring, 0); /* queue mask hi */ 1844 amdgpu_ring_write(ring, 0); /* gws mask lo */ 1845 amdgpu_ring_write(ring, 0); /* gws mask hi */ 1846 amdgpu_ring_write(ring, 0); /* oac mask */ 1847 amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */ 1848 amdgpu_ring_commit(ring); 1849 udelay(50); 1850 } 1851 1852 static void gfx_v9_0_map_queue_enable(struct amdgpu_ring *kiq_ring, 1853 struct amdgpu_ring *ring) 1854 { 1855 struct amdgpu_device *adev = kiq_ring->adev; 1856 uint64_t mqd_addr, wptr_addr; 1857 1858 mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 1859 wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 1860 amdgpu_ring_alloc(kiq_ring, 8); 1861 1862 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 1863 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 1864 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 1865 (0 << 4) | /* Queue_Sel */ 1866 (0 << 8) | /* VMID */ 1867 (ring->queue << 13 ) | 1868 (ring->pipe << 16) | 1869 ((ring->me == 1 ? 0 : 1) << 18) | 1870 (0 << 21) | /*queue_type: normal compute queue */ 1871 (1 << 24) | /* alloc format: all_on_one_pipe */ 1872 (0 << 26) | /* engine_sel: compute */ 1873 (1 << 29)); /* num_queues: must be 1 */ 1874 amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2)); 1875 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 1876 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 1877 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 1878 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 1879 amdgpu_ring_commit(kiq_ring); 1880 udelay(50); 1881 } 1882 1883 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 1884 { 1885 struct amdgpu_device *adev = ring->adev; 1886 struct v9_mqd *mqd = ring->mqd_ptr; 1887 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 1888 uint32_t tmp; 1889 1890 mqd->header = 0xC0310800; 1891 mqd->compute_pipelinestat_enable = 0x00000001; 1892 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 1893 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 1894 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 1895 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 1896 mqd->compute_misc_reserved = 0x00000003; 1897 1898 eop_base_addr = ring->eop_gpu_addr >> 8; 1899 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 1900 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 1901 1902 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 1903 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 1904 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 1905 (order_base_2(MEC_HPD_SIZE / 4) - 1)); 1906 1907 mqd->cp_hqd_eop_control = tmp; 1908 1909 /* enable doorbell? */ 1910 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 1911 1912 if (ring->use_doorbell) { 1913 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1914 DOORBELL_OFFSET, ring->doorbell_index); 1915 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1916 DOORBELL_EN, 1); 1917 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1918 DOORBELL_SOURCE, 0); 1919 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1920 DOORBELL_HIT, 0); 1921 } 1922 else 1923 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1924 DOORBELL_EN, 0); 1925 1926 mqd->cp_hqd_pq_doorbell_control = tmp; 1927 1928 /* disable the queue if it's active */ 1929 ring->wptr = 0; 1930 mqd->cp_hqd_dequeue_request = 0; 1931 mqd->cp_hqd_pq_rptr = 0; 1932 mqd->cp_hqd_pq_wptr_lo = 0; 1933 mqd->cp_hqd_pq_wptr_hi = 0; 1934 1935 /* set the pointer to the MQD */ 1936 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 1937 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 1938 1939 /* set MQD vmid to 0 */ 1940 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 1941 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 1942 mqd->cp_mqd_control = tmp; 1943 1944 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 1945 hqd_gpu_addr = ring->gpu_addr >> 8; 1946 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 1947 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 1948 1949 /* set up the HQD, this is similar to CP_RB0_CNTL */ 1950 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 1951 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 1952 (order_base_2(ring->ring_size / 4) - 1)); 1953 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 1954 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 1955 #ifdef __BIG_ENDIAN 1956 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 1957 #endif 1958 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 1959 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 1960 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 1961 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 1962 mqd->cp_hqd_pq_control = tmp; 1963 1964 /* set the wb address whether it's enabled or not */ 1965 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 1966 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 1967 mqd->cp_hqd_pq_rptr_report_addr_hi = 1968 upper_32_bits(wb_gpu_addr) & 0xffff; 1969 1970 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 1971 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 1972 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 1973 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 1974 1975 tmp = 0; 1976 /* enable the doorbell if requested */ 1977 if (ring->use_doorbell) { 1978 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 1979 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1980 DOORBELL_OFFSET, ring->doorbell_index); 1981 1982 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1983 DOORBELL_EN, 1); 1984 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1985 DOORBELL_SOURCE, 0); 1986 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 1987 DOORBELL_HIT, 0); 1988 } 1989 1990 mqd->cp_hqd_pq_doorbell_control = tmp; 1991 1992 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 1993 ring->wptr = 0; 1994 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 1995 1996 /* set the vmid for the queue */ 1997 mqd->cp_hqd_vmid = 0; 1998 1999 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 2000 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 2001 mqd->cp_hqd_persistent_state = tmp; 2002 2003 /* set MIN_IB_AVAIL_SIZE */ 2004 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 2005 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 2006 mqd->cp_hqd_ib_control = tmp; 2007 2008 /* activate the queue */ 2009 mqd->cp_hqd_active = 1; 2010 2011 return 0; 2012 } 2013 2014 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 2015 { 2016 struct amdgpu_device *adev = ring->adev; 2017 struct v9_mqd *mqd = ring->mqd_ptr; 2018 int j; 2019 2020 /* disable wptr polling */ 2021 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 2022 2023 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 2024 mqd->cp_hqd_eop_base_addr_lo); 2025 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 2026 mqd->cp_hqd_eop_base_addr_hi); 2027 2028 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2029 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, 2030 mqd->cp_hqd_eop_control); 2031 2032 /* enable doorbell? */ 2033 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 2034 mqd->cp_hqd_pq_doorbell_control); 2035 2036 /* disable the queue if it's active */ 2037 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 2038 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 2039 for (j = 0; j < adev->usec_timeout; j++) { 2040 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 2041 break; 2042 udelay(1); 2043 } 2044 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 2045 mqd->cp_hqd_dequeue_request); 2046 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 2047 mqd->cp_hqd_pq_rptr); 2048 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 2049 mqd->cp_hqd_pq_wptr_lo); 2050 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 2051 mqd->cp_hqd_pq_wptr_hi); 2052 } 2053 2054 /* set the pointer to the MQD */ 2055 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, 2056 mqd->cp_mqd_base_addr_lo); 2057 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, 2058 mqd->cp_mqd_base_addr_hi); 2059 2060 /* set MQD vmid to 0 */ 2061 WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, 2062 mqd->cp_mqd_control); 2063 2064 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 2065 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, 2066 mqd->cp_hqd_pq_base_lo); 2067 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, 2068 mqd->cp_hqd_pq_base_hi); 2069 2070 /* set up the HQD, this is similar to CP_RB0_CNTL */ 2071 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, 2072 mqd->cp_hqd_pq_control); 2073 2074 /* set the wb address whether it's enabled or not */ 2075 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 2076 mqd->cp_hqd_pq_rptr_report_addr_lo); 2077 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 2078 mqd->cp_hqd_pq_rptr_report_addr_hi); 2079 2080 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 2081 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 2082 mqd->cp_hqd_pq_wptr_poll_addr_lo); 2083 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 2084 mqd->cp_hqd_pq_wptr_poll_addr_hi); 2085 2086 /* enable the doorbell if requested */ 2087 if (ring->use_doorbell) { 2088 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 2089 (AMDGPU_DOORBELL64_KIQ *2) << 2); 2090 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 2091 (AMDGPU_DOORBELL64_USERQUEUE_END * 2) << 2); 2092 } 2093 2094 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 2095 mqd->cp_hqd_pq_doorbell_control); 2096 2097 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 2098 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 2099 mqd->cp_hqd_pq_wptr_lo); 2100 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 2101 mqd->cp_hqd_pq_wptr_hi); 2102 2103 /* set the vmid for the queue */ 2104 WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 2105 2106 WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 2107 mqd->cp_hqd_persistent_state); 2108 2109 /* activate the queue */ 2110 WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 2111 mqd->cp_hqd_active); 2112 2113 if (ring->use_doorbell) 2114 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 2115 2116 return 0; 2117 } 2118 2119 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 2120 { 2121 struct amdgpu_device *adev = ring->adev; 2122 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 2123 struct v9_mqd *mqd = ring->mqd_ptr; 2124 bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ); 2125 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 2126 2127 if (is_kiq) { 2128 gfx_v9_0_kiq_setting(&kiq->ring); 2129 } else { 2130 mqd_idx = ring - &adev->gfx.compute_ring[0]; 2131 } 2132 2133 if (!adev->gfx.in_reset) { 2134 memset((void *)mqd, 0, sizeof(*mqd)); 2135 mutex_lock(&adev->srbm_mutex); 2136 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 2137 gfx_v9_0_mqd_init(ring); 2138 if (is_kiq) 2139 gfx_v9_0_kiq_init_register(ring); 2140 soc15_grbm_select(adev, 0, 0, 0, 0); 2141 mutex_unlock(&adev->srbm_mutex); 2142 2143 } else { /* for GPU_RESET case */ 2144 /* reset MQD to a clean status */ 2145 2146 /* reset ring buffer */ 2147 ring->wptr = 0; 2148 2149 if (is_kiq) { 2150 mutex_lock(&adev->srbm_mutex); 2151 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 2152 gfx_v9_0_kiq_init_register(ring); 2153 soc15_grbm_select(adev, 0, 0, 0, 0); 2154 mutex_unlock(&adev->srbm_mutex); 2155 } 2156 } 2157 2158 if (is_kiq) 2159 gfx_v9_0_kiq_enable(ring); 2160 else 2161 gfx_v9_0_map_queue_enable(&kiq->ring, ring); 2162 2163 return 0; 2164 } 2165 2166 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 2167 { 2168 struct amdgpu_ring *ring = NULL; 2169 int r = 0, i; 2170 2171 gfx_v9_0_cp_compute_enable(adev, true); 2172 2173 ring = &adev->gfx.kiq.ring; 2174 2175 r = amdgpu_bo_reserve(ring->mqd_obj, false); 2176 if (unlikely(r != 0)) 2177 goto done; 2178 2179 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 2180 if (!r) { 2181 r = gfx_v9_0_kiq_init_queue(ring); 2182 amdgpu_bo_kunmap(ring->mqd_obj); 2183 ring->mqd_ptr = NULL; 2184 } 2185 amdgpu_bo_unreserve(ring->mqd_obj); 2186 if (r) 2187 goto done; 2188 2189 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2190 ring = &adev->gfx.compute_ring[i]; 2191 2192 r = amdgpu_bo_reserve(ring->mqd_obj, false); 2193 if (unlikely(r != 0)) 2194 goto done; 2195 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 2196 if (!r) { 2197 r = gfx_v9_0_kiq_init_queue(ring); 2198 amdgpu_bo_kunmap(ring->mqd_obj); 2199 ring->mqd_ptr = NULL; 2200 } 2201 amdgpu_bo_unreserve(ring->mqd_obj); 2202 if (r) 2203 goto done; 2204 } 2205 2206 done: 2207 return r; 2208 } 2209 2210 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 2211 { 2212 int r,i; 2213 struct amdgpu_ring *ring; 2214 2215 if (!(adev->flags & AMD_IS_APU)) 2216 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 2217 2218 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 2219 /* legacy firmware loading */ 2220 r = gfx_v9_0_cp_gfx_load_microcode(adev); 2221 if (r) 2222 return r; 2223 2224 r = gfx_v9_0_cp_compute_load_microcode(adev); 2225 if (r) 2226 return r; 2227 } 2228 2229 r = gfx_v9_0_cp_gfx_resume(adev); 2230 if (r) 2231 return r; 2232 2233 if (amdgpu_sriov_vf(adev)) 2234 r = gfx_v9_0_kiq_resume(adev); 2235 else 2236 r = gfx_v9_0_cp_compute_resume(adev); 2237 if (r) 2238 return r; 2239 2240 ring = &adev->gfx.gfx_ring[0]; 2241 r = amdgpu_ring_test_ring(ring); 2242 if (r) { 2243 ring->ready = false; 2244 return r; 2245 } 2246 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2247 ring = &adev->gfx.compute_ring[i]; 2248 2249 ring->ready = true; 2250 r = amdgpu_ring_test_ring(ring); 2251 if (r) 2252 ring->ready = false; 2253 } 2254 2255 if (amdgpu_sriov_vf(adev)) { 2256 ring = &adev->gfx.kiq.ring; 2257 ring->ready = true; 2258 r = amdgpu_ring_test_ring(ring); 2259 if (r) 2260 ring->ready = false; 2261 } 2262 2263 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 2264 2265 return 0; 2266 } 2267 2268 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 2269 { 2270 gfx_v9_0_cp_gfx_enable(adev, enable); 2271 gfx_v9_0_cp_compute_enable(adev, enable); 2272 } 2273 2274 static int gfx_v9_0_hw_init(void *handle) 2275 { 2276 int r; 2277 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2278 2279 gfx_v9_0_init_golden_registers(adev); 2280 2281 gfx_v9_0_gpu_init(adev); 2282 2283 r = gfx_v9_0_rlc_resume(adev); 2284 if (r) 2285 return r; 2286 2287 r = gfx_v9_0_cp_resume(adev); 2288 if (r) 2289 return r; 2290 2291 r = gfx_v9_0_ngg_en(adev); 2292 if (r) 2293 return r; 2294 2295 return r; 2296 } 2297 2298 static int gfx_v9_0_hw_fini(void *handle) 2299 { 2300 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2301 2302 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 2303 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 2304 if (amdgpu_sriov_vf(adev)) { 2305 pr_debug("For SRIOV client, shouldn't do anything.\n"); 2306 return 0; 2307 } 2308 gfx_v9_0_cp_enable(adev, false); 2309 gfx_v9_0_rlc_stop(adev); 2310 gfx_v9_0_cp_compute_fini(adev); 2311 2312 return 0; 2313 } 2314 2315 static int gfx_v9_0_suspend(void *handle) 2316 { 2317 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2318 2319 return gfx_v9_0_hw_fini(adev); 2320 } 2321 2322 static int gfx_v9_0_resume(void *handle) 2323 { 2324 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2325 2326 return gfx_v9_0_hw_init(adev); 2327 } 2328 2329 static bool gfx_v9_0_is_idle(void *handle) 2330 { 2331 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2332 2333 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 2334 GRBM_STATUS, GUI_ACTIVE)) 2335 return false; 2336 else 2337 return true; 2338 } 2339 2340 static int gfx_v9_0_wait_for_idle(void *handle) 2341 { 2342 unsigned i; 2343 u32 tmp; 2344 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2345 2346 for (i = 0; i < adev->usec_timeout; i++) { 2347 /* read MC_STATUS */ 2348 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) & 2349 GRBM_STATUS__GUI_ACTIVE_MASK; 2350 2351 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) 2352 return 0; 2353 udelay(1); 2354 } 2355 return -ETIMEDOUT; 2356 } 2357 2358 static int gfx_v9_0_soft_reset(void *handle) 2359 { 2360 u32 grbm_soft_reset = 0; 2361 u32 tmp; 2362 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2363 2364 /* GRBM_STATUS */ 2365 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 2366 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 2367 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 2368 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 2369 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 2370 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 2371 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 2372 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 2373 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 2374 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 2375 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 2376 } 2377 2378 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 2379 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 2380 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 2381 } 2382 2383 /* GRBM_STATUS2 */ 2384 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 2385 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 2386 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 2387 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2388 2389 2390 if (grbm_soft_reset) { 2391 /* stop the rlc */ 2392 gfx_v9_0_rlc_stop(adev); 2393 2394 /* Disable GFX parsing/prefetching */ 2395 gfx_v9_0_cp_gfx_enable(adev, false); 2396 2397 /* Disable MEC parsing/prefetching */ 2398 gfx_v9_0_cp_compute_enable(adev, false); 2399 2400 if (grbm_soft_reset) { 2401 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 2402 tmp |= grbm_soft_reset; 2403 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 2404 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 2405 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 2406 2407 udelay(50); 2408 2409 tmp &= ~grbm_soft_reset; 2410 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 2411 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 2412 } 2413 2414 /* Wait a little for things to settle down */ 2415 udelay(50); 2416 } 2417 return 0; 2418 } 2419 2420 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 2421 { 2422 uint64_t clock; 2423 2424 mutex_lock(&adev->gfx.gpu_clock_mutex); 2425 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 2426 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 2427 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 2428 mutex_unlock(&adev->gfx.gpu_clock_mutex); 2429 return clock; 2430 } 2431 2432 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 2433 uint32_t vmid, 2434 uint32_t gds_base, uint32_t gds_size, 2435 uint32_t gws_base, uint32_t gws_size, 2436 uint32_t oa_base, uint32_t oa_size) 2437 { 2438 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 2439 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 2440 2441 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 2442 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 2443 2444 oa_base = oa_base >> AMDGPU_OA_SHIFT; 2445 oa_size = oa_size >> AMDGPU_OA_SHIFT; 2446 2447 /* GDS Base */ 2448 gfx_v9_0_write_data_to_reg(ring, 0, false, 2449 amdgpu_gds_reg_offset[vmid].mem_base, 2450 gds_base); 2451 2452 /* GDS Size */ 2453 gfx_v9_0_write_data_to_reg(ring, 0, false, 2454 amdgpu_gds_reg_offset[vmid].mem_size, 2455 gds_size); 2456 2457 /* GWS */ 2458 gfx_v9_0_write_data_to_reg(ring, 0, false, 2459 amdgpu_gds_reg_offset[vmid].gws, 2460 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 2461 2462 /* OA */ 2463 gfx_v9_0_write_data_to_reg(ring, 0, false, 2464 amdgpu_gds_reg_offset[vmid].oa, 2465 (1 << (oa_size + oa_base)) - (1 << oa_base)); 2466 } 2467 2468 static int gfx_v9_0_early_init(void *handle) 2469 { 2470 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2471 2472 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 2473 adev->gfx.num_compute_rings = GFX9_NUM_COMPUTE_RINGS; 2474 gfx_v9_0_set_ring_funcs(adev); 2475 gfx_v9_0_set_irq_funcs(adev); 2476 gfx_v9_0_set_gds_init(adev); 2477 gfx_v9_0_set_rlc_funcs(adev); 2478 2479 return 0; 2480 } 2481 2482 static int gfx_v9_0_late_init(void *handle) 2483 { 2484 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2485 int r; 2486 2487 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 2488 if (r) 2489 return r; 2490 2491 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 2492 if (r) 2493 return r; 2494 2495 return 0; 2496 } 2497 2498 static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev) 2499 { 2500 uint32_t rlc_setting, data; 2501 unsigned i; 2502 2503 if (adev->gfx.rlc.in_safe_mode) 2504 return; 2505 2506 /* if RLC is not enabled, do nothing */ 2507 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 2508 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 2509 return; 2510 2511 if (adev->cg_flags & 2512 (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG | 2513 AMD_CG_SUPPORT_GFX_3D_CGCG)) { 2514 data = RLC_SAFE_MODE__CMD_MASK; 2515 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 2516 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 2517 2518 /* wait for RLC_SAFE_MODE */ 2519 for (i = 0; i < adev->usec_timeout; i++) { 2520 if (!REG_GET_FIELD(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 2521 break; 2522 udelay(1); 2523 } 2524 adev->gfx.rlc.in_safe_mode = true; 2525 } 2526 } 2527 2528 static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev) 2529 { 2530 uint32_t rlc_setting, data; 2531 2532 if (!adev->gfx.rlc.in_safe_mode) 2533 return; 2534 2535 /* if RLC is not enabled, do nothing */ 2536 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 2537 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 2538 return; 2539 2540 if (adev->cg_flags & 2541 (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 2542 /* 2543 * Try to exit safe mode only if it is already in safe 2544 * mode. 2545 */ 2546 data = RLC_SAFE_MODE__CMD_MASK; 2547 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 2548 adev->gfx.rlc.in_safe_mode = false; 2549 } 2550 } 2551 2552 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 2553 bool enable) 2554 { 2555 uint32_t data, def; 2556 2557 /* It is disabled by HW by default */ 2558 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 2559 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 2560 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 2561 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK | 2562 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 2563 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 2564 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 2565 2566 /* only for Vega10 & Raven1 */ 2567 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 2568 2569 if (def != data) 2570 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 2571 2572 /* MGLS is a global flag to control all MGLS in GFX */ 2573 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 2574 /* 2 - RLC memory Light sleep */ 2575 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 2576 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 2577 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 2578 if (def != data) 2579 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 2580 } 2581 /* 3 - CP memory Light sleep */ 2582 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 2583 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 2584 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 2585 if (def != data) 2586 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 2587 } 2588 } 2589 } else { 2590 /* 1 - MGCG_OVERRIDE */ 2591 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 2592 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK | 2593 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 2594 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 2595 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 2596 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 2597 if (def != data) 2598 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 2599 2600 /* 2 - disable MGLS in RLC */ 2601 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 2602 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 2603 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 2604 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 2605 } 2606 2607 /* 3 - disable MGLS in CP */ 2608 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 2609 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 2610 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 2611 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 2612 } 2613 } 2614 } 2615 2616 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 2617 bool enable) 2618 { 2619 uint32_t data, def; 2620 2621 adev->gfx.rlc.funcs->enter_safe_mode(adev); 2622 2623 /* Enable 3D CGCG/CGLS */ 2624 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 2625 /* write cmd to clear cgcg/cgls ov */ 2626 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 2627 /* unset CGCG override */ 2628 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 2629 /* update CGCG and CGLS override bits */ 2630 if (def != data) 2631 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 2632 /* enable 3Dcgcg FSM(0x0020003f) */ 2633 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 2634 data = (0x2000 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 2635 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 2636 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 2637 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 2638 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 2639 if (def != data) 2640 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 2641 2642 /* set IDLE_POLL_COUNT(0x00900100) */ 2643 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 2644 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 2645 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2646 if (def != data) 2647 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 2648 } else { 2649 /* Disable CGCG/CGLS */ 2650 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 2651 /* disable cgcg, cgls should be disabled */ 2652 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 2653 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 2654 /* disable cgcg and cgls in FSM */ 2655 if (def != data) 2656 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 2657 } 2658 2659 adev->gfx.rlc.funcs->exit_safe_mode(adev); 2660 } 2661 2662 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 2663 bool enable) 2664 { 2665 uint32_t def, data; 2666 2667 adev->gfx.rlc.funcs->enter_safe_mode(adev); 2668 2669 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 2670 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 2671 /* unset CGCG override */ 2672 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 2673 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 2674 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 2675 else 2676 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 2677 /* update CGCG and CGLS override bits */ 2678 if (def != data) 2679 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 2680 2681 /* enable cgcg FSM(0x0020003F) */ 2682 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 2683 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 2684 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 2685 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 2686 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 2687 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 2688 if (def != data) 2689 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 2690 2691 /* set IDLE_POLL_COUNT(0x00900100) */ 2692 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 2693 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 2694 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2695 if (def != data) 2696 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 2697 } else { 2698 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 2699 /* reset CGCG/CGLS bits */ 2700 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 2701 /* disable cgcg and cgls in FSM */ 2702 if (def != data) 2703 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 2704 } 2705 2706 adev->gfx.rlc.funcs->exit_safe_mode(adev); 2707 } 2708 2709 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 2710 bool enable) 2711 { 2712 if (enable) { 2713 /* CGCG/CGLS should be enabled after MGCG/MGLS 2714 * === MGCG + MGLS === 2715 */ 2716 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 2717 /* === CGCG /CGLS for GFX 3D Only === */ 2718 gfx_v9_0_update_3d_clock_gating(adev, enable); 2719 /* === CGCG + CGLS === */ 2720 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 2721 } else { 2722 /* CGCG/CGLS should be disabled before MGCG/MGLS 2723 * === CGCG + CGLS === 2724 */ 2725 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 2726 /* === CGCG /CGLS for GFX 3D Only === */ 2727 gfx_v9_0_update_3d_clock_gating(adev, enable); 2728 /* === MGCG + MGLS === */ 2729 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 2730 } 2731 return 0; 2732 } 2733 2734 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 2735 .enter_safe_mode = gfx_v9_0_enter_rlc_safe_mode, 2736 .exit_safe_mode = gfx_v9_0_exit_rlc_safe_mode 2737 }; 2738 2739 static int gfx_v9_0_set_powergating_state(void *handle, 2740 enum amd_powergating_state state) 2741 { 2742 return 0; 2743 } 2744 2745 static int gfx_v9_0_set_clockgating_state(void *handle, 2746 enum amd_clockgating_state state) 2747 { 2748 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2749 2750 if (amdgpu_sriov_vf(adev)) 2751 return 0; 2752 2753 switch (adev->asic_type) { 2754 case CHIP_VEGA10: 2755 gfx_v9_0_update_gfx_clock_gating(adev, 2756 state == AMD_CG_STATE_GATE ? true : false); 2757 break; 2758 default: 2759 break; 2760 } 2761 return 0; 2762 } 2763 2764 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 2765 { 2766 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2767 int data; 2768 2769 if (amdgpu_sriov_vf(adev)) 2770 *flags = 0; 2771 2772 /* AMD_CG_SUPPORT_GFX_MGCG */ 2773 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 2774 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 2775 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 2776 2777 /* AMD_CG_SUPPORT_GFX_CGCG */ 2778 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 2779 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 2780 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 2781 2782 /* AMD_CG_SUPPORT_GFX_CGLS */ 2783 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 2784 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 2785 2786 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 2787 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 2788 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 2789 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 2790 2791 /* AMD_CG_SUPPORT_GFX_CP_LS */ 2792 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 2793 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 2794 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 2795 2796 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 2797 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 2798 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 2799 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 2800 2801 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 2802 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 2803 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 2804 } 2805 2806 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 2807 { 2808 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 2809 } 2810 2811 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 2812 { 2813 struct amdgpu_device *adev = ring->adev; 2814 u64 wptr; 2815 2816 /* XXX check if swapping is necessary on BE */ 2817 if (ring->use_doorbell) { 2818 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 2819 } else { 2820 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 2821 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 2822 } 2823 2824 return wptr; 2825 } 2826 2827 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 2828 { 2829 struct amdgpu_device *adev = ring->adev; 2830 2831 if (ring->use_doorbell) { 2832 /* XXX check if swapping is necessary on BE */ 2833 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 2834 WDOORBELL64(ring->doorbell_index, ring->wptr); 2835 } else { 2836 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 2837 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 2838 } 2839 } 2840 2841 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 2842 { 2843 u32 ref_and_mask, reg_mem_engine; 2844 struct nbio_hdp_flush_reg *nbio_hf_reg; 2845 2846 if (ring->adev->asic_type == CHIP_VEGA10) 2847 nbio_hf_reg = &nbio_v6_1_hdp_flush_reg; 2848 2849 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 2850 switch (ring->me) { 2851 case 1: 2852 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 2853 break; 2854 case 2: 2855 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 2856 break; 2857 default: 2858 return; 2859 } 2860 reg_mem_engine = 0; 2861 } else { 2862 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 2863 reg_mem_engine = 1; /* pfp */ 2864 } 2865 2866 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 2867 nbio_hf_reg->hdp_flush_req_offset, 2868 nbio_hf_reg->hdp_flush_done_offset, 2869 ref_and_mask, ref_and_mask, 0x20); 2870 } 2871 2872 static void gfx_v9_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 2873 { 2874 gfx_v9_0_write_data_to_reg(ring, 0, true, 2875 SOC15_REG_OFFSET(HDP, 0, mmHDP_DEBUG0), 1); 2876 } 2877 2878 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 2879 struct amdgpu_ib *ib, 2880 unsigned vm_id, bool ctx_switch) 2881 { 2882 u32 header, control = 0; 2883 2884 if (ib->flags & AMDGPU_IB_FLAG_CE) 2885 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 2886 else 2887 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 2888 2889 control |= ib->length_dw | (vm_id << 24); 2890 2891 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) 2892 control |= INDIRECT_BUFFER_PRE_ENB(1); 2893 2894 amdgpu_ring_write(ring, header); 2895 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 2896 amdgpu_ring_write(ring, 2897 #ifdef __BIG_ENDIAN 2898 (2 << 0) | 2899 #endif 2900 lower_32_bits(ib->gpu_addr)); 2901 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 2902 amdgpu_ring_write(ring, control); 2903 } 2904 2905 #define INDIRECT_BUFFER_VALID (1 << 23) 2906 2907 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 2908 struct amdgpu_ib *ib, 2909 unsigned vm_id, bool ctx_switch) 2910 { 2911 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24); 2912 2913 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 2914 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 2915 amdgpu_ring_write(ring, 2916 #ifdef __BIG_ENDIAN 2917 (2 << 0) | 2918 #endif 2919 lower_32_bits(ib->gpu_addr)); 2920 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 2921 amdgpu_ring_write(ring, control); 2922 } 2923 2924 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 2925 u64 seq, unsigned flags) 2926 { 2927 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 2928 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 2929 2930 /* RELEASE_MEM - flush caches, send int */ 2931 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 2932 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 2933 EOP_TC_ACTION_EN | 2934 EOP_TC_WB_ACTION_EN | 2935 EOP_TC_MD_ACTION_EN | 2936 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 2937 EVENT_INDEX(5))); 2938 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 2939 2940 /* 2941 * the address should be Qword aligned if 64bit write, Dword 2942 * aligned if only send 32bit data low (discard data high) 2943 */ 2944 if (write64bit) 2945 BUG_ON(addr & 0x7); 2946 else 2947 BUG_ON(addr & 0x3); 2948 amdgpu_ring_write(ring, lower_32_bits(addr)); 2949 amdgpu_ring_write(ring, upper_32_bits(addr)); 2950 amdgpu_ring_write(ring, lower_32_bits(seq)); 2951 amdgpu_ring_write(ring, upper_32_bits(seq)); 2952 amdgpu_ring_write(ring, 0); 2953 } 2954 2955 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 2956 { 2957 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 2958 uint32_t seq = ring->fence_drv.sync_seq; 2959 uint64_t addr = ring->fence_drv.gpu_addr; 2960 2961 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 2962 lower_32_bits(addr), upper_32_bits(addr), 2963 seq, 0xffffffff, 4); 2964 } 2965 2966 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 2967 unsigned vm_id, uint64_t pd_addr) 2968 { 2969 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 2970 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 2971 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); 2972 unsigned eng = ring->vm_inv_eng; 2973 2974 pd_addr = pd_addr | 0x1; /* valid bit */ 2975 /* now only use physical base address of PDE and valid */ 2976 BUG_ON(pd_addr & 0xFFFF00000000003EULL); 2977 2978 gfx_v9_0_write_data_to_reg(ring, usepfp, true, 2979 hub->ctx0_ptb_addr_lo32 + (2 * vm_id), 2980 lower_32_bits(pd_addr)); 2981 2982 gfx_v9_0_write_data_to_reg(ring, usepfp, true, 2983 hub->ctx0_ptb_addr_hi32 + (2 * vm_id), 2984 upper_32_bits(pd_addr)); 2985 2986 gfx_v9_0_write_data_to_reg(ring, usepfp, true, 2987 hub->vm_inv_eng0_req + eng, req); 2988 2989 /* wait for the invalidate to complete */ 2990 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack + 2991 eng, 0, 1 << vm_id, 1 << vm_id, 0x20); 2992 2993 /* compute doesn't have PFP */ 2994 if (usepfp) { 2995 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 2996 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 2997 amdgpu_ring_write(ring, 0x0); 2998 } 2999 } 3000 3001 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 3002 { 3003 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 3004 } 3005 3006 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 3007 { 3008 u64 wptr; 3009 3010 /* XXX check if swapping is necessary on BE */ 3011 if (ring->use_doorbell) 3012 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 3013 else 3014 BUG(); 3015 return wptr; 3016 } 3017 3018 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 3019 { 3020 struct amdgpu_device *adev = ring->adev; 3021 3022 /* XXX check if swapping is necessary on BE */ 3023 if (ring->use_doorbell) { 3024 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 3025 WDOORBELL64(ring->doorbell_index, ring->wptr); 3026 } else{ 3027 BUG(); /* only DOORBELL method supported on gfx9 now */ 3028 } 3029 } 3030 3031 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 3032 u64 seq, unsigned int flags) 3033 { 3034 /* we only allocate 32bit for each seq wb address */ 3035 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 3036 3037 /* write fence seq to the "addr" */ 3038 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3039 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 3040 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 3041 amdgpu_ring_write(ring, lower_32_bits(addr)); 3042 amdgpu_ring_write(ring, upper_32_bits(addr)); 3043 amdgpu_ring_write(ring, lower_32_bits(seq)); 3044 3045 if (flags & AMDGPU_FENCE_FLAG_INT) { 3046 /* set register to trigger INT */ 3047 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3048 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 3049 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 3050 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 3051 amdgpu_ring_write(ring, 0); 3052 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 3053 } 3054 } 3055 3056 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 3057 { 3058 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 3059 amdgpu_ring_write(ring, 0); 3060 } 3061 3062 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 3063 { 3064 static struct v9_ce_ib_state ce_payload = {0}; 3065 uint64_t csa_addr; 3066 int cnt; 3067 3068 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 3069 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; 3070 3071 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 3072 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 3073 WRITE_DATA_DST_SEL(8) | 3074 WR_CONFIRM) | 3075 WRITE_DATA_CACHE_POLICY(0)); 3076 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 3077 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 3078 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 3079 } 3080 3081 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 3082 { 3083 static struct v9_de_ib_state de_payload = {0}; 3084 uint64_t csa_addr, gds_addr; 3085 int cnt; 3086 3087 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; 3088 gds_addr = csa_addr + 4096; 3089 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 3090 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 3091 3092 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 3093 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 3094 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 3095 WRITE_DATA_DST_SEL(8) | 3096 WR_CONFIRM) | 3097 WRITE_DATA_CACHE_POLICY(0)); 3098 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 3099 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 3100 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 3101 } 3102 3103 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 3104 { 3105 uint32_t dw2 = 0; 3106 3107 if (amdgpu_sriov_vf(ring->adev)) 3108 gfx_v9_0_ring_emit_ce_meta(ring); 3109 3110 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 3111 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 3112 /* set load_global_config & load_global_uconfig */ 3113 dw2 |= 0x8001; 3114 /* set load_cs_sh_regs */ 3115 dw2 |= 0x01000000; 3116 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 3117 dw2 |= 0x10002; 3118 3119 /* set load_ce_ram if preamble presented */ 3120 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 3121 dw2 |= 0x10000000; 3122 } else { 3123 /* still load_ce_ram if this is the first time preamble presented 3124 * although there is no context switch happens. 3125 */ 3126 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 3127 dw2 |= 0x10000000; 3128 } 3129 3130 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3131 amdgpu_ring_write(ring, dw2); 3132 amdgpu_ring_write(ring, 0); 3133 3134 if (amdgpu_sriov_vf(ring->adev)) 3135 gfx_v9_0_ring_emit_de_meta(ring); 3136 } 3137 3138 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 3139 { 3140 unsigned ret; 3141 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 3142 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 3143 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 3144 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 3145 ret = ring->wptr & ring->buf_mask; 3146 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 3147 return ret; 3148 } 3149 3150 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 3151 { 3152 unsigned cur; 3153 BUG_ON(offset > ring->buf_mask); 3154 BUG_ON(ring->ring[offset] != 0x55aa55aa); 3155 3156 cur = (ring->wptr & ring->buf_mask) - 1; 3157 if (likely(cur > offset)) 3158 ring->ring[offset] = cur - offset; 3159 else 3160 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 3161 } 3162 3163 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 3164 { 3165 struct amdgpu_device *adev = ring->adev; 3166 3167 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 3168 amdgpu_ring_write(ring, 0 | /* src: register*/ 3169 (5 << 8) | /* dst: memory */ 3170 (1 << 20)); /* write confirm */ 3171 amdgpu_ring_write(ring, reg); 3172 amdgpu_ring_write(ring, 0); 3173 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 3174 adev->virt.reg_val_offs * 4)); 3175 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 3176 adev->virt.reg_val_offs * 4)); 3177 } 3178 3179 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 3180 uint32_t val) 3181 { 3182 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3183 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */ 3184 amdgpu_ring_write(ring, reg); 3185 amdgpu_ring_write(ring, 0); 3186 amdgpu_ring_write(ring, val); 3187 } 3188 3189 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 3190 enum amdgpu_interrupt_state state) 3191 { 3192 switch (state) { 3193 case AMDGPU_IRQ_STATE_DISABLE: 3194 case AMDGPU_IRQ_STATE_ENABLE: 3195 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 3196 TIME_STAMP_INT_ENABLE, 3197 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 3198 break; 3199 default: 3200 break; 3201 } 3202 } 3203 3204 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 3205 int me, int pipe, 3206 enum amdgpu_interrupt_state state) 3207 { 3208 u32 mec_int_cntl, mec_int_cntl_reg; 3209 3210 /* 3211 * amdgpu controls only pipe 0 of MEC1. That's why this function only 3212 * handles the setting of interrupts for this specific pipe. All other 3213 * pipes' interrupts are set by amdkfd. 3214 */ 3215 3216 if (me == 1) { 3217 switch (pipe) { 3218 case 0: 3219 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 3220 break; 3221 default: 3222 DRM_DEBUG("invalid pipe %d\n", pipe); 3223 return; 3224 } 3225 } else { 3226 DRM_DEBUG("invalid me %d\n", me); 3227 return; 3228 } 3229 3230 switch (state) { 3231 case AMDGPU_IRQ_STATE_DISABLE: 3232 mec_int_cntl = RREG32(mec_int_cntl_reg); 3233 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 3234 TIME_STAMP_INT_ENABLE, 0); 3235 WREG32(mec_int_cntl_reg, mec_int_cntl); 3236 break; 3237 case AMDGPU_IRQ_STATE_ENABLE: 3238 mec_int_cntl = RREG32(mec_int_cntl_reg); 3239 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 3240 TIME_STAMP_INT_ENABLE, 1); 3241 WREG32(mec_int_cntl_reg, mec_int_cntl); 3242 break; 3243 default: 3244 break; 3245 } 3246 } 3247 3248 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 3249 struct amdgpu_irq_src *source, 3250 unsigned type, 3251 enum amdgpu_interrupt_state state) 3252 { 3253 switch (state) { 3254 case AMDGPU_IRQ_STATE_DISABLE: 3255 case AMDGPU_IRQ_STATE_ENABLE: 3256 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 3257 PRIV_REG_INT_ENABLE, 3258 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 3259 break; 3260 default: 3261 break; 3262 } 3263 3264 return 0; 3265 } 3266 3267 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 3268 struct amdgpu_irq_src *source, 3269 unsigned type, 3270 enum amdgpu_interrupt_state state) 3271 { 3272 switch (state) { 3273 case AMDGPU_IRQ_STATE_DISABLE: 3274 case AMDGPU_IRQ_STATE_ENABLE: 3275 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 3276 PRIV_INSTR_INT_ENABLE, 3277 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 3278 default: 3279 break; 3280 } 3281 3282 return 0; 3283 } 3284 3285 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 3286 struct amdgpu_irq_src *src, 3287 unsigned type, 3288 enum amdgpu_interrupt_state state) 3289 { 3290 switch (type) { 3291 case AMDGPU_CP_IRQ_GFX_EOP: 3292 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 3293 break; 3294 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 3295 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 3296 break; 3297 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 3298 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 3299 break; 3300 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 3301 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 3302 break; 3303 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 3304 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 3305 break; 3306 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 3307 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 3308 break; 3309 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 3310 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 3311 break; 3312 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 3313 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 3314 break; 3315 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 3316 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 3317 break; 3318 default: 3319 break; 3320 } 3321 return 0; 3322 } 3323 3324 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 3325 struct amdgpu_irq_src *source, 3326 struct amdgpu_iv_entry *entry) 3327 { 3328 int i; 3329 u8 me_id, pipe_id, queue_id; 3330 struct amdgpu_ring *ring; 3331 3332 DRM_DEBUG("IH: CP EOP\n"); 3333 me_id = (entry->ring_id & 0x0c) >> 2; 3334 pipe_id = (entry->ring_id & 0x03) >> 0; 3335 queue_id = (entry->ring_id & 0x70) >> 4; 3336 3337 switch (me_id) { 3338 case 0: 3339 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 3340 break; 3341 case 1: 3342 case 2: 3343 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3344 ring = &adev->gfx.compute_ring[i]; 3345 /* Per-queue interrupt is supported for MEC starting from VI. 3346 * The interrupt can only be enabled/disabled per pipe instead of per queue. 3347 */ 3348 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 3349 amdgpu_fence_process(ring); 3350 } 3351 break; 3352 } 3353 return 0; 3354 } 3355 3356 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 3357 struct amdgpu_irq_src *source, 3358 struct amdgpu_iv_entry *entry) 3359 { 3360 DRM_ERROR("Illegal register access in command stream\n"); 3361 schedule_work(&adev->reset_work); 3362 return 0; 3363 } 3364 3365 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 3366 struct amdgpu_irq_src *source, 3367 struct amdgpu_iv_entry *entry) 3368 { 3369 DRM_ERROR("Illegal instruction in command stream\n"); 3370 schedule_work(&adev->reset_work); 3371 return 0; 3372 } 3373 3374 static int gfx_v9_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 3375 struct amdgpu_irq_src *src, 3376 unsigned int type, 3377 enum amdgpu_interrupt_state state) 3378 { 3379 uint32_t tmp, target; 3380 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 3381 3382 if (ring->me == 1) 3383 target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 3384 else 3385 target = SOC15_REG_OFFSET(GC, 0, mmCP_ME2_PIPE0_INT_CNTL); 3386 target += ring->pipe; 3387 3388 switch (type) { 3389 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 3390 if (state == AMDGPU_IRQ_STATE_DISABLE) { 3391 tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); 3392 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 3393 GENERIC2_INT_ENABLE, 0); 3394 WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); 3395 3396 tmp = RREG32(target); 3397 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, 3398 GENERIC2_INT_ENABLE, 0); 3399 WREG32(target, tmp); 3400 } else { 3401 tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); 3402 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 3403 GENERIC2_INT_ENABLE, 1); 3404 WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); 3405 3406 tmp = RREG32(target); 3407 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, 3408 GENERIC2_INT_ENABLE, 1); 3409 WREG32(target, tmp); 3410 } 3411 break; 3412 default: 3413 BUG(); /* kiq only support GENERIC2_INT now */ 3414 break; 3415 } 3416 return 0; 3417 } 3418 3419 static int gfx_v9_0_kiq_irq(struct amdgpu_device *adev, 3420 struct amdgpu_irq_src *source, 3421 struct amdgpu_iv_entry *entry) 3422 { 3423 u8 me_id, pipe_id, queue_id; 3424 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 3425 3426 me_id = (entry->ring_id & 0x0c) >> 2; 3427 pipe_id = (entry->ring_id & 0x03) >> 0; 3428 queue_id = (entry->ring_id & 0x70) >> 4; 3429 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", 3430 me_id, pipe_id, queue_id); 3431 3432 amdgpu_fence_process(ring); 3433 return 0; 3434 } 3435 3436 const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 3437 .name = "gfx_v9_0", 3438 .early_init = gfx_v9_0_early_init, 3439 .late_init = gfx_v9_0_late_init, 3440 .sw_init = gfx_v9_0_sw_init, 3441 .sw_fini = gfx_v9_0_sw_fini, 3442 .hw_init = gfx_v9_0_hw_init, 3443 .hw_fini = gfx_v9_0_hw_fini, 3444 .suspend = gfx_v9_0_suspend, 3445 .resume = gfx_v9_0_resume, 3446 .is_idle = gfx_v9_0_is_idle, 3447 .wait_for_idle = gfx_v9_0_wait_for_idle, 3448 .soft_reset = gfx_v9_0_soft_reset, 3449 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 3450 .set_powergating_state = gfx_v9_0_set_powergating_state, 3451 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 3452 }; 3453 3454 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 3455 .type = AMDGPU_RING_TYPE_GFX, 3456 .align_mask = 0xff, 3457 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 3458 .support_64bit_ptrs = true, 3459 .vmhub = AMDGPU_GFXHUB, 3460 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 3461 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 3462 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 3463 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 3464 5 + /* COND_EXEC */ 3465 7 + /* PIPELINE_SYNC */ 3466 24 + /* VM_FLUSH */ 3467 8 + /* FENCE for VM_FLUSH */ 3468 20 + /* GDS switch */ 3469 4 + /* double SWITCH_BUFFER, 3470 the first COND_EXEC jump to the place just 3471 prior to this double SWITCH_BUFFER */ 3472 5 + /* COND_EXEC */ 3473 7 + /* HDP_flush */ 3474 4 + /* VGT_flush */ 3475 14 + /* CE_META */ 3476 31 + /* DE_META */ 3477 3 + /* CNTX_CTRL */ 3478 5 + /* HDP_INVL */ 3479 8 + 8 + /* FENCE x2 */ 3480 2, /* SWITCH_BUFFER */ 3481 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 3482 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 3483 .emit_fence = gfx_v9_0_ring_emit_fence, 3484 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 3485 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 3486 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 3487 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 3488 .emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate, 3489 .test_ring = gfx_v9_0_ring_test_ring, 3490 .test_ib = gfx_v9_0_ring_test_ib, 3491 .insert_nop = amdgpu_ring_insert_nop, 3492 .pad_ib = amdgpu_ring_generic_pad_ib, 3493 .emit_switch_buffer = gfx_v9_ring_emit_sb, 3494 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 3495 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 3496 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 3497 }; 3498 3499 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 3500 .type = AMDGPU_RING_TYPE_COMPUTE, 3501 .align_mask = 0xff, 3502 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 3503 .support_64bit_ptrs = true, 3504 .vmhub = AMDGPU_GFXHUB, 3505 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 3506 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 3507 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 3508 .emit_frame_size = 3509 20 + /* gfx_v9_0_ring_emit_gds_switch */ 3510 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 3511 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ 3512 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 3513 24 + /* gfx_v9_0_ring_emit_vm_flush */ 3514 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 3515 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ 3516 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 3517 .emit_fence = gfx_v9_0_ring_emit_fence, 3518 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 3519 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 3520 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 3521 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 3522 .emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate, 3523 .test_ring = gfx_v9_0_ring_test_ring, 3524 .test_ib = gfx_v9_0_ring_test_ib, 3525 .insert_nop = amdgpu_ring_insert_nop, 3526 .pad_ib = amdgpu_ring_generic_pad_ib, 3527 }; 3528 3529 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 3530 .type = AMDGPU_RING_TYPE_KIQ, 3531 .align_mask = 0xff, 3532 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 3533 .support_64bit_ptrs = true, 3534 .vmhub = AMDGPU_GFXHUB, 3535 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 3536 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 3537 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 3538 .emit_frame_size = 3539 20 + /* gfx_v9_0_ring_emit_gds_switch */ 3540 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 3541 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ 3542 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 3543 24 + /* gfx_v9_0_ring_emit_vm_flush */ 3544 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 3545 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ 3546 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 3547 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 3548 .test_ring = gfx_v9_0_ring_test_ring, 3549 .test_ib = gfx_v9_0_ring_test_ib, 3550 .insert_nop = amdgpu_ring_insert_nop, 3551 .pad_ib = amdgpu_ring_generic_pad_ib, 3552 .emit_rreg = gfx_v9_0_ring_emit_rreg, 3553 .emit_wreg = gfx_v9_0_ring_emit_wreg, 3554 }; 3555 3556 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 3557 { 3558 int i; 3559 3560 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 3561 3562 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 3563 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 3564 3565 for (i = 0; i < adev->gfx.num_compute_rings; i++) 3566 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 3567 } 3568 3569 static const struct amdgpu_irq_src_funcs gfx_v9_0_kiq_irq_funcs = { 3570 .set = gfx_v9_0_kiq_set_interrupt_state, 3571 .process = gfx_v9_0_kiq_irq, 3572 }; 3573 3574 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 3575 .set = gfx_v9_0_set_eop_interrupt_state, 3576 .process = gfx_v9_0_eop_irq, 3577 }; 3578 3579 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 3580 .set = gfx_v9_0_set_priv_reg_fault_state, 3581 .process = gfx_v9_0_priv_reg_irq, 3582 }; 3583 3584 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 3585 .set = gfx_v9_0_set_priv_inst_fault_state, 3586 .process = gfx_v9_0_priv_inst_irq, 3587 }; 3588 3589 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 3590 { 3591 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 3592 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 3593 3594 adev->gfx.priv_reg_irq.num_types = 1; 3595 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 3596 3597 adev->gfx.priv_inst_irq.num_types = 1; 3598 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 3599 3600 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; 3601 adev->gfx.kiq.irq.funcs = &gfx_v9_0_kiq_irq_funcs; 3602 } 3603 3604 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 3605 { 3606 switch (adev->asic_type) { 3607 case CHIP_VEGA10: 3608 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 3609 break; 3610 default: 3611 break; 3612 } 3613 } 3614 3615 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 3616 { 3617 /* init asci gds info */ 3618 adev->gds.mem.total_size = RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); 3619 adev->gds.gws.total_size = 64; 3620 adev->gds.oa.total_size = 16; 3621 3622 if (adev->gds.mem.total_size == 64 * 1024) { 3623 adev->gds.mem.gfx_partition_size = 4096; 3624 adev->gds.mem.cs_partition_size = 4096; 3625 3626 adev->gds.gws.gfx_partition_size = 4; 3627 adev->gds.gws.cs_partition_size = 4; 3628 3629 adev->gds.oa.gfx_partition_size = 4; 3630 adev->gds.oa.cs_partition_size = 1; 3631 } else { 3632 adev->gds.mem.gfx_partition_size = 1024; 3633 adev->gds.mem.cs_partition_size = 1024; 3634 3635 adev->gds.gws.gfx_partition_size = 16; 3636 adev->gds.gws.cs_partition_size = 16; 3637 3638 adev->gds.oa.gfx_partition_size = 4; 3639 adev->gds.oa.cs_partition_size = 4; 3640 } 3641 } 3642 3643 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 3644 { 3645 u32 data, mask; 3646 3647 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 3648 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 3649 3650 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 3651 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 3652 3653 mask = gfx_v9_0_create_bitmask(adev->gfx.config.max_cu_per_sh); 3654 3655 return (~data) & mask; 3656 } 3657 3658 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 3659 struct amdgpu_cu_info *cu_info) 3660 { 3661 int i, j, k, counter, active_cu_number = 0; 3662 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 3663 3664 if (!adev || !cu_info) 3665 return -EINVAL; 3666 3667 memset(cu_info, 0, sizeof(*cu_info)); 3668 3669 mutex_lock(&adev->grbm_idx_mutex); 3670 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3671 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3672 mask = 1; 3673 ao_bitmap = 0; 3674 counter = 0; 3675 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 3676 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 3677 cu_info->bitmap[i][j] = bitmap; 3678 3679 for (k = 0; k < 16; k ++) { 3680 if (bitmap & mask) { 3681 if (counter < 2) 3682 ao_bitmap |= mask; 3683 counter ++; 3684 } 3685 mask <<= 1; 3686 } 3687 active_cu_number += counter; 3688 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 3689 } 3690 } 3691 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3692 mutex_unlock(&adev->grbm_idx_mutex); 3693 3694 cu_info->number = active_cu_number; 3695 cu_info->ao_cu_mask = ao_cu_mask; 3696 3697 return 0; 3698 } 3699 3700 static int gfx_v9_0_init_queue(struct amdgpu_ring *ring) 3701 { 3702 int r, j; 3703 u32 tmp; 3704 bool use_doorbell = true; 3705 u64 hqd_gpu_addr; 3706 u64 mqd_gpu_addr; 3707 u64 eop_gpu_addr; 3708 u64 wb_gpu_addr; 3709 u32 *buf; 3710 struct v9_mqd *mqd; 3711 struct amdgpu_device *adev; 3712 3713 adev = ring->adev; 3714 if (ring->mqd_obj == NULL) { 3715 r = amdgpu_bo_create(adev, 3716 sizeof(struct v9_mqd), 3717 PAGE_SIZE,true, 3718 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, 3719 NULL, &ring->mqd_obj); 3720 if (r) { 3721 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); 3722 return r; 3723 } 3724 } 3725 3726 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3727 if (unlikely(r != 0)) { 3728 gfx_v9_0_cp_compute_fini(adev); 3729 return r; 3730 } 3731 3732 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, 3733 &mqd_gpu_addr); 3734 if (r) { 3735 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); 3736 gfx_v9_0_cp_compute_fini(adev); 3737 return r; 3738 } 3739 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); 3740 if (r) { 3741 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); 3742 gfx_v9_0_cp_compute_fini(adev); 3743 return r; 3744 } 3745 3746 /* init the mqd struct */ 3747 memset(buf, 0, sizeof(struct v9_mqd)); 3748 3749 mqd = (struct v9_mqd *)buf; 3750 mqd->header = 0xC0310800; 3751 mqd->compute_pipelinestat_enable = 0x00000001; 3752 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3753 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3754 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3755 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3756 mqd->compute_misc_reserved = 0x00000003; 3757 mutex_lock(&adev->srbm_mutex); 3758 soc15_grbm_select(adev, ring->me, 3759 ring->pipe, 3760 ring->queue, 0); 3761 /* disable wptr polling */ 3762 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3763 3764 /* write the EOP addr */ 3765 BUG_ON(ring->me != 1 || ring->pipe != 0); /* can't handle other cases eop address */ 3766 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring->queue * MEC_HPD_SIZE); 3767 eop_gpu_addr >>= 8; 3768 3769 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, lower_32_bits(eop_gpu_addr)); 3770 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); 3771 mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_gpu_addr); 3772 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_gpu_addr); 3773 3774 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3775 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3776 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3777 (order_base_2(MEC_HPD_SIZE / 4) - 1)); 3778 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, tmp); 3779 3780 /* enable doorbell? */ 3781 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3782 if (use_doorbell) 3783 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 3784 else 3785 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); 3786 3787 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); 3788 mqd->cp_hqd_pq_doorbell_control = tmp; 3789 3790 /* disable the queue if it's active */ 3791 ring->wptr = 0; 3792 mqd->cp_hqd_dequeue_request = 0; 3793 mqd->cp_hqd_pq_rptr = 0; 3794 mqd->cp_hqd_pq_wptr_lo = 0; 3795 mqd->cp_hqd_pq_wptr_hi = 0; 3796 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3797 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3798 for (j = 0; j < adev->usec_timeout; j++) { 3799 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3800 break; 3801 udelay(1); 3802 } 3803 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); 3804 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); 3805 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); 3806 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); 3807 } 3808 3809 /* set the pointer to the MQD */ 3810 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; 3811 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); 3812 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 3813 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 3814 3815 /* set MQD vmid to 0 */ 3816 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3817 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3818 WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, tmp); 3819 mqd->cp_mqd_control = tmp; 3820 3821 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3822 hqd_gpu_addr = ring->gpu_addr >> 8; 3823 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3824 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3825 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 3826 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 3827 3828 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3829 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3830 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3831 (order_base_2(ring->ring_size / 4) - 1)); 3832 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3833 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 3834 #ifdef __BIG_ENDIAN 3835 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3836 #endif 3837 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3838 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3839 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3840 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3841 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, tmp); 3842 mqd->cp_hqd_pq_control = tmp; 3843 3844 /* set the wb address wether it's enabled or not */ 3845 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3846 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3847 mqd->cp_hqd_pq_rptr_report_addr_hi = 3848 upper_32_bits(wb_gpu_addr) & 0xffff; 3849 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3850 mqd->cp_hqd_pq_rptr_report_addr_lo); 3851 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3852 mqd->cp_hqd_pq_rptr_report_addr_hi); 3853 3854 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3855 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3856 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3857 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3858 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3859 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3860 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3861 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3862 3863 /* enable the doorbell if requested */ 3864 if (use_doorbell) { 3865 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3866 (AMDGPU_DOORBELL64_KIQ * 2) << 2); 3867 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3868 (AMDGPU_DOORBELL64_MEC_RING7 * 2) << 2); 3869 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3870 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3871 DOORBELL_OFFSET, ring->doorbell_index); 3872 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 3873 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); 3874 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); 3875 mqd->cp_hqd_pq_doorbell_control = tmp; 3876 3877 } else { 3878 mqd->cp_hqd_pq_doorbell_control = 0; 3879 } 3880 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3881 mqd->cp_hqd_pq_doorbell_control); 3882 3883 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3884 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); 3885 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); 3886 3887 /* set the vmid for the queue */ 3888 mqd->cp_hqd_vmid = 0; 3889 WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3890 3891 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3892 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3893 WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, tmp); 3894 mqd->cp_hqd_persistent_state = tmp; 3895 3896 /* activate the queue */ 3897 mqd->cp_hqd_active = 1; 3898 WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active); 3899 3900 soc15_grbm_select(adev, 0, 0, 0, 0); 3901 mutex_unlock(&adev->srbm_mutex); 3902 3903 amdgpu_bo_kunmap(ring->mqd_obj); 3904 amdgpu_bo_unreserve(ring->mqd_obj); 3905 3906 if (use_doorbell) 3907 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3908 3909 return 0; 3910 } 3911 3912 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 3913 { 3914 .type = AMD_IP_BLOCK_TYPE_GFX, 3915 .major = 9, 3916 .minor = 0, 3917 .rev = 0, 3918 .funcs = &gfx_v9_0_ip_funcs, 3919 }; 3920