1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/firmware.h> 24 #include <drm/drmP.h> 25 #include "amdgpu.h" 26 #include "amdgpu_gfx.h" 27 #include "soc15.h" 28 #include "soc15d.h" 29 30 #include "vega10/soc15ip.h" 31 #include "vega10/GC/gc_9_0_offset.h" 32 #include "vega10/GC/gc_9_0_sh_mask.h" 33 #include "vega10/vega10_enum.h" 34 #include "vega10/HDP/hdp_4_0_offset.h" 35 36 #include "soc15_common.h" 37 #include "clearstate_gfx9.h" 38 #include "v9_structs.h" 39 40 #define GFX9_NUM_GFX_RINGS 1 41 #define GFX9_MEC_HPD_SIZE 2048 42 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 43 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 44 #define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34 45 46 #define mmPWR_MISC_CNTL_STATUS 0x0183 47 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 48 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0 49 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1 50 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L 51 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L 52 53 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 54 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 55 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 56 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 57 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 58 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 59 60 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 61 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 62 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 63 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 64 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 65 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 66 67 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 68 { 69 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 70 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0)}, 71 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_SIZE), 72 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID1), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID1)}, 73 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_SIZE), 74 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID2), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID2)}, 75 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_SIZE), 76 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID3), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID3)}, 77 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_SIZE), 78 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID4), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID4)}, 79 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_SIZE), 80 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID5), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID5)}, 81 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_SIZE), 82 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID6), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID6)}, 83 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_SIZE), 84 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID7), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID7)}, 85 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_SIZE), 86 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID8), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID8)}, 87 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_SIZE), 88 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID9), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID9)}, 89 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_SIZE), 90 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID10), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID10)}, 91 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_SIZE), 92 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID11), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID11)}, 93 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_SIZE), 94 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID12), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID12)}, 95 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_SIZE), 96 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID13), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID13)}, 97 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_SIZE), 98 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID14), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID14)}, 99 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_SIZE), 100 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID15), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID15)} 101 }; 102 103 static const u32 golden_settings_gc_9_0[] = 104 { 105 SOC15_REG_OFFSET(GC, 0, mmCPC_UTCL1_CNTL), 0x08000000, 0x08000080, 106 SOC15_REG_OFFSET(GC, 0, mmCPF_UTCL1_CNTL), 0x08000000, 0x08000080, 107 SOC15_REG_OFFSET(GC, 0, mmCPG_UTCL1_CNTL), 0x08000000, 0x08000080, 108 SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00fffff, 0x00000420, 109 SOC15_REG_OFFSET(GC, 0, mmGB_GPU_ID), 0x0000000f, 0x00000000, 110 SOC15_REG_OFFSET(GC, 0, mmIA_UTCL1_CNTL), 0x08000000, 0x08000080, 111 SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024, 112 SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001, 113 SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000, 114 SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_0), 0x08000000, 0x08000080, 115 SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_1), 0x08000000, 0x08000080, 116 SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_2), 0x08000000, 0x08000080, 117 SOC15_REG_OFFSET(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL), 0x08000000, 0x08000080, 118 SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_UTCL1_CNTL), 0x08000000, 0x08000080, 119 SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), 0x00001000, 0x00001000, 120 SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1), 0x0000000f, 0x01000107, 121 SOC15_REG_OFFSET(GC, 0, mmSQC_CONFIG), 0x03000000, 0x020a2000, 122 SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000, 123 SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x4a2c0e68, 124 SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0xb5d3f197, 125 SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION), 0x3fff3af3, 0x19200000, 126 SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000003ff, 127 SOC15_REG_OFFSET(GC, 0, mmWD_UTCL1_CNTL), 0x08000000, 0x08000080 128 }; 129 130 static const u32 golden_settings_gc_9_0_vg10[] = 131 { 132 SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL), 0x0000f000, 0x00012107, 133 SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL_3), 0x30000000, 0x10000000, 134 SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), 0xffff77ff, 0x2a114042, 135 SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x2a114042, 136 SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0x00008000, 0x00048000, 137 SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000, 138 SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x00001800, 0x00000800 139 }; 140 141 static const u32 golden_settings_gc_9_1[] = 142 { 143 SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL), 0xfffdf3cf, 0x00014104, 144 SOC15_REG_OFFSET(GC, 0, mmCPC_UTCL1_CNTL), 0x08000000, 0x08000080, 145 SOC15_REG_OFFSET(GC, 0, mmCPF_UTCL1_CNTL), 0x08000000, 0x08000080, 146 SOC15_REG_OFFSET(GC, 0, mmCPG_UTCL1_CNTL), 0x08000000, 0x08000080, 147 SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00fffff, 0x00000420, 148 SOC15_REG_OFFSET(GC, 0, mmGB_GPU_ID), 0x0000000f, 0x00000000, 149 SOC15_REG_OFFSET(GC, 0, mmIA_UTCL1_CNTL), 0x08000000, 0x08000080, 150 SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024, 151 SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001, 152 SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000, 153 SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_0), 0x08000000, 0x08000080, 154 SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_1), 0x08000000, 0x08000080, 155 SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_2), 0x08000000, 0x08000080, 156 SOC15_REG_OFFSET(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL), 0x08000000, 0x08000080, 157 SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_UTCL1_CNTL), 0x08000000, 0x08000080, 158 SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000, 159 SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x00000000, 160 SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0x00003120, 161 SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION), 0x3fff3af3, 0x19200000, 162 SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000000ff, 163 SOC15_REG_OFFSET(GC, 0, mmWD_UTCL1_CNTL), 0x08000000, 0x08000080 164 }; 165 166 static const u32 golden_settings_gc_9_1_rv1[] = 167 { 168 SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL_3), 0x30000000, 0x10000000, 169 SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), 0xffff77ff, 0x24000042, 170 SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x24000042, 171 SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0xffffffff, 0x04048000, 172 SOC15_REG_OFFSET(GC, 0, mmPA_SC_MODE_CNTL_1), 0x06000000, 0x06000000, 173 SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000, 174 SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x01bd9f33, 0x00000800 175 }; 176 177 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 178 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 179 180 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 181 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 182 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 183 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 184 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 185 struct amdgpu_cu_info *cu_info); 186 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 187 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); 188 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 189 190 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 191 { 192 switch (adev->asic_type) { 193 case CHIP_VEGA10: 194 amdgpu_program_register_sequence(adev, 195 golden_settings_gc_9_0, 196 (const u32)ARRAY_SIZE(golden_settings_gc_9_0)); 197 amdgpu_program_register_sequence(adev, 198 golden_settings_gc_9_0_vg10, 199 (const u32)ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 200 break; 201 case CHIP_RAVEN: 202 amdgpu_program_register_sequence(adev, 203 golden_settings_gc_9_1, 204 (const u32)ARRAY_SIZE(golden_settings_gc_9_1)); 205 amdgpu_program_register_sequence(adev, 206 golden_settings_gc_9_1_rv1, 207 (const u32)ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 208 break; 209 default: 210 break; 211 } 212 } 213 214 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 215 { 216 adev->gfx.scratch.num_reg = 8; 217 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 218 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 219 } 220 221 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 222 bool wc, uint32_t reg, uint32_t val) 223 { 224 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 225 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 226 WRITE_DATA_DST_SEL(0) | 227 (wc ? WR_CONFIRM : 0)); 228 amdgpu_ring_write(ring, reg); 229 amdgpu_ring_write(ring, 0); 230 amdgpu_ring_write(ring, val); 231 } 232 233 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 234 int mem_space, int opt, uint32_t addr0, 235 uint32_t addr1, uint32_t ref, uint32_t mask, 236 uint32_t inv) 237 { 238 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 239 amdgpu_ring_write(ring, 240 /* memory (1) or register (0) */ 241 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 242 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 243 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 244 WAIT_REG_MEM_ENGINE(eng_sel))); 245 246 if (mem_space) 247 BUG_ON(addr0 & 0x3); /* Dword align */ 248 amdgpu_ring_write(ring, addr0); 249 amdgpu_ring_write(ring, addr1); 250 amdgpu_ring_write(ring, ref); 251 amdgpu_ring_write(ring, mask); 252 amdgpu_ring_write(ring, inv); /* poll interval */ 253 } 254 255 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 256 { 257 struct amdgpu_device *adev = ring->adev; 258 uint32_t scratch; 259 uint32_t tmp = 0; 260 unsigned i; 261 int r; 262 263 r = amdgpu_gfx_scratch_get(adev, &scratch); 264 if (r) { 265 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 266 return r; 267 } 268 WREG32(scratch, 0xCAFEDEAD); 269 r = amdgpu_ring_alloc(ring, 3); 270 if (r) { 271 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 272 ring->idx, r); 273 amdgpu_gfx_scratch_free(adev, scratch); 274 return r; 275 } 276 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 277 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 278 amdgpu_ring_write(ring, 0xDEADBEEF); 279 amdgpu_ring_commit(ring); 280 281 for (i = 0; i < adev->usec_timeout; i++) { 282 tmp = RREG32(scratch); 283 if (tmp == 0xDEADBEEF) 284 break; 285 DRM_UDELAY(1); 286 } 287 if (i < adev->usec_timeout) { 288 DRM_INFO("ring test on %d succeeded in %d usecs\n", 289 ring->idx, i); 290 } else { 291 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 292 ring->idx, scratch, tmp); 293 r = -EINVAL; 294 } 295 amdgpu_gfx_scratch_free(adev, scratch); 296 return r; 297 } 298 299 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 300 { 301 struct amdgpu_device *adev = ring->adev; 302 struct amdgpu_ib ib; 303 struct dma_fence *f = NULL; 304 uint32_t scratch; 305 uint32_t tmp = 0; 306 long r; 307 308 r = amdgpu_gfx_scratch_get(adev, &scratch); 309 if (r) { 310 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); 311 return r; 312 } 313 WREG32(scratch, 0xCAFEDEAD); 314 memset(&ib, 0, sizeof(ib)); 315 r = amdgpu_ib_get(adev, NULL, 256, &ib); 316 if (r) { 317 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 318 goto err1; 319 } 320 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 321 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 322 ib.ptr[2] = 0xDEADBEEF; 323 ib.length_dw = 3; 324 325 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 326 if (r) 327 goto err2; 328 329 r = dma_fence_wait_timeout(f, false, timeout); 330 if (r == 0) { 331 DRM_ERROR("amdgpu: IB test timed out.\n"); 332 r = -ETIMEDOUT; 333 goto err2; 334 } else if (r < 0) { 335 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 336 goto err2; 337 } 338 tmp = RREG32(scratch); 339 if (tmp == 0xDEADBEEF) { 340 DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 341 r = 0; 342 } else { 343 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", 344 scratch, tmp); 345 r = -EINVAL; 346 } 347 err2: 348 amdgpu_ib_free(adev, &ib, NULL); 349 dma_fence_put(f); 350 err1: 351 amdgpu_gfx_scratch_free(adev, scratch); 352 return r; 353 } 354 355 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 356 { 357 const char *chip_name; 358 char fw_name[30]; 359 int err; 360 struct amdgpu_firmware_info *info = NULL; 361 const struct common_firmware_header *header = NULL; 362 const struct gfx_firmware_header_v1_0 *cp_hdr; 363 const struct rlc_firmware_header_v2_0 *rlc_hdr; 364 unsigned int *tmp = NULL; 365 unsigned int i = 0; 366 367 DRM_DEBUG("\n"); 368 369 switch (adev->asic_type) { 370 case CHIP_VEGA10: 371 chip_name = "vega10"; 372 break; 373 case CHIP_RAVEN: 374 chip_name = "raven"; 375 break; 376 default: 377 BUG(); 378 } 379 380 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 381 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 382 if (err) 383 goto out; 384 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 385 if (err) 386 goto out; 387 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 388 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 389 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 390 391 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 392 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 393 if (err) 394 goto out; 395 err = amdgpu_ucode_validate(adev->gfx.me_fw); 396 if (err) 397 goto out; 398 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 399 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 400 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 401 402 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 403 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 404 if (err) 405 goto out; 406 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 407 if (err) 408 goto out; 409 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 410 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 411 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 412 413 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 414 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 415 if (err) 416 goto out; 417 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 418 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 419 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 420 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 421 adev->gfx.rlc.save_and_restore_offset = 422 le32_to_cpu(rlc_hdr->save_and_restore_offset); 423 adev->gfx.rlc.clear_state_descriptor_offset = 424 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 425 adev->gfx.rlc.avail_scratch_ram_locations = 426 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 427 adev->gfx.rlc.reg_restore_list_size = 428 le32_to_cpu(rlc_hdr->reg_restore_list_size); 429 adev->gfx.rlc.reg_list_format_start = 430 le32_to_cpu(rlc_hdr->reg_list_format_start); 431 adev->gfx.rlc.reg_list_format_separate_start = 432 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 433 adev->gfx.rlc.starting_offsets_start = 434 le32_to_cpu(rlc_hdr->starting_offsets_start); 435 adev->gfx.rlc.reg_list_format_size_bytes = 436 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 437 adev->gfx.rlc.reg_list_size_bytes = 438 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 439 adev->gfx.rlc.register_list_format = 440 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 441 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 442 if (!adev->gfx.rlc.register_list_format) { 443 err = -ENOMEM; 444 goto out; 445 } 446 447 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 448 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 449 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 450 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 451 452 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 453 454 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 455 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 456 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 457 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 458 459 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 460 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 461 if (err) 462 goto out; 463 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 464 if (err) 465 goto out; 466 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 467 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 468 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 469 470 471 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 472 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 473 if (!err) { 474 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 475 if (err) 476 goto out; 477 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 478 adev->gfx.mec2_fw->data; 479 adev->gfx.mec2_fw_version = 480 le32_to_cpu(cp_hdr->header.ucode_version); 481 adev->gfx.mec2_feature_version = 482 le32_to_cpu(cp_hdr->ucode_feature_version); 483 } else { 484 err = 0; 485 adev->gfx.mec2_fw = NULL; 486 } 487 488 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 489 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 490 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 491 info->fw = adev->gfx.pfp_fw; 492 header = (const struct common_firmware_header *)info->fw->data; 493 adev->firmware.fw_size += 494 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 495 496 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 497 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 498 info->fw = adev->gfx.me_fw; 499 header = (const struct common_firmware_header *)info->fw->data; 500 adev->firmware.fw_size += 501 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 502 503 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 504 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 505 info->fw = adev->gfx.ce_fw; 506 header = (const struct common_firmware_header *)info->fw->data; 507 adev->firmware.fw_size += 508 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 509 510 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 511 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 512 info->fw = adev->gfx.rlc_fw; 513 header = (const struct common_firmware_header *)info->fw->data; 514 adev->firmware.fw_size += 515 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 516 517 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 518 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 519 info->fw = adev->gfx.mec_fw; 520 header = (const struct common_firmware_header *)info->fw->data; 521 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 522 adev->firmware.fw_size += 523 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 524 525 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 526 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 527 info->fw = adev->gfx.mec_fw; 528 adev->firmware.fw_size += 529 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 530 531 if (adev->gfx.mec2_fw) { 532 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 533 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 534 info->fw = adev->gfx.mec2_fw; 535 header = (const struct common_firmware_header *)info->fw->data; 536 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 537 adev->firmware.fw_size += 538 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 539 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 540 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 541 info->fw = adev->gfx.mec2_fw; 542 adev->firmware.fw_size += 543 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 544 } 545 546 } 547 548 out: 549 if (err) { 550 dev_err(adev->dev, 551 "gfx9: Failed to load firmware \"%s\"\n", 552 fw_name); 553 release_firmware(adev->gfx.pfp_fw); 554 adev->gfx.pfp_fw = NULL; 555 release_firmware(adev->gfx.me_fw); 556 adev->gfx.me_fw = NULL; 557 release_firmware(adev->gfx.ce_fw); 558 adev->gfx.ce_fw = NULL; 559 release_firmware(adev->gfx.rlc_fw); 560 adev->gfx.rlc_fw = NULL; 561 release_firmware(adev->gfx.mec_fw); 562 adev->gfx.mec_fw = NULL; 563 release_firmware(adev->gfx.mec2_fw); 564 adev->gfx.mec2_fw = NULL; 565 } 566 return err; 567 } 568 569 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 570 { 571 u32 count = 0; 572 const struct cs_section_def *sect = NULL; 573 const struct cs_extent_def *ext = NULL; 574 575 /* begin clear state */ 576 count += 2; 577 /* context control state */ 578 count += 3; 579 580 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 581 for (ext = sect->section; ext->extent != NULL; ++ext) { 582 if (sect->id == SECT_CONTEXT) 583 count += 2 + ext->reg_count; 584 else 585 return 0; 586 } 587 } 588 589 /* end clear state */ 590 count += 2; 591 /* clear state */ 592 count += 2; 593 594 return count; 595 } 596 597 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 598 volatile u32 *buffer) 599 { 600 u32 count = 0, i; 601 const struct cs_section_def *sect = NULL; 602 const struct cs_extent_def *ext = NULL; 603 604 if (adev->gfx.rlc.cs_data == NULL) 605 return; 606 if (buffer == NULL) 607 return; 608 609 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 610 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 611 612 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 613 buffer[count++] = cpu_to_le32(0x80000000); 614 buffer[count++] = cpu_to_le32(0x80000000); 615 616 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 617 for (ext = sect->section; ext->extent != NULL; ++ext) { 618 if (sect->id == SECT_CONTEXT) { 619 buffer[count++] = 620 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 621 buffer[count++] = cpu_to_le32(ext->reg_index - 622 PACKET3_SET_CONTEXT_REG_START); 623 for (i = 0; i < ext->reg_count; i++) 624 buffer[count++] = cpu_to_le32(ext->extent[i]); 625 } else { 626 return; 627 } 628 } 629 } 630 631 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 632 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 633 634 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 635 buffer[count++] = cpu_to_le32(0); 636 } 637 638 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 639 { 640 uint32_t data; 641 642 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 643 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 644 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 645 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 646 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 647 648 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 649 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 650 651 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 652 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 653 654 mutex_lock(&adev->grbm_idx_mutex); 655 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 656 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 657 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 658 659 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 660 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 661 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 662 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 663 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 664 665 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 666 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 667 data &= 0x0000FFFF; 668 data |= 0x00C00000; 669 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 670 671 /* set RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF */ 672 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, 0xFFF); 673 674 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 675 * but used for RLC_LB_CNTL configuration */ 676 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 677 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 678 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 679 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 680 mutex_unlock(&adev->grbm_idx_mutex); 681 } 682 683 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 684 { 685 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 686 } 687 688 static void rv_init_cp_jump_table(struct amdgpu_device *adev) 689 { 690 const __le32 *fw_data; 691 volatile u32 *dst_ptr; 692 int me, i, max_me = 5; 693 u32 bo_offset = 0; 694 u32 table_offset, table_size; 695 696 /* write the cp table buffer */ 697 dst_ptr = adev->gfx.rlc.cp_table_ptr; 698 for (me = 0; me < max_me; me++) { 699 if (me == 0) { 700 const struct gfx_firmware_header_v1_0 *hdr = 701 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 702 fw_data = (const __le32 *) 703 (adev->gfx.ce_fw->data + 704 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 705 table_offset = le32_to_cpu(hdr->jt_offset); 706 table_size = le32_to_cpu(hdr->jt_size); 707 } else if (me == 1) { 708 const struct gfx_firmware_header_v1_0 *hdr = 709 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 710 fw_data = (const __le32 *) 711 (adev->gfx.pfp_fw->data + 712 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 713 table_offset = le32_to_cpu(hdr->jt_offset); 714 table_size = le32_to_cpu(hdr->jt_size); 715 } else if (me == 2) { 716 const struct gfx_firmware_header_v1_0 *hdr = 717 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 718 fw_data = (const __le32 *) 719 (adev->gfx.me_fw->data + 720 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 721 table_offset = le32_to_cpu(hdr->jt_offset); 722 table_size = le32_to_cpu(hdr->jt_size); 723 } else if (me == 3) { 724 const struct gfx_firmware_header_v1_0 *hdr = 725 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 726 fw_data = (const __le32 *) 727 (adev->gfx.mec_fw->data + 728 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 729 table_offset = le32_to_cpu(hdr->jt_offset); 730 table_size = le32_to_cpu(hdr->jt_size); 731 } else if (me == 4) { 732 const struct gfx_firmware_header_v1_0 *hdr = 733 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 734 fw_data = (const __le32 *) 735 (adev->gfx.mec2_fw->data + 736 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 737 table_offset = le32_to_cpu(hdr->jt_offset); 738 table_size = le32_to_cpu(hdr->jt_size); 739 } 740 741 for (i = 0; i < table_size; i ++) { 742 dst_ptr[bo_offset + i] = 743 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 744 } 745 746 bo_offset += table_size; 747 } 748 } 749 750 static void gfx_v9_0_rlc_fini(struct amdgpu_device *adev) 751 { 752 /* clear state block */ 753 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 754 &adev->gfx.rlc.clear_state_gpu_addr, 755 (void **)&adev->gfx.rlc.cs_ptr); 756 757 /* jump table block */ 758 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 759 &adev->gfx.rlc.cp_table_gpu_addr, 760 (void **)&adev->gfx.rlc.cp_table_ptr); 761 } 762 763 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 764 { 765 volatile u32 *dst_ptr; 766 u32 dws; 767 const struct cs_section_def *cs_data; 768 int r; 769 770 adev->gfx.rlc.cs_data = gfx9_cs_data; 771 772 cs_data = adev->gfx.rlc.cs_data; 773 774 if (cs_data) { 775 /* clear state block */ 776 adev->gfx.rlc.clear_state_size = dws = gfx_v9_0_get_csb_size(adev); 777 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, 778 AMDGPU_GEM_DOMAIN_VRAM, 779 &adev->gfx.rlc.clear_state_obj, 780 &adev->gfx.rlc.clear_state_gpu_addr, 781 (void **)&adev->gfx.rlc.cs_ptr); 782 if (r) { 783 dev_err(adev->dev, "(%d) failed to create rlc csb bo\n", 784 r); 785 gfx_v9_0_rlc_fini(adev); 786 return r; 787 } 788 /* set up the cs buffer */ 789 dst_ptr = adev->gfx.rlc.cs_ptr; 790 gfx_v9_0_get_csb_buffer(adev, dst_ptr); 791 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 792 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 793 } 794 795 if (adev->asic_type == CHIP_RAVEN) { 796 /* TODO: double check the cp_table_size for RV */ 797 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 798 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, 799 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 800 &adev->gfx.rlc.cp_table_obj, 801 &adev->gfx.rlc.cp_table_gpu_addr, 802 (void **)&adev->gfx.rlc.cp_table_ptr); 803 if (r) { 804 dev_err(adev->dev, 805 "(%d) failed to create cp table bo\n", r); 806 gfx_v9_0_rlc_fini(adev); 807 return r; 808 } 809 810 rv_init_cp_jump_table(adev); 811 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); 812 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 813 814 gfx_v9_0_init_lbpw(adev); 815 } 816 817 return 0; 818 } 819 820 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 821 { 822 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 823 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 824 } 825 826 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 827 { 828 int r; 829 u32 *hpd; 830 const __le32 *fw_data; 831 unsigned fw_size; 832 u32 *fw; 833 size_t mec_hpd_size; 834 835 const struct gfx_firmware_header_v1_0 *mec_hdr; 836 837 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 838 839 /* take ownership of the relevant compute queues */ 840 amdgpu_gfx_compute_queue_acquire(adev); 841 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 842 843 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 844 AMDGPU_GEM_DOMAIN_GTT, 845 &adev->gfx.mec.hpd_eop_obj, 846 &adev->gfx.mec.hpd_eop_gpu_addr, 847 (void **)&hpd); 848 if (r) { 849 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 850 gfx_v9_0_mec_fini(adev); 851 return r; 852 } 853 854 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); 855 856 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 857 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 858 859 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 860 861 fw_data = (const __le32 *) 862 (adev->gfx.mec_fw->data + 863 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 864 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 865 866 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 867 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 868 &adev->gfx.mec.mec_fw_obj, 869 &adev->gfx.mec.mec_fw_gpu_addr, 870 (void **)&fw); 871 if (r) { 872 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 873 gfx_v9_0_mec_fini(adev); 874 return r; 875 } 876 877 memcpy(fw, fw_data, fw_size); 878 879 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 880 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 881 882 return 0; 883 } 884 885 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 886 { 887 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 888 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 889 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 890 (address << SQ_IND_INDEX__INDEX__SHIFT) | 891 (SQ_IND_INDEX__FORCE_READ_MASK)); 892 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 893 } 894 895 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 896 uint32_t wave, uint32_t thread, 897 uint32_t regno, uint32_t num, uint32_t *out) 898 { 899 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 900 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 901 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 902 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 903 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 904 (SQ_IND_INDEX__FORCE_READ_MASK) | 905 (SQ_IND_INDEX__AUTO_INCR_MASK)); 906 while (num--) 907 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 908 } 909 910 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 911 { 912 /* type 1 wave data */ 913 dst[(*no_fields)++] = 1; 914 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 915 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 916 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 917 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 918 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 919 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 920 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 921 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 922 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 923 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 924 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 925 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 926 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 927 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 928 } 929 930 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 931 uint32_t wave, uint32_t start, 932 uint32_t size, uint32_t *dst) 933 { 934 wave_read_regs( 935 adev, simd, wave, 0, 936 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 937 } 938 939 940 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 941 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 942 .select_se_sh = &gfx_v9_0_select_se_sh, 943 .read_wave_data = &gfx_v9_0_read_wave_data, 944 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 945 }; 946 947 static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 948 { 949 u32 gb_addr_config; 950 951 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 952 953 switch (adev->asic_type) { 954 case CHIP_VEGA10: 955 adev->gfx.config.max_hw_contexts = 8; 956 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 957 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 958 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 959 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 960 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 961 break; 962 case CHIP_RAVEN: 963 adev->gfx.config.max_hw_contexts = 8; 964 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 965 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 966 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 967 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 968 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 969 break; 970 default: 971 BUG(); 972 break; 973 } 974 975 adev->gfx.config.gb_addr_config = gb_addr_config; 976 977 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 978 REG_GET_FIELD( 979 adev->gfx.config.gb_addr_config, 980 GB_ADDR_CONFIG, 981 NUM_PIPES); 982 983 adev->gfx.config.max_tile_pipes = 984 adev->gfx.config.gb_addr_config_fields.num_pipes; 985 986 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 987 REG_GET_FIELD( 988 adev->gfx.config.gb_addr_config, 989 GB_ADDR_CONFIG, 990 NUM_BANKS); 991 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 992 REG_GET_FIELD( 993 adev->gfx.config.gb_addr_config, 994 GB_ADDR_CONFIG, 995 MAX_COMPRESSED_FRAGS); 996 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 997 REG_GET_FIELD( 998 adev->gfx.config.gb_addr_config, 999 GB_ADDR_CONFIG, 1000 NUM_RB_PER_SE); 1001 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 1002 REG_GET_FIELD( 1003 adev->gfx.config.gb_addr_config, 1004 GB_ADDR_CONFIG, 1005 NUM_SHADER_ENGINES); 1006 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 1007 REG_GET_FIELD( 1008 adev->gfx.config.gb_addr_config, 1009 GB_ADDR_CONFIG, 1010 PIPE_INTERLEAVE_SIZE)); 1011 } 1012 1013 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, 1014 struct amdgpu_ngg_buf *ngg_buf, 1015 int size_se, 1016 int default_size_se) 1017 { 1018 int r; 1019 1020 if (size_se < 0) { 1021 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se); 1022 return -EINVAL; 1023 } 1024 size_se = size_se ? size_se : default_size_se; 1025 1026 ngg_buf->size = size_se * adev->gfx.config.max_shader_engines; 1027 r = amdgpu_bo_create_kernel(adev, ngg_buf->size, 1028 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1029 &ngg_buf->bo, 1030 &ngg_buf->gpu_addr, 1031 NULL); 1032 if (r) { 1033 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r); 1034 return r; 1035 } 1036 ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo); 1037 1038 return r; 1039 } 1040 1041 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev) 1042 { 1043 int i; 1044 1045 for (i = 0; i < NGG_BUF_MAX; i++) 1046 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo, 1047 &adev->gfx.ngg.buf[i].gpu_addr, 1048 NULL); 1049 1050 memset(&adev->gfx.ngg.buf[0], 0, 1051 sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX); 1052 1053 adev->gfx.ngg.init = false; 1054 1055 return 0; 1056 } 1057 1058 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) 1059 { 1060 int r; 1061 1062 if (!amdgpu_ngg || adev->gfx.ngg.init == true) 1063 return 0; 1064 1065 /* GDS reserve memory: 64 bytes alignment */ 1066 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); 1067 adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size; 1068 adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size; 1069 adev->gfx.ngg.gds_reserve_addr = amdgpu_gds_reg_offset[0].mem_base; 1070 adev->gfx.ngg.gds_reserve_addr += adev->gds.mem.gfx_partition_size; 1071 1072 /* Primitive Buffer */ 1073 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], 1074 amdgpu_prim_buf_per_se, 1075 64 * 1024); 1076 if (r) { 1077 dev_err(adev->dev, "Failed to create Primitive Buffer\n"); 1078 goto err; 1079 } 1080 1081 /* Position Buffer */ 1082 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS], 1083 amdgpu_pos_buf_per_se, 1084 256 * 1024); 1085 if (r) { 1086 dev_err(adev->dev, "Failed to create Position Buffer\n"); 1087 goto err; 1088 } 1089 1090 /* Control Sideband */ 1091 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL], 1092 amdgpu_cntl_sb_buf_per_se, 1093 256); 1094 if (r) { 1095 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n"); 1096 goto err; 1097 } 1098 1099 /* Parameter Cache, not created by default */ 1100 if (amdgpu_param_buf_per_se <= 0) 1101 goto out; 1102 1103 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM], 1104 amdgpu_param_buf_per_se, 1105 512 * 1024); 1106 if (r) { 1107 dev_err(adev->dev, "Failed to create Parameter Cache\n"); 1108 goto err; 1109 } 1110 1111 out: 1112 adev->gfx.ngg.init = true; 1113 return 0; 1114 err: 1115 gfx_v9_0_ngg_fini(adev); 1116 return r; 1117 } 1118 1119 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) 1120 { 1121 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 1122 int r; 1123 u32 data; 1124 u32 size; 1125 u32 base; 1126 1127 if (!amdgpu_ngg) 1128 return 0; 1129 1130 /* Program buffer size */ 1131 data = 0; 1132 size = adev->gfx.ngg.buf[NGG_PRIM].size / 256; 1133 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, size); 1134 1135 size = adev->gfx.ngg.buf[NGG_POS].size / 256; 1136 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, size); 1137 1138 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); 1139 1140 data = 0; 1141 size = adev->gfx.ngg.buf[NGG_CNTL].size / 256; 1142 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, size); 1143 1144 size = adev->gfx.ngg.buf[NGG_PARAM].size / 1024; 1145 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, size); 1146 1147 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); 1148 1149 /* Program buffer base address */ 1150 base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 1151 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); 1152 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data); 1153 1154 base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 1155 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); 1156 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data); 1157 1158 base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 1159 data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); 1160 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data); 1161 1162 base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 1163 data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); 1164 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data); 1165 1166 base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 1167 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); 1168 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data); 1169 1170 base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 1171 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); 1172 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data); 1173 1174 /* Clear GDS reserved memory */ 1175 r = amdgpu_ring_alloc(ring, 17); 1176 if (r) { 1177 DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n", 1178 ring->idx, r); 1179 return r; 1180 } 1181 1182 gfx_v9_0_write_data_to_reg(ring, 0, false, 1183 amdgpu_gds_reg_offset[0].mem_size, 1184 (adev->gds.mem.total_size + 1185 adev->gfx.ngg.gds_reserve_size) >> 1186 AMDGPU_GDS_SHIFT); 1187 1188 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 1189 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 1190 PACKET3_DMA_DATA_SRC_SEL(2))); 1191 amdgpu_ring_write(ring, 0); 1192 amdgpu_ring_write(ring, 0); 1193 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); 1194 amdgpu_ring_write(ring, 0); 1195 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_size); 1196 1197 1198 gfx_v9_0_write_data_to_reg(ring, 0, false, 1199 amdgpu_gds_reg_offset[0].mem_size, 0); 1200 1201 amdgpu_ring_commit(ring); 1202 1203 return 0; 1204 } 1205 1206 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1207 int mec, int pipe, int queue) 1208 { 1209 int r; 1210 unsigned irq_type; 1211 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1212 1213 ring = &adev->gfx.compute_ring[ring_id]; 1214 1215 /* mec0 is me1 */ 1216 ring->me = mec + 1; 1217 ring->pipe = pipe; 1218 ring->queue = queue; 1219 1220 ring->ring_obj = NULL; 1221 ring->use_doorbell = true; 1222 ring->doorbell_index = (AMDGPU_DOORBELL_MEC_RING0 + ring_id) << 1; 1223 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1224 + (ring_id * GFX9_MEC_HPD_SIZE); 1225 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1226 1227 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1228 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1229 + ring->pipe; 1230 1231 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1232 r = amdgpu_ring_init(adev, ring, 1024, 1233 &adev->gfx.eop_irq, irq_type); 1234 if (r) 1235 return r; 1236 1237 1238 return 0; 1239 } 1240 1241 static int gfx_v9_0_sw_init(void *handle) 1242 { 1243 int i, j, k, r, ring_id; 1244 struct amdgpu_ring *ring; 1245 struct amdgpu_kiq *kiq; 1246 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1247 1248 switch (adev->asic_type) { 1249 case CHIP_VEGA10: 1250 case CHIP_RAVEN: 1251 adev->gfx.mec.num_mec = 2; 1252 break; 1253 default: 1254 adev->gfx.mec.num_mec = 1; 1255 break; 1256 } 1257 1258 adev->gfx.mec.num_pipe_per_mec = 4; 1259 adev->gfx.mec.num_queue_per_pipe = 8; 1260 1261 /* KIQ event */ 1262 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq); 1263 if (r) 1264 return r; 1265 1266 /* EOP Event */ 1267 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq); 1268 if (r) 1269 return r; 1270 1271 /* Privileged reg */ 1272 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 184, 1273 &adev->gfx.priv_reg_irq); 1274 if (r) 1275 return r; 1276 1277 /* Privileged inst */ 1278 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 185, 1279 &adev->gfx.priv_inst_irq); 1280 if (r) 1281 return r; 1282 1283 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1284 1285 gfx_v9_0_scratch_init(adev); 1286 1287 r = gfx_v9_0_init_microcode(adev); 1288 if (r) { 1289 DRM_ERROR("Failed to load gfx firmware!\n"); 1290 return r; 1291 } 1292 1293 r = gfx_v9_0_rlc_init(adev); 1294 if (r) { 1295 DRM_ERROR("Failed to init rlc BOs!\n"); 1296 return r; 1297 } 1298 1299 r = gfx_v9_0_mec_init(adev); 1300 if (r) { 1301 DRM_ERROR("Failed to init MEC BOs!\n"); 1302 return r; 1303 } 1304 1305 /* set up the gfx ring */ 1306 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 1307 ring = &adev->gfx.gfx_ring[i]; 1308 ring->ring_obj = NULL; 1309 sprintf(ring->name, "gfx"); 1310 ring->use_doorbell = true; 1311 ring->doorbell_index = AMDGPU_DOORBELL64_GFX_RING0 << 1; 1312 r = amdgpu_ring_init(adev, ring, 1024, 1313 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); 1314 if (r) 1315 return r; 1316 } 1317 1318 /* set up the compute queues - allocate horizontally across pipes */ 1319 ring_id = 0; 1320 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1321 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1322 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1323 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 1324 continue; 1325 1326 r = gfx_v9_0_compute_ring_init(adev, 1327 ring_id, 1328 i, k, j); 1329 if (r) 1330 return r; 1331 1332 ring_id++; 1333 } 1334 } 1335 } 1336 1337 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); 1338 if (r) { 1339 DRM_ERROR("Failed to init KIQ BOs!\n"); 1340 return r; 1341 } 1342 1343 kiq = &adev->gfx.kiq; 1344 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 1345 if (r) 1346 return r; 1347 1348 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 1349 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd)); 1350 if (r) 1351 return r; 1352 1353 /* reserve GDS, GWS and OA resource for gfx */ 1354 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 1355 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, 1356 &adev->gds.gds_gfx_bo, NULL, NULL); 1357 if (r) 1358 return r; 1359 1360 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, 1361 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, 1362 &adev->gds.gws_gfx_bo, NULL, NULL); 1363 if (r) 1364 return r; 1365 1366 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, 1367 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, 1368 &adev->gds.oa_gfx_bo, NULL, NULL); 1369 if (r) 1370 return r; 1371 1372 adev->gfx.ce_ram_size = 0x8000; 1373 1374 gfx_v9_0_gpu_early_init(adev); 1375 1376 r = gfx_v9_0_ngg_init(adev); 1377 if (r) 1378 return r; 1379 1380 return 0; 1381 } 1382 1383 1384 static int gfx_v9_0_sw_fini(void *handle) 1385 { 1386 int i; 1387 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1388 1389 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 1390 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 1391 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 1392 1393 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1394 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1395 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1396 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1397 1398 amdgpu_gfx_compute_mqd_sw_fini(adev); 1399 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 1400 amdgpu_gfx_kiq_fini(adev); 1401 1402 gfx_v9_0_mec_fini(adev); 1403 gfx_v9_0_ngg_fini(adev); 1404 1405 return 0; 1406 } 1407 1408 1409 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 1410 { 1411 /* TODO */ 1412 } 1413 1414 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) 1415 { 1416 u32 data; 1417 1418 if (instance == 0xffffffff) 1419 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 1420 else 1421 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 1422 1423 if (se_num == 0xffffffff) 1424 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 1425 else 1426 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1427 1428 if (sh_num == 0xffffffff) 1429 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 1430 else 1431 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 1432 1433 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); 1434 } 1435 1436 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1437 { 1438 u32 data, mask; 1439 1440 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 1441 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 1442 1443 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 1444 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 1445 1446 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 1447 adev->gfx.config.max_sh_per_se); 1448 1449 return (~data) & mask; 1450 } 1451 1452 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 1453 { 1454 int i, j; 1455 u32 data; 1456 u32 active_rbs = 0; 1457 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 1458 adev->gfx.config.max_sh_per_se; 1459 1460 mutex_lock(&adev->grbm_idx_mutex); 1461 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1462 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1463 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1464 data = gfx_v9_0_get_rb_active_bitmap(adev); 1465 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 1466 rb_bitmap_width_per_sh); 1467 } 1468 } 1469 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1470 mutex_unlock(&adev->grbm_idx_mutex); 1471 1472 adev->gfx.config.backend_enable_mask = active_rbs; 1473 adev->gfx.config.num_rbs = hweight32(active_rbs); 1474 } 1475 1476 #define DEFAULT_SH_MEM_BASES (0x6000) 1477 #define FIRST_COMPUTE_VMID (8) 1478 #define LAST_COMPUTE_VMID (16) 1479 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 1480 { 1481 int i; 1482 uint32_t sh_mem_config; 1483 uint32_t sh_mem_bases; 1484 1485 /* 1486 * Configure apertures: 1487 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 1488 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 1489 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 1490 */ 1491 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 1492 1493 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 1494 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 1495 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 1496 1497 mutex_lock(&adev->srbm_mutex); 1498 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 1499 soc15_grbm_select(adev, 0, 0, 0, i); 1500 /* CP and shaders */ 1501 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 1502 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 1503 } 1504 soc15_grbm_select(adev, 0, 0, 0, 0); 1505 mutex_unlock(&adev->srbm_mutex); 1506 } 1507 1508 static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) 1509 { 1510 u32 tmp; 1511 int i; 1512 1513 WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 1514 1515 gfx_v9_0_tiling_mode_table_init(adev); 1516 1517 gfx_v9_0_setup_rb(adev); 1518 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 1519 1520 /* XXX SH_MEM regs */ 1521 /* where to put LDS, scratch, GPUVM in FSA64 space */ 1522 mutex_lock(&adev->srbm_mutex); 1523 for (i = 0; i < 16; i++) { 1524 soc15_grbm_select(adev, 0, 0, 0, i); 1525 /* CP and shaders */ 1526 tmp = 0; 1527 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 1528 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1529 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); 1530 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0); 1531 } 1532 soc15_grbm_select(adev, 0, 0, 0, 0); 1533 1534 mutex_unlock(&adev->srbm_mutex); 1535 1536 gfx_v9_0_init_compute_vmid(adev); 1537 1538 mutex_lock(&adev->grbm_idx_mutex); 1539 /* 1540 * making sure that the following register writes will be broadcasted 1541 * to all the shaders 1542 */ 1543 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1544 1545 WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE, 1546 (adev->gfx.config.sc_prim_fifo_size_frontend << 1547 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 1548 (adev->gfx.config.sc_prim_fifo_size_backend << 1549 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 1550 (adev->gfx.config.sc_hiz_tile_fifo_size << 1551 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 1552 (adev->gfx.config.sc_earlyz_tile_fifo_size << 1553 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 1554 mutex_unlock(&adev->grbm_idx_mutex); 1555 1556 } 1557 1558 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 1559 { 1560 u32 i, j, k; 1561 u32 mask; 1562 1563 mutex_lock(&adev->grbm_idx_mutex); 1564 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1565 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1566 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1567 for (k = 0; k < adev->usec_timeout; k++) { 1568 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 1569 break; 1570 udelay(1); 1571 } 1572 } 1573 } 1574 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1575 mutex_unlock(&adev->grbm_idx_mutex); 1576 1577 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 1578 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 1579 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 1580 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 1581 for (k = 0; k < adev->usec_timeout; k++) { 1582 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 1583 break; 1584 udelay(1); 1585 } 1586 } 1587 1588 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 1589 bool enable) 1590 { 1591 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 1592 1593 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 1594 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 1595 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 1596 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 1597 1598 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 1599 } 1600 1601 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 1602 { 1603 /* csib */ 1604 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 1605 adev->gfx.rlc.clear_state_gpu_addr >> 32); 1606 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 1607 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 1608 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 1609 adev->gfx.rlc.clear_state_size); 1610 } 1611 1612 static void gfx_v9_0_parse_ind_reg_list(int *register_list_format, 1613 int indirect_offset, 1614 int list_size, 1615 int *unique_indirect_regs, 1616 int *unique_indirect_reg_count, 1617 int max_indirect_reg_count, 1618 int *indirect_start_offsets, 1619 int *indirect_start_offsets_count, 1620 int max_indirect_start_offsets_count) 1621 { 1622 int idx; 1623 bool new_entry = true; 1624 1625 for (; indirect_offset < list_size; indirect_offset++) { 1626 1627 if (new_entry) { 1628 new_entry = false; 1629 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 1630 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 1631 BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count); 1632 } 1633 1634 if (register_list_format[indirect_offset] == 0xFFFFFFFF) { 1635 new_entry = true; 1636 continue; 1637 } 1638 1639 indirect_offset += 2; 1640 1641 /* look for the matching indice */ 1642 for (idx = 0; idx < *unique_indirect_reg_count; idx++) { 1643 if (unique_indirect_regs[idx] == 1644 register_list_format[indirect_offset]) 1645 break; 1646 } 1647 1648 if (idx >= *unique_indirect_reg_count) { 1649 unique_indirect_regs[*unique_indirect_reg_count] = 1650 register_list_format[indirect_offset]; 1651 idx = *unique_indirect_reg_count; 1652 *unique_indirect_reg_count = *unique_indirect_reg_count + 1; 1653 BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count); 1654 } 1655 1656 register_list_format[indirect_offset] = idx; 1657 } 1658 } 1659 1660 static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) 1661 { 1662 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 1663 int unique_indirect_reg_count = 0; 1664 1665 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 1666 int indirect_start_offsets_count = 0; 1667 1668 int list_size = 0; 1669 int i = 0; 1670 u32 tmp = 0; 1671 1672 u32 *register_list_format = 1673 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 1674 if (!register_list_format) 1675 return -ENOMEM; 1676 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 1677 adev->gfx.rlc.reg_list_format_size_bytes); 1678 1679 /* setup unique_indirect_regs array and indirect_start_offsets array */ 1680 gfx_v9_0_parse_ind_reg_list(register_list_format, 1681 GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH, 1682 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 1683 unique_indirect_regs, 1684 &unique_indirect_reg_count, 1685 sizeof(unique_indirect_regs)/sizeof(int), 1686 indirect_start_offsets, 1687 &indirect_start_offsets_count, 1688 sizeof(indirect_start_offsets)/sizeof(int)); 1689 1690 /* enable auto inc in case it is disabled */ 1691 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 1692 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 1693 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 1694 1695 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 1696 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 1697 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 1698 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 1699 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 1700 adev->gfx.rlc.register_restore[i]); 1701 1702 /* load direct register */ 1703 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0); 1704 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 1705 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 1706 adev->gfx.rlc.register_restore[i]); 1707 1708 /* load indirect register */ 1709 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 1710 adev->gfx.rlc.reg_list_format_start); 1711 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 1712 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 1713 register_list_format[i]); 1714 1715 /* set save/restore list size */ 1716 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 1717 list_size = list_size >> 1; 1718 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 1719 adev->gfx.rlc.reg_restore_list_size); 1720 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 1721 1722 /* write the starting offsets to RLC scratch ram */ 1723 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 1724 adev->gfx.rlc.starting_offsets_start); 1725 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) 1726 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 1727 indirect_start_offsets[i]); 1728 1729 /* load unique indirect regs*/ 1730 for (i = 0; i < sizeof(unique_indirect_regs)/sizeof(int); i++) { 1731 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i, 1732 unique_indirect_regs[i] & 0x3FFFF); 1733 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i, 1734 unique_indirect_regs[i] >> 20); 1735 } 1736 1737 kfree(register_list_format); 1738 return 0; 1739 } 1740 1741 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 1742 { 1743 u32 tmp = 0; 1744 1745 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 1746 tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; 1747 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 1748 } 1749 1750 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 1751 bool enable) 1752 { 1753 uint32_t data = 0; 1754 uint32_t default_data = 0; 1755 1756 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 1757 if (enable == true) { 1758 /* enable GFXIP control over CGPG */ 1759 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 1760 if(default_data != data) 1761 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 1762 1763 /* update status */ 1764 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 1765 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 1766 if(default_data != data) 1767 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 1768 } else { 1769 /* restore GFXIP control over GCPG */ 1770 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 1771 if(default_data != data) 1772 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 1773 } 1774 } 1775 1776 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 1777 { 1778 uint32_t data = 0; 1779 1780 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 1781 AMD_PG_SUPPORT_GFX_SMG | 1782 AMD_PG_SUPPORT_GFX_DMG)) { 1783 /* init IDLE_POLL_COUNT = 60 */ 1784 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 1785 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 1786 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 1787 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 1788 1789 /* init RLC PG Delay */ 1790 data = 0; 1791 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 1792 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 1793 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 1794 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 1795 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 1796 1797 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 1798 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 1799 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 1800 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 1801 1802 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 1803 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 1804 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 1805 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 1806 1807 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 1808 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 1809 1810 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 1811 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 1812 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 1813 1814 pwr_10_0_gfxip_control_over_cgpg(adev, true); 1815 } 1816 } 1817 1818 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 1819 bool enable) 1820 { 1821 uint32_t data = 0; 1822 uint32_t default_data = 0; 1823 1824 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 1825 1826 if (enable == true) { 1827 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK; 1828 if (default_data != data) 1829 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 1830 } else { 1831 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK; 1832 if(default_data != data) 1833 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 1834 } 1835 } 1836 1837 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 1838 bool enable) 1839 { 1840 uint32_t data = 0; 1841 uint32_t default_data = 0; 1842 1843 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 1844 1845 if (enable == true) { 1846 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK; 1847 if(default_data != data) 1848 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 1849 } else { 1850 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK; 1851 if(default_data != data) 1852 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 1853 } 1854 } 1855 1856 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 1857 bool enable) 1858 { 1859 uint32_t data = 0; 1860 uint32_t default_data = 0; 1861 1862 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 1863 1864 if (enable == true) { 1865 data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK; 1866 if(default_data != data) 1867 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 1868 } else { 1869 data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK; 1870 if(default_data != data) 1871 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 1872 } 1873 } 1874 1875 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 1876 bool enable) 1877 { 1878 uint32_t data, default_data; 1879 1880 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 1881 if (enable == true) 1882 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 1883 else 1884 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 1885 if(default_data != data) 1886 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 1887 } 1888 1889 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 1890 bool enable) 1891 { 1892 uint32_t data, default_data; 1893 1894 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 1895 if (enable == true) 1896 data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK; 1897 else 1898 data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK; 1899 if(default_data != data) 1900 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 1901 1902 if (!enable) 1903 /* read any GFX register to wake up GFX */ 1904 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 1905 } 1906 1907 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 1908 bool enable) 1909 { 1910 uint32_t data, default_data; 1911 1912 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 1913 if (enable == true) 1914 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; 1915 else 1916 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; 1917 if(default_data != data) 1918 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 1919 } 1920 1921 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 1922 bool enable) 1923 { 1924 uint32_t data, default_data; 1925 1926 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 1927 if (enable == true) 1928 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; 1929 else 1930 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; 1931 if(default_data != data) 1932 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 1933 } 1934 1935 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 1936 { 1937 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 1938 AMD_PG_SUPPORT_GFX_SMG | 1939 AMD_PG_SUPPORT_GFX_DMG | 1940 AMD_PG_SUPPORT_CP | 1941 AMD_PG_SUPPORT_GDS | 1942 AMD_PG_SUPPORT_RLC_SMU_HS)) { 1943 gfx_v9_0_init_csb(adev); 1944 gfx_v9_0_init_rlc_save_restore_list(adev); 1945 gfx_v9_0_enable_save_restore_machine(adev); 1946 1947 if (adev->asic_type == CHIP_RAVEN) { 1948 WREG32(mmRLC_JUMP_TABLE_RESTORE, 1949 adev->gfx.rlc.cp_table_gpu_addr >> 8); 1950 gfx_v9_0_init_gfx_power_gating(adev); 1951 1952 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 1953 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 1954 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 1955 } else { 1956 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 1957 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 1958 } 1959 1960 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 1961 gfx_v9_0_enable_cp_power_gating(adev, true); 1962 else 1963 gfx_v9_0_enable_cp_power_gating(adev, false); 1964 } 1965 } 1966 } 1967 1968 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 1969 { 1970 u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); 1971 1972 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 1973 WREG32_SOC15(GC, 0, mmRLC_CNTL, tmp); 1974 1975 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 1976 1977 gfx_v9_0_wait_for_rlc_serdes(adev); 1978 } 1979 1980 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 1981 { 1982 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 1983 udelay(50); 1984 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 1985 udelay(50); 1986 } 1987 1988 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 1989 { 1990 #ifdef AMDGPU_RLC_DEBUG_RETRY 1991 u32 rlc_ucode_ver; 1992 #endif 1993 1994 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 1995 1996 /* carrizo do enable cp interrupt after cp inited */ 1997 if (!(adev->flags & AMD_IS_APU)) 1998 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 1999 2000 udelay(50); 2001 2002 #ifdef AMDGPU_RLC_DEBUG_RETRY 2003 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 2004 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 2005 if(rlc_ucode_ver == 0x108) { 2006 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 2007 rlc_ucode_ver, adev->gfx.rlc_fw_version); 2008 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 2009 * default is 0x9C4 to create a 100us interval */ 2010 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 2011 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 2012 * to disable the page fault retry interrupts, default is 2013 * 0x100 (256) */ 2014 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 2015 } 2016 #endif 2017 } 2018 2019 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 2020 { 2021 const struct rlc_firmware_header_v2_0 *hdr; 2022 const __le32 *fw_data; 2023 unsigned i, fw_size; 2024 2025 if (!adev->gfx.rlc_fw) 2026 return -EINVAL; 2027 2028 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2029 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2030 2031 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2032 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2033 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2034 2035 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 2036 RLCG_UCODE_LOADING_START_ADDRESS); 2037 for (i = 0; i < fw_size; i++) 2038 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 2039 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2040 2041 return 0; 2042 } 2043 2044 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 2045 { 2046 int r; 2047 2048 if (amdgpu_sriov_vf(adev)) 2049 return 0; 2050 2051 gfx_v9_0_rlc_stop(adev); 2052 2053 /* disable CG */ 2054 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 2055 2056 /* disable PG */ 2057 WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, 0); 2058 2059 gfx_v9_0_rlc_reset(adev); 2060 2061 gfx_v9_0_init_pg(adev); 2062 2063 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 2064 /* legacy rlc firmware loading */ 2065 r = gfx_v9_0_rlc_load_microcode(adev); 2066 if (r) 2067 return r; 2068 } 2069 2070 if (adev->asic_type == CHIP_RAVEN) { 2071 if (amdgpu_lbpw != 0) 2072 gfx_v9_0_enable_lbpw(adev, true); 2073 else 2074 gfx_v9_0_enable_lbpw(adev, false); 2075 } 2076 2077 gfx_v9_0_rlc_start(adev); 2078 2079 return 0; 2080 } 2081 2082 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 2083 { 2084 int i; 2085 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 2086 2087 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 2088 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 2089 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 2090 if (!enable) { 2091 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2092 adev->gfx.gfx_ring[i].ready = false; 2093 } 2094 WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); 2095 udelay(50); 2096 } 2097 2098 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 2099 { 2100 const struct gfx_firmware_header_v1_0 *pfp_hdr; 2101 const struct gfx_firmware_header_v1_0 *ce_hdr; 2102 const struct gfx_firmware_header_v1_0 *me_hdr; 2103 const __le32 *fw_data; 2104 unsigned i, fw_size; 2105 2106 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 2107 return -EINVAL; 2108 2109 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 2110 adev->gfx.pfp_fw->data; 2111 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 2112 adev->gfx.ce_fw->data; 2113 me_hdr = (const struct gfx_firmware_header_v1_0 *) 2114 adev->gfx.me_fw->data; 2115 2116 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 2117 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 2118 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 2119 2120 gfx_v9_0_cp_gfx_enable(adev, false); 2121 2122 /* PFP */ 2123 fw_data = (const __le32 *) 2124 (adev->gfx.pfp_fw->data + 2125 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 2126 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 2127 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 2128 for (i = 0; i < fw_size; i++) 2129 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 2130 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 2131 2132 /* CE */ 2133 fw_data = (const __le32 *) 2134 (adev->gfx.ce_fw->data + 2135 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 2136 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 2137 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 2138 for (i = 0; i < fw_size; i++) 2139 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 2140 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 2141 2142 /* ME */ 2143 fw_data = (const __le32 *) 2144 (adev->gfx.me_fw->data + 2145 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 2146 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 2147 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 2148 for (i = 0; i < fw_size; i++) 2149 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 2150 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 2151 2152 return 0; 2153 } 2154 2155 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 2156 { 2157 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 2158 const struct cs_section_def *sect = NULL; 2159 const struct cs_extent_def *ext = NULL; 2160 int r, i, tmp; 2161 2162 /* init the CP */ 2163 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 2164 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 2165 2166 gfx_v9_0_cp_gfx_enable(adev, true); 2167 2168 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 2169 if (r) { 2170 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 2171 return r; 2172 } 2173 2174 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2175 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 2176 2177 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 2178 amdgpu_ring_write(ring, 0x80000000); 2179 amdgpu_ring_write(ring, 0x80000000); 2180 2181 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 2182 for (ext = sect->section; ext->extent != NULL; ++ext) { 2183 if (sect->id == SECT_CONTEXT) { 2184 amdgpu_ring_write(ring, 2185 PACKET3(PACKET3_SET_CONTEXT_REG, 2186 ext->reg_count)); 2187 amdgpu_ring_write(ring, 2188 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 2189 for (i = 0; i < ext->reg_count; i++) 2190 amdgpu_ring_write(ring, ext->extent[i]); 2191 } 2192 } 2193 } 2194 2195 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2196 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 2197 2198 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 2199 amdgpu_ring_write(ring, 0); 2200 2201 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 2202 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 2203 amdgpu_ring_write(ring, 0x8000); 2204 amdgpu_ring_write(ring, 0x8000); 2205 2206 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 2207 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 2208 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 2209 amdgpu_ring_write(ring, tmp); 2210 amdgpu_ring_write(ring, 0); 2211 2212 amdgpu_ring_commit(ring); 2213 2214 return 0; 2215 } 2216 2217 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 2218 { 2219 struct amdgpu_ring *ring; 2220 u32 tmp; 2221 u32 rb_bufsz; 2222 u64 rb_addr, rptr_addr, wptr_gpu_addr; 2223 2224 /* Set the write pointer delay */ 2225 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 2226 2227 /* set the RB to use vmid 0 */ 2228 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 2229 2230 /* Set ring buffer size */ 2231 ring = &adev->gfx.gfx_ring[0]; 2232 rb_bufsz = order_base_2(ring->ring_size / 8); 2233 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 2234 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 2235 #ifdef __BIG_ENDIAN 2236 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 2237 #endif 2238 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 2239 2240 /* Initialize the ring buffer's write pointers */ 2241 ring->wptr = 0; 2242 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 2243 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 2244 2245 /* set the wb address wether it's enabled or not */ 2246 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 2247 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 2248 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 2249 2250 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2251 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 2252 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 2253 2254 mdelay(1); 2255 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 2256 2257 rb_addr = ring->gpu_addr >> 8; 2258 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 2259 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 2260 2261 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 2262 if (ring->use_doorbell) { 2263 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2264 DOORBELL_OFFSET, ring->doorbell_index); 2265 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2266 DOORBELL_EN, 1); 2267 } else { 2268 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 2269 } 2270 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 2271 2272 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 2273 DOORBELL_RANGE_LOWER, ring->doorbell_index); 2274 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 2275 2276 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 2277 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 2278 2279 2280 /* start the ring */ 2281 gfx_v9_0_cp_gfx_start(adev); 2282 ring->ready = true; 2283 2284 return 0; 2285 } 2286 2287 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 2288 { 2289 int i; 2290 2291 if (enable) { 2292 WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0); 2293 } else { 2294 WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 2295 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 2296 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2297 adev->gfx.compute_ring[i].ready = false; 2298 adev->gfx.kiq.ring.ready = false; 2299 } 2300 udelay(50); 2301 } 2302 2303 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 2304 { 2305 const struct gfx_firmware_header_v1_0 *mec_hdr; 2306 const __le32 *fw_data; 2307 unsigned i; 2308 u32 tmp; 2309 2310 if (!adev->gfx.mec_fw) 2311 return -EINVAL; 2312 2313 gfx_v9_0_cp_compute_enable(adev, false); 2314 2315 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 2316 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 2317 2318 fw_data = (const __le32 *) 2319 (adev->gfx.mec_fw->data + 2320 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 2321 tmp = 0; 2322 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2323 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2324 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 2325 2326 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 2327 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 2328 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 2329 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 2330 2331 /* MEC1 */ 2332 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 2333 mec_hdr->jt_offset); 2334 for (i = 0; i < mec_hdr->jt_size; i++) 2335 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 2336 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 2337 2338 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 2339 adev->gfx.mec_fw_version); 2340 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 2341 2342 return 0; 2343 } 2344 2345 /* KIQ functions */ 2346 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 2347 { 2348 uint32_t tmp; 2349 struct amdgpu_device *adev = ring->adev; 2350 2351 /* tell RLC which is KIQ queue */ 2352 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 2353 tmp &= 0xffffff00; 2354 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 2355 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2356 tmp |= 0x80; 2357 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2358 } 2359 2360 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) 2361 { 2362 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 2363 uint32_t scratch, tmp = 0; 2364 uint64_t queue_mask = 0; 2365 int r, i; 2366 2367 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 2368 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 2369 continue; 2370 2371 /* This situation may be hit in the future if a new HW 2372 * generation exposes more than 64 queues. If so, the 2373 * definition of queue_mask needs updating */ 2374 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 2375 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 2376 break; 2377 } 2378 2379 queue_mask |= (1ull << i); 2380 } 2381 2382 r = amdgpu_gfx_scratch_get(adev, &scratch); 2383 if (r) { 2384 DRM_ERROR("Failed to get scratch reg (%d).\n", r); 2385 return r; 2386 } 2387 WREG32(scratch, 0xCAFEDEAD); 2388 2389 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 11); 2390 if (r) { 2391 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 2392 amdgpu_gfx_scratch_free(adev, scratch); 2393 return r; 2394 } 2395 2396 /* set resources */ 2397 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 2398 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 2399 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 2400 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 2401 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 2402 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 2403 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 2404 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 2405 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 2406 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2407 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 2408 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 2409 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2410 2411 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 2412 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 2413 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 2414 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 2415 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 2416 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 2417 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 2418 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 2419 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 2420 PACKET3_MAP_QUEUES_ALLOC_FORMAT(1) | /* alloc format: all_on_one_pipe */ 2421 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ 2422 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 2423 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 2424 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 2425 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 2426 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 2427 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 2428 } 2429 /* write to scratch for completion */ 2430 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 2431 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 2432 amdgpu_ring_write(kiq_ring, 0xDEADBEEF); 2433 amdgpu_ring_commit(kiq_ring); 2434 2435 for (i = 0; i < adev->usec_timeout; i++) { 2436 tmp = RREG32(scratch); 2437 if (tmp == 0xDEADBEEF) 2438 break; 2439 DRM_UDELAY(1); 2440 } 2441 if (i >= adev->usec_timeout) { 2442 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", 2443 scratch, tmp); 2444 r = -EINVAL; 2445 } 2446 amdgpu_gfx_scratch_free(adev, scratch); 2447 2448 return r; 2449 } 2450 2451 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 2452 { 2453 struct amdgpu_device *adev = ring->adev; 2454 struct v9_mqd *mqd = ring->mqd_ptr; 2455 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 2456 uint32_t tmp; 2457 2458 mqd->header = 0xC0310800; 2459 mqd->compute_pipelinestat_enable = 0x00000001; 2460 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 2461 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 2462 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 2463 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 2464 mqd->compute_misc_reserved = 0x00000003; 2465 2466 eop_base_addr = ring->eop_gpu_addr >> 8; 2467 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 2468 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 2469 2470 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2471 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 2472 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 2473 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 2474 2475 mqd->cp_hqd_eop_control = tmp; 2476 2477 /* enable doorbell? */ 2478 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 2479 2480 if (ring->use_doorbell) { 2481 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2482 DOORBELL_OFFSET, ring->doorbell_index); 2483 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2484 DOORBELL_EN, 1); 2485 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2486 DOORBELL_SOURCE, 0); 2487 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2488 DOORBELL_HIT, 0); 2489 } 2490 else 2491 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2492 DOORBELL_EN, 0); 2493 2494 mqd->cp_hqd_pq_doorbell_control = tmp; 2495 2496 /* disable the queue if it's active */ 2497 ring->wptr = 0; 2498 mqd->cp_hqd_dequeue_request = 0; 2499 mqd->cp_hqd_pq_rptr = 0; 2500 mqd->cp_hqd_pq_wptr_lo = 0; 2501 mqd->cp_hqd_pq_wptr_hi = 0; 2502 2503 /* set the pointer to the MQD */ 2504 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 2505 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 2506 2507 /* set MQD vmid to 0 */ 2508 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 2509 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 2510 mqd->cp_mqd_control = tmp; 2511 2512 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 2513 hqd_gpu_addr = ring->gpu_addr >> 8; 2514 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 2515 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 2516 2517 /* set up the HQD, this is similar to CP_RB0_CNTL */ 2518 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 2519 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 2520 (order_base_2(ring->ring_size / 4) - 1)); 2521 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 2522 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 2523 #ifdef __BIG_ENDIAN 2524 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 2525 #endif 2526 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 2527 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 2528 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 2529 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 2530 mqd->cp_hqd_pq_control = tmp; 2531 2532 /* set the wb address whether it's enabled or not */ 2533 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 2534 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 2535 mqd->cp_hqd_pq_rptr_report_addr_hi = 2536 upper_32_bits(wb_gpu_addr) & 0xffff; 2537 2538 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 2539 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2540 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 2541 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 2542 2543 tmp = 0; 2544 /* enable the doorbell if requested */ 2545 if (ring->use_doorbell) { 2546 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 2547 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2548 DOORBELL_OFFSET, ring->doorbell_index); 2549 2550 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2551 DOORBELL_EN, 1); 2552 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2553 DOORBELL_SOURCE, 0); 2554 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2555 DOORBELL_HIT, 0); 2556 } 2557 2558 mqd->cp_hqd_pq_doorbell_control = tmp; 2559 2560 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 2561 ring->wptr = 0; 2562 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 2563 2564 /* set the vmid for the queue */ 2565 mqd->cp_hqd_vmid = 0; 2566 2567 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 2568 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 2569 mqd->cp_hqd_persistent_state = tmp; 2570 2571 /* set MIN_IB_AVAIL_SIZE */ 2572 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 2573 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 2574 mqd->cp_hqd_ib_control = tmp; 2575 2576 /* activate the queue */ 2577 mqd->cp_hqd_active = 1; 2578 2579 return 0; 2580 } 2581 2582 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 2583 { 2584 struct amdgpu_device *adev = ring->adev; 2585 struct v9_mqd *mqd = ring->mqd_ptr; 2586 int j; 2587 2588 /* disable wptr polling */ 2589 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 2590 2591 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 2592 mqd->cp_hqd_eop_base_addr_lo); 2593 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 2594 mqd->cp_hqd_eop_base_addr_hi); 2595 2596 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2597 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, 2598 mqd->cp_hqd_eop_control); 2599 2600 /* enable doorbell? */ 2601 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 2602 mqd->cp_hqd_pq_doorbell_control); 2603 2604 /* disable the queue if it's active */ 2605 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 2606 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 2607 for (j = 0; j < adev->usec_timeout; j++) { 2608 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 2609 break; 2610 udelay(1); 2611 } 2612 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 2613 mqd->cp_hqd_dequeue_request); 2614 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 2615 mqd->cp_hqd_pq_rptr); 2616 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 2617 mqd->cp_hqd_pq_wptr_lo); 2618 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 2619 mqd->cp_hqd_pq_wptr_hi); 2620 } 2621 2622 /* set the pointer to the MQD */ 2623 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, 2624 mqd->cp_mqd_base_addr_lo); 2625 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, 2626 mqd->cp_mqd_base_addr_hi); 2627 2628 /* set MQD vmid to 0 */ 2629 WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, 2630 mqd->cp_mqd_control); 2631 2632 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 2633 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, 2634 mqd->cp_hqd_pq_base_lo); 2635 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, 2636 mqd->cp_hqd_pq_base_hi); 2637 2638 /* set up the HQD, this is similar to CP_RB0_CNTL */ 2639 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, 2640 mqd->cp_hqd_pq_control); 2641 2642 /* set the wb address whether it's enabled or not */ 2643 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 2644 mqd->cp_hqd_pq_rptr_report_addr_lo); 2645 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 2646 mqd->cp_hqd_pq_rptr_report_addr_hi); 2647 2648 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 2649 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 2650 mqd->cp_hqd_pq_wptr_poll_addr_lo); 2651 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 2652 mqd->cp_hqd_pq_wptr_poll_addr_hi); 2653 2654 /* enable the doorbell if requested */ 2655 if (ring->use_doorbell) { 2656 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 2657 (AMDGPU_DOORBELL64_KIQ *2) << 2); 2658 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 2659 (AMDGPU_DOORBELL64_USERQUEUE_END * 2) << 2); 2660 } 2661 2662 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 2663 mqd->cp_hqd_pq_doorbell_control); 2664 2665 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 2666 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 2667 mqd->cp_hqd_pq_wptr_lo); 2668 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 2669 mqd->cp_hqd_pq_wptr_hi); 2670 2671 /* set the vmid for the queue */ 2672 WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 2673 2674 WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 2675 mqd->cp_hqd_persistent_state); 2676 2677 /* activate the queue */ 2678 WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 2679 mqd->cp_hqd_active); 2680 2681 if (ring->use_doorbell) 2682 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 2683 2684 return 0; 2685 } 2686 2687 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 2688 { 2689 struct amdgpu_device *adev = ring->adev; 2690 struct v9_mqd *mqd = ring->mqd_ptr; 2691 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 2692 2693 gfx_v9_0_kiq_setting(ring); 2694 2695 if (adev->gfx.in_reset) { /* for GPU_RESET case */ 2696 /* reset MQD to a clean status */ 2697 if (adev->gfx.mec.mqd_backup[mqd_idx]) 2698 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 2699 2700 /* reset ring buffer */ 2701 ring->wptr = 0; 2702 amdgpu_ring_clear_ring(ring); 2703 2704 mutex_lock(&adev->srbm_mutex); 2705 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 2706 gfx_v9_0_kiq_init_register(ring); 2707 soc15_grbm_select(adev, 0, 0, 0, 0); 2708 mutex_unlock(&adev->srbm_mutex); 2709 } else { 2710 memset((void *)mqd, 0, sizeof(*mqd)); 2711 mutex_lock(&adev->srbm_mutex); 2712 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 2713 gfx_v9_0_mqd_init(ring); 2714 gfx_v9_0_kiq_init_register(ring); 2715 soc15_grbm_select(adev, 0, 0, 0, 0); 2716 mutex_unlock(&adev->srbm_mutex); 2717 2718 if (adev->gfx.mec.mqd_backup[mqd_idx]) 2719 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 2720 } 2721 2722 return 0; 2723 } 2724 2725 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 2726 { 2727 struct amdgpu_device *adev = ring->adev; 2728 struct v9_mqd *mqd = ring->mqd_ptr; 2729 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 2730 2731 if (!adev->gfx.in_reset && !adev->gfx.in_suspend) { 2732 memset((void *)mqd, 0, sizeof(*mqd)); 2733 mutex_lock(&adev->srbm_mutex); 2734 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 2735 gfx_v9_0_mqd_init(ring); 2736 soc15_grbm_select(adev, 0, 0, 0, 0); 2737 mutex_unlock(&adev->srbm_mutex); 2738 2739 if (adev->gfx.mec.mqd_backup[mqd_idx]) 2740 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 2741 } else if (adev->gfx.in_reset) { /* for GPU_RESET case */ 2742 /* reset MQD to a clean status */ 2743 if (adev->gfx.mec.mqd_backup[mqd_idx]) 2744 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 2745 2746 /* reset ring buffer */ 2747 ring->wptr = 0; 2748 amdgpu_ring_clear_ring(ring); 2749 } else { 2750 amdgpu_ring_clear_ring(ring); 2751 } 2752 2753 return 0; 2754 } 2755 2756 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 2757 { 2758 struct amdgpu_ring *ring = NULL; 2759 int r = 0, i; 2760 2761 gfx_v9_0_cp_compute_enable(adev, true); 2762 2763 ring = &adev->gfx.kiq.ring; 2764 2765 r = amdgpu_bo_reserve(ring->mqd_obj, false); 2766 if (unlikely(r != 0)) 2767 goto done; 2768 2769 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 2770 if (!r) { 2771 r = gfx_v9_0_kiq_init_queue(ring); 2772 amdgpu_bo_kunmap(ring->mqd_obj); 2773 ring->mqd_ptr = NULL; 2774 } 2775 amdgpu_bo_unreserve(ring->mqd_obj); 2776 if (r) 2777 goto done; 2778 2779 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2780 ring = &adev->gfx.compute_ring[i]; 2781 2782 r = amdgpu_bo_reserve(ring->mqd_obj, false); 2783 if (unlikely(r != 0)) 2784 goto done; 2785 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 2786 if (!r) { 2787 r = gfx_v9_0_kcq_init_queue(ring); 2788 amdgpu_bo_kunmap(ring->mqd_obj); 2789 ring->mqd_ptr = NULL; 2790 } 2791 amdgpu_bo_unreserve(ring->mqd_obj); 2792 if (r) 2793 goto done; 2794 } 2795 2796 r = gfx_v9_0_kiq_kcq_enable(adev); 2797 done: 2798 return r; 2799 } 2800 2801 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 2802 { 2803 int r, i; 2804 struct amdgpu_ring *ring; 2805 2806 if (!(adev->flags & AMD_IS_APU)) 2807 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 2808 2809 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 2810 /* legacy firmware loading */ 2811 r = gfx_v9_0_cp_gfx_load_microcode(adev); 2812 if (r) 2813 return r; 2814 2815 r = gfx_v9_0_cp_compute_load_microcode(adev); 2816 if (r) 2817 return r; 2818 } 2819 2820 r = gfx_v9_0_cp_gfx_resume(adev); 2821 if (r) 2822 return r; 2823 2824 r = gfx_v9_0_kiq_resume(adev); 2825 if (r) 2826 return r; 2827 2828 ring = &adev->gfx.gfx_ring[0]; 2829 r = amdgpu_ring_test_ring(ring); 2830 if (r) { 2831 ring->ready = false; 2832 return r; 2833 } 2834 2835 ring = &adev->gfx.kiq.ring; 2836 ring->ready = true; 2837 r = amdgpu_ring_test_ring(ring); 2838 if (r) 2839 ring->ready = false; 2840 2841 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2842 ring = &adev->gfx.compute_ring[i]; 2843 2844 ring->ready = true; 2845 r = amdgpu_ring_test_ring(ring); 2846 if (r) 2847 ring->ready = false; 2848 } 2849 2850 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 2851 2852 return 0; 2853 } 2854 2855 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 2856 { 2857 gfx_v9_0_cp_gfx_enable(adev, enable); 2858 gfx_v9_0_cp_compute_enable(adev, enable); 2859 } 2860 2861 static int gfx_v9_0_hw_init(void *handle) 2862 { 2863 int r; 2864 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2865 2866 gfx_v9_0_init_golden_registers(adev); 2867 2868 gfx_v9_0_gpu_init(adev); 2869 2870 r = gfx_v9_0_rlc_resume(adev); 2871 if (r) 2872 return r; 2873 2874 r = gfx_v9_0_cp_resume(adev); 2875 if (r) 2876 return r; 2877 2878 r = gfx_v9_0_ngg_en(adev); 2879 if (r) 2880 return r; 2881 2882 return r; 2883 } 2884 2885 static int gfx_v9_0_hw_fini(void *handle) 2886 { 2887 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2888 2889 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 2890 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 2891 if (amdgpu_sriov_vf(adev)) { 2892 pr_debug("For SRIOV client, shouldn't do anything.\n"); 2893 return 0; 2894 } 2895 gfx_v9_0_cp_enable(adev, false); 2896 gfx_v9_0_rlc_stop(adev); 2897 2898 return 0; 2899 } 2900 2901 static int gfx_v9_0_suspend(void *handle) 2902 { 2903 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2904 2905 adev->gfx.in_suspend = true; 2906 return gfx_v9_0_hw_fini(adev); 2907 } 2908 2909 static int gfx_v9_0_resume(void *handle) 2910 { 2911 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2912 int r; 2913 2914 r = gfx_v9_0_hw_init(adev); 2915 adev->gfx.in_suspend = false; 2916 return r; 2917 } 2918 2919 static bool gfx_v9_0_is_idle(void *handle) 2920 { 2921 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2922 2923 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 2924 GRBM_STATUS, GUI_ACTIVE)) 2925 return false; 2926 else 2927 return true; 2928 } 2929 2930 static int gfx_v9_0_wait_for_idle(void *handle) 2931 { 2932 unsigned i; 2933 u32 tmp; 2934 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2935 2936 for (i = 0; i < adev->usec_timeout; i++) { 2937 /* read MC_STATUS */ 2938 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) & 2939 GRBM_STATUS__GUI_ACTIVE_MASK; 2940 2941 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) 2942 return 0; 2943 udelay(1); 2944 } 2945 return -ETIMEDOUT; 2946 } 2947 2948 static int gfx_v9_0_soft_reset(void *handle) 2949 { 2950 u32 grbm_soft_reset = 0; 2951 u32 tmp; 2952 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2953 2954 /* GRBM_STATUS */ 2955 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 2956 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 2957 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 2958 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 2959 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 2960 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 2961 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 2962 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 2963 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 2964 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 2965 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 2966 } 2967 2968 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 2969 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 2970 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 2971 } 2972 2973 /* GRBM_STATUS2 */ 2974 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 2975 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 2976 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 2977 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2978 2979 2980 if (grbm_soft_reset) { 2981 /* stop the rlc */ 2982 gfx_v9_0_rlc_stop(adev); 2983 2984 /* Disable GFX parsing/prefetching */ 2985 gfx_v9_0_cp_gfx_enable(adev, false); 2986 2987 /* Disable MEC parsing/prefetching */ 2988 gfx_v9_0_cp_compute_enable(adev, false); 2989 2990 if (grbm_soft_reset) { 2991 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 2992 tmp |= grbm_soft_reset; 2993 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 2994 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 2995 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 2996 2997 udelay(50); 2998 2999 tmp &= ~grbm_soft_reset; 3000 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3001 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3002 } 3003 3004 /* Wait a little for things to settle down */ 3005 udelay(50); 3006 } 3007 return 0; 3008 } 3009 3010 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 3011 { 3012 uint64_t clock; 3013 3014 mutex_lock(&adev->gfx.gpu_clock_mutex); 3015 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 3016 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 3017 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 3018 mutex_unlock(&adev->gfx.gpu_clock_mutex); 3019 return clock; 3020 } 3021 3022 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 3023 uint32_t vmid, 3024 uint32_t gds_base, uint32_t gds_size, 3025 uint32_t gws_base, uint32_t gws_size, 3026 uint32_t oa_base, uint32_t oa_size) 3027 { 3028 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 3029 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 3030 3031 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 3032 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 3033 3034 oa_base = oa_base >> AMDGPU_OA_SHIFT; 3035 oa_size = oa_size >> AMDGPU_OA_SHIFT; 3036 3037 /* GDS Base */ 3038 gfx_v9_0_write_data_to_reg(ring, 0, false, 3039 amdgpu_gds_reg_offset[vmid].mem_base, 3040 gds_base); 3041 3042 /* GDS Size */ 3043 gfx_v9_0_write_data_to_reg(ring, 0, false, 3044 amdgpu_gds_reg_offset[vmid].mem_size, 3045 gds_size); 3046 3047 /* GWS */ 3048 gfx_v9_0_write_data_to_reg(ring, 0, false, 3049 amdgpu_gds_reg_offset[vmid].gws, 3050 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 3051 3052 /* OA */ 3053 gfx_v9_0_write_data_to_reg(ring, 0, false, 3054 amdgpu_gds_reg_offset[vmid].oa, 3055 (1 << (oa_size + oa_base)) - (1 << oa_base)); 3056 } 3057 3058 static int gfx_v9_0_early_init(void *handle) 3059 { 3060 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3061 3062 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 3063 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 3064 gfx_v9_0_set_ring_funcs(adev); 3065 gfx_v9_0_set_irq_funcs(adev); 3066 gfx_v9_0_set_gds_init(adev); 3067 gfx_v9_0_set_rlc_funcs(adev); 3068 3069 return 0; 3070 } 3071 3072 static int gfx_v9_0_late_init(void *handle) 3073 { 3074 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3075 int r; 3076 3077 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 3078 if (r) 3079 return r; 3080 3081 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 3082 if (r) 3083 return r; 3084 3085 return 0; 3086 } 3087 3088 static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev) 3089 { 3090 uint32_t rlc_setting, data; 3091 unsigned i; 3092 3093 if (adev->gfx.rlc.in_safe_mode) 3094 return; 3095 3096 /* if RLC is not enabled, do nothing */ 3097 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 3098 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 3099 return; 3100 3101 if (adev->cg_flags & 3102 (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG | 3103 AMD_CG_SUPPORT_GFX_3D_CGCG)) { 3104 data = RLC_SAFE_MODE__CMD_MASK; 3105 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 3106 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 3107 3108 /* wait for RLC_SAFE_MODE */ 3109 for (i = 0; i < adev->usec_timeout; i++) { 3110 if (!REG_GET_FIELD(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 3111 break; 3112 udelay(1); 3113 } 3114 adev->gfx.rlc.in_safe_mode = true; 3115 } 3116 } 3117 3118 static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev) 3119 { 3120 uint32_t rlc_setting, data; 3121 3122 if (!adev->gfx.rlc.in_safe_mode) 3123 return; 3124 3125 /* if RLC is not enabled, do nothing */ 3126 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 3127 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 3128 return; 3129 3130 if (adev->cg_flags & 3131 (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 3132 /* 3133 * Try to exit safe mode only if it is already in safe 3134 * mode. 3135 */ 3136 data = RLC_SAFE_MODE__CMD_MASK; 3137 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 3138 adev->gfx.rlc.in_safe_mode = false; 3139 } 3140 } 3141 3142 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 3143 bool enable) 3144 { 3145 /* TODO: double check if we need to perform under safe mdoe */ 3146 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 3147 3148 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 3149 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 3150 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 3151 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 3152 } else { 3153 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 3154 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 3155 } 3156 3157 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 3158 } 3159 3160 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 3161 bool enable) 3162 { 3163 /* TODO: double check if we need to perform under safe mode */ 3164 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 3165 3166 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 3167 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 3168 else 3169 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 3170 3171 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 3172 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 3173 else 3174 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 3175 3176 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 3177 } 3178 3179 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 3180 bool enable) 3181 { 3182 uint32_t data, def; 3183 3184 /* It is disabled by HW by default */ 3185 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 3186 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 3187 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 3188 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK | 3189 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 3190 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 3191 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 3192 3193 /* only for Vega10 & Raven1 */ 3194 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 3195 3196 if (def != data) 3197 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 3198 3199 /* MGLS is a global flag to control all MGLS in GFX */ 3200 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 3201 /* 2 - RLC memory Light sleep */ 3202 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 3203 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 3204 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 3205 if (def != data) 3206 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 3207 } 3208 /* 3 - CP memory Light sleep */ 3209 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 3210 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 3211 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 3212 if (def != data) 3213 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 3214 } 3215 } 3216 } else { 3217 /* 1 - MGCG_OVERRIDE */ 3218 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 3219 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK | 3220 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 3221 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 3222 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 3223 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 3224 if (def != data) 3225 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 3226 3227 /* 2 - disable MGLS in RLC */ 3228 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 3229 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 3230 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 3231 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 3232 } 3233 3234 /* 3 - disable MGLS in CP */ 3235 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 3236 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 3237 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 3238 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 3239 } 3240 } 3241 } 3242 3243 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 3244 bool enable) 3245 { 3246 uint32_t data, def; 3247 3248 adev->gfx.rlc.funcs->enter_safe_mode(adev); 3249 3250 /* Enable 3D CGCG/CGLS */ 3251 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 3252 /* write cmd to clear cgcg/cgls ov */ 3253 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 3254 /* unset CGCG override */ 3255 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 3256 /* update CGCG and CGLS override bits */ 3257 if (def != data) 3258 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 3259 /* enable 3Dcgcg FSM(0x0020003f) */ 3260 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 3261 data = (0x2000 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 3262 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 3263 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 3264 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 3265 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 3266 if (def != data) 3267 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 3268 3269 /* set IDLE_POLL_COUNT(0x00900100) */ 3270 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 3271 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 3272 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 3273 if (def != data) 3274 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 3275 } else { 3276 /* Disable CGCG/CGLS */ 3277 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 3278 /* disable cgcg, cgls should be disabled */ 3279 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 3280 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 3281 /* disable cgcg and cgls in FSM */ 3282 if (def != data) 3283 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 3284 } 3285 3286 adev->gfx.rlc.funcs->exit_safe_mode(adev); 3287 } 3288 3289 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 3290 bool enable) 3291 { 3292 uint32_t def, data; 3293 3294 adev->gfx.rlc.funcs->enter_safe_mode(adev); 3295 3296 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 3297 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 3298 /* unset CGCG override */ 3299 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 3300 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 3301 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 3302 else 3303 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 3304 /* update CGCG and CGLS override bits */ 3305 if (def != data) 3306 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 3307 3308 /* enable cgcg FSM(0x0020003F) */ 3309 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 3310 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 3311 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 3312 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 3313 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 3314 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 3315 if (def != data) 3316 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 3317 3318 /* set IDLE_POLL_COUNT(0x00900100) */ 3319 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 3320 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 3321 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 3322 if (def != data) 3323 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 3324 } else { 3325 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 3326 /* reset CGCG/CGLS bits */ 3327 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 3328 /* disable cgcg and cgls in FSM */ 3329 if (def != data) 3330 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 3331 } 3332 3333 adev->gfx.rlc.funcs->exit_safe_mode(adev); 3334 } 3335 3336 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 3337 bool enable) 3338 { 3339 if (enable) { 3340 /* CGCG/CGLS should be enabled after MGCG/MGLS 3341 * === MGCG + MGLS === 3342 */ 3343 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 3344 /* === CGCG /CGLS for GFX 3D Only === */ 3345 gfx_v9_0_update_3d_clock_gating(adev, enable); 3346 /* === CGCG + CGLS === */ 3347 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 3348 } else { 3349 /* CGCG/CGLS should be disabled before MGCG/MGLS 3350 * === CGCG + CGLS === 3351 */ 3352 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 3353 /* === CGCG /CGLS for GFX 3D Only === */ 3354 gfx_v9_0_update_3d_clock_gating(adev, enable); 3355 /* === MGCG + MGLS === */ 3356 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 3357 } 3358 return 0; 3359 } 3360 3361 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 3362 .enter_safe_mode = gfx_v9_0_enter_rlc_safe_mode, 3363 .exit_safe_mode = gfx_v9_0_exit_rlc_safe_mode 3364 }; 3365 3366 static int gfx_v9_0_set_powergating_state(void *handle, 3367 enum amd_powergating_state state) 3368 { 3369 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3370 bool enable = (state == AMD_PG_STATE_GATE) ? true : false; 3371 3372 switch (adev->asic_type) { 3373 case CHIP_RAVEN: 3374 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 3375 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 3376 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 3377 } else { 3378 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 3379 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 3380 } 3381 3382 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 3383 gfx_v9_0_enable_cp_power_gating(adev, true); 3384 else 3385 gfx_v9_0_enable_cp_power_gating(adev, false); 3386 3387 /* update gfx cgpg state */ 3388 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 3389 3390 /* update mgcg state */ 3391 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 3392 break; 3393 default: 3394 break; 3395 } 3396 3397 return 0; 3398 } 3399 3400 static int gfx_v9_0_set_clockgating_state(void *handle, 3401 enum amd_clockgating_state state) 3402 { 3403 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3404 3405 if (amdgpu_sriov_vf(adev)) 3406 return 0; 3407 3408 switch (adev->asic_type) { 3409 case CHIP_VEGA10: 3410 case CHIP_RAVEN: 3411 gfx_v9_0_update_gfx_clock_gating(adev, 3412 state == AMD_CG_STATE_GATE ? true : false); 3413 break; 3414 default: 3415 break; 3416 } 3417 return 0; 3418 } 3419 3420 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 3421 { 3422 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3423 int data; 3424 3425 if (amdgpu_sriov_vf(adev)) 3426 *flags = 0; 3427 3428 /* AMD_CG_SUPPORT_GFX_MGCG */ 3429 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 3430 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 3431 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 3432 3433 /* AMD_CG_SUPPORT_GFX_CGCG */ 3434 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 3435 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 3436 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 3437 3438 /* AMD_CG_SUPPORT_GFX_CGLS */ 3439 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 3440 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 3441 3442 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 3443 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 3444 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 3445 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 3446 3447 /* AMD_CG_SUPPORT_GFX_CP_LS */ 3448 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 3449 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 3450 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 3451 3452 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 3453 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 3454 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 3455 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 3456 3457 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 3458 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 3459 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 3460 } 3461 3462 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 3463 { 3464 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 3465 } 3466 3467 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 3468 { 3469 struct amdgpu_device *adev = ring->adev; 3470 u64 wptr; 3471 3472 /* XXX check if swapping is necessary on BE */ 3473 if (ring->use_doorbell) { 3474 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 3475 } else { 3476 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 3477 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 3478 } 3479 3480 return wptr; 3481 } 3482 3483 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 3484 { 3485 struct amdgpu_device *adev = ring->adev; 3486 3487 if (ring->use_doorbell) { 3488 /* XXX check if swapping is necessary on BE */ 3489 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 3490 WDOORBELL64(ring->doorbell_index, ring->wptr); 3491 } else { 3492 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3493 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3494 } 3495 } 3496 3497 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 3498 { 3499 u32 ref_and_mask, reg_mem_engine; 3500 struct nbio_hdp_flush_reg *nbio_hf_reg; 3501 3502 if (ring->adev->asic_type == CHIP_VEGA10) 3503 nbio_hf_reg = &nbio_v6_1_hdp_flush_reg; 3504 3505 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 3506 switch (ring->me) { 3507 case 1: 3508 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 3509 break; 3510 case 2: 3511 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 3512 break; 3513 default: 3514 return; 3515 } 3516 reg_mem_engine = 0; 3517 } else { 3518 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 3519 reg_mem_engine = 1; /* pfp */ 3520 } 3521 3522 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 3523 nbio_hf_reg->hdp_flush_req_offset, 3524 nbio_hf_reg->hdp_flush_done_offset, 3525 ref_and_mask, ref_and_mask, 0x20); 3526 } 3527 3528 static void gfx_v9_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 3529 { 3530 gfx_v9_0_write_data_to_reg(ring, 0, true, 3531 SOC15_REG_OFFSET(HDP, 0, mmHDP_DEBUG0), 1); 3532 } 3533 3534 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 3535 struct amdgpu_ib *ib, 3536 unsigned vm_id, bool ctx_switch) 3537 { 3538 u32 header, control = 0; 3539 3540 if (ib->flags & AMDGPU_IB_FLAG_CE) 3541 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 3542 else 3543 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 3544 3545 control |= ib->length_dw | (vm_id << 24); 3546 3547 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 3548 control |= INDIRECT_BUFFER_PRE_ENB(1); 3549 3550 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 3551 gfx_v9_0_ring_emit_de_meta(ring); 3552 } 3553 3554 amdgpu_ring_write(ring, header); 3555 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 3556 amdgpu_ring_write(ring, 3557 #ifdef __BIG_ENDIAN 3558 (2 << 0) | 3559 #endif 3560 lower_32_bits(ib->gpu_addr)); 3561 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 3562 amdgpu_ring_write(ring, control); 3563 } 3564 3565 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 3566 struct amdgpu_ib *ib, 3567 unsigned vm_id, bool ctx_switch) 3568 { 3569 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24); 3570 3571 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 3572 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 3573 amdgpu_ring_write(ring, 3574 #ifdef __BIG_ENDIAN 3575 (2 << 0) | 3576 #endif 3577 lower_32_bits(ib->gpu_addr)); 3578 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 3579 amdgpu_ring_write(ring, control); 3580 } 3581 3582 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 3583 u64 seq, unsigned flags) 3584 { 3585 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 3586 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 3587 3588 /* RELEASE_MEM - flush caches, send int */ 3589 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 3590 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 3591 EOP_TC_ACTION_EN | 3592 EOP_TC_WB_ACTION_EN | 3593 EOP_TC_MD_ACTION_EN | 3594 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 3595 EVENT_INDEX(5))); 3596 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 3597 3598 /* 3599 * the address should be Qword aligned if 64bit write, Dword 3600 * aligned if only send 32bit data low (discard data high) 3601 */ 3602 if (write64bit) 3603 BUG_ON(addr & 0x7); 3604 else 3605 BUG_ON(addr & 0x3); 3606 amdgpu_ring_write(ring, lower_32_bits(addr)); 3607 amdgpu_ring_write(ring, upper_32_bits(addr)); 3608 amdgpu_ring_write(ring, lower_32_bits(seq)); 3609 amdgpu_ring_write(ring, upper_32_bits(seq)); 3610 amdgpu_ring_write(ring, 0); 3611 } 3612 3613 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 3614 { 3615 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 3616 uint32_t seq = ring->fence_drv.sync_seq; 3617 uint64_t addr = ring->fence_drv.gpu_addr; 3618 3619 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 3620 lower_32_bits(addr), upper_32_bits(addr), 3621 seq, 0xffffffff, 4); 3622 } 3623 3624 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 3625 unsigned vm_id, uint64_t pd_addr) 3626 { 3627 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 3628 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 3629 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); 3630 unsigned eng = ring->vm_inv_eng; 3631 3632 pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); 3633 pd_addr |= AMDGPU_PTE_VALID; 3634 3635 gfx_v9_0_write_data_to_reg(ring, usepfp, true, 3636 hub->ctx0_ptb_addr_lo32 + (2 * vm_id), 3637 lower_32_bits(pd_addr)); 3638 3639 gfx_v9_0_write_data_to_reg(ring, usepfp, true, 3640 hub->ctx0_ptb_addr_hi32 + (2 * vm_id), 3641 upper_32_bits(pd_addr)); 3642 3643 gfx_v9_0_write_data_to_reg(ring, usepfp, true, 3644 hub->vm_inv_eng0_req + eng, req); 3645 3646 /* wait for the invalidate to complete */ 3647 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack + 3648 eng, 0, 1 << vm_id, 1 << vm_id, 0x20); 3649 3650 /* compute doesn't have PFP */ 3651 if (usepfp) { 3652 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 3653 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 3654 amdgpu_ring_write(ring, 0x0); 3655 } 3656 } 3657 3658 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 3659 { 3660 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 3661 } 3662 3663 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 3664 { 3665 u64 wptr; 3666 3667 /* XXX check if swapping is necessary on BE */ 3668 if (ring->use_doorbell) 3669 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 3670 else 3671 BUG(); 3672 return wptr; 3673 } 3674 3675 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 3676 { 3677 struct amdgpu_device *adev = ring->adev; 3678 3679 /* XXX check if swapping is necessary on BE */ 3680 if (ring->use_doorbell) { 3681 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 3682 WDOORBELL64(ring->doorbell_index, ring->wptr); 3683 } else{ 3684 BUG(); /* only DOORBELL method supported on gfx9 now */ 3685 } 3686 } 3687 3688 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 3689 u64 seq, unsigned int flags) 3690 { 3691 /* we only allocate 32bit for each seq wb address */ 3692 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 3693 3694 /* write fence seq to the "addr" */ 3695 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3696 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 3697 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 3698 amdgpu_ring_write(ring, lower_32_bits(addr)); 3699 amdgpu_ring_write(ring, upper_32_bits(addr)); 3700 amdgpu_ring_write(ring, lower_32_bits(seq)); 3701 3702 if (flags & AMDGPU_FENCE_FLAG_INT) { 3703 /* set register to trigger INT */ 3704 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3705 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 3706 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 3707 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 3708 amdgpu_ring_write(ring, 0); 3709 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 3710 } 3711 } 3712 3713 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 3714 { 3715 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 3716 amdgpu_ring_write(ring, 0); 3717 } 3718 3719 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 3720 { 3721 static struct v9_ce_ib_state ce_payload = {0}; 3722 uint64_t csa_addr; 3723 int cnt; 3724 3725 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 3726 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; 3727 3728 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 3729 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 3730 WRITE_DATA_DST_SEL(8) | 3731 WR_CONFIRM) | 3732 WRITE_DATA_CACHE_POLICY(0)); 3733 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 3734 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 3735 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 3736 } 3737 3738 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 3739 { 3740 static struct v9_de_ib_state de_payload = {0}; 3741 uint64_t csa_addr, gds_addr; 3742 int cnt; 3743 3744 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; 3745 gds_addr = csa_addr + 4096; 3746 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 3747 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 3748 3749 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 3750 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 3751 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 3752 WRITE_DATA_DST_SEL(8) | 3753 WR_CONFIRM) | 3754 WRITE_DATA_CACHE_POLICY(0)); 3755 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 3756 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 3757 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 3758 } 3759 3760 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 3761 { 3762 uint32_t dw2 = 0; 3763 3764 if (amdgpu_sriov_vf(ring->adev)) 3765 gfx_v9_0_ring_emit_ce_meta(ring); 3766 3767 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 3768 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 3769 /* set load_global_config & load_global_uconfig */ 3770 dw2 |= 0x8001; 3771 /* set load_cs_sh_regs */ 3772 dw2 |= 0x01000000; 3773 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 3774 dw2 |= 0x10002; 3775 3776 /* set load_ce_ram if preamble presented */ 3777 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 3778 dw2 |= 0x10000000; 3779 } else { 3780 /* still load_ce_ram if this is the first time preamble presented 3781 * although there is no context switch happens. 3782 */ 3783 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 3784 dw2 |= 0x10000000; 3785 } 3786 3787 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3788 amdgpu_ring_write(ring, dw2); 3789 amdgpu_ring_write(ring, 0); 3790 } 3791 3792 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 3793 { 3794 unsigned ret; 3795 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 3796 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 3797 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 3798 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 3799 ret = ring->wptr & ring->buf_mask; 3800 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 3801 return ret; 3802 } 3803 3804 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 3805 { 3806 unsigned cur; 3807 BUG_ON(offset > ring->buf_mask); 3808 BUG_ON(ring->ring[offset] != 0x55aa55aa); 3809 3810 cur = (ring->wptr & ring->buf_mask) - 1; 3811 if (likely(cur > offset)) 3812 ring->ring[offset] = cur - offset; 3813 else 3814 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 3815 } 3816 3817 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) 3818 { 3819 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 3820 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ 3821 } 3822 3823 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 3824 { 3825 struct amdgpu_device *adev = ring->adev; 3826 3827 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 3828 amdgpu_ring_write(ring, 0 | /* src: register*/ 3829 (5 << 8) | /* dst: memory */ 3830 (1 << 20)); /* write confirm */ 3831 amdgpu_ring_write(ring, reg); 3832 amdgpu_ring_write(ring, 0); 3833 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 3834 adev->virt.reg_val_offs * 4)); 3835 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 3836 adev->virt.reg_val_offs * 4)); 3837 } 3838 3839 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 3840 uint32_t val) 3841 { 3842 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3843 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */ 3844 amdgpu_ring_write(ring, reg); 3845 amdgpu_ring_write(ring, 0); 3846 amdgpu_ring_write(ring, val); 3847 } 3848 3849 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 3850 enum amdgpu_interrupt_state state) 3851 { 3852 switch (state) { 3853 case AMDGPU_IRQ_STATE_DISABLE: 3854 case AMDGPU_IRQ_STATE_ENABLE: 3855 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 3856 TIME_STAMP_INT_ENABLE, 3857 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 3858 break; 3859 default: 3860 break; 3861 } 3862 } 3863 3864 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 3865 int me, int pipe, 3866 enum amdgpu_interrupt_state state) 3867 { 3868 u32 mec_int_cntl, mec_int_cntl_reg; 3869 3870 /* 3871 * amdgpu controls only the first MEC. That's why this function only 3872 * handles the setting of interrupts for this specific MEC. All other 3873 * pipes' interrupts are set by amdkfd. 3874 */ 3875 3876 if (me == 1) { 3877 switch (pipe) { 3878 case 0: 3879 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 3880 break; 3881 case 1: 3882 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 3883 break; 3884 case 2: 3885 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 3886 break; 3887 case 3: 3888 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 3889 break; 3890 default: 3891 DRM_DEBUG("invalid pipe %d\n", pipe); 3892 return; 3893 } 3894 } else { 3895 DRM_DEBUG("invalid me %d\n", me); 3896 return; 3897 } 3898 3899 switch (state) { 3900 case AMDGPU_IRQ_STATE_DISABLE: 3901 mec_int_cntl = RREG32(mec_int_cntl_reg); 3902 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 3903 TIME_STAMP_INT_ENABLE, 0); 3904 WREG32(mec_int_cntl_reg, mec_int_cntl); 3905 break; 3906 case AMDGPU_IRQ_STATE_ENABLE: 3907 mec_int_cntl = RREG32(mec_int_cntl_reg); 3908 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 3909 TIME_STAMP_INT_ENABLE, 1); 3910 WREG32(mec_int_cntl_reg, mec_int_cntl); 3911 break; 3912 default: 3913 break; 3914 } 3915 } 3916 3917 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 3918 struct amdgpu_irq_src *source, 3919 unsigned type, 3920 enum amdgpu_interrupt_state state) 3921 { 3922 switch (state) { 3923 case AMDGPU_IRQ_STATE_DISABLE: 3924 case AMDGPU_IRQ_STATE_ENABLE: 3925 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 3926 PRIV_REG_INT_ENABLE, 3927 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 3928 break; 3929 default: 3930 break; 3931 } 3932 3933 return 0; 3934 } 3935 3936 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 3937 struct amdgpu_irq_src *source, 3938 unsigned type, 3939 enum amdgpu_interrupt_state state) 3940 { 3941 switch (state) { 3942 case AMDGPU_IRQ_STATE_DISABLE: 3943 case AMDGPU_IRQ_STATE_ENABLE: 3944 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 3945 PRIV_INSTR_INT_ENABLE, 3946 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 3947 default: 3948 break; 3949 } 3950 3951 return 0; 3952 } 3953 3954 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 3955 struct amdgpu_irq_src *src, 3956 unsigned type, 3957 enum amdgpu_interrupt_state state) 3958 { 3959 switch (type) { 3960 case AMDGPU_CP_IRQ_GFX_EOP: 3961 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 3962 break; 3963 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 3964 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 3965 break; 3966 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 3967 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 3968 break; 3969 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 3970 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 3971 break; 3972 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 3973 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 3974 break; 3975 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 3976 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 3977 break; 3978 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 3979 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 3980 break; 3981 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 3982 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 3983 break; 3984 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 3985 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 3986 break; 3987 default: 3988 break; 3989 } 3990 return 0; 3991 } 3992 3993 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 3994 struct amdgpu_irq_src *source, 3995 struct amdgpu_iv_entry *entry) 3996 { 3997 int i; 3998 u8 me_id, pipe_id, queue_id; 3999 struct amdgpu_ring *ring; 4000 4001 DRM_DEBUG("IH: CP EOP\n"); 4002 me_id = (entry->ring_id & 0x0c) >> 2; 4003 pipe_id = (entry->ring_id & 0x03) >> 0; 4004 queue_id = (entry->ring_id & 0x70) >> 4; 4005 4006 switch (me_id) { 4007 case 0: 4008 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 4009 break; 4010 case 1: 4011 case 2: 4012 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4013 ring = &adev->gfx.compute_ring[i]; 4014 /* Per-queue interrupt is supported for MEC starting from VI. 4015 * The interrupt can only be enabled/disabled per pipe instead of per queue. 4016 */ 4017 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 4018 amdgpu_fence_process(ring); 4019 } 4020 break; 4021 } 4022 return 0; 4023 } 4024 4025 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 4026 struct amdgpu_irq_src *source, 4027 struct amdgpu_iv_entry *entry) 4028 { 4029 DRM_ERROR("Illegal register access in command stream\n"); 4030 schedule_work(&adev->reset_work); 4031 return 0; 4032 } 4033 4034 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 4035 struct amdgpu_irq_src *source, 4036 struct amdgpu_iv_entry *entry) 4037 { 4038 DRM_ERROR("Illegal instruction in command stream\n"); 4039 schedule_work(&adev->reset_work); 4040 return 0; 4041 } 4042 4043 static int gfx_v9_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 4044 struct amdgpu_irq_src *src, 4045 unsigned int type, 4046 enum amdgpu_interrupt_state state) 4047 { 4048 uint32_t tmp, target; 4049 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 4050 4051 if (ring->me == 1) 4052 target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 4053 else 4054 target = SOC15_REG_OFFSET(GC, 0, mmCP_ME2_PIPE0_INT_CNTL); 4055 target += ring->pipe; 4056 4057 switch (type) { 4058 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 4059 if (state == AMDGPU_IRQ_STATE_DISABLE) { 4060 tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); 4061 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 4062 GENERIC2_INT_ENABLE, 0); 4063 WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); 4064 4065 tmp = RREG32(target); 4066 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, 4067 GENERIC2_INT_ENABLE, 0); 4068 WREG32(target, tmp); 4069 } else { 4070 tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); 4071 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 4072 GENERIC2_INT_ENABLE, 1); 4073 WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); 4074 4075 tmp = RREG32(target); 4076 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, 4077 GENERIC2_INT_ENABLE, 1); 4078 WREG32(target, tmp); 4079 } 4080 break; 4081 default: 4082 BUG(); /* kiq only support GENERIC2_INT now */ 4083 break; 4084 } 4085 return 0; 4086 } 4087 4088 static int gfx_v9_0_kiq_irq(struct amdgpu_device *adev, 4089 struct amdgpu_irq_src *source, 4090 struct amdgpu_iv_entry *entry) 4091 { 4092 u8 me_id, pipe_id, queue_id; 4093 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 4094 4095 me_id = (entry->ring_id & 0x0c) >> 2; 4096 pipe_id = (entry->ring_id & 0x03) >> 0; 4097 queue_id = (entry->ring_id & 0x70) >> 4; 4098 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", 4099 me_id, pipe_id, queue_id); 4100 4101 amdgpu_fence_process(ring); 4102 return 0; 4103 } 4104 4105 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 4106 .name = "gfx_v9_0", 4107 .early_init = gfx_v9_0_early_init, 4108 .late_init = gfx_v9_0_late_init, 4109 .sw_init = gfx_v9_0_sw_init, 4110 .sw_fini = gfx_v9_0_sw_fini, 4111 .hw_init = gfx_v9_0_hw_init, 4112 .hw_fini = gfx_v9_0_hw_fini, 4113 .suspend = gfx_v9_0_suspend, 4114 .resume = gfx_v9_0_resume, 4115 .is_idle = gfx_v9_0_is_idle, 4116 .wait_for_idle = gfx_v9_0_wait_for_idle, 4117 .soft_reset = gfx_v9_0_soft_reset, 4118 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 4119 .set_powergating_state = gfx_v9_0_set_powergating_state, 4120 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 4121 }; 4122 4123 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 4124 .type = AMDGPU_RING_TYPE_GFX, 4125 .align_mask = 0xff, 4126 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 4127 .support_64bit_ptrs = true, 4128 .vmhub = AMDGPU_GFXHUB, 4129 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 4130 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 4131 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 4132 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 4133 5 + /* COND_EXEC */ 4134 7 + /* PIPELINE_SYNC */ 4135 24 + /* VM_FLUSH */ 4136 8 + /* FENCE for VM_FLUSH */ 4137 20 + /* GDS switch */ 4138 4 + /* double SWITCH_BUFFER, 4139 the first COND_EXEC jump to the place just 4140 prior to this double SWITCH_BUFFER */ 4141 5 + /* COND_EXEC */ 4142 7 + /* HDP_flush */ 4143 4 + /* VGT_flush */ 4144 14 + /* CE_META */ 4145 31 + /* DE_META */ 4146 3 + /* CNTX_CTRL */ 4147 5 + /* HDP_INVL */ 4148 8 + 8 + /* FENCE x2 */ 4149 2, /* SWITCH_BUFFER */ 4150 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 4151 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 4152 .emit_fence = gfx_v9_0_ring_emit_fence, 4153 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 4154 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 4155 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 4156 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 4157 .emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate, 4158 .test_ring = gfx_v9_0_ring_test_ring, 4159 .test_ib = gfx_v9_0_ring_test_ib, 4160 .insert_nop = amdgpu_ring_insert_nop, 4161 .pad_ib = amdgpu_ring_generic_pad_ib, 4162 .emit_switch_buffer = gfx_v9_ring_emit_sb, 4163 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 4164 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 4165 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 4166 .emit_tmz = gfx_v9_0_ring_emit_tmz, 4167 }; 4168 4169 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 4170 .type = AMDGPU_RING_TYPE_COMPUTE, 4171 .align_mask = 0xff, 4172 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 4173 .support_64bit_ptrs = true, 4174 .vmhub = AMDGPU_GFXHUB, 4175 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 4176 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 4177 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 4178 .emit_frame_size = 4179 20 + /* gfx_v9_0_ring_emit_gds_switch */ 4180 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 4181 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ 4182 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 4183 24 + /* gfx_v9_0_ring_emit_vm_flush */ 4184 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 4185 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ 4186 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 4187 .emit_fence = gfx_v9_0_ring_emit_fence, 4188 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 4189 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 4190 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 4191 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 4192 .emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate, 4193 .test_ring = gfx_v9_0_ring_test_ring, 4194 .test_ib = gfx_v9_0_ring_test_ib, 4195 .insert_nop = amdgpu_ring_insert_nop, 4196 .pad_ib = amdgpu_ring_generic_pad_ib, 4197 }; 4198 4199 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 4200 .type = AMDGPU_RING_TYPE_KIQ, 4201 .align_mask = 0xff, 4202 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 4203 .support_64bit_ptrs = true, 4204 .vmhub = AMDGPU_GFXHUB, 4205 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 4206 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 4207 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 4208 .emit_frame_size = 4209 20 + /* gfx_v9_0_ring_emit_gds_switch */ 4210 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 4211 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ 4212 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 4213 24 + /* gfx_v9_0_ring_emit_vm_flush */ 4214 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 4215 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ 4216 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 4217 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 4218 .test_ring = gfx_v9_0_ring_test_ring, 4219 .test_ib = gfx_v9_0_ring_test_ib, 4220 .insert_nop = amdgpu_ring_insert_nop, 4221 .pad_ib = amdgpu_ring_generic_pad_ib, 4222 .emit_rreg = gfx_v9_0_ring_emit_rreg, 4223 .emit_wreg = gfx_v9_0_ring_emit_wreg, 4224 }; 4225 4226 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 4227 { 4228 int i; 4229 4230 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 4231 4232 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4233 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 4234 4235 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4236 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 4237 } 4238 4239 static const struct amdgpu_irq_src_funcs gfx_v9_0_kiq_irq_funcs = { 4240 .set = gfx_v9_0_kiq_set_interrupt_state, 4241 .process = gfx_v9_0_kiq_irq, 4242 }; 4243 4244 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 4245 .set = gfx_v9_0_set_eop_interrupt_state, 4246 .process = gfx_v9_0_eop_irq, 4247 }; 4248 4249 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 4250 .set = gfx_v9_0_set_priv_reg_fault_state, 4251 .process = gfx_v9_0_priv_reg_irq, 4252 }; 4253 4254 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 4255 .set = gfx_v9_0_set_priv_inst_fault_state, 4256 .process = gfx_v9_0_priv_inst_irq, 4257 }; 4258 4259 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 4260 { 4261 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 4262 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 4263 4264 adev->gfx.priv_reg_irq.num_types = 1; 4265 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 4266 4267 adev->gfx.priv_inst_irq.num_types = 1; 4268 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 4269 4270 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; 4271 adev->gfx.kiq.irq.funcs = &gfx_v9_0_kiq_irq_funcs; 4272 } 4273 4274 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 4275 { 4276 switch (adev->asic_type) { 4277 case CHIP_VEGA10: 4278 case CHIP_RAVEN: 4279 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 4280 break; 4281 default: 4282 break; 4283 } 4284 } 4285 4286 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 4287 { 4288 /* init asci gds info */ 4289 adev->gds.mem.total_size = RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); 4290 adev->gds.gws.total_size = 64; 4291 adev->gds.oa.total_size = 16; 4292 4293 if (adev->gds.mem.total_size == 64 * 1024) { 4294 adev->gds.mem.gfx_partition_size = 4096; 4295 adev->gds.mem.cs_partition_size = 4096; 4296 4297 adev->gds.gws.gfx_partition_size = 4; 4298 adev->gds.gws.cs_partition_size = 4; 4299 4300 adev->gds.oa.gfx_partition_size = 4; 4301 adev->gds.oa.cs_partition_size = 1; 4302 } else { 4303 adev->gds.mem.gfx_partition_size = 1024; 4304 adev->gds.mem.cs_partition_size = 1024; 4305 4306 adev->gds.gws.gfx_partition_size = 16; 4307 adev->gds.gws.cs_partition_size = 16; 4308 4309 adev->gds.oa.gfx_partition_size = 4; 4310 adev->gds.oa.cs_partition_size = 4; 4311 } 4312 } 4313 4314 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 4315 u32 bitmap) 4316 { 4317 u32 data; 4318 4319 if (!bitmap) 4320 return; 4321 4322 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 4323 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 4324 4325 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 4326 } 4327 4328 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 4329 { 4330 u32 data, mask; 4331 4332 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 4333 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 4334 4335 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 4336 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 4337 4338 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 4339 4340 return (~data) & mask; 4341 } 4342 4343 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 4344 struct amdgpu_cu_info *cu_info) 4345 { 4346 int i, j, k, counter, active_cu_number = 0; 4347 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 4348 unsigned disable_masks[4 * 2]; 4349 4350 if (!adev || !cu_info) 4351 return -EINVAL; 4352 4353 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 4354 4355 mutex_lock(&adev->grbm_idx_mutex); 4356 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 4357 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 4358 mask = 1; 4359 ao_bitmap = 0; 4360 counter = 0; 4361 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 4362 if (i < 4 && j < 2) 4363 gfx_v9_0_set_user_cu_inactive_bitmap( 4364 adev, disable_masks[i * 2 + j]); 4365 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 4366 cu_info->bitmap[i][j] = bitmap; 4367 4368 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 4369 if (bitmap & mask) { 4370 if (counter < adev->gfx.config.max_cu_per_sh) 4371 ao_bitmap |= mask; 4372 counter ++; 4373 } 4374 mask <<= 1; 4375 } 4376 active_cu_number += counter; 4377 if (i < 2 && j < 2) 4378 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 4379 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 4380 } 4381 } 4382 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 4383 mutex_unlock(&adev->grbm_idx_mutex); 4384 4385 cu_info->number = active_cu_number; 4386 cu_info->ao_cu_mask = ao_cu_mask; 4387 4388 return 0; 4389 } 4390 4391 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 4392 { 4393 .type = AMD_IP_BLOCK_TYPE_GFX, 4394 .major = 9, 4395 .minor = 0, 4396 .rev = 0, 4397 .funcs = &gfx_v9_0_ip_funcs, 4398 }; 4399