1 /* 2 * Copyright 2019 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/firmware.h> 24 #include <drm/drmP.h> 25 #include "amdgpu.h" 26 #include "amdgpu_gfx.h" 27 #include "amdgpu_psp.h" 28 #include "amdgpu_smu.h" 29 #include "nv.h" 30 #include "nvd.h" 31 32 #include "gc/gc_10_1_0_offset.h" 33 #include "gc/gc_10_1_0_sh_mask.h" 34 #include "navi10_enum.h" 35 #include "hdp/hdp_5_0_0_offset.h" 36 #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h" 37 38 #include "soc15.h" 39 #include "soc15_common.h" 40 #include "clearstate_gfx10.h" 41 #include "v10_structs.h" 42 #include "gfx_v10_0.h" 43 #include "nbio_v2_3.h" 44 45 /** 46 * Navi10 has two graphic rings to share each graphic pipe. 47 * 1. Primary ring 48 * 2. Async ring 49 * 50 * In bring-up phase, it just used primary ring so set gfx ring number as 1 at 51 * first. 52 */ 53 #define GFX10_NUM_GFX_RINGS 2 54 #define GFX10_MEC_HPD_SIZE 2048 55 56 #define F32_CE_PROGRAM_RAM_SIZE 65536 57 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 58 59 #define mmCGTT_GS_NGG_CLK_CTRL 0x5087 60 #define mmCGTT_GS_NGG_CLK_CTRL_BASE_IDX 1 61 62 MODULE_FIRMWARE("amdgpu/navi10_ce.bin"); 63 MODULE_FIRMWARE("amdgpu/navi10_pfp.bin"); 64 MODULE_FIRMWARE("amdgpu/navi10_me.bin"); 65 MODULE_FIRMWARE("amdgpu/navi10_mec.bin"); 66 MODULE_FIRMWARE("amdgpu/navi10_mec2.bin"); 67 MODULE_FIRMWARE("amdgpu/navi10_rlc.bin"); 68 69 MODULE_FIRMWARE("amdgpu/navi14_ce.bin"); 70 MODULE_FIRMWARE("amdgpu/navi14_pfp.bin"); 71 MODULE_FIRMWARE("amdgpu/navi14_me.bin"); 72 MODULE_FIRMWARE("amdgpu/navi14_mec.bin"); 73 MODULE_FIRMWARE("amdgpu/navi14_mec2.bin"); 74 MODULE_FIRMWARE("amdgpu/navi14_rlc.bin"); 75 76 static const struct soc15_reg_golden golden_settings_gc_10_1[] = 77 { 78 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), 79 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100), 80 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100), 81 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0x60000ff0, 0x60000100), 82 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000000, 0x40000100), 83 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), 84 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xfeff8fff, 0xfeff8100), 85 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), 86 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000002, 0x00000000), 87 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x000007ff, 0x000005ff), 88 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0x20000000, 0x20000000), 89 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420), 90 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000200, 0x00000200), 91 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0x07900000, 0x04900000), 92 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f), 93 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204), 94 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500), 95 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x000007ff, 0x000001fe), 96 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), 97 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x10321032), 98 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x02310231), 99 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), 100 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), 101 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0x10000000, 0x10000100), 102 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f), 103 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffff9fff, 0x00001188), 104 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009), 105 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000), 106 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 107 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), 108 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), 109 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130), 110 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), 111 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), 112 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010), 113 SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CGTT_CLK_CTRL, 0xfeff0fff, 0x40000100), 114 SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000) 115 }; 116 117 static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] = 118 { 119 /* Pending on emulation bring up */ 120 }; 121 122 static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = 123 { 124 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014), 125 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100), 126 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100), 127 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100), 128 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xf8ff0fff, 0x60000100), 129 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000ff0, 0x40000100), 130 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), 131 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100), 132 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), 133 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000002, 0x00000000), 134 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff), 135 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000), 136 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420), 137 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000200, 0x00000200), 138 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04900000), 139 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f), 140 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204), 141 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1800ff, 0x00000043), 142 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500), 143 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x000007ff, 0x000001fe), 144 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), 145 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7), 146 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7), 147 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100), 148 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f), 149 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188), 150 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009), 151 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000), 152 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 153 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), 154 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), 155 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130), 156 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), 157 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), 158 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010), 159 SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000), 160 }; 161 162 static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] = 163 { 164 /* Pending on emulation bring up */ 165 }; 166 167 #define DEFAULT_SH_MEM_CONFIG \ 168 ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ 169 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ 170 (SH_MEM_RETRY_MODE_ALL << SH_MEM_CONFIG__RETRY_MODE__SHIFT) | \ 171 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) 172 173 174 static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev); 175 static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev); 176 static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev); 177 static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev); 178 static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, 179 struct amdgpu_cu_info *cu_info); 180 static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev); 181 static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 182 u32 sh_num, u32 instance); 183 static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); 184 185 static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev); 186 static void gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device *adev); 187 static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev); 188 static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); 189 static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume); 190 static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); 191 static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start); 192 193 static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) 194 { 195 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 196 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 197 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 198 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 199 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 200 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 201 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 202 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 203 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 204 } 205 206 static void gfx10_kiq_map_queues(struct amdgpu_ring *kiq_ring, 207 struct amdgpu_ring *ring) 208 { 209 struct amdgpu_device *adev = kiq_ring->adev; 210 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 211 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 212 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 213 214 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 215 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 216 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 217 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 218 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 219 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 220 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 221 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 222 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 223 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 224 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 225 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 226 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 227 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 228 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 229 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 230 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 231 } 232 233 static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 234 struct amdgpu_ring *ring, 235 enum amdgpu_unmap_queues_action action, 236 u64 gpu_addr, u64 seq) 237 { 238 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 239 240 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 241 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 242 PACKET3_UNMAP_QUEUES_ACTION(action) | 243 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 244 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 245 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 246 amdgpu_ring_write(kiq_ring, 247 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 248 249 if (action == PREEMPT_QUEUES_NO_UNMAP) { 250 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 251 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 252 amdgpu_ring_write(kiq_ring, seq); 253 } else { 254 amdgpu_ring_write(kiq_ring, 0); 255 amdgpu_ring_write(kiq_ring, 0); 256 amdgpu_ring_write(kiq_ring, 0); 257 } 258 } 259 260 static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring, 261 struct amdgpu_ring *ring, 262 u64 addr, 263 u64 seq) 264 { 265 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 266 267 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 268 amdgpu_ring_write(kiq_ring, 269 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 270 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 271 PACKET3_QUERY_STATUS_COMMAND(2)); 272 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 273 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 274 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 275 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 276 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 277 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 278 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 279 } 280 281 static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = { 282 .kiq_set_resources = gfx10_kiq_set_resources, 283 .kiq_map_queues = gfx10_kiq_map_queues, 284 .kiq_unmap_queues = gfx10_kiq_unmap_queues, 285 .kiq_query_status = gfx10_kiq_query_status, 286 .set_resources_size = 8, 287 .map_queues_size = 7, 288 .unmap_queues_size = 6, 289 .query_status_size = 7, 290 }; 291 292 static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 293 { 294 adev->gfx.kiq.pmf = &gfx_v10_0_kiq_pm4_funcs; 295 } 296 297 static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) 298 { 299 switch (adev->asic_type) { 300 case CHIP_NAVI10: 301 soc15_program_register_sequence(adev, 302 golden_settings_gc_10_1, 303 (const u32)ARRAY_SIZE(golden_settings_gc_10_1)); 304 soc15_program_register_sequence(adev, 305 golden_settings_gc_10_0_nv10, 306 (const u32)ARRAY_SIZE(golden_settings_gc_10_0_nv10)); 307 break; 308 case CHIP_NAVI14: 309 soc15_program_register_sequence(adev, 310 golden_settings_gc_10_1_1, 311 (const u32)ARRAY_SIZE(golden_settings_gc_10_1_1)); 312 soc15_program_register_sequence(adev, 313 golden_settings_gc_10_1_nv14, 314 (const u32)ARRAY_SIZE(golden_settings_gc_10_1_nv14)); 315 break; 316 default: 317 break; 318 } 319 } 320 321 static void gfx_v10_0_scratch_init(struct amdgpu_device *adev) 322 { 323 adev->gfx.scratch.num_reg = 8; 324 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 325 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 326 } 327 328 static void gfx_v10_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 329 bool wc, uint32_t reg, uint32_t val) 330 { 331 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 332 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 333 WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0)); 334 amdgpu_ring_write(ring, reg); 335 amdgpu_ring_write(ring, 0); 336 amdgpu_ring_write(ring, val); 337 } 338 339 static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 340 int mem_space, int opt, uint32_t addr0, 341 uint32_t addr1, uint32_t ref, uint32_t mask, 342 uint32_t inv) 343 { 344 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 345 amdgpu_ring_write(ring, 346 /* memory (1) or register (0) */ 347 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 348 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 349 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 350 WAIT_REG_MEM_ENGINE(eng_sel))); 351 352 if (mem_space) 353 BUG_ON(addr0 & 0x3); /* Dword align */ 354 amdgpu_ring_write(ring, addr0); 355 amdgpu_ring_write(ring, addr1); 356 amdgpu_ring_write(ring, ref); 357 amdgpu_ring_write(ring, mask); 358 amdgpu_ring_write(ring, inv); /* poll interval */ 359 } 360 361 static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring) 362 { 363 struct amdgpu_device *adev = ring->adev; 364 uint32_t scratch; 365 uint32_t tmp = 0; 366 unsigned i; 367 int r; 368 369 r = amdgpu_gfx_scratch_get(adev, &scratch); 370 if (r) { 371 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 372 return r; 373 } 374 375 WREG32(scratch, 0xCAFEDEAD); 376 377 r = amdgpu_ring_alloc(ring, 3); 378 if (r) { 379 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 380 ring->idx, r); 381 amdgpu_gfx_scratch_free(adev, scratch); 382 return r; 383 } 384 385 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 386 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 387 amdgpu_ring_write(ring, 0xDEADBEEF); 388 amdgpu_ring_commit(ring); 389 390 for (i = 0; i < adev->usec_timeout; i++) { 391 tmp = RREG32(scratch); 392 if (tmp == 0xDEADBEEF) 393 break; 394 if (amdgpu_emu_mode == 1) 395 msleep(1); 396 else 397 DRM_UDELAY(1); 398 } 399 if (i < adev->usec_timeout) { 400 if (amdgpu_emu_mode == 1) 401 DRM_INFO("ring test on %d succeeded in %d msecs\n", 402 ring->idx, i); 403 else 404 DRM_INFO("ring test on %d succeeded in %d usecs\n", 405 ring->idx, i); 406 } else { 407 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 408 ring->idx, scratch, tmp); 409 r = -EINVAL; 410 } 411 amdgpu_gfx_scratch_free(adev, scratch); 412 413 return r; 414 } 415 416 static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 417 { 418 struct amdgpu_device *adev = ring->adev; 419 struct amdgpu_ib ib; 420 struct dma_fence *f = NULL; 421 uint32_t scratch; 422 uint32_t tmp = 0; 423 long r; 424 425 r = amdgpu_gfx_scratch_get(adev, &scratch); 426 if (r) { 427 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); 428 return r; 429 } 430 431 WREG32(scratch, 0xCAFEDEAD); 432 433 memset(&ib, 0, sizeof(ib)); 434 r = amdgpu_ib_get(adev, NULL, 256, &ib); 435 if (r) { 436 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 437 goto err1; 438 } 439 440 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 441 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 442 ib.ptr[2] = 0xDEADBEEF; 443 ib.length_dw = 3; 444 445 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 446 if (r) 447 goto err2; 448 449 r = dma_fence_wait_timeout(f, false, timeout); 450 if (r == 0) { 451 DRM_ERROR("amdgpu: IB test timed out.\n"); 452 r = -ETIMEDOUT; 453 goto err2; 454 } else if (r < 0) { 455 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 456 goto err2; 457 } 458 459 tmp = RREG32(scratch); 460 if (tmp == 0xDEADBEEF) { 461 DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 462 r = 0; 463 } else { 464 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", 465 scratch, tmp); 466 r = -EINVAL; 467 } 468 err2: 469 amdgpu_ib_free(adev, &ib, NULL); 470 dma_fence_put(f); 471 err1: 472 amdgpu_gfx_scratch_free(adev, scratch); 473 474 return r; 475 } 476 477 static void gfx_v10_0_free_microcode(struct amdgpu_device *adev) 478 { 479 release_firmware(adev->gfx.pfp_fw); 480 adev->gfx.pfp_fw = NULL; 481 release_firmware(adev->gfx.me_fw); 482 adev->gfx.me_fw = NULL; 483 release_firmware(adev->gfx.ce_fw); 484 adev->gfx.ce_fw = NULL; 485 release_firmware(adev->gfx.rlc_fw); 486 adev->gfx.rlc_fw = NULL; 487 release_firmware(adev->gfx.mec_fw); 488 adev->gfx.mec_fw = NULL; 489 release_firmware(adev->gfx.mec2_fw); 490 adev->gfx.mec2_fw = NULL; 491 492 kfree(adev->gfx.rlc.register_list_format); 493 } 494 495 static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 496 { 497 const struct rlc_firmware_header_v2_1 *rlc_hdr; 498 499 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 500 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 501 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 502 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 503 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 504 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 505 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 506 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 507 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 508 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 509 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 510 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 511 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 512 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 513 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 514 } 515 516 static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) 517 { 518 switch (adev->asic_type) { 519 case CHIP_NAVI10: 520 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 521 break; 522 default: 523 break; 524 } 525 } 526 527 static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) 528 { 529 const char *chip_name; 530 char fw_name[30]; 531 int err; 532 struct amdgpu_firmware_info *info = NULL; 533 const struct common_firmware_header *header = NULL; 534 const struct gfx_firmware_header_v1_0 *cp_hdr; 535 const struct rlc_firmware_header_v2_0 *rlc_hdr; 536 unsigned int *tmp = NULL; 537 unsigned int i = 0; 538 uint16_t version_major; 539 uint16_t version_minor; 540 541 DRM_DEBUG("\n"); 542 543 switch (adev->asic_type) { 544 case CHIP_NAVI10: 545 chip_name = "navi10"; 546 break; 547 case CHIP_NAVI14: 548 chip_name = "navi14"; 549 break; 550 default: 551 BUG(); 552 } 553 554 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 555 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 556 if (err) 557 goto out; 558 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 559 if (err) 560 goto out; 561 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 562 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 563 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 564 565 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 566 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 567 if (err) 568 goto out; 569 err = amdgpu_ucode_validate(adev->gfx.me_fw); 570 if (err) 571 goto out; 572 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 573 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 574 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 575 576 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 577 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 578 if (err) 579 goto out; 580 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 581 if (err) 582 goto out; 583 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 584 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 585 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 586 587 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 588 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 589 if (err) 590 goto out; 591 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 592 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 593 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 594 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 595 if (version_major == 2 && version_minor == 1) 596 adev->gfx.rlc.is_rlc_v2_1 = true; 597 598 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 599 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 600 adev->gfx.rlc.save_and_restore_offset = 601 le32_to_cpu(rlc_hdr->save_and_restore_offset); 602 adev->gfx.rlc.clear_state_descriptor_offset = 603 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 604 adev->gfx.rlc.avail_scratch_ram_locations = 605 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 606 adev->gfx.rlc.reg_restore_list_size = 607 le32_to_cpu(rlc_hdr->reg_restore_list_size); 608 adev->gfx.rlc.reg_list_format_start = 609 le32_to_cpu(rlc_hdr->reg_list_format_start); 610 adev->gfx.rlc.reg_list_format_separate_start = 611 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 612 adev->gfx.rlc.starting_offsets_start = 613 le32_to_cpu(rlc_hdr->starting_offsets_start); 614 adev->gfx.rlc.reg_list_format_size_bytes = 615 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 616 adev->gfx.rlc.reg_list_size_bytes = 617 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 618 adev->gfx.rlc.register_list_format = 619 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 620 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 621 if (!adev->gfx.rlc.register_list_format) { 622 err = -ENOMEM; 623 goto out; 624 } 625 626 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 627 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 628 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 629 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 630 631 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 632 633 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 634 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 635 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 636 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 637 638 if (adev->gfx.rlc.is_rlc_v2_1) 639 gfx_v10_0_init_rlc_ext_microcode(adev); 640 641 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 642 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 643 if (err) 644 goto out; 645 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 646 if (err) 647 goto out; 648 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 649 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 650 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 651 652 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 653 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 654 if (!err) { 655 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 656 if (err) 657 goto out; 658 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 659 adev->gfx.mec2_fw->data; 660 adev->gfx.mec2_fw_version = 661 le32_to_cpu(cp_hdr->header.ucode_version); 662 adev->gfx.mec2_feature_version = 663 le32_to_cpu(cp_hdr->ucode_feature_version); 664 } else { 665 err = 0; 666 adev->gfx.mec2_fw = NULL; 667 } 668 669 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 670 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 671 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 672 info->fw = adev->gfx.pfp_fw; 673 header = (const struct common_firmware_header *)info->fw->data; 674 adev->firmware.fw_size += 675 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 676 677 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 678 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 679 info->fw = adev->gfx.me_fw; 680 header = (const struct common_firmware_header *)info->fw->data; 681 adev->firmware.fw_size += 682 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 683 684 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 685 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 686 info->fw = adev->gfx.ce_fw; 687 header = (const struct common_firmware_header *)info->fw->data; 688 adev->firmware.fw_size += 689 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 690 691 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 692 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 693 info->fw = adev->gfx.rlc_fw; 694 header = (const struct common_firmware_header *)info->fw->data; 695 adev->firmware.fw_size += 696 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 697 698 if (adev->gfx.rlc.is_rlc_v2_1 && 699 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 700 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 701 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 702 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 703 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 704 info->fw = adev->gfx.rlc_fw; 705 adev->firmware.fw_size += 706 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 707 708 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 709 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 710 info->fw = adev->gfx.rlc_fw; 711 adev->firmware.fw_size += 712 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 713 714 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 715 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 716 info->fw = adev->gfx.rlc_fw; 717 adev->firmware.fw_size += 718 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 719 } 720 721 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 722 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 723 info->fw = adev->gfx.mec_fw; 724 header = (const struct common_firmware_header *)info->fw->data; 725 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 726 adev->firmware.fw_size += 727 ALIGN(le32_to_cpu(header->ucode_size_bytes) - 728 le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 729 730 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 731 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 732 info->fw = adev->gfx.mec_fw; 733 adev->firmware.fw_size += 734 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 735 736 if (adev->gfx.mec2_fw) { 737 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 738 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 739 info->fw = adev->gfx.mec2_fw; 740 header = (const struct common_firmware_header *)info->fw->data; 741 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 742 adev->firmware.fw_size += 743 ALIGN(le32_to_cpu(header->ucode_size_bytes) - 744 le32_to_cpu(cp_hdr->jt_size) * 4, 745 PAGE_SIZE); 746 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 747 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 748 info->fw = adev->gfx.mec2_fw; 749 adev->firmware.fw_size += 750 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, 751 PAGE_SIZE); 752 } 753 } 754 755 out: 756 if (err) { 757 dev_err(adev->dev, 758 "gfx10: Failed to load firmware \"%s\"\n", 759 fw_name); 760 release_firmware(adev->gfx.pfp_fw); 761 adev->gfx.pfp_fw = NULL; 762 release_firmware(adev->gfx.me_fw); 763 adev->gfx.me_fw = NULL; 764 release_firmware(adev->gfx.ce_fw); 765 adev->gfx.ce_fw = NULL; 766 release_firmware(adev->gfx.rlc_fw); 767 adev->gfx.rlc_fw = NULL; 768 release_firmware(adev->gfx.mec_fw); 769 adev->gfx.mec_fw = NULL; 770 release_firmware(adev->gfx.mec2_fw); 771 adev->gfx.mec2_fw = NULL; 772 } 773 774 gfx_v10_0_check_gfxoff_flag(adev); 775 776 return err; 777 } 778 779 static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev) 780 { 781 u32 count = 0; 782 const struct cs_section_def *sect = NULL; 783 const struct cs_extent_def *ext = NULL; 784 785 /* begin clear state */ 786 count += 2; 787 /* context control state */ 788 count += 3; 789 790 for (sect = gfx10_cs_data; sect->section != NULL; ++sect) { 791 for (ext = sect->section; ext->extent != NULL; ++ext) { 792 if (sect->id == SECT_CONTEXT) 793 count += 2 + ext->reg_count; 794 else 795 return 0; 796 } 797 } 798 799 /* set PA_SC_TILE_STEERING_OVERRIDE */ 800 count += 3; 801 /* end clear state */ 802 count += 2; 803 /* clear state */ 804 count += 2; 805 806 return count; 807 } 808 809 static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev, 810 volatile u32 *buffer) 811 { 812 u32 count = 0, i; 813 const struct cs_section_def *sect = NULL; 814 const struct cs_extent_def *ext = NULL; 815 int ctx_reg_offset; 816 817 if (adev->gfx.rlc.cs_data == NULL) 818 return; 819 if (buffer == NULL) 820 return; 821 822 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 823 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 824 825 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 826 buffer[count++] = cpu_to_le32(0x80000000); 827 buffer[count++] = cpu_to_le32(0x80000000); 828 829 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 830 for (ext = sect->section; ext->extent != NULL; ++ext) { 831 if (sect->id == SECT_CONTEXT) { 832 buffer[count++] = 833 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 834 buffer[count++] = cpu_to_le32(ext->reg_index - 835 PACKET3_SET_CONTEXT_REG_START); 836 for (i = 0; i < ext->reg_count; i++) 837 buffer[count++] = cpu_to_le32(ext->extent[i]); 838 } else { 839 return; 840 } 841 } 842 } 843 844 ctx_reg_offset = 845 SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 846 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 847 buffer[count++] = cpu_to_le32(ctx_reg_offset); 848 buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); 849 850 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 851 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 852 853 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 854 buffer[count++] = cpu_to_le32(0); 855 } 856 857 static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev) 858 { 859 /* clear state block */ 860 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 861 &adev->gfx.rlc.clear_state_gpu_addr, 862 (void **)&adev->gfx.rlc.cs_ptr); 863 864 /* jump table block */ 865 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 866 &adev->gfx.rlc.cp_table_gpu_addr, 867 (void **)&adev->gfx.rlc.cp_table_ptr); 868 } 869 870 static int gfx_v10_0_rlc_init(struct amdgpu_device *adev) 871 { 872 const struct cs_section_def *cs_data; 873 int r; 874 875 adev->gfx.rlc.cs_data = gfx10_cs_data; 876 877 cs_data = adev->gfx.rlc.cs_data; 878 879 if (cs_data) { 880 /* init clear state block */ 881 r = amdgpu_gfx_rlc_init_csb(adev); 882 if (r) 883 return r; 884 } 885 886 return 0; 887 } 888 889 static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev) 890 { 891 int r; 892 893 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 894 if (unlikely(r != 0)) 895 return r; 896 897 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, 898 AMDGPU_GEM_DOMAIN_VRAM); 899 if (!r) 900 adev->gfx.rlc.clear_state_gpu_addr = 901 amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); 902 903 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 904 905 return r; 906 } 907 908 static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev) 909 { 910 int r; 911 912 if (!adev->gfx.rlc.clear_state_obj) 913 return; 914 915 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); 916 if (likely(r == 0)) { 917 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 918 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 919 } 920 } 921 922 static void gfx_v10_0_mec_fini(struct amdgpu_device *adev) 923 { 924 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 925 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 926 } 927 928 static int gfx_v10_0_me_init(struct amdgpu_device *adev) 929 { 930 int r; 931 932 bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); 933 934 amdgpu_gfx_graphics_queue_acquire(adev); 935 936 r = gfx_v10_0_init_microcode(adev); 937 if (r) 938 DRM_ERROR("Failed to load gfx firmware!\n"); 939 940 return r; 941 } 942 943 static int gfx_v10_0_mec_init(struct amdgpu_device *adev) 944 { 945 int r; 946 u32 *hpd; 947 const __le32 *fw_data = NULL; 948 unsigned fw_size; 949 u32 *fw = NULL; 950 size_t mec_hpd_size; 951 952 const struct gfx_firmware_header_v1_0 *mec_hdr = NULL; 953 954 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 955 956 /* take ownership of the relevant compute queues */ 957 amdgpu_gfx_compute_queue_acquire(adev); 958 mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE; 959 960 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 961 AMDGPU_GEM_DOMAIN_GTT, 962 &adev->gfx.mec.hpd_eop_obj, 963 &adev->gfx.mec.hpd_eop_gpu_addr, 964 (void **)&hpd); 965 if (r) { 966 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 967 gfx_v10_0_mec_fini(adev); 968 return r; 969 } 970 971 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); 972 973 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 974 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 975 976 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 977 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 978 979 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 980 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 981 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 982 983 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 984 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 985 &adev->gfx.mec.mec_fw_obj, 986 &adev->gfx.mec.mec_fw_gpu_addr, 987 (void **)&fw); 988 if (r) { 989 dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r); 990 gfx_v10_0_mec_fini(adev); 991 return r; 992 } 993 994 memcpy(fw, fw_data, fw_size); 995 996 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 997 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 998 } 999 1000 return 0; 1001 } 1002 1003 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address) 1004 { 1005 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1006 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1007 (address << SQ_IND_INDEX__INDEX__SHIFT)); 1008 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1009 } 1010 1011 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, 1012 uint32_t thread, uint32_t regno, 1013 uint32_t num, uint32_t *out) 1014 { 1015 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1016 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1017 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1018 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | 1019 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1020 while (num--) 1021 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1022 } 1023 1024 static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1025 { 1026 /* in gfx10 the SIMD_ID is specified as part of the INSTANCE 1027 * field when performing a select_se_sh so it should be 1028 * zero here */ 1029 WARN_ON(simd != 0); 1030 1031 /* type 2 wave data */ 1032 dst[(*no_fields)++] = 2; 1033 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS); 1034 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO); 1035 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI); 1036 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO); 1037 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI); 1038 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1); 1039 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2); 1040 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_INST_DW0); 1041 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC); 1042 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC); 1043 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS); 1044 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS); 1045 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); 1046 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); 1047 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); 1048 } 1049 1050 static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 1051 uint32_t wave, uint32_t start, 1052 uint32_t size, uint32_t *dst) 1053 { 1054 WARN_ON(simd != 0); 1055 1056 wave_read_regs( 1057 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, 1058 dst); 1059 } 1060 1061 static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 1062 uint32_t wave, uint32_t thread, 1063 uint32_t start, uint32_t size, 1064 uint32_t *dst) 1065 { 1066 wave_read_regs( 1067 adev, wave, thread, 1068 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1069 } 1070 1071 static void gfx_v10_0_select_me_pipe_q(struct amdgpu_device *adev, 1072 u32 me, u32 pipe, u32 q, u32 vm) 1073 { 1074 nv_grbm_select(adev, me, pipe, q, vm); 1075 } 1076 1077 1078 static const struct amdgpu_gfx_funcs gfx_v10_0_gfx_funcs = { 1079 .get_gpu_clock_counter = &gfx_v10_0_get_gpu_clock_counter, 1080 .select_se_sh = &gfx_v10_0_select_se_sh, 1081 .read_wave_data = &gfx_v10_0_read_wave_data, 1082 .read_wave_sgprs = &gfx_v10_0_read_wave_sgprs, 1083 .read_wave_vgprs = &gfx_v10_0_read_wave_vgprs, 1084 .select_me_pipe_q = &gfx_v10_0_select_me_pipe_q, 1085 }; 1086 1087 static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) 1088 { 1089 u32 gb_addr_config; 1090 1091 adev->gfx.funcs = &gfx_v10_0_gfx_funcs; 1092 1093 switch (adev->asic_type) { 1094 case CHIP_NAVI10: 1095 adev->gfx.config.max_hw_contexts = 8; 1096 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1097 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1098 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1099 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1100 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1101 break; 1102 case CHIP_NAVI14: 1103 adev->gfx.config.max_hw_contexts = 8; 1104 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1105 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1106 adev->gfx.config.sc_hiz_tile_fifo_size = 0x0; 1107 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1108 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1109 break; 1110 default: 1111 BUG(); 1112 break; 1113 } 1114 1115 adev->gfx.config.gb_addr_config = gb_addr_config; 1116 1117 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 1118 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 1119 GB_ADDR_CONFIG, NUM_PIPES); 1120 1121 adev->gfx.config.max_tile_pipes = 1122 adev->gfx.config.gb_addr_config_fields.num_pipes; 1123 1124 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 1125 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 1126 GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS); 1127 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 1128 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 1129 GB_ADDR_CONFIG, NUM_RB_PER_SE); 1130 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 1131 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 1132 GB_ADDR_CONFIG, NUM_SHADER_ENGINES); 1133 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 1134 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 1135 GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); 1136 } 1137 1138 static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, 1139 int me, int pipe, int queue) 1140 { 1141 int r; 1142 struct amdgpu_ring *ring; 1143 unsigned int irq_type; 1144 1145 ring = &adev->gfx.gfx_ring[ring_id]; 1146 1147 ring->me = me; 1148 ring->pipe = pipe; 1149 ring->queue = queue; 1150 1151 ring->ring_obj = NULL; 1152 ring->use_doorbell = true; 1153 1154 if (!ring_id) 1155 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 1156 else 1157 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; 1158 sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1159 1160 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; 1161 r = amdgpu_ring_init(adev, ring, 1024, 1162 &adev->gfx.eop_irq, irq_type); 1163 if (r) 1164 return r; 1165 return 0; 1166 } 1167 1168 static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1169 int mec, int pipe, int queue) 1170 { 1171 int r; 1172 unsigned irq_type; 1173 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1174 1175 ring = &adev->gfx.compute_ring[ring_id]; 1176 1177 /* mec0 is me1 */ 1178 ring->me = mec + 1; 1179 ring->pipe = pipe; 1180 ring->queue = queue; 1181 1182 ring->ring_obj = NULL; 1183 ring->use_doorbell = true; 1184 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1185 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1186 + (ring_id * GFX10_MEC_HPD_SIZE); 1187 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1188 1189 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1190 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1191 + ring->pipe; 1192 1193 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1194 r = amdgpu_ring_init(adev, ring, 1024, 1195 &adev->gfx.eop_irq, irq_type); 1196 if (r) 1197 return r; 1198 1199 return 0; 1200 } 1201 1202 static int gfx_v10_0_sw_init(void *handle) 1203 { 1204 int i, j, k, r, ring_id = 0; 1205 struct amdgpu_kiq *kiq; 1206 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1207 1208 switch (adev->asic_type) { 1209 case CHIP_NAVI10: 1210 case CHIP_NAVI14: 1211 adev->gfx.me.num_me = 1; 1212 adev->gfx.me.num_pipe_per_me = 2; 1213 adev->gfx.me.num_queue_per_pipe = 1; 1214 adev->gfx.mec.num_mec = 2; 1215 adev->gfx.mec.num_pipe_per_mec = 4; 1216 adev->gfx.mec.num_queue_per_pipe = 8; 1217 break; 1218 default: 1219 adev->gfx.me.num_me = 1; 1220 adev->gfx.me.num_pipe_per_me = 1; 1221 adev->gfx.me.num_queue_per_pipe = 1; 1222 adev->gfx.mec.num_mec = 1; 1223 adev->gfx.mec.num_pipe_per_mec = 4; 1224 adev->gfx.mec.num_queue_per_pipe = 8; 1225 break; 1226 } 1227 1228 /* KIQ event */ 1229 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 1230 GFX_10_1__SRCID__CP_IB2_INTERRUPT_PKT, 1231 &adev->gfx.kiq.irq); 1232 if (r) 1233 return r; 1234 1235 /* EOP Event */ 1236 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 1237 GFX_10_1__SRCID__CP_EOP_INTERRUPT, 1238 &adev->gfx.eop_irq); 1239 if (r) 1240 return r; 1241 1242 /* Privileged reg */ 1243 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_REG_FAULT, 1244 &adev->gfx.priv_reg_irq); 1245 if (r) 1246 return r; 1247 1248 /* Privileged inst */ 1249 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_INSTR_FAULT, 1250 &adev->gfx.priv_inst_irq); 1251 if (r) 1252 return r; 1253 1254 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1255 1256 gfx_v10_0_scratch_init(adev); 1257 1258 r = gfx_v10_0_me_init(adev); 1259 if (r) 1260 return r; 1261 1262 r = gfx_v10_0_rlc_init(adev); 1263 if (r) { 1264 DRM_ERROR("Failed to init rlc BOs!\n"); 1265 return r; 1266 } 1267 1268 r = gfx_v10_0_mec_init(adev); 1269 if (r) { 1270 DRM_ERROR("Failed to init MEC BOs!\n"); 1271 return r; 1272 } 1273 1274 /* set up the gfx ring */ 1275 for (i = 0; i < adev->gfx.me.num_me; i++) { 1276 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { 1277 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 1278 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) 1279 continue; 1280 1281 r = gfx_v10_0_gfx_ring_init(adev, ring_id, 1282 i, k, j); 1283 if (r) 1284 return r; 1285 ring_id++; 1286 } 1287 } 1288 } 1289 1290 ring_id = 0; 1291 /* set up the compute queues - allocate horizontally across pipes */ 1292 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1293 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1294 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1295 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, 1296 j)) 1297 continue; 1298 1299 r = gfx_v10_0_compute_ring_init(adev, ring_id, 1300 i, k, j); 1301 if (r) 1302 return r; 1303 1304 ring_id++; 1305 } 1306 } 1307 } 1308 1309 r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE); 1310 if (r) { 1311 DRM_ERROR("Failed to init KIQ BOs!\n"); 1312 return r; 1313 } 1314 1315 kiq = &adev->gfx.kiq; 1316 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 1317 if (r) 1318 return r; 1319 1320 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd)); 1321 if (r) 1322 return r; 1323 1324 /* allocate visible FB for rlc auto-loading fw */ 1325 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1326 r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev); 1327 if (r) 1328 return r; 1329 } 1330 1331 adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE; 1332 1333 gfx_v10_0_gpu_early_init(adev); 1334 1335 return 0; 1336 } 1337 1338 static void gfx_v10_0_pfp_fini(struct amdgpu_device *adev) 1339 { 1340 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj, 1341 &adev->gfx.pfp.pfp_fw_gpu_addr, 1342 (void **)&adev->gfx.pfp.pfp_fw_ptr); 1343 } 1344 1345 static void gfx_v10_0_ce_fini(struct amdgpu_device *adev) 1346 { 1347 amdgpu_bo_free_kernel(&adev->gfx.ce.ce_fw_obj, 1348 &adev->gfx.ce.ce_fw_gpu_addr, 1349 (void **)&adev->gfx.ce.ce_fw_ptr); 1350 } 1351 1352 static void gfx_v10_0_me_fini(struct amdgpu_device *adev) 1353 { 1354 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj, 1355 &adev->gfx.me.me_fw_gpu_addr, 1356 (void **)&adev->gfx.me.me_fw_ptr); 1357 } 1358 1359 static int gfx_v10_0_sw_fini(void *handle) 1360 { 1361 int i; 1362 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1363 1364 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1365 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1366 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1367 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1368 1369 amdgpu_gfx_mqd_sw_fini(adev); 1370 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 1371 amdgpu_gfx_kiq_fini(adev); 1372 1373 gfx_v10_0_pfp_fini(adev); 1374 gfx_v10_0_ce_fini(adev); 1375 gfx_v10_0_me_fini(adev); 1376 gfx_v10_0_rlc_fini(adev); 1377 gfx_v10_0_mec_fini(adev); 1378 1379 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1380 gfx_v10_0_rlc_backdoor_autoload_buffer_fini(adev); 1381 1382 gfx_v10_0_free_microcode(adev); 1383 1384 return 0; 1385 } 1386 1387 1388 static void gfx_v10_0_tiling_mode_table_init(struct amdgpu_device *adev) 1389 { 1390 /* TODO */ 1391 } 1392 1393 static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 1394 u32 sh_num, u32 instance) 1395 { 1396 u32 data; 1397 1398 if (instance == 0xffffffff) 1399 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, 1400 INSTANCE_BROADCAST_WRITES, 1); 1401 else 1402 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, 1403 instance); 1404 1405 if (se_num == 0xffffffff) 1406 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1407 1); 1408 else 1409 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1410 1411 if (sh_num == 0xffffffff) 1412 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1413 1); 1414 else 1415 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num); 1416 1417 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); 1418 } 1419 1420 static u32 gfx_v10_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1421 { 1422 u32 data, mask; 1423 1424 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 1425 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 1426 1427 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 1428 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 1429 1430 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 1431 adev->gfx.config.max_sh_per_se); 1432 1433 return (~data) & mask; 1434 } 1435 1436 static void gfx_v10_0_setup_rb(struct amdgpu_device *adev) 1437 { 1438 int i, j; 1439 u32 data; 1440 u32 active_rbs = 0; 1441 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 1442 adev->gfx.config.max_sh_per_se; 1443 1444 mutex_lock(&adev->grbm_idx_mutex); 1445 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1446 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1447 gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); 1448 data = gfx_v10_0_get_rb_active_bitmap(adev); 1449 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 1450 rb_bitmap_width_per_sh); 1451 } 1452 } 1453 gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1454 mutex_unlock(&adev->grbm_idx_mutex); 1455 1456 adev->gfx.config.backend_enable_mask = active_rbs; 1457 adev->gfx.config.num_rbs = hweight32(active_rbs); 1458 } 1459 1460 static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *adev) 1461 { 1462 uint32_t num_sc; 1463 uint32_t enabled_rb_per_sh; 1464 uint32_t active_rb_bitmap; 1465 uint32_t num_rb_per_sc; 1466 uint32_t num_packer_per_sc; 1467 uint32_t pa_sc_tile_steering_override; 1468 1469 /* init num_sc */ 1470 num_sc = adev->gfx.config.max_shader_engines * adev->gfx.config.max_sh_per_se * 1471 adev->gfx.config.num_sc_per_sh; 1472 /* init num_rb_per_sc */ 1473 active_rb_bitmap = gfx_v10_0_get_rb_active_bitmap(adev); 1474 enabled_rb_per_sh = hweight32(active_rb_bitmap); 1475 num_rb_per_sc = enabled_rb_per_sh / adev->gfx.config.num_sc_per_sh; 1476 /* init num_packer_per_sc */ 1477 num_packer_per_sc = adev->gfx.config.num_packer_per_sc; 1478 1479 pa_sc_tile_steering_override = 0; 1480 pa_sc_tile_steering_override |= 1481 (order_base_2(num_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_SC__SHIFT) & 1482 PA_SC_TILE_STEERING_OVERRIDE__NUM_SC_MASK; 1483 pa_sc_tile_steering_override |= 1484 (order_base_2(num_rb_per_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SC__SHIFT) & 1485 PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SC_MASK; 1486 pa_sc_tile_steering_override |= 1487 (order_base_2(num_packer_per_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_PACKER_PER_SC__SHIFT) & 1488 PA_SC_TILE_STEERING_OVERRIDE__NUM_PACKER_PER_SC_MASK; 1489 1490 return pa_sc_tile_steering_override; 1491 } 1492 1493 #define DEFAULT_SH_MEM_BASES (0x6000) 1494 #define FIRST_COMPUTE_VMID (8) 1495 #define LAST_COMPUTE_VMID (16) 1496 1497 static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) 1498 { 1499 int i; 1500 uint32_t sh_mem_bases; 1501 1502 /* 1503 * Configure apertures: 1504 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 1505 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 1506 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 1507 */ 1508 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 1509 1510 mutex_lock(&adev->srbm_mutex); 1511 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 1512 nv_grbm_select(adev, 0, 0, 0, i); 1513 /* CP and shaders */ 1514 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 1515 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 1516 } 1517 nv_grbm_select(adev, 0, 0, 0, 0); 1518 mutex_unlock(&adev->srbm_mutex); 1519 1520 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 1521 acccess. These should be enabled by FW for target VMIDs. */ 1522 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 1523 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 1524 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 1525 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 1526 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 1527 } 1528 } 1529 1530 static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) 1531 { 1532 int i, j, k; 1533 int max_wgp_per_sh = adev->gfx.config.max_cu_per_sh >> 1; 1534 u32 tmp, wgp_active_bitmap = 0; 1535 u32 gcrd_targets_disable_tcp = 0; 1536 u32 utcl_invreq_disable = 0; 1537 /* 1538 * GCRD_TARGETS_DISABLE field contains 1539 * for Navi10: GL1C=[18:15], SQC=[14:10], TCP=[9:0] 1540 * for Navi14: GL1C=[21:18], SQC=[17:12], TCP=[11:0] 1541 */ 1542 u32 gcrd_targets_disable_mask = amdgpu_gfx_create_bitmask( 1543 2 * max_wgp_per_sh + /* TCP */ 1544 max_wgp_per_sh + /* SQC */ 1545 4); /* GL1C */ 1546 /* 1547 * UTCL1_UTCL0_INVREQ_DISABLE field contains 1548 * for Navi10: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0] 1549 * for Navi14: SQG=[28], RMI=[27:24], SQC=[23:12], TCP=[11:0] 1550 */ 1551 u32 utcl_invreq_disable_mask = amdgpu_gfx_create_bitmask( 1552 2 * max_wgp_per_sh + /* TCP */ 1553 2 * max_wgp_per_sh + /* SQC */ 1554 4 + /* RMI */ 1555 1); /* SQG */ 1556 1557 if (adev->asic_type == CHIP_NAVI10 || adev->asic_type == CHIP_NAVI14) { 1558 mutex_lock(&adev->grbm_idx_mutex); 1559 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1560 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1561 gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); 1562 wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev); 1563 /* 1564 * Set corresponding TCP bits for the inactive WGPs in 1565 * GCRD_SA_TARGETS_DISABLE 1566 */ 1567 gcrd_targets_disable_tcp = 0; 1568 /* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */ 1569 utcl_invreq_disable = 0; 1570 1571 for (k = 0; k < max_wgp_per_sh; k++) { 1572 if (!(wgp_active_bitmap & (1 << k))) { 1573 gcrd_targets_disable_tcp |= 3 << (2 * k); 1574 utcl_invreq_disable |= (3 << (2 * k)) | 1575 (3 << (2 * (max_wgp_per_sh + k))); 1576 } 1577 } 1578 1579 tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE); 1580 /* only override TCP & SQC bits */ 1581 tmp &= 0xffffffff << (4 * max_wgp_per_sh); 1582 tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask); 1583 WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp); 1584 1585 tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE); 1586 /* only override TCP bits */ 1587 tmp &= 0xffffffff << (2 * max_wgp_per_sh); 1588 tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask); 1589 WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp); 1590 } 1591 } 1592 1593 gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1594 mutex_unlock(&adev->grbm_idx_mutex); 1595 } 1596 } 1597 1598 static void gfx_v10_0_constants_init(struct amdgpu_device *adev) 1599 { 1600 u32 tmp; 1601 int i; 1602 1603 WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 1604 1605 gfx_v10_0_tiling_mode_table_init(adev); 1606 1607 gfx_v10_0_setup_rb(adev); 1608 gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info); 1609 adev->gfx.config.pa_sc_tile_steering_override = 1610 gfx_v10_0_init_pa_sc_tile_steering_override(adev); 1611 1612 /* XXX SH_MEM regs */ 1613 /* where to put LDS, scratch, GPUVM in FSA64 space */ 1614 mutex_lock(&adev->srbm_mutex); 1615 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { 1616 nv_grbm_select(adev, 0, 0, 0, i); 1617 /* CP and shaders */ 1618 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 1619 if (i != 0) { 1620 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 1621 (adev->gmc.private_aperture_start >> 48)); 1622 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 1623 (adev->gmc.shared_aperture_start >> 48)); 1624 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp); 1625 } 1626 } 1627 nv_grbm_select(adev, 0, 0, 0, 0); 1628 1629 mutex_unlock(&adev->srbm_mutex); 1630 1631 gfx_v10_0_init_compute_vmid(adev); 1632 1633 } 1634 1635 static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 1636 bool enable) 1637 { 1638 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 1639 1640 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 1641 enable ? 1 : 0); 1642 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 1643 enable ? 1 : 0); 1644 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 1645 enable ? 1 : 0); 1646 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 1647 enable ? 1 : 0); 1648 1649 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 1650 } 1651 1652 static void gfx_v10_0_init_csb(struct amdgpu_device *adev) 1653 { 1654 /* csib */ 1655 WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI, 1656 adev->gfx.rlc.clear_state_gpu_addr >> 32); 1657 WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO, 1658 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 1659 WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); 1660 } 1661 1662 static void gfx_v10_0_init_pg(struct amdgpu_device *adev) 1663 { 1664 gfx_v10_0_init_csb(adev); 1665 1666 amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); 1667 1668 /* TODO: init power gating */ 1669 return; 1670 } 1671 1672 void gfx_v10_0_rlc_stop(struct amdgpu_device *adev) 1673 { 1674 u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); 1675 1676 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 1677 WREG32_SOC15(GC, 0, mmRLC_CNTL, tmp); 1678 } 1679 1680 static void gfx_v10_0_rlc_reset(struct amdgpu_device *adev) 1681 { 1682 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 1683 udelay(50); 1684 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 1685 udelay(50); 1686 } 1687 1688 static void gfx_v10_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, 1689 bool enable) 1690 { 1691 uint32_t rlc_pg_cntl; 1692 1693 rlc_pg_cntl = RREG32_SOC15(GC, 0, mmRLC_PG_CNTL); 1694 1695 if (!enable) { 1696 /* RLC_PG_CNTL[23] = 0 (default) 1697 * RLC will wait for handshake acks with SMU 1698 * GFXOFF will be enabled 1699 * RLC_PG_CNTL[23] = 1 1700 * RLC will not issue any message to SMU 1701 * hence no handshake between SMU & RLC 1702 * GFXOFF will be disabled 1703 */ 1704 rlc_pg_cntl |= 0x80000; 1705 } else 1706 rlc_pg_cntl &= ~0x80000; 1707 WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, rlc_pg_cntl); 1708 } 1709 1710 static void gfx_v10_0_rlc_start(struct amdgpu_device *adev) 1711 { 1712 /* TODO: enable rlc & smu handshake until smu 1713 * and gfxoff feature works as expected */ 1714 if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) 1715 gfx_v10_0_rlc_smu_handshake_cntl(adev, false); 1716 1717 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 1718 udelay(50); 1719 } 1720 1721 static void gfx_v10_0_rlc_enable_srm(struct amdgpu_device *adev) 1722 { 1723 uint32_t tmp; 1724 1725 /* enable Save Restore Machine */ 1726 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 1727 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 1728 tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; 1729 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 1730 } 1731 1732 static int gfx_v10_0_rlc_load_microcode(struct amdgpu_device *adev) 1733 { 1734 const struct rlc_firmware_header_v2_0 *hdr; 1735 const __le32 *fw_data; 1736 unsigned i, fw_size; 1737 1738 if (!adev->gfx.rlc_fw) 1739 return -EINVAL; 1740 1741 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1742 amdgpu_ucode_print_rlc_hdr(&hdr->header); 1743 1744 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1745 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1746 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 1747 1748 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 1749 RLCG_UCODE_LOADING_START_ADDRESS); 1750 1751 for (i = 0; i < fw_size; i++) 1752 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, 1753 le32_to_cpup(fw_data++)); 1754 1755 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 1756 1757 return 0; 1758 } 1759 1760 static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) 1761 { 1762 int r; 1763 1764 if (amdgpu_sriov_vf(adev)) 1765 return 0; 1766 1767 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1768 r = gfx_v10_0_wait_for_rlc_autoload_complete(adev); 1769 if (r) 1770 return r; 1771 gfx_v10_0_init_pg(adev); 1772 1773 /* enable RLC SRM */ 1774 gfx_v10_0_rlc_enable_srm(adev); 1775 1776 } else { 1777 adev->gfx.rlc.funcs->stop(adev); 1778 1779 /* disable CG */ 1780 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 1781 1782 /* disable PG */ 1783 WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, 0); 1784 1785 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1786 /* legacy rlc firmware loading */ 1787 r = gfx_v10_0_rlc_load_microcode(adev); 1788 if (r) 1789 return r; 1790 } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1791 /* rlc backdoor autoload firmware */ 1792 r = gfx_v10_0_rlc_backdoor_autoload_enable(adev); 1793 if (r) 1794 return r; 1795 } 1796 1797 gfx_v10_0_init_pg(adev); 1798 adev->gfx.rlc.funcs->start(adev); 1799 1800 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1801 r = gfx_v10_0_wait_for_rlc_autoload_complete(adev); 1802 if (r) 1803 return r; 1804 } 1805 } 1806 return 0; 1807 } 1808 1809 static struct { 1810 FIRMWARE_ID id; 1811 unsigned int offset; 1812 unsigned int size; 1813 } rlc_autoload_info[FIRMWARE_ID_MAX]; 1814 1815 static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev) 1816 { 1817 int ret; 1818 RLC_TABLE_OF_CONTENT *rlc_toc; 1819 1820 ret = amdgpu_bo_create_reserved(adev, adev->psp.toc_bin_size, PAGE_SIZE, 1821 AMDGPU_GEM_DOMAIN_GTT, 1822 &adev->gfx.rlc.rlc_toc_bo, 1823 &adev->gfx.rlc.rlc_toc_gpu_addr, 1824 (void **)&adev->gfx.rlc.rlc_toc_buf); 1825 if (ret) { 1826 dev_err(adev->dev, "(%d) failed to create rlc toc bo\n", ret); 1827 return ret; 1828 } 1829 1830 /* Copy toc from psp sos fw to rlc toc buffer */ 1831 memcpy(adev->gfx.rlc.rlc_toc_buf, adev->psp.toc_start_addr, adev->psp.toc_bin_size); 1832 1833 rlc_toc = (RLC_TABLE_OF_CONTENT *)adev->gfx.rlc.rlc_toc_buf; 1834 while (rlc_toc && (rlc_toc->id > FIRMWARE_ID_INVALID) && 1835 (rlc_toc->id < FIRMWARE_ID_MAX)) { 1836 if ((rlc_toc->id >= FIRMWARE_ID_CP_CE) && 1837 (rlc_toc->id <= FIRMWARE_ID_CP_MES)) { 1838 /* Offset needs 4KB alignment */ 1839 rlc_toc->offset = ALIGN(rlc_toc->offset * 4, PAGE_SIZE); 1840 } 1841 1842 rlc_autoload_info[rlc_toc->id].id = rlc_toc->id; 1843 rlc_autoload_info[rlc_toc->id].offset = rlc_toc->offset * 4; 1844 rlc_autoload_info[rlc_toc->id].size = rlc_toc->size * 4; 1845 1846 rlc_toc++; 1847 }; 1848 1849 return 0; 1850 } 1851 1852 static uint32_t gfx_v10_0_calc_toc_total_size(struct amdgpu_device *adev) 1853 { 1854 uint32_t total_size = 0; 1855 FIRMWARE_ID id; 1856 int ret; 1857 1858 ret = gfx_v10_0_parse_rlc_toc(adev); 1859 if (ret) { 1860 dev_err(adev->dev, "failed to parse rlc toc\n"); 1861 return 0; 1862 } 1863 1864 for (id = FIRMWARE_ID_RLC_G_UCODE; id < FIRMWARE_ID_MAX; id++) 1865 total_size += rlc_autoload_info[id].size; 1866 1867 /* In case the offset in rlc toc ucode is aligned */ 1868 if (total_size < rlc_autoload_info[FIRMWARE_ID_MAX-1].offset) 1869 total_size = rlc_autoload_info[FIRMWARE_ID_MAX-1].offset + 1870 rlc_autoload_info[FIRMWARE_ID_MAX-1].size; 1871 1872 return total_size; 1873 } 1874 1875 static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev) 1876 { 1877 int r; 1878 uint32_t total_size; 1879 1880 total_size = gfx_v10_0_calc_toc_total_size(adev); 1881 1882 r = amdgpu_bo_create_reserved(adev, total_size, PAGE_SIZE, 1883 AMDGPU_GEM_DOMAIN_GTT, 1884 &adev->gfx.rlc.rlc_autoload_bo, 1885 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1886 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1887 if (r) { 1888 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r); 1889 return r; 1890 } 1891 1892 return 0; 1893 } 1894 1895 static void gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device *adev) 1896 { 1897 amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_toc_bo, 1898 &adev->gfx.rlc.rlc_toc_gpu_addr, 1899 (void **)&adev->gfx.rlc.rlc_toc_buf); 1900 amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo, 1901 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1902 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1903 } 1904 1905 static void gfx_v10_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev, 1906 FIRMWARE_ID id, 1907 const void *fw_data, 1908 uint32_t fw_size) 1909 { 1910 uint32_t toc_offset; 1911 uint32_t toc_fw_size; 1912 char *ptr = adev->gfx.rlc.rlc_autoload_ptr; 1913 1914 if (id <= FIRMWARE_ID_INVALID || id >= FIRMWARE_ID_MAX) 1915 return; 1916 1917 toc_offset = rlc_autoload_info[id].offset; 1918 toc_fw_size = rlc_autoload_info[id].size; 1919 1920 if (fw_size == 0) 1921 fw_size = toc_fw_size; 1922 1923 if (fw_size > toc_fw_size) 1924 fw_size = toc_fw_size; 1925 1926 memcpy(ptr + toc_offset, fw_data, fw_size); 1927 1928 if (fw_size < toc_fw_size) 1929 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); 1930 } 1931 1932 static void gfx_v10_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev) 1933 { 1934 void *data; 1935 uint32_t size; 1936 1937 data = adev->gfx.rlc.rlc_toc_buf; 1938 size = rlc_autoload_info[FIRMWARE_ID_RLC_TOC].size; 1939 1940 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, 1941 FIRMWARE_ID_RLC_TOC, 1942 data, size); 1943 } 1944 1945 static void gfx_v10_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev) 1946 { 1947 const __le32 *fw_data; 1948 uint32_t fw_size; 1949 const struct gfx_firmware_header_v1_0 *cp_hdr; 1950 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1951 1952 /* pfp ucode */ 1953 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1954 adev->gfx.pfp_fw->data; 1955 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1956 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1957 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1958 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, 1959 FIRMWARE_ID_CP_PFP, 1960 fw_data, fw_size); 1961 1962 /* ce ucode */ 1963 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1964 adev->gfx.ce_fw->data; 1965 fw_data = (const __le32 *)(adev->gfx.ce_fw->data + 1966 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1967 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1968 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, 1969 FIRMWARE_ID_CP_CE, 1970 fw_data, fw_size); 1971 1972 /* me ucode */ 1973 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1974 adev->gfx.me_fw->data; 1975 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1976 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1977 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1978 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, 1979 FIRMWARE_ID_CP_ME, 1980 fw_data, fw_size); 1981 1982 /* rlc ucode */ 1983 rlc_hdr = (const struct rlc_firmware_header_v2_0 *) 1984 adev->gfx.rlc_fw->data; 1985 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1986 le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes)); 1987 fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes); 1988 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, 1989 FIRMWARE_ID_RLC_G_UCODE, 1990 fw_data, fw_size); 1991 1992 /* mec1 ucode */ 1993 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1994 adev->gfx.mec_fw->data; 1995 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1996 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1997 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - 1998 cp_hdr->jt_size * 4; 1999 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, 2000 FIRMWARE_ID_CP_MEC, 2001 fw_data, fw_size); 2002 /* mec2 ucode is not necessary if mec2 ucode is same as mec1 */ 2003 } 2004 2005 /* Temporarily put sdma part here */ 2006 static void gfx_v10_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev) 2007 { 2008 const __le32 *fw_data; 2009 uint32_t fw_size; 2010 const struct sdma_firmware_header_v1_0 *sdma_hdr; 2011 int i; 2012 2013 for (i = 0; i < adev->sdma.num_instances; i++) { 2014 sdma_hdr = (const struct sdma_firmware_header_v1_0 *) 2015 adev->sdma.instance[i].fw->data; 2016 fw_data = (const __le32 *) (adev->sdma.instance[i].fw->data + 2017 le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes)); 2018 fw_size = le32_to_cpu(sdma_hdr->header.ucode_size_bytes); 2019 2020 if (i == 0) { 2021 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, 2022 FIRMWARE_ID_SDMA0_UCODE, fw_data, fw_size); 2023 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, 2024 FIRMWARE_ID_SDMA0_JT, 2025 (uint32_t *)fw_data + 2026 sdma_hdr->jt_offset, 2027 sdma_hdr->jt_size * 4); 2028 } else if (i == 1) { 2029 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, 2030 FIRMWARE_ID_SDMA1_UCODE, fw_data, fw_size); 2031 gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, 2032 FIRMWARE_ID_SDMA1_JT, 2033 (uint32_t *)fw_data + 2034 sdma_hdr->jt_offset, 2035 sdma_hdr->jt_size * 4); 2036 } 2037 } 2038 } 2039 2040 static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) 2041 { 2042 uint32_t rlc_g_offset, rlc_g_size, tmp; 2043 uint64_t gpu_addr; 2044 2045 gfx_v10_0_rlc_backdoor_autoload_copy_toc_ucode(adev); 2046 gfx_v10_0_rlc_backdoor_autoload_copy_sdma_ucode(adev); 2047 gfx_v10_0_rlc_backdoor_autoload_copy_gfx_ucode(adev); 2048 2049 rlc_g_offset = rlc_autoload_info[FIRMWARE_ID_RLC_G_UCODE].offset; 2050 rlc_g_size = rlc_autoload_info[FIRMWARE_ID_RLC_G_UCODE].size; 2051 gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset; 2052 2053 WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_ADDR_HI, upper_32_bits(gpu_addr)); 2054 WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_ADDR_LO, lower_32_bits(gpu_addr)); 2055 WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_SIZE, rlc_g_size); 2056 2057 tmp = RREG32_SOC15(GC, 0, mmRLC_HYP_RESET_VECTOR); 2058 if (!(tmp & (RLC_HYP_RESET_VECTOR__COLD_BOOT_EXIT_MASK | 2059 RLC_HYP_RESET_VECTOR__VDDGFX_EXIT_MASK))) { 2060 DRM_ERROR("Neither COLD_BOOT_EXIT nor VDDGFX_EXIT is set\n"); 2061 return -EINVAL; 2062 } 2063 2064 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); 2065 if (tmp & RLC_CNTL__RLC_ENABLE_F32_MASK) { 2066 DRM_ERROR("RLC ROM should halt itself\n"); 2067 return -EINVAL; 2068 } 2069 2070 return 0; 2071 } 2072 2073 static int gfx_v10_0_rlc_backdoor_autoload_config_me_cache(struct amdgpu_device *adev) 2074 { 2075 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2076 uint32_t tmp; 2077 int i; 2078 uint64_t addr; 2079 2080 /* Trigger an invalidation of the L1 instruction caches */ 2081 tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL); 2082 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2083 WREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL, tmp); 2084 2085 /* Wait for invalidation complete */ 2086 for (i = 0; i < usec_timeout; i++) { 2087 tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL); 2088 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2089 INVALIDATE_CACHE_COMPLETE)) 2090 break; 2091 udelay(1); 2092 } 2093 2094 if (i >= usec_timeout) { 2095 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2096 return -EINVAL; 2097 } 2098 2099 /* Program me ucode address into intruction cache address register */ 2100 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2101 rlc_autoload_info[FIRMWARE_ID_CP_ME].offset; 2102 WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_LO, 2103 lower_32_bits(addr) & 0xFFFFF000); 2104 WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_HI, 2105 upper_32_bits(addr)); 2106 2107 return 0; 2108 } 2109 2110 static int gfx_v10_0_rlc_backdoor_autoload_config_ce_cache(struct amdgpu_device *adev) 2111 { 2112 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2113 uint32_t tmp; 2114 int i; 2115 uint64_t addr; 2116 2117 /* Trigger an invalidation of the L1 instruction caches */ 2118 tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL); 2119 tmp = REG_SET_FIELD(tmp, CP_CE_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2120 WREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL, tmp); 2121 2122 /* Wait for invalidation complete */ 2123 for (i = 0; i < usec_timeout; i++) { 2124 tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL); 2125 if (1 == REG_GET_FIELD(tmp, CP_CE_IC_OP_CNTL, 2126 INVALIDATE_CACHE_COMPLETE)) 2127 break; 2128 udelay(1); 2129 } 2130 2131 if (i >= usec_timeout) { 2132 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2133 return -EINVAL; 2134 } 2135 2136 /* Program ce ucode address into intruction cache address register */ 2137 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2138 rlc_autoload_info[FIRMWARE_ID_CP_CE].offset; 2139 WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_LO, 2140 lower_32_bits(addr) & 0xFFFFF000); 2141 WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_HI, 2142 upper_32_bits(addr)); 2143 2144 return 0; 2145 } 2146 2147 static int gfx_v10_0_rlc_backdoor_autoload_config_pfp_cache(struct amdgpu_device *adev) 2148 { 2149 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2150 uint32_t tmp; 2151 int i; 2152 uint64_t addr; 2153 2154 /* Trigger an invalidation of the L1 instruction caches */ 2155 tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL); 2156 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2157 WREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL, tmp); 2158 2159 /* Wait for invalidation complete */ 2160 for (i = 0; i < usec_timeout; i++) { 2161 tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL); 2162 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2163 INVALIDATE_CACHE_COMPLETE)) 2164 break; 2165 udelay(1); 2166 } 2167 2168 if (i >= usec_timeout) { 2169 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2170 return -EINVAL; 2171 } 2172 2173 /* Program pfp ucode address into intruction cache address register */ 2174 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2175 rlc_autoload_info[FIRMWARE_ID_CP_PFP].offset; 2176 WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_LO, 2177 lower_32_bits(addr) & 0xFFFFF000); 2178 WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_HI, 2179 upper_32_bits(addr)); 2180 2181 return 0; 2182 } 2183 2184 static int gfx_v10_0_rlc_backdoor_autoload_config_mec_cache(struct amdgpu_device *adev) 2185 { 2186 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2187 uint32_t tmp; 2188 int i; 2189 uint64_t addr; 2190 2191 /* Trigger an invalidation of the L1 instruction caches */ 2192 tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL); 2193 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2194 WREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL, tmp); 2195 2196 /* Wait for invalidation complete */ 2197 for (i = 0; i < usec_timeout; i++) { 2198 tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL); 2199 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2200 INVALIDATE_CACHE_COMPLETE)) 2201 break; 2202 udelay(1); 2203 } 2204 2205 if (i >= usec_timeout) { 2206 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2207 return -EINVAL; 2208 } 2209 2210 /* Program mec1 ucode address into intruction cache address register */ 2211 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2212 rlc_autoload_info[FIRMWARE_ID_CP_MEC].offset; 2213 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 2214 lower_32_bits(addr) & 0xFFFFF000); 2215 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 2216 upper_32_bits(addr)); 2217 2218 return 0; 2219 } 2220 2221 static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) 2222 { 2223 uint32_t cp_status; 2224 uint32_t bootload_status; 2225 int i, r; 2226 2227 for (i = 0; i < adev->usec_timeout; i++) { 2228 cp_status = RREG32_SOC15(GC, 0, mmCP_STAT); 2229 bootload_status = RREG32_SOC15(GC, 0, mmRLC_RLCS_BOOTLOAD_STATUS); 2230 if ((cp_status == 0) && 2231 (REG_GET_FIELD(bootload_status, 2232 RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { 2233 break; 2234 } 2235 udelay(1); 2236 } 2237 2238 if (i >= adev->usec_timeout) { 2239 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n"); 2240 return -ETIMEDOUT; 2241 } 2242 2243 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 2244 r = gfx_v10_0_rlc_backdoor_autoload_config_me_cache(adev); 2245 if (r) 2246 return r; 2247 2248 r = gfx_v10_0_rlc_backdoor_autoload_config_ce_cache(adev); 2249 if (r) 2250 return r; 2251 2252 r = gfx_v10_0_rlc_backdoor_autoload_config_pfp_cache(adev); 2253 if (r) 2254 return r; 2255 2256 r = gfx_v10_0_rlc_backdoor_autoload_config_mec_cache(adev); 2257 if (r) 2258 return r; 2259 } 2260 2261 return 0; 2262 } 2263 2264 static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 2265 { 2266 int i; 2267 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 2268 2269 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 2270 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 2271 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 2272 if (!enable) { 2273 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2274 adev->gfx.gfx_ring[i].sched.ready = false; 2275 } 2276 WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); 2277 udelay(50); 2278 } 2279 2280 static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) 2281 { 2282 int r; 2283 const struct gfx_firmware_header_v1_0 *pfp_hdr; 2284 const __le32 *fw_data; 2285 unsigned i, fw_size; 2286 uint32_t tmp; 2287 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2288 2289 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 2290 adev->gfx.pfp_fw->data; 2291 2292 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 2293 2294 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 2295 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 2296 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes); 2297 2298 r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes, 2299 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 2300 &adev->gfx.pfp.pfp_fw_obj, 2301 &adev->gfx.pfp.pfp_fw_gpu_addr, 2302 (void **)&adev->gfx.pfp.pfp_fw_ptr); 2303 if (r) { 2304 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r); 2305 gfx_v10_0_pfp_fini(adev); 2306 return r; 2307 } 2308 2309 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size); 2310 2311 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 2312 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 2313 2314 /* Trigger an invalidation of the L1 instruction caches */ 2315 tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL); 2316 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2317 WREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL, tmp); 2318 2319 /* Wait for invalidation complete */ 2320 for (i = 0; i < usec_timeout; i++) { 2321 tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL); 2322 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2323 INVALIDATE_CACHE_COMPLETE)) 2324 break; 2325 udelay(1); 2326 } 2327 2328 if (i >= usec_timeout) { 2329 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2330 return -EINVAL; 2331 } 2332 2333 if (amdgpu_emu_mode == 1) 2334 adev->nbio_funcs->hdp_flush(adev, NULL); 2335 2336 tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL); 2337 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2338 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2339 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2340 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2341 WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL, tmp); 2342 WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_LO, 2343 adev->gfx.pfp.pfp_fw_gpu_addr & 0xFFFFF000); 2344 WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_HI, 2345 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 2346 2347 return 0; 2348 } 2349 2350 static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev) 2351 { 2352 int r; 2353 const struct gfx_firmware_header_v1_0 *ce_hdr; 2354 const __le32 *fw_data; 2355 unsigned i, fw_size; 2356 uint32_t tmp; 2357 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2358 2359 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 2360 adev->gfx.ce_fw->data; 2361 2362 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 2363 2364 fw_data = (const __le32 *)(adev->gfx.ce_fw->data + 2365 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 2366 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes); 2367 2368 r = amdgpu_bo_create_reserved(adev, ce_hdr->header.ucode_size_bytes, 2369 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 2370 &adev->gfx.ce.ce_fw_obj, 2371 &adev->gfx.ce.ce_fw_gpu_addr, 2372 (void **)&adev->gfx.ce.ce_fw_ptr); 2373 if (r) { 2374 dev_err(adev->dev, "(%d) failed to create ce fw bo\n", r); 2375 gfx_v10_0_ce_fini(adev); 2376 return r; 2377 } 2378 2379 memcpy(adev->gfx.ce.ce_fw_ptr, fw_data, fw_size); 2380 2381 amdgpu_bo_kunmap(adev->gfx.ce.ce_fw_obj); 2382 amdgpu_bo_unreserve(adev->gfx.ce.ce_fw_obj); 2383 2384 /* Trigger an invalidation of the L1 instruction caches */ 2385 tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL); 2386 tmp = REG_SET_FIELD(tmp, CP_CE_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2387 WREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL, tmp); 2388 2389 /* Wait for invalidation complete */ 2390 for (i = 0; i < usec_timeout; i++) { 2391 tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL); 2392 if (1 == REG_GET_FIELD(tmp, CP_CE_IC_OP_CNTL, 2393 INVALIDATE_CACHE_COMPLETE)) 2394 break; 2395 udelay(1); 2396 } 2397 2398 if (i >= usec_timeout) { 2399 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2400 return -EINVAL; 2401 } 2402 2403 if (amdgpu_emu_mode == 1) 2404 adev->nbio_funcs->hdp_flush(adev, NULL); 2405 2406 tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL); 2407 tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0); 2408 tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, CACHE_POLICY, 0); 2409 tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, EXE_DISABLE, 0); 2410 tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2411 WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_LO, 2412 adev->gfx.ce.ce_fw_gpu_addr & 0xFFFFF000); 2413 WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_HI, 2414 upper_32_bits(adev->gfx.ce.ce_fw_gpu_addr)); 2415 2416 return 0; 2417 } 2418 2419 static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) 2420 { 2421 int r; 2422 const struct gfx_firmware_header_v1_0 *me_hdr; 2423 const __le32 *fw_data; 2424 unsigned i, fw_size; 2425 uint32_t tmp; 2426 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2427 2428 me_hdr = (const struct gfx_firmware_header_v1_0 *) 2429 adev->gfx.me_fw->data; 2430 2431 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 2432 2433 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 2434 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 2435 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes); 2436 2437 r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes, 2438 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 2439 &adev->gfx.me.me_fw_obj, 2440 &adev->gfx.me.me_fw_gpu_addr, 2441 (void **)&adev->gfx.me.me_fw_ptr); 2442 if (r) { 2443 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r); 2444 gfx_v10_0_me_fini(adev); 2445 return r; 2446 } 2447 2448 memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size); 2449 2450 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 2451 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 2452 2453 /* Trigger an invalidation of the L1 instruction caches */ 2454 tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL); 2455 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2456 WREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL, tmp); 2457 2458 /* Wait for invalidation complete */ 2459 for (i = 0; i < usec_timeout; i++) { 2460 tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL); 2461 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2462 INVALIDATE_CACHE_COMPLETE)) 2463 break; 2464 udelay(1); 2465 } 2466 2467 if (i >= usec_timeout) { 2468 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2469 return -EINVAL; 2470 } 2471 2472 if (amdgpu_emu_mode == 1) 2473 adev->nbio_funcs->hdp_flush(adev, NULL); 2474 2475 tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL); 2476 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2477 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2478 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2479 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2480 WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_LO, 2481 adev->gfx.me.me_fw_gpu_addr & 0xFFFFF000); 2482 WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_HI, 2483 upper_32_bits(adev->gfx.me.me_fw_gpu_addr)); 2484 2485 return 0; 2486 } 2487 2488 static int gfx_v10_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 2489 { 2490 int r; 2491 2492 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 2493 return -EINVAL; 2494 2495 gfx_v10_0_cp_gfx_enable(adev, false); 2496 2497 r = gfx_v10_0_cp_gfx_load_pfp_microcode(adev); 2498 if (r) { 2499 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r); 2500 return r; 2501 } 2502 2503 r = gfx_v10_0_cp_gfx_load_ce_microcode(adev); 2504 if (r) { 2505 dev_err(adev->dev, "(%d) failed to load ce fw\n", r); 2506 return r; 2507 } 2508 2509 r = gfx_v10_0_cp_gfx_load_me_microcode(adev); 2510 if (r) { 2511 dev_err(adev->dev, "(%d) failed to load me fw\n", r); 2512 return r; 2513 } 2514 2515 return 0; 2516 } 2517 2518 static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev) 2519 { 2520 struct amdgpu_ring *ring; 2521 const struct cs_section_def *sect = NULL; 2522 const struct cs_extent_def *ext = NULL; 2523 int r, i; 2524 int ctx_reg_offset; 2525 2526 /* init the CP */ 2527 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, 2528 adev->gfx.config.max_hw_contexts - 1); 2529 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 2530 2531 gfx_v10_0_cp_gfx_enable(adev, true); 2532 2533 ring = &adev->gfx.gfx_ring[0]; 2534 r = amdgpu_ring_alloc(ring, gfx_v10_0_get_csb_size(adev) + 4); 2535 if (r) { 2536 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 2537 return r; 2538 } 2539 2540 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2541 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 2542 2543 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 2544 amdgpu_ring_write(ring, 0x80000000); 2545 amdgpu_ring_write(ring, 0x80000000); 2546 2547 for (sect = gfx10_cs_data; sect->section != NULL; ++sect) { 2548 for (ext = sect->section; ext->extent != NULL; ++ext) { 2549 if (sect->id == SECT_CONTEXT) { 2550 amdgpu_ring_write(ring, 2551 PACKET3(PACKET3_SET_CONTEXT_REG, 2552 ext->reg_count)); 2553 amdgpu_ring_write(ring, ext->reg_index - 2554 PACKET3_SET_CONTEXT_REG_START); 2555 for (i = 0; i < ext->reg_count; i++) 2556 amdgpu_ring_write(ring, ext->extent[i]); 2557 } 2558 } 2559 } 2560 2561 ctx_reg_offset = 2562 SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 2563 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 2564 amdgpu_ring_write(ring, ctx_reg_offset); 2565 amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override); 2566 2567 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2568 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 2569 2570 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 2571 amdgpu_ring_write(ring, 0); 2572 2573 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 2574 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 2575 amdgpu_ring_write(ring, 0x8000); 2576 amdgpu_ring_write(ring, 0x8000); 2577 2578 amdgpu_ring_commit(ring); 2579 2580 /* submit cs packet to copy state 0 to next available state */ 2581 ring = &adev->gfx.gfx_ring[1]; 2582 r = amdgpu_ring_alloc(ring, 2); 2583 if (r) { 2584 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 2585 return r; 2586 } 2587 2588 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 2589 amdgpu_ring_write(ring, 0); 2590 2591 amdgpu_ring_commit(ring); 2592 2593 return 0; 2594 } 2595 2596 static void gfx_v10_0_cp_gfx_switch_pipe(struct amdgpu_device *adev, 2597 CP_PIPE_ID pipe) 2598 { 2599 u32 tmp; 2600 2601 tmp = RREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL); 2602 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe); 2603 2604 WREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL, tmp); 2605 } 2606 2607 static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, 2608 struct amdgpu_ring *ring) 2609 { 2610 u32 tmp; 2611 2612 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 2613 if (ring->use_doorbell) { 2614 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2615 DOORBELL_OFFSET, ring->doorbell_index); 2616 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2617 DOORBELL_EN, 1); 2618 } else { 2619 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2620 DOORBELL_EN, 0); 2621 } 2622 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 2623 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 2624 DOORBELL_RANGE_LOWER, ring->doorbell_index); 2625 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 2626 2627 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 2628 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 2629 } 2630 2631 static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) 2632 { 2633 struct amdgpu_ring *ring; 2634 u32 tmp; 2635 u32 rb_bufsz; 2636 u64 rb_addr, rptr_addr, wptr_gpu_addr; 2637 u32 i; 2638 2639 /* Set the write pointer delay */ 2640 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 2641 2642 /* set the RB to use vmid 0 */ 2643 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 2644 2645 /* Init gfx ring 0 for pipe 0 */ 2646 mutex_lock(&adev->srbm_mutex); 2647 gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 2648 mutex_unlock(&adev->srbm_mutex); 2649 /* Set ring buffer size */ 2650 ring = &adev->gfx.gfx_ring[0]; 2651 rb_bufsz = order_base_2(ring->ring_size / 8); 2652 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 2653 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 2654 #ifdef __BIG_ENDIAN 2655 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 2656 #endif 2657 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 2658 2659 /* Initialize the ring buffer's write pointers */ 2660 ring->wptr = 0; 2661 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 2662 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 2663 2664 /* set the wb address wether it's enabled or not */ 2665 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 2666 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 2667 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 2668 CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 2669 2670 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2671 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, 2672 lower_32_bits(wptr_gpu_addr)); 2673 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, 2674 upper_32_bits(wptr_gpu_addr)); 2675 2676 mdelay(1); 2677 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 2678 2679 rb_addr = ring->gpu_addr >> 8; 2680 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 2681 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 2682 2683 WREG32_SOC15(GC, 0, mmCP_RB_ACTIVE, 1); 2684 2685 gfx_v10_0_cp_gfx_set_doorbell(adev, ring); 2686 2687 /* Init gfx ring 1 for pipe 1 */ 2688 mutex_lock(&adev->srbm_mutex); 2689 gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1); 2690 mutex_unlock(&adev->srbm_mutex); 2691 ring = &adev->gfx.gfx_ring[1]; 2692 rb_bufsz = order_base_2(ring->ring_size / 8); 2693 tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); 2694 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); 2695 WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp); 2696 /* Initialize the ring buffer's write pointers */ 2697 ring->wptr = 0; 2698 WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr)); 2699 WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); 2700 /* Set the wb address wether it's enabled or not */ 2701 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 2702 WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); 2703 WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 2704 CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 2705 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2706 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, 2707 lower_32_bits(wptr_gpu_addr)); 2708 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, 2709 upper_32_bits(wptr_gpu_addr)); 2710 2711 mdelay(1); 2712 WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp); 2713 2714 rb_addr = ring->gpu_addr >> 8; 2715 WREG32_SOC15(GC, 0, mmCP_RB1_BASE, rb_addr); 2716 WREG32_SOC15(GC, 0, mmCP_RB1_BASE_HI, upper_32_bits(rb_addr)); 2717 WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1); 2718 2719 gfx_v10_0_cp_gfx_set_doorbell(adev, ring); 2720 2721 /* Switch to pipe 0 */ 2722 mutex_lock(&adev->srbm_mutex); 2723 gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 2724 mutex_unlock(&adev->srbm_mutex); 2725 2726 /* start the ring */ 2727 gfx_v10_0_cp_gfx_start(adev); 2728 2729 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2730 ring = &adev->gfx.gfx_ring[i]; 2731 ring->sched.ready = true; 2732 } 2733 2734 return 0; 2735 } 2736 2737 static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 2738 { 2739 int i; 2740 2741 if (enable) { 2742 WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0); 2743 } else { 2744 WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 2745 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | 2746 CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 2747 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2748 adev->gfx.compute_ring[i].sched.ready = false; 2749 adev->gfx.kiq.ring.sched.ready = false; 2750 } 2751 udelay(50); 2752 } 2753 2754 static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev) 2755 { 2756 const struct gfx_firmware_header_v1_0 *mec_hdr; 2757 const __le32 *fw_data; 2758 unsigned i; 2759 u32 tmp; 2760 u32 usec_timeout = 50000; /* Wait for 50 ms */ 2761 2762 if (!adev->gfx.mec_fw) 2763 return -EINVAL; 2764 2765 gfx_v10_0_cp_compute_enable(adev, false); 2766 2767 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 2768 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 2769 2770 fw_data = (const __le32 *) 2771 (adev->gfx.mec_fw->data + 2772 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 2773 2774 /* Trigger an invalidation of the L1 instruction caches */ 2775 tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL); 2776 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2777 WREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL, tmp); 2778 2779 /* Wait for invalidation complete */ 2780 for (i = 0; i < usec_timeout; i++) { 2781 tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL); 2782 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2783 INVALIDATE_CACHE_COMPLETE)) 2784 break; 2785 udelay(1); 2786 } 2787 2788 if (i >= usec_timeout) { 2789 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2790 return -EINVAL; 2791 } 2792 2793 if (amdgpu_emu_mode == 1) 2794 adev->nbio_funcs->hdp_flush(adev, NULL); 2795 2796 tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL); 2797 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2798 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2799 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2800 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 2801 2802 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr & 2803 0xFFFFF000); 2804 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 2805 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 2806 2807 /* MEC1 */ 2808 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 0); 2809 2810 for (i = 0; i < mec_hdr->jt_size; i++) 2811 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 2812 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 2813 2814 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 2815 2816 /* 2817 * TODO: Loading MEC2 firmware is only necessary if MEC2 should run 2818 * different microcode than MEC1. 2819 */ 2820 2821 return 0; 2822 } 2823 2824 static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring) 2825 { 2826 uint32_t tmp; 2827 struct amdgpu_device *adev = ring->adev; 2828 2829 /* tell RLC which is KIQ queue */ 2830 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 2831 tmp &= 0xffffff00; 2832 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 2833 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2834 tmp |= 0x80; 2835 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2836 } 2837 2838 static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring) 2839 { 2840 struct amdgpu_device *adev = ring->adev; 2841 struct v10_gfx_mqd *mqd = ring->mqd_ptr; 2842 uint64_t hqd_gpu_addr, wb_gpu_addr; 2843 uint32_t tmp; 2844 uint32_t rb_bufsz; 2845 2846 /* set up gfx hqd wptr */ 2847 mqd->cp_gfx_hqd_wptr = 0; 2848 mqd->cp_gfx_hqd_wptr_hi = 0; 2849 2850 /* set the pointer to the MQD */ 2851 mqd->cp_mqd_base_addr = ring->mqd_gpu_addr & 0xfffffffc; 2852 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 2853 2854 /* set up mqd control */ 2855 tmp = RREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL); 2856 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); 2857 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); 2858 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); 2859 mqd->cp_gfx_mqd_control = tmp; 2860 2861 /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ 2862 tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_VMID); 2863 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); 2864 mqd->cp_gfx_hqd_vmid = 0; 2865 2866 /* set up default queue priority level 2867 * 0x0 = low priority, 0x1 = high priority */ 2868 tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY); 2869 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0); 2870 mqd->cp_gfx_hqd_queue_priority = tmp; 2871 2872 /* set up time quantum */ 2873 tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM); 2874 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); 2875 mqd->cp_gfx_hqd_quantum = tmp; 2876 2877 /* set up gfx hqd base. this is similar as CP_RB_BASE */ 2878 hqd_gpu_addr = ring->gpu_addr >> 8; 2879 mqd->cp_gfx_hqd_base = hqd_gpu_addr; 2880 mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr); 2881 2882 /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */ 2883 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 2884 mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc; 2885 mqd->cp_gfx_hqd_rptr_addr_hi = 2886 upper_32_bits(wb_gpu_addr) & 0xffff; 2887 2888 /* set up rb_wptr_poll addr */ 2889 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2890 mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 2891 mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 2892 2893 /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ 2894 rb_bufsz = order_base_2(ring->ring_size / 4) - 1; 2895 tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL); 2896 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); 2897 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); 2898 #ifdef __BIG_ENDIAN 2899 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); 2900 #endif 2901 mqd->cp_gfx_hqd_cntl = tmp; 2902 2903 /* set up cp_doorbell_control */ 2904 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 2905 if (ring->use_doorbell) { 2906 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2907 DOORBELL_OFFSET, ring->doorbell_index); 2908 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2909 DOORBELL_EN, 1); 2910 } else 2911 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2912 DOORBELL_EN, 0); 2913 mqd->cp_rb_doorbell_control = tmp; 2914 2915 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 2916 ring->wptr = 0; 2917 mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR); 2918 2919 /* active the queue */ 2920 mqd->cp_gfx_hqd_active = 1; 2921 2922 return 0; 2923 } 2924 2925 #ifdef BRING_UP_DEBUG 2926 static int gfx_v10_0_gfx_queue_init_register(struct amdgpu_ring *ring) 2927 { 2928 struct amdgpu_device *adev = ring->adev; 2929 struct v10_gfx_mqd *mqd = ring->mqd_ptr; 2930 2931 /* set mmCP_GFX_HQD_WPTR/_HI to 0 */ 2932 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr); 2933 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi); 2934 2935 /* set GFX_MQD_BASE */ 2936 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr); 2937 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 2938 2939 /* set GFX_MQD_CONTROL */ 2940 WREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control); 2941 2942 /* set GFX_HQD_VMID to 0 */ 2943 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid); 2944 2945 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY, 2946 mqd->cp_gfx_hqd_queue_priority); 2947 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum); 2948 2949 /* set GFX_HQD_BASE, similar as CP_RB_BASE */ 2950 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base); 2951 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi); 2952 2953 /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */ 2954 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr); 2955 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi); 2956 2957 /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */ 2958 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl); 2959 2960 /* set RB_WPTR_POLL_ADDR */ 2961 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo); 2962 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi); 2963 2964 /* set RB_DOORBELL_CONTROL */ 2965 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control); 2966 2967 /* active the queue */ 2968 WREG32_SOC15(GC, 0, mmCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active); 2969 2970 return 0; 2971 } 2972 #endif 2973 2974 static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) 2975 { 2976 struct amdgpu_device *adev = ring->adev; 2977 struct v10_gfx_mqd *mqd = ring->mqd_ptr; 2978 2979 if (!adev->in_gpu_reset && !adev->in_suspend) { 2980 memset((void *)mqd, 0, sizeof(*mqd)); 2981 mutex_lock(&adev->srbm_mutex); 2982 nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 2983 gfx_v10_0_gfx_mqd_init(ring); 2984 #ifdef BRING_UP_DEBUG 2985 gfx_v10_0_gfx_queue_init_register(ring); 2986 #endif 2987 nv_grbm_select(adev, 0, 0, 0, 0); 2988 mutex_unlock(&adev->srbm_mutex); 2989 if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]) 2990 memcpy(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], mqd, sizeof(*mqd)); 2991 } else if (adev->in_gpu_reset) { 2992 /* reset mqd with the backup copy */ 2993 if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]) 2994 memcpy(mqd, adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], sizeof(*mqd)); 2995 /* reset the ring */ 2996 ring->wptr = 0; 2997 amdgpu_ring_clear_ring(ring); 2998 #ifdef BRING_UP_DEBUG 2999 mutex_lock(&adev->srbm_mutex); 3000 nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3001 gfx_v10_0_gfx_queue_init_register(ring); 3002 nv_grbm_select(adev, 0, 0, 0, 0); 3003 mutex_unlock(&adev->srbm_mutex); 3004 #endif 3005 } else { 3006 amdgpu_ring_clear_ring(ring); 3007 } 3008 3009 return 0; 3010 } 3011 3012 #ifndef BRING_UP_DEBUG 3013 static int gfx_v10_0_kiq_enable_kgq(struct amdgpu_device *adev) 3014 { 3015 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 3016 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3017 int r, i; 3018 3019 if (!kiq->pmf || !kiq->pmf->kiq_map_queues) 3020 return -EINVAL; 3021 3022 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * 3023 adev->gfx.num_gfx_rings); 3024 if (r) { 3025 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3026 return r; 3027 } 3028 3029 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 3030 kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]); 3031 3032 r = amdgpu_ring_test_ring(kiq_ring); 3033 if (r) { 3034 DRM_ERROR("kfq enable failed\n"); 3035 kiq_ring->sched.ready = false; 3036 } 3037 return r; 3038 } 3039 #endif 3040 3041 static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) 3042 { 3043 int r, i; 3044 struct amdgpu_ring *ring; 3045 3046 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 3047 ring = &adev->gfx.gfx_ring[i]; 3048 3049 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3050 if (unlikely(r != 0)) 3051 goto done; 3052 3053 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3054 if (!r) { 3055 r = gfx_v10_0_gfx_init_queue(ring); 3056 amdgpu_bo_kunmap(ring->mqd_obj); 3057 ring->mqd_ptr = NULL; 3058 } 3059 amdgpu_bo_unreserve(ring->mqd_obj); 3060 if (r) 3061 goto done; 3062 } 3063 #ifndef BRING_UP_DEBUG 3064 r = gfx_v10_0_kiq_enable_kgq(adev); 3065 if (r) 3066 goto done; 3067 #endif 3068 r = gfx_v10_0_cp_gfx_start(adev); 3069 if (r) 3070 goto done; 3071 3072 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 3073 ring = &adev->gfx.gfx_ring[i]; 3074 ring->sched.ready = true; 3075 } 3076 done: 3077 return r; 3078 } 3079 3080 static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring) 3081 { 3082 struct amdgpu_device *adev = ring->adev; 3083 struct v10_compute_mqd *mqd = ring->mqd_ptr; 3084 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3085 uint32_t tmp; 3086 3087 mqd->header = 0xC0310800; 3088 mqd->compute_pipelinestat_enable = 0x00000001; 3089 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3090 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3091 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3092 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3093 mqd->compute_misc_reserved = 0x00000003; 3094 3095 eop_base_addr = ring->eop_gpu_addr >> 8; 3096 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3097 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3098 3099 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3100 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3101 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3102 (order_base_2(GFX10_MEC_HPD_SIZE / 4) - 1)); 3103 3104 mqd->cp_hqd_eop_control = tmp; 3105 3106 /* enable doorbell? */ 3107 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3108 3109 if (ring->use_doorbell) { 3110 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3111 DOORBELL_OFFSET, ring->doorbell_index); 3112 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3113 DOORBELL_EN, 1); 3114 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3115 DOORBELL_SOURCE, 0); 3116 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3117 DOORBELL_HIT, 0); 3118 } else { 3119 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3120 DOORBELL_EN, 0); 3121 } 3122 3123 mqd->cp_hqd_pq_doorbell_control = tmp; 3124 3125 /* disable the queue if it's active */ 3126 ring->wptr = 0; 3127 mqd->cp_hqd_dequeue_request = 0; 3128 mqd->cp_hqd_pq_rptr = 0; 3129 mqd->cp_hqd_pq_wptr_lo = 0; 3130 mqd->cp_hqd_pq_wptr_hi = 0; 3131 3132 /* set the pointer to the MQD */ 3133 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3134 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3135 3136 /* set MQD vmid to 0 */ 3137 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3138 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3139 mqd->cp_mqd_control = tmp; 3140 3141 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3142 hqd_gpu_addr = ring->gpu_addr >> 8; 3143 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3144 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3145 3146 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3147 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3148 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3149 (order_base_2(ring->ring_size / 4) - 1)); 3150 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3151 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 3152 #ifdef __BIG_ENDIAN 3153 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3154 #endif 3155 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3156 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); 3157 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3158 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3159 mqd->cp_hqd_pq_control = tmp; 3160 3161 /* set the wb address whether it's enabled or not */ 3162 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3163 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3164 mqd->cp_hqd_pq_rptr_report_addr_hi = 3165 upper_32_bits(wb_gpu_addr) & 0xffff; 3166 3167 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3168 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3169 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3170 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3171 3172 tmp = 0; 3173 /* enable the doorbell if requested */ 3174 if (ring->use_doorbell) { 3175 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3176 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3177 DOORBELL_OFFSET, ring->doorbell_index); 3178 3179 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3180 DOORBELL_EN, 1); 3181 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3182 DOORBELL_SOURCE, 0); 3183 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3184 DOORBELL_HIT, 0); 3185 } 3186 3187 mqd->cp_hqd_pq_doorbell_control = tmp; 3188 3189 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3190 ring->wptr = 0; 3191 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3192 3193 /* set the vmid for the queue */ 3194 mqd->cp_hqd_vmid = 0; 3195 3196 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3197 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3198 mqd->cp_hqd_persistent_state = tmp; 3199 3200 /* set MIN_IB_AVAIL_SIZE */ 3201 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3202 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3203 mqd->cp_hqd_ib_control = tmp; 3204 3205 /* activate the queue */ 3206 mqd->cp_hqd_active = 1; 3207 3208 return 0; 3209 } 3210 3211 static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring) 3212 { 3213 struct amdgpu_device *adev = ring->adev; 3214 struct v10_compute_mqd *mqd = ring->mqd_ptr; 3215 int j; 3216 3217 /* disable wptr polling */ 3218 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3219 3220 /* write the EOP addr */ 3221 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3222 mqd->cp_hqd_eop_base_addr_lo); 3223 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3224 mqd->cp_hqd_eop_base_addr_hi); 3225 3226 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3227 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, 3228 mqd->cp_hqd_eop_control); 3229 3230 /* enable doorbell? */ 3231 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3232 mqd->cp_hqd_pq_doorbell_control); 3233 3234 /* disable the queue if it's active */ 3235 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3236 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3237 for (j = 0; j < adev->usec_timeout; j++) { 3238 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3239 break; 3240 udelay(1); 3241 } 3242 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3243 mqd->cp_hqd_dequeue_request); 3244 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 3245 mqd->cp_hqd_pq_rptr); 3246 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3247 mqd->cp_hqd_pq_wptr_lo); 3248 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3249 mqd->cp_hqd_pq_wptr_hi); 3250 } 3251 3252 /* set the pointer to the MQD */ 3253 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, 3254 mqd->cp_mqd_base_addr_lo); 3255 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3256 mqd->cp_mqd_base_addr_hi); 3257 3258 /* set MQD vmid to 0 */ 3259 WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, 3260 mqd->cp_mqd_control); 3261 3262 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3263 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, 3264 mqd->cp_hqd_pq_base_lo); 3265 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, 3266 mqd->cp_hqd_pq_base_hi); 3267 3268 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3269 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, 3270 mqd->cp_hqd_pq_control); 3271 3272 /* set the wb address whether it's enabled or not */ 3273 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3274 mqd->cp_hqd_pq_rptr_report_addr_lo); 3275 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3276 mqd->cp_hqd_pq_rptr_report_addr_hi); 3277 3278 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3279 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3280 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3281 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3282 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3283 3284 /* enable the doorbell if requested */ 3285 if (ring->use_doorbell) { 3286 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3287 (adev->doorbell_index.kiq * 2) << 2); 3288 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3289 (adev->doorbell_index.userqueue_end * 2) << 2); 3290 } 3291 3292 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3293 mqd->cp_hqd_pq_doorbell_control); 3294 3295 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3296 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3297 mqd->cp_hqd_pq_wptr_lo); 3298 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3299 mqd->cp_hqd_pq_wptr_hi); 3300 3301 /* set the vmid for the queue */ 3302 WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3303 3304 WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3305 mqd->cp_hqd_persistent_state); 3306 3307 /* activate the queue */ 3308 WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 3309 mqd->cp_hqd_active); 3310 3311 if (ring->use_doorbell) 3312 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3313 3314 return 0; 3315 } 3316 3317 static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring) 3318 { 3319 struct amdgpu_device *adev = ring->adev; 3320 struct v10_compute_mqd *mqd = ring->mqd_ptr; 3321 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3322 3323 gfx_v10_0_kiq_setting(ring); 3324 3325 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3326 /* reset MQD to a clean status */ 3327 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3328 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 3329 3330 /* reset ring buffer */ 3331 ring->wptr = 0; 3332 amdgpu_ring_clear_ring(ring); 3333 3334 mutex_lock(&adev->srbm_mutex); 3335 nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3336 gfx_v10_0_kiq_init_register(ring); 3337 nv_grbm_select(adev, 0, 0, 0, 0); 3338 mutex_unlock(&adev->srbm_mutex); 3339 } else { 3340 memset((void *)mqd, 0, sizeof(*mqd)); 3341 mutex_lock(&adev->srbm_mutex); 3342 nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3343 gfx_v10_0_compute_mqd_init(ring); 3344 gfx_v10_0_kiq_init_register(ring); 3345 nv_grbm_select(adev, 0, 0, 0, 0); 3346 mutex_unlock(&adev->srbm_mutex); 3347 3348 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3349 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 3350 } 3351 3352 return 0; 3353 } 3354 3355 static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring) 3356 { 3357 struct amdgpu_device *adev = ring->adev; 3358 struct v10_compute_mqd *mqd = ring->mqd_ptr; 3359 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3360 3361 if (!adev->in_gpu_reset && !adev->in_suspend) { 3362 memset((void *)mqd, 0, sizeof(*mqd)); 3363 mutex_lock(&adev->srbm_mutex); 3364 nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3365 gfx_v10_0_compute_mqd_init(ring); 3366 nv_grbm_select(adev, 0, 0, 0, 0); 3367 mutex_unlock(&adev->srbm_mutex); 3368 3369 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3370 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 3371 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3372 /* reset MQD to a clean status */ 3373 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3374 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 3375 3376 /* reset ring buffer */ 3377 ring->wptr = 0; 3378 amdgpu_ring_clear_ring(ring); 3379 } else { 3380 amdgpu_ring_clear_ring(ring); 3381 } 3382 3383 return 0; 3384 } 3385 3386 static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev) 3387 { 3388 struct amdgpu_ring *ring; 3389 int r; 3390 3391 ring = &adev->gfx.kiq.ring; 3392 3393 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3394 if (unlikely(r != 0)) 3395 return r; 3396 3397 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3398 if (unlikely(r != 0)) 3399 return r; 3400 3401 gfx_v10_0_kiq_init_queue(ring); 3402 amdgpu_bo_kunmap(ring->mqd_obj); 3403 ring->mqd_ptr = NULL; 3404 amdgpu_bo_unreserve(ring->mqd_obj); 3405 ring->sched.ready = true; 3406 return 0; 3407 } 3408 3409 static int gfx_v10_0_kcq_resume(struct amdgpu_device *adev) 3410 { 3411 struct amdgpu_ring *ring = NULL; 3412 int r = 0, i; 3413 3414 gfx_v10_0_cp_compute_enable(adev, true); 3415 3416 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3417 ring = &adev->gfx.compute_ring[i]; 3418 3419 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3420 if (unlikely(r != 0)) 3421 goto done; 3422 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3423 if (!r) { 3424 r = gfx_v10_0_kcq_init_queue(ring); 3425 amdgpu_bo_kunmap(ring->mqd_obj); 3426 ring->mqd_ptr = NULL; 3427 } 3428 amdgpu_bo_unreserve(ring->mqd_obj); 3429 if (r) 3430 goto done; 3431 } 3432 3433 r = amdgpu_gfx_enable_kcq(adev); 3434 done: 3435 return r; 3436 } 3437 3438 static int gfx_v10_0_cp_resume(struct amdgpu_device *adev) 3439 { 3440 int r, i; 3441 struct amdgpu_ring *ring; 3442 3443 if (!(adev->flags & AMD_IS_APU)) 3444 gfx_v10_0_enable_gui_idle_interrupt(adev, false); 3445 3446 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 3447 /* legacy firmware loading */ 3448 r = gfx_v10_0_cp_gfx_load_microcode(adev); 3449 if (r) 3450 return r; 3451 3452 r = gfx_v10_0_cp_compute_load_microcode(adev); 3453 if (r) 3454 return r; 3455 } 3456 3457 r = gfx_v10_0_kiq_resume(adev); 3458 if (r) 3459 return r; 3460 3461 r = gfx_v10_0_kcq_resume(adev); 3462 if (r) 3463 return r; 3464 3465 if (!amdgpu_async_gfx_ring) { 3466 r = gfx_v10_0_cp_gfx_resume(adev); 3467 if (r) 3468 return r; 3469 } else { 3470 r = gfx_v10_0_cp_async_gfx_ring_resume(adev); 3471 if (r) 3472 return r; 3473 } 3474 3475 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 3476 ring = &adev->gfx.gfx_ring[i]; 3477 DRM_INFO("gfx %d ring me %d pipe %d q %d\n", 3478 i, ring->me, ring->pipe, ring->queue); 3479 r = amdgpu_ring_test_ring(ring); 3480 if (r) { 3481 ring->sched.ready = false; 3482 return r; 3483 } 3484 } 3485 3486 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3487 ring = &adev->gfx.compute_ring[i]; 3488 ring->sched.ready = true; 3489 DRM_INFO("compute ring %d mec %d pipe %d q %d\n", 3490 i, ring->me, ring->pipe, ring->queue); 3491 r = amdgpu_ring_test_ring(ring); 3492 if (r) 3493 ring->sched.ready = false; 3494 } 3495 3496 return 0; 3497 } 3498 3499 static void gfx_v10_0_cp_enable(struct amdgpu_device *adev, bool enable) 3500 { 3501 gfx_v10_0_cp_gfx_enable(adev, enable); 3502 gfx_v10_0_cp_compute_enable(adev, enable); 3503 } 3504 3505 static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) 3506 { 3507 uint32_t data, pattern = 0xDEADBEEF; 3508 3509 /* check if mmVGT_ESGS_RING_SIZE_UMD 3510 * has been remapped to mmVGT_ESGS_RING_SIZE */ 3511 data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE); 3512 3513 WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, 0); 3514 3515 WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern); 3516 3517 if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE) == pattern) { 3518 WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data); 3519 return true; 3520 } else { 3521 WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data); 3522 return false; 3523 } 3524 } 3525 3526 static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) 3527 { 3528 uint32_t data; 3529 3530 /* initialize cam_index to 0 3531 * index will auto-inc after each data writting */ 3532 WREG32_SOC15(GC, 0, mmGRBM_CAM_INDEX, 0); 3533 3534 /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */ 3535 data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) << 3536 GRBM_CAM_DATA__CAM_ADDR__SHIFT) | 3537 (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE) << 3538 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); 3539 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); 3540 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); 3541 3542 /* mmVGT_TF_MEMORY_BASE_UMD -> mmVGT_TF_MEMORY_BASE */ 3543 data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_UMD) << 3544 GRBM_CAM_DATA__CAM_ADDR__SHIFT) | 3545 (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE) << 3546 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); 3547 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); 3548 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); 3549 3550 /* mmVGT_TF_MEMORY_BASE_HI_UMD -> mmVGT_TF_MEMORY_BASE_HI */ 3551 data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_UMD) << 3552 GRBM_CAM_DATA__CAM_ADDR__SHIFT) | 3553 (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI) << 3554 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); 3555 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); 3556 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); 3557 3558 /* mmVGT_HS_OFFCHIP_PARAM_UMD -> mmVGT_HS_OFFCHIP_PARAM */ 3559 data = (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_UMD) << 3560 GRBM_CAM_DATA__CAM_ADDR__SHIFT) | 3561 (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM) << 3562 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); 3563 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); 3564 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); 3565 3566 /* mmVGT_ESGS_RING_SIZE_UMD -> mmVGT_ESGS_RING_SIZE */ 3567 data = (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_UMD) << 3568 GRBM_CAM_DATA__CAM_ADDR__SHIFT) | 3569 (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE) << 3570 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); 3571 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); 3572 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); 3573 3574 /* mmVGT_GSVS_RING_SIZE_UMD -> mmVGT_GSVS_RING_SIZE */ 3575 data = (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_UMD) << 3576 GRBM_CAM_DATA__CAM_ADDR__SHIFT) | 3577 (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE) << 3578 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); 3579 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); 3580 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); 3581 3582 /* mmSPI_CONFIG_CNTL_REMAP -> mmSPI_CONFIG_CNTL */ 3583 data = (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_REMAP) << 3584 GRBM_CAM_DATA__CAM_ADDR__SHIFT) | 3585 (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL) << 3586 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); 3587 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); 3588 WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); 3589 } 3590 3591 static int gfx_v10_0_hw_init(void *handle) 3592 { 3593 int r; 3594 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3595 3596 r = gfx_v10_0_csb_vram_pin(adev); 3597 if (r) 3598 return r; 3599 3600 if (!amdgpu_emu_mode) 3601 gfx_v10_0_init_golden_registers(adev); 3602 3603 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 3604 /** 3605 * For gfx 10, rlc firmware loading relies on smu firmware is 3606 * loaded firstly, so in direct type, it has to load smc ucode 3607 * here before rlc. 3608 */ 3609 r = smu_load_microcode(&adev->smu); 3610 if (r) 3611 return r; 3612 3613 r = smu_check_fw_status(&adev->smu); 3614 if (r) { 3615 pr_err("SMC firmware status is not correct\n"); 3616 return r; 3617 } 3618 } 3619 3620 /* if GRBM CAM not remapped, set up the remapping */ 3621 if (!gfx_v10_0_check_grbm_cam_remapping(adev)) 3622 gfx_v10_0_setup_grbm_cam_remapping(adev); 3623 3624 gfx_v10_0_constants_init(adev); 3625 3626 r = gfx_v10_0_rlc_resume(adev); 3627 if (r) 3628 return r; 3629 3630 /* 3631 * init golden registers and rlc resume may override some registers, 3632 * reconfig them here 3633 */ 3634 gfx_v10_0_tcp_harvest(adev); 3635 3636 r = gfx_v10_0_cp_resume(adev); 3637 if (r) 3638 return r; 3639 3640 return r; 3641 } 3642 3643 #ifndef BRING_UP_DEBUG 3644 static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device *adev) 3645 { 3646 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 3647 struct amdgpu_ring *kiq_ring = &kiq->ring; 3648 int i; 3649 3650 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 3651 return -EINVAL; 3652 3653 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * 3654 adev->gfx.num_gfx_rings)) 3655 return -ENOMEM; 3656 3657 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 3658 kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], 3659 PREEMPT_QUEUES, 0, 0); 3660 3661 return amdgpu_ring_test_ring(kiq_ring); 3662 } 3663 #endif 3664 3665 static int gfx_v10_0_hw_fini(void *handle) 3666 { 3667 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3668 int r; 3669 3670 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3671 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3672 #ifndef BRING_UP_DEBUG 3673 if (amdgpu_async_gfx_ring) { 3674 r = gfx_v10_0_kiq_disable_kgq(adev); 3675 if (r) 3676 DRM_ERROR("KGQ disable failed\n"); 3677 } 3678 #endif 3679 if (amdgpu_gfx_disable_kcq(adev)) 3680 DRM_ERROR("KCQ disable failed\n"); 3681 if (amdgpu_sriov_vf(adev)) { 3682 pr_debug("For SRIOV client, shouldn't do anything.\n"); 3683 return 0; 3684 } 3685 gfx_v10_0_cp_enable(adev, false); 3686 gfx_v10_0_enable_gui_idle_interrupt(adev, false); 3687 gfx_v10_0_csb_vram_unpin(adev); 3688 3689 return 0; 3690 } 3691 3692 static int gfx_v10_0_suspend(void *handle) 3693 { 3694 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3695 3696 adev->in_suspend = true; 3697 return gfx_v10_0_hw_fini(adev); 3698 } 3699 3700 static int gfx_v10_0_resume(void *handle) 3701 { 3702 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3703 int r; 3704 3705 r = gfx_v10_0_hw_init(adev); 3706 adev->in_suspend = false; 3707 return r; 3708 } 3709 3710 static bool gfx_v10_0_is_idle(void *handle) 3711 { 3712 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3713 3714 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 3715 GRBM_STATUS, GUI_ACTIVE)) 3716 return false; 3717 else 3718 return true; 3719 } 3720 3721 static int gfx_v10_0_wait_for_idle(void *handle) 3722 { 3723 unsigned i; 3724 u32 tmp; 3725 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3726 3727 for (i = 0; i < adev->usec_timeout; i++) { 3728 /* read MC_STATUS */ 3729 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) & 3730 GRBM_STATUS__GUI_ACTIVE_MASK; 3731 3732 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) 3733 return 0; 3734 udelay(1); 3735 } 3736 return -ETIMEDOUT; 3737 } 3738 3739 static int gfx_v10_0_soft_reset(void *handle) 3740 { 3741 u32 grbm_soft_reset = 0; 3742 u32 tmp; 3743 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3744 3745 /* GRBM_STATUS */ 3746 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 3747 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 3748 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 3749 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__DB_BUSY_MASK | 3750 GRBM_STATUS__CB_BUSY_MASK | GRBM_STATUS__GDS_BUSY_MASK | 3751 GRBM_STATUS__SPI_BUSY_MASK | GRBM_STATUS__GE_BUSY_NO_DMA_MASK 3752 | GRBM_STATUS__BCI_BUSY_MASK)) { 3753 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3754 GRBM_SOFT_RESET, SOFT_RESET_CP, 3755 1); 3756 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3757 GRBM_SOFT_RESET, SOFT_RESET_GFX, 3758 1); 3759 } 3760 3761 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 3762 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3763 GRBM_SOFT_RESET, SOFT_RESET_CP, 3764 1); 3765 } 3766 3767 /* GRBM_STATUS2 */ 3768 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 3769 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 3770 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3771 GRBM_SOFT_RESET, SOFT_RESET_RLC, 3772 1); 3773 3774 if (grbm_soft_reset) { 3775 /* stop the rlc */ 3776 gfx_v10_0_rlc_stop(adev); 3777 3778 /* Disable GFX parsing/prefetching */ 3779 gfx_v10_0_cp_gfx_enable(adev, false); 3780 3781 /* Disable MEC parsing/prefetching */ 3782 gfx_v10_0_cp_compute_enable(adev, false); 3783 3784 if (grbm_soft_reset) { 3785 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3786 tmp |= grbm_soft_reset; 3787 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 3788 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3789 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3790 3791 udelay(50); 3792 3793 tmp &= ~grbm_soft_reset; 3794 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3795 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3796 } 3797 3798 /* Wait a little for things to settle down */ 3799 udelay(50); 3800 } 3801 return 0; 3802 } 3803 3804 static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) 3805 { 3806 uint64_t clock; 3807 3808 mutex_lock(&adev->gfx.gpu_clock_mutex); 3809 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 3810 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 3811 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 3812 mutex_unlock(&adev->gfx.gpu_clock_mutex); 3813 return clock; 3814 } 3815 3816 static void gfx_v10_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 3817 uint32_t vmid, 3818 uint32_t gds_base, uint32_t gds_size, 3819 uint32_t gws_base, uint32_t gws_size, 3820 uint32_t oa_base, uint32_t oa_size) 3821 { 3822 struct amdgpu_device *adev = ring->adev; 3823 3824 /* GDS Base */ 3825 gfx_v10_0_write_data_to_reg(ring, 0, false, 3826 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 3827 gds_base); 3828 3829 /* GDS Size */ 3830 gfx_v10_0_write_data_to_reg(ring, 0, false, 3831 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 3832 gds_size); 3833 3834 /* GWS */ 3835 gfx_v10_0_write_data_to_reg(ring, 0, false, 3836 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 3837 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 3838 3839 /* OA */ 3840 gfx_v10_0_write_data_to_reg(ring, 0, false, 3841 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 3842 (1 << (oa_size + oa_base)) - (1 << oa_base)); 3843 } 3844 3845 static int gfx_v10_0_early_init(void *handle) 3846 { 3847 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3848 3849 adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS; 3850 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 3851 3852 gfx_v10_0_set_kiq_pm4_funcs(adev); 3853 gfx_v10_0_set_ring_funcs(adev); 3854 gfx_v10_0_set_irq_funcs(adev); 3855 gfx_v10_0_set_gds_init(adev); 3856 gfx_v10_0_set_rlc_funcs(adev); 3857 3858 return 0; 3859 } 3860 3861 static int gfx_v10_0_late_init(void *handle) 3862 { 3863 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3864 int r; 3865 3866 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 3867 if (r) 3868 return r; 3869 3870 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 3871 if (r) 3872 return r; 3873 3874 return 0; 3875 } 3876 3877 static bool gfx_v10_0_is_rlc_enabled(struct amdgpu_device *adev) 3878 { 3879 uint32_t rlc_cntl; 3880 3881 /* if RLC is not enabled, do nothing */ 3882 rlc_cntl = RREG32_SOC15(GC, 0, mmRLC_CNTL); 3883 return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; 3884 } 3885 3886 static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev) 3887 { 3888 uint32_t data; 3889 unsigned i; 3890 3891 data = RLC_SAFE_MODE__CMD_MASK; 3892 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 3893 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 3894 3895 /* wait for RLC_SAFE_MODE */ 3896 for (i = 0; i < adev->usec_timeout; i++) { 3897 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 3898 break; 3899 udelay(1); 3900 } 3901 } 3902 3903 static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev) 3904 { 3905 uint32_t data; 3906 3907 data = RLC_SAFE_MODE__CMD_MASK; 3908 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 3909 } 3910 3911 static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 3912 bool enable) 3913 { 3914 uint32_t data, def; 3915 3916 /* It is disabled by HW by default */ 3917 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 3918 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 3919 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 3920 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 3921 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 3922 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 3923 3924 /* only for Vega10 & Raven1 */ 3925 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 3926 3927 if (def != data) 3928 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 3929 3930 /* MGLS is a global flag to control all MGLS in GFX */ 3931 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 3932 /* 2 - RLC memory Light sleep */ 3933 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 3934 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 3935 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 3936 if (def != data) 3937 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 3938 } 3939 /* 3 - CP memory Light sleep */ 3940 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 3941 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 3942 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 3943 if (def != data) 3944 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 3945 } 3946 } 3947 } else { 3948 /* 1 - MGCG_OVERRIDE */ 3949 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 3950 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 3951 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 3952 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 3953 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 3954 if (def != data) 3955 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 3956 3957 /* 2 - disable MGLS in RLC */ 3958 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 3959 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 3960 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 3961 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 3962 } 3963 3964 /* 3 - disable MGLS in CP */ 3965 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 3966 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 3967 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 3968 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 3969 } 3970 } 3971 } 3972 3973 static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev, 3974 bool enable) 3975 { 3976 uint32_t data, def; 3977 3978 /* Enable 3D CGCG/CGLS */ 3979 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 3980 /* write cmd to clear cgcg/cgls ov */ 3981 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 3982 /* unset CGCG override */ 3983 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 3984 /* update CGCG and CGLS override bits */ 3985 if (def != data) 3986 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 3987 /* enable 3Dcgcg FSM(0x0000363f) */ 3988 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 3989 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 3990 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 3991 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 3992 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 3993 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 3994 if (def != data) 3995 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 3996 3997 /* set IDLE_POLL_COUNT(0x00900100) */ 3998 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 3999 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4000 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4001 if (def != data) 4002 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4003 } else { 4004 /* Disable CGCG/CGLS */ 4005 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4006 /* disable cgcg, cgls should be disabled */ 4007 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 4008 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 4009 /* disable cgcg and cgls in FSM */ 4010 if (def != data) 4011 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4012 } 4013 } 4014 4015 static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4016 bool enable) 4017 { 4018 uint32_t def, data; 4019 4020 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 4021 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4022 /* unset CGCG override */ 4023 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 4024 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4025 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4026 else 4027 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4028 /* update CGCG and CGLS override bits */ 4029 if (def != data) 4030 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4031 4032 /* enable cgcg FSM(0x0000363F) */ 4033 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4034 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4035 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4036 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4037 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4038 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4039 if (def != data) 4040 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4041 4042 /* set IDLE_POLL_COUNT(0x00900100) */ 4043 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4044 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4045 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4046 if (def != data) 4047 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4048 } else { 4049 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4050 /* reset CGCG/CGLS bits */ 4051 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4052 /* disable cgcg and cgls in FSM */ 4053 if (def != data) 4054 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4055 } 4056 } 4057 4058 static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, 4059 bool enable) 4060 { 4061 amdgpu_gfx_rlc_enter_safe_mode(adev); 4062 4063 if (enable) { 4064 /* CGCG/CGLS should be enabled after MGCG/MGLS 4065 * === MGCG + MGLS === 4066 */ 4067 gfx_v10_0_update_medium_grain_clock_gating(adev, enable); 4068 /* === CGCG /CGLS for GFX 3D Only === */ 4069 gfx_v10_0_update_3d_clock_gating(adev, enable); 4070 /* === CGCG + CGLS === */ 4071 gfx_v10_0_update_coarse_grain_clock_gating(adev, enable); 4072 } else { 4073 /* CGCG/CGLS should be disabled before MGCG/MGLS 4074 * === CGCG + CGLS === 4075 */ 4076 gfx_v10_0_update_coarse_grain_clock_gating(adev, enable); 4077 /* === CGCG /CGLS for GFX 3D Only === */ 4078 gfx_v10_0_update_3d_clock_gating(adev, enable); 4079 /* === MGCG + MGLS === */ 4080 gfx_v10_0_update_medium_grain_clock_gating(adev, enable); 4081 } 4082 4083 if (adev->cg_flags & 4084 (AMD_CG_SUPPORT_GFX_MGCG | 4085 AMD_CG_SUPPORT_GFX_CGLS | 4086 AMD_CG_SUPPORT_GFX_CGCG | 4087 AMD_CG_SUPPORT_GFX_CGLS | 4088 AMD_CG_SUPPORT_GFX_3D_CGCG | 4089 AMD_CG_SUPPORT_GFX_3D_CGLS)) 4090 gfx_v10_0_enable_gui_idle_interrupt(adev, enable); 4091 4092 amdgpu_gfx_rlc_exit_safe_mode(adev); 4093 4094 return 0; 4095 } 4096 4097 static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = { 4098 .is_rlc_enabled = gfx_v10_0_is_rlc_enabled, 4099 .set_safe_mode = gfx_v10_0_set_safe_mode, 4100 .unset_safe_mode = gfx_v10_0_unset_safe_mode, 4101 .init = gfx_v10_0_rlc_init, 4102 .get_csb_size = gfx_v10_0_get_csb_size, 4103 .get_csb_buffer = gfx_v10_0_get_csb_buffer, 4104 .resume = gfx_v10_0_rlc_resume, 4105 .stop = gfx_v10_0_rlc_stop, 4106 .reset = gfx_v10_0_rlc_reset, 4107 .start = gfx_v10_0_rlc_start 4108 }; 4109 4110 static int gfx_v10_0_set_powergating_state(void *handle, 4111 enum amd_powergating_state state) 4112 { 4113 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4114 bool enable = (state == AMD_PG_STATE_GATE) ? true : false; 4115 switch (adev->asic_type) { 4116 case CHIP_NAVI10: 4117 case CHIP_NAVI14: 4118 if (!enable) { 4119 amdgpu_gfx_off_ctrl(adev, false); 4120 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4121 } else 4122 amdgpu_gfx_off_ctrl(adev, true); 4123 break; 4124 default: 4125 break; 4126 } 4127 return 0; 4128 } 4129 4130 static int gfx_v10_0_set_clockgating_state(void *handle, 4131 enum amd_clockgating_state state) 4132 { 4133 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4134 4135 switch (adev->asic_type) { 4136 case CHIP_NAVI10: 4137 case CHIP_NAVI14: 4138 gfx_v10_0_update_gfx_clock_gating(adev, 4139 state == AMD_CG_STATE_GATE ? true : false); 4140 break; 4141 default: 4142 break; 4143 } 4144 return 0; 4145 } 4146 4147 static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags) 4148 { 4149 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4150 int data; 4151 4152 /* AMD_CG_SUPPORT_GFX_MGCG */ 4153 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4154 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 4155 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 4156 4157 /* AMD_CG_SUPPORT_GFX_CGCG */ 4158 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4159 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 4160 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 4161 4162 /* AMD_CG_SUPPORT_GFX_CGLS */ 4163 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 4164 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 4165 4166 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 4167 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4168 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 4169 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 4170 4171 /* AMD_CG_SUPPORT_GFX_CP_LS */ 4172 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4173 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 4174 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 4175 4176 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 4177 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4178 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 4179 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 4180 4181 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 4182 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 4183 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 4184 } 4185 4186 static u64 gfx_v10_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 4187 { 4188 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 is 32bit rptr*/ 4189 } 4190 4191 static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 4192 { 4193 struct amdgpu_device *adev = ring->adev; 4194 u64 wptr; 4195 4196 /* XXX check if swapping is necessary on BE */ 4197 if (ring->use_doorbell) { 4198 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 4199 } else { 4200 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 4201 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 4202 } 4203 4204 return wptr; 4205 } 4206 4207 static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 4208 { 4209 struct amdgpu_device *adev = ring->adev; 4210 4211 if (ring->use_doorbell) { 4212 /* XXX check if swapping is necessary on BE */ 4213 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4214 WDOORBELL64(ring->doorbell_index, ring->wptr); 4215 } else { 4216 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4217 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 4218 } 4219 } 4220 4221 static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 4222 { 4223 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 hardware is 32bit rptr */ 4224 } 4225 4226 static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 4227 { 4228 u64 wptr; 4229 4230 /* XXX check if swapping is necessary on BE */ 4231 if (ring->use_doorbell) 4232 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 4233 else 4234 BUG(); 4235 return wptr; 4236 } 4237 4238 static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 4239 { 4240 struct amdgpu_device *adev = ring->adev; 4241 4242 /* XXX check if swapping is necessary on BE */ 4243 if (ring->use_doorbell) { 4244 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4245 WDOORBELL64(ring->doorbell_index, ring->wptr); 4246 } else { 4247 BUG(); /* only DOORBELL method supported on gfx10 now */ 4248 } 4249 } 4250 4251 static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 4252 { 4253 struct amdgpu_device *adev = ring->adev; 4254 u32 ref_and_mask, reg_mem_engine; 4255 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; 4256 4257 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 4258 switch (ring->me) { 4259 case 1: 4260 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 4261 break; 4262 case 2: 4263 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 4264 break; 4265 default: 4266 return; 4267 } 4268 reg_mem_engine = 0; 4269 } else { 4270 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 4271 reg_mem_engine = 1; /* pfp */ 4272 } 4273 4274 gfx_v10_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 4275 adev->nbio_funcs->get_hdp_flush_req_offset(adev), 4276 adev->nbio_funcs->get_hdp_flush_done_offset(adev), 4277 ref_and_mask, ref_and_mask, 0x20); 4278 } 4279 4280 static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 4281 struct amdgpu_job *job, 4282 struct amdgpu_ib *ib, 4283 uint32_t flags) 4284 { 4285 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4286 u32 header, control = 0; 4287 4288 /* Prevent a hw deadlock due to a wave ID mismatch between ME and GDS. 4289 * This resets the wave ID counters. (needed by transform feedback) 4290 * TODO: This might only be needed on a VMID switch when we change 4291 * the GDS OA mapping, not sure. 4292 */ 4293 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 4294 amdgpu_ring_write(ring, mmVGT_GS_MAX_WAVE_ID); 4295 amdgpu_ring_write(ring, ring->adev->gds.vgt_gs_max_wave_id); 4296 4297 if (ib->flags & AMDGPU_IB_FLAG_CE) 4298 header = PACKET3(PACKET3_INDIRECT_BUFFER_CNST, 2); 4299 else 4300 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 4301 4302 control |= ib->length_dw | (vmid << 24); 4303 4304 if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 4305 control |= INDIRECT_BUFFER_PRE_ENB(1); 4306 4307 if (flags & AMDGPU_IB_PREEMPTED) 4308 control |= INDIRECT_BUFFER_PRE_RESUME(1); 4309 4310 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 4311 gfx_v10_0_ring_emit_de_meta(ring, 4312 flags & AMDGPU_IB_PREEMPTED ? true : false); 4313 } 4314 4315 amdgpu_ring_write(ring, header); 4316 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4317 amdgpu_ring_write(ring, 4318 #ifdef __BIG_ENDIAN 4319 (2 << 0) | 4320 #endif 4321 lower_32_bits(ib->gpu_addr)); 4322 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4323 amdgpu_ring_write(ring, control); 4324 } 4325 4326 static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 4327 struct amdgpu_job *job, 4328 struct amdgpu_ib *ib, 4329 uint32_t flags) 4330 { 4331 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4332 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 4333 4334 /* Currently, there is a high possibility to get wave ID mismatch 4335 * between ME and GDS, leading to a hw deadlock, because ME generates 4336 * different wave IDs than the GDS expects. This situation happens 4337 * randomly when at least 5 compute pipes use GDS ordered append. 4338 * The wave IDs generated by ME are also wrong after suspend/resume. 4339 * Those are probably bugs somewhere else in the kernel driver. 4340 * 4341 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 4342 * GDS to 0 for this ring (me/pipe). 4343 */ 4344 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 4345 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 4346 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 4347 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 4348 } 4349 4350 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 4351 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4352 amdgpu_ring_write(ring, 4353 #ifdef __BIG_ENDIAN 4354 (2 << 0) | 4355 #endif 4356 lower_32_bits(ib->gpu_addr)); 4357 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4358 amdgpu_ring_write(ring, control); 4359 } 4360 4361 static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 4362 u64 seq, unsigned flags) 4363 { 4364 struct amdgpu_device *adev = ring->adev; 4365 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 4366 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 4367 4368 /* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */ 4369 if (adev->pdev->device == 0x50) 4370 int_sel = false; 4371 4372 /* RELEASE_MEM - flush caches, send int */ 4373 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 4374 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | 4375 PACKET3_RELEASE_MEM_GCR_GL2_WB | 4376 PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */ 4377 PACKET3_RELEASE_MEM_GCR_GLM_WB | 4378 PACKET3_RELEASE_MEM_CACHE_POLICY(3) | 4379 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 4380 PACKET3_RELEASE_MEM_EVENT_INDEX(5))); 4381 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | 4382 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); 4383 4384 /* 4385 * the address should be Qword aligned if 64bit write, Dword 4386 * aligned if only send 32bit data low (discard data high) 4387 */ 4388 if (write64bit) 4389 BUG_ON(addr & 0x7); 4390 else 4391 BUG_ON(addr & 0x3); 4392 amdgpu_ring_write(ring, lower_32_bits(addr)); 4393 amdgpu_ring_write(ring, upper_32_bits(addr)); 4394 amdgpu_ring_write(ring, lower_32_bits(seq)); 4395 amdgpu_ring_write(ring, upper_32_bits(seq)); 4396 amdgpu_ring_write(ring, 0); 4397 } 4398 4399 static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 4400 { 4401 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 4402 uint32_t seq = ring->fence_drv.sync_seq; 4403 uint64_t addr = ring->fence_drv.gpu_addr; 4404 4405 gfx_v10_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr), 4406 upper_32_bits(addr), seq, 0xffffffff, 4); 4407 } 4408 4409 static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 4410 unsigned vmid, uint64_t pd_addr) 4411 { 4412 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 4413 4414 /* compute doesn't have PFP */ 4415 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 4416 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 4417 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 4418 amdgpu_ring_write(ring, 0x0); 4419 } 4420 } 4421 4422 static void gfx_v10_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 4423 u64 seq, unsigned int flags) 4424 { 4425 struct amdgpu_device *adev = ring->adev; 4426 4427 /* we only allocate 32bit for each seq wb address */ 4428 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 4429 4430 /* write fence seq to the "addr" */ 4431 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4432 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4433 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 4434 amdgpu_ring_write(ring, lower_32_bits(addr)); 4435 amdgpu_ring_write(ring, upper_32_bits(addr)); 4436 amdgpu_ring_write(ring, lower_32_bits(seq)); 4437 4438 if (flags & AMDGPU_FENCE_FLAG_INT) { 4439 /* set register to trigger INT */ 4440 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4441 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4442 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 4443 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 4444 amdgpu_ring_write(ring, 0); 4445 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 4446 } 4447 } 4448 4449 static void gfx_v10_0_ring_emit_sb(struct amdgpu_ring *ring) 4450 { 4451 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 4452 amdgpu_ring_write(ring, 0); 4453 } 4454 4455 static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 4456 { 4457 uint32_t dw2 = 0; 4458 4459 if (amdgpu_mcbp) 4460 gfx_v10_0_ring_emit_ce_meta(ring, 4461 flags & AMDGPU_IB_PREEMPTED ? true : false); 4462 4463 gfx_v10_0_ring_emit_tmz(ring, true); 4464 4465 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 4466 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 4467 /* set load_global_config & load_global_uconfig */ 4468 dw2 |= 0x8001; 4469 /* set load_cs_sh_regs */ 4470 dw2 |= 0x01000000; 4471 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 4472 dw2 |= 0x10002; 4473 4474 /* set load_ce_ram if preamble presented */ 4475 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 4476 dw2 |= 0x10000000; 4477 } else { 4478 /* still load_ce_ram if this is the first time preamble presented 4479 * although there is no context switch happens. 4480 */ 4481 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 4482 dw2 |= 0x10000000; 4483 } 4484 4485 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4486 amdgpu_ring_write(ring, dw2); 4487 amdgpu_ring_write(ring, 0); 4488 } 4489 4490 static unsigned gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 4491 { 4492 unsigned ret; 4493 4494 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 4495 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 4496 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 4497 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 4498 ret = ring->wptr & ring->buf_mask; 4499 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 4500 4501 return ret; 4502 } 4503 4504 static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 4505 { 4506 unsigned cur; 4507 BUG_ON(offset > ring->buf_mask); 4508 BUG_ON(ring->ring[offset] != 0x55aa55aa); 4509 4510 cur = (ring->wptr - 1) & ring->buf_mask; 4511 if (likely(cur > offset)) 4512 ring->ring[offset] = cur - offset; 4513 else 4514 ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; 4515 } 4516 4517 static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring) 4518 { 4519 int i, r = 0; 4520 struct amdgpu_device *adev = ring->adev; 4521 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 4522 struct amdgpu_ring *kiq_ring = &kiq->ring; 4523 4524 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 4525 return -EINVAL; 4526 4527 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) 4528 return -ENOMEM; 4529 4530 /* assert preemption condition */ 4531 amdgpu_ring_set_preempt_cond_exec(ring, false); 4532 4533 /* assert IB preemption, emit the trailing fence */ 4534 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, 4535 ring->trail_fence_gpu_addr, 4536 ++ring->trail_seq); 4537 amdgpu_ring_commit(kiq_ring); 4538 4539 /* poll the trailing fence */ 4540 for (i = 0; i < adev->usec_timeout; i++) { 4541 if (ring->trail_seq == 4542 le32_to_cpu(*(ring->trail_fence_cpu_addr))) 4543 break; 4544 DRM_UDELAY(1); 4545 } 4546 4547 if (i >= adev->usec_timeout) { 4548 r = -EINVAL; 4549 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx); 4550 } 4551 4552 /* deassert preemption condition */ 4553 amdgpu_ring_set_preempt_cond_exec(ring, true); 4554 return r; 4555 } 4556 4557 static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) 4558 { 4559 struct amdgpu_device *adev = ring->adev; 4560 struct v10_ce_ib_state ce_payload = {0}; 4561 uint64_t csa_addr; 4562 int cnt; 4563 4564 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 4565 csa_addr = amdgpu_csa_vaddr(ring->adev); 4566 4567 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 4568 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 4569 WRITE_DATA_DST_SEL(8) | 4570 WR_CONFIRM) | 4571 WRITE_DATA_CACHE_POLICY(0)); 4572 amdgpu_ring_write(ring, lower_32_bits(csa_addr + 4573 offsetof(struct v10_gfx_meta_data, ce_payload))); 4574 amdgpu_ring_write(ring, upper_32_bits(csa_addr + 4575 offsetof(struct v10_gfx_meta_data, ce_payload))); 4576 4577 if (resume) 4578 amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr + 4579 offsetof(struct v10_gfx_meta_data, 4580 ce_payload), 4581 sizeof(ce_payload) >> 2); 4582 else 4583 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, 4584 sizeof(ce_payload) >> 2); 4585 } 4586 4587 static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) 4588 { 4589 struct amdgpu_device *adev = ring->adev; 4590 struct v10_de_ib_state de_payload = {0}; 4591 uint64_t csa_addr, gds_addr; 4592 int cnt; 4593 4594 csa_addr = amdgpu_csa_vaddr(ring->adev); 4595 gds_addr = ALIGN(csa_addr + AMDGPU_CSA_SIZE - adev->gds.gds_size, 4596 PAGE_SIZE); 4597 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 4598 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 4599 4600 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 4601 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 4602 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 4603 WRITE_DATA_DST_SEL(8) | 4604 WR_CONFIRM) | 4605 WRITE_DATA_CACHE_POLICY(0)); 4606 amdgpu_ring_write(ring, lower_32_bits(csa_addr + 4607 offsetof(struct v10_gfx_meta_data, de_payload))); 4608 amdgpu_ring_write(ring, upper_32_bits(csa_addr + 4609 offsetof(struct v10_gfx_meta_data, de_payload))); 4610 4611 if (resume) 4612 amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr + 4613 offsetof(struct v10_gfx_meta_data, 4614 de_payload), 4615 sizeof(de_payload) >> 2); 4616 else 4617 amdgpu_ring_write_multiple(ring, (void *)&de_payload, 4618 sizeof(de_payload) >> 2); 4619 } 4620 4621 static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) 4622 { 4623 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 4624 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ 4625 } 4626 4627 static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 4628 { 4629 struct amdgpu_device *adev = ring->adev; 4630 4631 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 4632 amdgpu_ring_write(ring, 0 | /* src: register*/ 4633 (5 << 8) | /* dst: memory */ 4634 (1 << 20)); /* write confirm */ 4635 amdgpu_ring_write(ring, reg); 4636 amdgpu_ring_write(ring, 0); 4637 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 4638 adev->virt.reg_val_offs * 4)); 4639 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 4640 adev->virt.reg_val_offs * 4)); 4641 } 4642 4643 static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 4644 uint32_t val) 4645 { 4646 uint32_t cmd = 0; 4647 4648 switch (ring->funcs->type) { 4649 case AMDGPU_RING_TYPE_GFX: 4650 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 4651 break; 4652 case AMDGPU_RING_TYPE_KIQ: 4653 cmd = (1 << 16); /* no inc addr */ 4654 break; 4655 default: 4656 cmd = WR_CONFIRM; 4657 break; 4658 } 4659 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4660 amdgpu_ring_write(ring, cmd); 4661 amdgpu_ring_write(ring, reg); 4662 amdgpu_ring_write(ring, 0); 4663 amdgpu_ring_write(ring, val); 4664 } 4665 4666 static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 4667 uint32_t val, uint32_t mask) 4668 { 4669 gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 4670 } 4671 4672 static void 4673 gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 4674 uint32_t me, uint32_t pipe, 4675 enum amdgpu_interrupt_state state) 4676 { 4677 uint32_t cp_int_cntl, cp_int_cntl_reg; 4678 4679 if (!me) { 4680 switch (pipe) { 4681 case 0: 4682 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0); 4683 break; 4684 case 1: 4685 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING1); 4686 break; 4687 default: 4688 DRM_DEBUG("invalid pipe %d\n", pipe); 4689 return; 4690 } 4691 } else { 4692 DRM_DEBUG("invalid me %d\n", me); 4693 return; 4694 } 4695 4696 switch (state) { 4697 case AMDGPU_IRQ_STATE_DISABLE: 4698 cp_int_cntl = RREG32(cp_int_cntl_reg); 4699 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 4700 TIME_STAMP_INT_ENABLE, 0); 4701 WREG32(cp_int_cntl_reg, cp_int_cntl); 4702 case AMDGPU_IRQ_STATE_ENABLE: 4703 cp_int_cntl = RREG32(cp_int_cntl_reg); 4704 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 4705 TIME_STAMP_INT_ENABLE, 1); 4706 WREG32(cp_int_cntl_reg, cp_int_cntl); 4707 break; 4708 default: 4709 break; 4710 } 4711 } 4712 4713 static void gfx_v10_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 4714 int me, int pipe, 4715 enum amdgpu_interrupt_state state) 4716 { 4717 u32 mec_int_cntl, mec_int_cntl_reg; 4718 4719 /* 4720 * amdgpu controls only the first MEC. That's why this function only 4721 * handles the setting of interrupts for this specific MEC. All other 4722 * pipes' interrupts are set by amdkfd. 4723 */ 4724 4725 if (me == 1) { 4726 switch (pipe) { 4727 case 0: 4728 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 4729 break; 4730 case 1: 4731 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 4732 break; 4733 case 2: 4734 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 4735 break; 4736 case 3: 4737 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 4738 break; 4739 default: 4740 DRM_DEBUG("invalid pipe %d\n", pipe); 4741 return; 4742 } 4743 } else { 4744 DRM_DEBUG("invalid me %d\n", me); 4745 return; 4746 } 4747 4748 switch (state) { 4749 case AMDGPU_IRQ_STATE_DISABLE: 4750 mec_int_cntl = RREG32(mec_int_cntl_reg); 4751 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 4752 TIME_STAMP_INT_ENABLE, 0); 4753 WREG32(mec_int_cntl_reg, mec_int_cntl); 4754 break; 4755 case AMDGPU_IRQ_STATE_ENABLE: 4756 mec_int_cntl = RREG32(mec_int_cntl_reg); 4757 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 4758 TIME_STAMP_INT_ENABLE, 1); 4759 WREG32(mec_int_cntl_reg, mec_int_cntl); 4760 break; 4761 default: 4762 break; 4763 } 4764 } 4765 4766 static int gfx_v10_0_set_eop_interrupt_state(struct amdgpu_device *adev, 4767 struct amdgpu_irq_src *src, 4768 unsigned type, 4769 enum amdgpu_interrupt_state state) 4770 { 4771 switch (type) { 4772 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 4773 gfx_v10_0_set_gfx_eop_interrupt_state(adev, 0, 0, state); 4774 break; 4775 case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP: 4776 gfx_v10_0_set_gfx_eop_interrupt_state(adev, 0, 1, state); 4777 break; 4778 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 4779 gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 4780 break; 4781 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 4782 gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 4783 break; 4784 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 4785 gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 4786 break; 4787 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 4788 gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 4789 break; 4790 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 4791 gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 4792 break; 4793 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 4794 gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 4795 break; 4796 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 4797 gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 4798 break; 4799 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 4800 gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 4801 break; 4802 default: 4803 break; 4804 } 4805 return 0; 4806 } 4807 4808 static int gfx_v10_0_eop_irq(struct amdgpu_device *adev, 4809 struct amdgpu_irq_src *source, 4810 struct amdgpu_iv_entry *entry) 4811 { 4812 int i; 4813 u8 me_id, pipe_id, queue_id; 4814 struct amdgpu_ring *ring; 4815 4816 DRM_DEBUG("IH: CP EOP\n"); 4817 me_id = (entry->ring_id & 0x0c) >> 2; 4818 pipe_id = (entry->ring_id & 0x03) >> 0; 4819 queue_id = (entry->ring_id & 0x70) >> 4; 4820 4821 switch (me_id) { 4822 case 0: 4823 if (pipe_id == 0) 4824 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 4825 else 4826 amdgpu_fence_process(&adev->gfx.gfx_ring[1]); 4827 break; 4828 case 1: 4829 case 2: 4830 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4831 ring = &adev->gfx.compute_ring[i]; 4832 /* Per-queue interrupt is supported for MEC starting from VI. 4833 * The interrupt can only be enabled/disabled per pipe instead of per queue. 4834 */ 4835 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 4836 amdgpu_fence_process(ring); 4837 } 4838 break; 4839 } 4840 return 0; 4841 } 4842 4843 static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 4844 struct amdgpu_irq_src *source, 4845 unsigned type, 4846 enum amdgpu_interrupt_state state) 4847 { 4848 switch (state) { 4849 case AMDGPU_IRQ_STATE_DISABLE: 4850 case AMDGPU_IRQ_STATE_ENABLE: 4851 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4852 PRIV_REG_INT_ENABLE, 4853 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4854 break; 4855 default: 4856 break; 4857 } 4858 4859 return 0; 4860 } 4861 4862 static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 4863 struct amdgpu_irq_src *source, 4864 unsigned type, 4865 enum amdgpu_interrupt_state state) 4866 { 4867 switch (state) { 4868 case AMDGPU_IRQ_STATE_DISABLE: 4869 case AMDGPU_IRQ_STATE_ENABLE: 4870 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4871 PRIV_INSTR_INT_ENABLE, 4872 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4873 default: 4874 break; 4875 } 4876 4877 return 0; 4878 } 4879 4880 static void gfx_v10_0_handle_priv_fault(struct amdgpu_device *adev, 4881 struct amdgpu_iv_entry *entry) 4882 { 4883 u8 me_id, pipe_id, queue_id; 4884 struct amdgpu_ring *ring; 4885 int i; 4886 4887 me_id = (entry->ring_id & 0x0c) >> 2; 4888 pipe_id = (entry->ring_id & 0x03) >> 0; 4889 queue_id = (entry->ring_id & 0x70) >> 4; 4890 4891 switch (me_id) { 4892 case 0: 4893 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4894 ring = &adev->gfx.gfx_ring[i]; 4895 /* we only enabled 1 gfx queue per pipe for now */ 4896 if (ring->me == me_id && ring->pipe == pipe_id) 4897 drm_sched_fault(&ring->sched); 4898 } 4899 break; 4900 case 1: 4901 case 2: 4902 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4903 ring = &adev->gfx.compute_ring[i]; 4904 if (ring->me == me_id && ring->pipe == pipe_id && 4905 ring->queue == queue_id) 4906 drm_sched_fault(&ring->sched); 4907 } 4908 break; 4909 default: 4910 BUG(); 4911 } 4912 } 4913 4914 static int gfx_v10_0_priv_reg_irq(struct amdgpu_device *adev, 4915 struct amdgpu_irq_src *source, 4916 struct amdgpu_iv_entry *entry) 4917 { 4918 DRM_ERROR("Illegal register access in command stream\n"); 4919 gfx_v10_0_handle_priv_fault(adev, entry); 4920 return 0; 4921 } 4922 4923 static int gfx_v10_0_priv_inst_irq(struct amdgpu_device *adev, 4924 struct amdgpu_irq_src *source, 4925 struct amdgpu_iv_entry *entry) 4926 { 4927 DRM_ERROR("Illegal instruction in command stream\n"); 4928 gfx_v10_0_handle_priv_fault(adev, entry); 4929 return 0; 4930 } 4931 4932 static int gfx_v10_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 4933 struct amdgpu_irq_src *src, 4934 unsigned int type, 4935 enum amdgpu_interrupt_state state) 4936 { 4937 uint32_t tmp, target; 4938 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 4939 4940 if (ring->me == 1) 4941 target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 4942 else 4943 target = SOC15_REG_OFFSET(GC, 0, mmCP_ME2_PIPE0_INT_CNTL); 4944 target += ring->pipe; 4945 4946 switch (type) { 4947 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 4948 if (state == AMDGPU_IRQ_STATE_DISABLE) { 4949 tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); 4950 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 4951 GENERIC2_INT_ENABLE, 0); 4952 WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); 4953 4954 tmp = RREG32(target); 4955 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, 4956 GENERIC2_INT_ENABLE, 0); 4957 WREG32(target, tmp); 4958 } else { 4959 tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); 4960 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 4961 GENERIC2_INT_ENABLE, 1); 4962 WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); 4963 4964 tmp = RREG32(target); 4965 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, 4966 GENERIC2_INT_ENABLE, 1); 4967 WREG32(target, tmp); 4968 } 4969 break; 4970 default: 4971 BUG(); /* kiq only support GENERIC2_INT now */ 4972 break; 4973 } 4974 return 0; 4975 } 4976 4977 static int gfx_v10_0_kiq_irq(struct amdgpu_device *adev, 4978 struct amdgpu_irq_src *source, 4979 struct amdgpu_iv_entry *entry) 4980 { 4981 u8 me_id, pipe_id, queue_id; 4982 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 4983 4984 me_id = (entry->ring_id & 0x0c) >> 2; 4985 pipe_id = (entry->ring_id & 0x03) >> 0; 4986 queue_id = (entry->ring_id & 0x70) >> 4; 4987 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", 4988 me_id, pipe_id, queue_id); 4989 4990 amdgpu_fence_process(ring); 4991 return 0; 4992 } 4993 4994 static const struct amd_ip_funcs gfx_v10_0_ip_funcs = { 4995 .name = "gfx_v10_0", 4996 .early_init = gfx_v10_0_early_init, 4997 .late_init = gfx_v10_0_late_init, 4998 .sw_init = gfx_v10_0_sw_init, 4999 .sw_fini = gfx_v10_0_sw_fini, 5000 .hw_init = gfx_v10_0_hw_init, 5001 .hw_fini = gfx_v10_0_hw_fini, 5002 .suspend = gfx_v10_0_suspend, 5003 .resume = gfx_v10_0_resume, 5004 .is_idle = gfx_v10_0_is_idle, 5005 .wait_for_idle = gfx_v10_0_wait_for_idle, 5006 .soft_reset = gfx_v10_0_soft_reset, 5007 .set_clockgating_state = gfx_v10_0_set_clockgating_state, 5008 .set_powergating_state = gfx_v10_0_set_powergating_state, 5009 .get_clockgating_state = gfx_v10_0_get_clockgating_state, 5010 }; 5011 5012 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { 5013 .type = AMDGPU_RING_TYPE_GFX, 5014 .align_mask = 0xff, 5015 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5016 .support_64bit_ptrs = true, 5017 .vmhub = AMDGPU_GFXHUB_0, 5018 .get_rptr = gfx_v10_0_ring_get_rptr_gfx, 5019 .get_wptr = gfx_v10_0_ring_get_wptr_gfx, 5020 .set_wptr = gfx_v10_0_ring_set_wptr_gfx, 5021 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 5022 5 + /* COND_EXEC */ 5023 7 + /* PIPELINE_SYNC */ 5024 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5025 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5026 2 + /* VM_FLUSH */ 5027 8 + /* FENCE for VM_FLUSH */ 5028 20 + /* GDS switch */ 5029 4 + /* double SWITCH_BUFFER, 5030 * the first COND_EXEC jump to the place 5031 * just prior to this double SWITCH_BUFFER 5032 */ 5033 5 + /* COND_EXEC */ 5034 7 + /* HDP_flush */ 5035 4 + /* VGT_flush */ 5036 14 + /* CE_META */ 5037 31 + /* DE_META */ 5038 3 + /* CNTX_CTRL */ 5039 5 + /* HDP_INVL */ 5040 8 + 8 + /* FENCE x2 */ 5041 2, /* SWITCH_BUFFER */ 5042 .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_gfx */ 5043 .emit_ib = gfx_v10_0_ring_emit_ib_gfx, 5044 .emit_fence = gfx_v10_0_ring_emit_fence, 5045 .emit_pipeline_sync = gfx_v10_0_ring_emit_pipeline_sync, 5046 .emit_vm_flush = gfx_v10_0_ring_emit_vm_flush, 5047 .emit_gds_switch = gfx_v10_0_ring_emit_gds_switch, 5048 .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush, 5049 .test_ring = gfx_v10_0_ring_test_ring, 5050 .test_ib = gfx_v10_0_ring_test_ib, 5051 .insert_nop = amdgpu_ring_insert_nop, 5052 .pad_ib = amdgpu_ring_generic_pad_ib, 5053 .emit_switch_buffer = gfx_v10_0_ring_emit_sb, 5054 .emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl, 5055 .init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec, 5056 .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec, 5057 .preempt_ib = gfx_v10_0_ring_preempt_ib, 5058 .emit_tmz = gfx_v10_0_ring_emit_tmz, 5059 .emit_wreg = gfx_v10_0_ring_emit_wreg, 5060 .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, 5061 }; 5062 5063 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { 5064 .type = AMDGPU_RING_TYPE_COMPUTE, 5065 .align_mask = 0xff, 5066 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5067 .support_64bit_ptrs = true, 5068 .vmhub = AMDGPU_GFXHUB_0, 5069 .get_rptr = gfx_v10_0_ring_get_rptr_compute, 5070 .get_wptr = gfx_v10_0_ring_get_wptr_compute, 5071 .set_wptr = gfx_v10_0_ring_set_wptr_compute, 5072 .emit_frame_size = 5073 20 + /* gfx_v10_0_ring_emit_gds_switch */ 5074 7 + /* gfx_v10_0_ring_emit_hdp_flush */ 5075 5 + /* hdp invalidate */ 5076 7 + /* gfx_v10_0_ring_emit_pipeline_sync */ 5077 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5078 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5079 2 + /* gfx_v10_0_ring_emit_vm_flush */ 5080 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */ 5081 .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */ 5082 .emit_ib = gfx_v10_0_ring_emit_ib_compute, 5083 .emit_fence = gfx_v10_0_ring_emit_fence, 5084 .emit_pipeline_sync = gfx_v10_0_ring_emit_pipeline_sync, 5085 .emit_vm_flush = gfx_v10_0_ring_emit_vm_flush, 5086 .emit_gds_switch = gfx_v10_0_ring_emit_gds_switch, 5087 .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush, 5088 .test_ring = gfx_v10_0_ring_test_ring, 5089 .test_ib = gfx_v10_0_ring_test_ib, 5090 .insert_nop = amdgpu_ring_insert_nop, 5091 .pad_ib = amdgpu_ring_generic_pad_ib, 5092 .emit_wreg = gfx_v10_0_ring_emit_wreg, 5093 .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, 5094 }; 5095 5096 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { 5097 .type = AMDGPU_RING_TYPE_KIQ, 5098 .align_mask = 0xff, 5099 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5100 .support_64bit_ptrs = true, 5101 .vmhub = AMDGPU_GFXHUB_0, 5102 .get_rptr = gfx_v10_0_ring_get_rptr_compute, 5103 .get_wptr = gfx_v10_0_ring_get_wptr_compute, 5104 .set_wptr = gfx_v10_0_ring_set_wptr_compute, 5105 .emit_frame_size = 5106 20 + /* gfx_v10_0_ring_emit_gds_switch */ 5107 7 + /* gfx_v10_0_ring_emit_hdp_flush */ 5108 5 + /*hdp invalidate */ 5109 7 + /* gfx_v10_0_ring_emit_pipeline_sync */ 5110 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5111 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5112 2 + /* gfx_v10_0_ring_emit_vm_flush */ 5113 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 5114 .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */ 5115 .emit_ib = gfx_v10_0_ring_emit_ib_compute, 5116 .emit_fence = gfx_v10_0_ring_emit_fence_kiq, 5117 .test_ring = gfx_v10_0_ring_test_ring, 5118 .test_ib = gfx_v10_0_ring_test_ib, 5119 .insert_nop = amdgpu_ring_insert_nop, 5120 .pad_ib = amdgpu_ring_generic_pad_ib, 5121 .emit_rreg = gfx_v10_0_ring_emit_rreg, 5122 .emit_wreg = gfx_v10_0_ring_emit_wreg, 5123 .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, 5124 }; 5125 5126 static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev) 5127 { 5128 int i; 5129 5130 adev->gfx.kiq.ring.funcs = &gfx_v10_0_ring_funcs_kiq; 5131 5132 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 5133 adev->gfx.gfx_ring[i].funcs = &gfx_v10_0_ring_funcs_gfx; 5134 5135 for (i = 0; i < adev->gfx.num_compute_rings; i++) 5136 adev->gfx.compute_ring[i].funcs = &gfx_v10_0_ring_funcs_compute; 5137 } 5138 5139 static const struct amdgpu_irq_src_funcs gfx_v10_0_eop_irq_funcs = { 5140 .set = gfx_v10_0_set_eop_interrupt_state, 5141 .process = gfx_v10_0_eop_irq, 5142 }; 5143 5144 static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_reg_irq_funcs = { 5145 .set = gfx_v10_0_set_priv_reg_fault_state, 5146 .process = gfx_v10_0_priv_reg_irq, 5147 }; 5148 5149 static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_inst_irq_funcs = { 5150 .set = gfx_v10_0_set_priv_inst_fault_state, 5151 .process = gfx_v10_0_priv_inst_irq, 5152 }; 5153 5154 static const struct amdgpu_irq_src_funcs gfx_v10_0_kiq_irq_funcs = { 5155 .set = gfx_v10_0_kiq_set_interrupt_state, 5156 .process = gfx_v10_0_kiq_irq, 5157 }; 5158 5159 static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev) 5160 { 5161 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 5162 adev->gfx.eop_irq.funcs = &gfx_v10_0_eop_irq_funcs; 5163 5164 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; 5165 adev->gfx.kiq.irq.funcs = &gfx_v10_0_kiq_irq_funcs; 5166 5167 adev->gfx.priv_reg_irq.num_types = 1; 5168 adev->gfx.priv_reg_irq.funcs = &gfx_v10_0_priv_reg_irq_funcs; 5169 5170 adev->gfx.priv_inst_irq.num_types = 1; 5171 adev->gfx.priv_inst_irq.funcs = &gfx_v10_0_priv_inst_irq_funcs; 5172 } 5173 5174 static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) 5175 { 5176 switch (adev->asic_type) { 5177 case CHIP_NAVI10: 5178 case CHIP_NAVI14: 5179 adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; 5180 break; 5181 default: 5182 break; 5183 } 5184 } 5185 5186 static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev) 5187 { 5188 /* init asic gds info */ 5189 switch (adev->asic_type) { 5190 case CHIP_NAVI10: 5191 default: 5192 adev->gds.gds_size = 0x10000; 5193 adev->gds.gds_compute_max_wave_id = 0x4ff; 5194 adev->gds.vgt_gs_max_wave_id = 0x3ff; 5195 break; 5196 } 5197 5198 adev->gds.gws_size = 64; 5199 adev->gds.oa_size = 16; 5200 } 5201 5202 static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev, 5203 u32 bitmap) 5204 { 5205 u32 data; 5206 5207 if (!bitmap) 5208 return; 5209 5210 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 5211 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 5212 5213 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 5214 } 5215 5216 static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) 5217 { 5218 u32 data, wgp_bitmask; 5219 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 5220 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 5221 5222 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 5223 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 5224 5225 wgp_bitmask = 5226 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); 5227 5228 return (~data) & wgp_bitmask; 5229 } 5230 5231 static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) 5232 { 5233 u32 wgp_idx, wgp_active_bitmap; 5234 u32 cu_bitmap_per_wgp, cu_active_bitmap; 5235 5236 wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev); 5237 cu_active_bitmap = 0; 5238 5239 for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) { 5240 /* if there is one WGP enabled, it means 2 CUs will be enabled */ 5241 cu_bitmap_per_wgp = 3 << (2 * wgp_idx); 5242 if (wgp_active_bitmap & (1 << wgp_idx)) 5243 cu_active_bitmap |= cu_bitmap_per_wgp; 5244 } 5245 5246 return cu_active_bitmap; 5247 } 5248 5249 static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, 5250 struct amdgpu_cu_info *cu_info) 5251 { 5252 int i, j, k, counter, active_cu_number = 0; 5253 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 5254 unsigned disable_masks[4 * 2]; 5255 5256 if (!adev || !cu_info) 5257 return -EINVAL; 5258 5259 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 5260 5261 mutex_lock(&adev->grbm_idx_mutex); 5262 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 5263 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 5264 mask = 1; 5265 ao_bitmap = 0; 5266 counter = 0; 5267 gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); 5268 if (i < 4 && j < 2) 5269 gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh( 5270 adev, disable_masks[i * 2 + j]); 5271 bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev); 5272 cu_info->bitmap[i][j] = bitmap; 5273 5274 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { 5275 if (bitmap & mask) { 5276 if (counter < adev->gfx.config.max_cu_per_sh) 5277 ao_bitmap |= mask; 5278 counter++; 5279 } 5280 mask <<= 1; 5281 } 5282 active_cu_number += counter; 5283 if (i < 2 && j < 2) 5284 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 5285 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 5286 } 5287 } 5288 gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5289 mutex_unlock(&adev->grbm_idx_mutex); 5290 5291 cu_info->number = active_cu_number; 5292 cu_info->ao_cu_mask = ao_cu_mask; 5293 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 5294 5295 return 0; 5296 } 5297 5298 const struct amdgpu_ip_block_version gfx_v10_0_ip_block = 5299 { 5300 .type = AMD_IP_BLOCK_TYPE_GFX, 5301 .major = 10, 5302 .minor = 0, 5303 .rev = 0, 5304 .funcs = &gfx_v10_0_ip_funcs, 5305 }; 5306