1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/kernel.h> 24 #include <linux/firmware.h> 25 #include <drm/drmP.h> 26 #include "amdgpu.h" 27 #include "amdgpu_gfx.h" 28 #include "soc15.h" 29 #include "soc15d.h" 30 #include "amdgpu_atomfirmware.h" 31 #include "amdgpu_pm.h" 32 33 #include "gc/gc_9_0_offset.h" 34 #include "gc/gc_9_0_sh_mask.h" 35 #include "vega10_enum.h" 36 #include "hdp/hdp_4_0_offset.h" 37 38 #include "soc15.h" 39 #include "soc15_common.h" 40 #include "clearstate_gfx9.h" 41 #include "v9_structs.h" 42 43 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 44 45 #include "amdgpu_ras.h" 46 47 #define GFX9_NUM_GFX_RINGS 1 48 #define GFX9_MEC_HPD_SIZE 4096 49 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 50 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 51 52 #define mmPWR_MISC_CNTL_STATUS 0x0183 53 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 54 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0 55 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1 56 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L 57 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L 58 59 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 60 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 61 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 62 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 63 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 64 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 65 66 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 67 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 68 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 69 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 70 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 71 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 72 73 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 74 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 75 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 76 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 77 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 78 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 79 80 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 81 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 82 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 83 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 84 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 85 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 86 87 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 88 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 89 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 90 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 91 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 92 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 93 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 94 95 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 96 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 97 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 98 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 99 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 100 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 101 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 102 103 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 104 { 105 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 106 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 107 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 108 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 109 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 110 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 111 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 112 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 113 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 114 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 115 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 116 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 117 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 118 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 119 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 120 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 121 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 122 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 123 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 124 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 125 }; 126 127 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 128 { 129 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 130 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 131 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 132 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 133 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 134 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 135 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 136 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 137 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 138 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 139 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 140 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 141 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 142 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 143 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 144 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 145 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 146 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 147 }; 148 149 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 150 { 151 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 152 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 153 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 154 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 155 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 156 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 157 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 158 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 159 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 160 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 161 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 162 }; 163 164 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 165 { 166 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 167 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 168 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 169 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 170 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 171 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 172 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 173 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 174 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 175 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 176 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 177 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 178 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 179 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 180 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 181 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 182 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 183 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 184 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 185 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 186 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 187 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 188 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 189 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 190 }; 191 192 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 193 { 194 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 195 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 196 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 197 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 198 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 199 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 200 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 201 }; 202 203 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 204 { 205 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 206 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 207 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 208 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 209 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 210 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 211 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 212 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 213 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 214 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 215 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 216 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 217 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 218 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 219 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 220 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 221 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 222 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 223 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 224 }; 225 226 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 227 { 228 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 229 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 230 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 231 }; 232 233 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 234 { 235 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 236 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 237 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 238 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 239 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 240 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 241 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 242 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 243 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 244 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 245 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 246 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 247 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 248 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 249 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 250 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 251 }; 252 253 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 254 { 255 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 256 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 257 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 258 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 259 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 260 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 261 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 262 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 263 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 264 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 265 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 266 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 267 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 268 }; 269 270 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 271 { 272 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 273 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 274 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 275 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 276 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 277 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 278 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 279 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 280 }; 281 282 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 283 { 284 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 285 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 286 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 287 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 288 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 289 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 290 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 291 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 292 }; 293 294 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 295 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 296 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 297 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 298 299 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 300 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 301 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 302 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 303 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 304 struct amdgpu_cu_info *cu_info); 305 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 306 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); 307 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 308 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 309 310 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 311 { 312 switch (adev->asic_type) { 313 case CHIP_VEGA10: 314 if (!amdgpu_virt_support_skip_setting(adev)) { 315 soc15_program_register_sequence(adev, 316 golden_settings_gc_9_0, 317 ARRAY_SIZE(golden_settings_gc_9_0)); 318 soc15_program_register_sequence(adev, 319 golden_settings_gc_9_0_vg10, 320 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 321 } 322 break; 323 case CHIP_VEGA12: 324 soc15_program_register_sequence(adev, 325 golden_settings_gc_9_2_1, 326 ARRAY_SIZE(golden_settings_gc_9_2_1)); 327 soc15_program_register_sequence(adev, 328 golden_settings_gc_9_2_1_vg12, 329 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 330 break; 331 case CHIP_VEGA20: 332 soc15_program_register_sequence(adev, 333 golden_settings_gc_9_0, 334 ARRAY_SIZE(golden_settings_gc_9_0)); 335 soc15_program_register_sequence(adev, 336 golden_settings_gc_9_0_vg20, 337 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 338 break; 339 case CHIP_RAVEN: 340 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 341 ARRAY_SIZE(golden_settings_gc_9_1)); 342 if (adev->rev_id >= 8) 343 soc15_program_register_sequence(adev, 344 golden_settings_gc_9_1_rv2, 345 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 346 else 347 soc15_program_register_sequence(adev, 348 golden_settings_gc_9_1_rv1, 349 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 350 break; 351 default: 352 break; 353 } 354 355 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 356 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 357 } 358 359 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 360 { 361 adev->gfx.scratch.num_reg = 8; 362 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 363 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 364 } 365 366 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 367 bool wc, uint32_t reg, uint32_t val) 368 { 369 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 370 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 371 WRITE_DATA_DST_SEL(0) | 372 (wc ? WR_CONFIRM : 0)); 373 amdgpu_ring_write(ring, reg); 374 amdgpu_ring_write(ring, 0); 375 amdgpu_ring_write(ring, val); 376 } 377 378 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 379 int mem_space, int opt, uint32_t addr0, 380 uint32_t addr1, uint32_t ref, uint32_t mask, 381 uint32_t inv) 382 { 383 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 384 amdgpu_ring_write(ring, 385 /* memory (1) or register (0) */ 386 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 387 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 388 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 389 WAIT_REG_MEM_ENGINE(eng_sel))); 390 391 if (mem_space) 392 BUG_ON(addr0 & 0x3); /* Dword align */ 393 amdgpu_ring_write(ring, addr0); 394 amdgpu_ring_write(ring, addr1); 395 amdgpu_ring_write(ring, ref); 396 amdgpu_ring_write(ring, mask); 397 amdgpu_ring_write(ring, inv); /* poll interval */ 398 } 399 400 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 401 { 402 struct amdgpu_device *adev = ring->adev; 403 uint32_t scratch; 404 uint32_t tmp = 0; 405 unsigned i; 406 int r; 407 408 r = amdgpu_gfx_scratch_get(adev, &scratch); 409 if (r) 410 return r; 411 412 WREG32(scratch, 0xCAFEDEAD); 413 r = amdgpu_ring_alloc(ring, 3); 414 if (r) 415 goto error_free_scratch; 416 417 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 418 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 419 amdgpu_ring_write(ring, 0xDEADBEEF); 420 amdgpu_ring_commit(ring); 421 422 for (i = 0; i < adev->usec_timeout; i++) { 423 tmp = RREG32(scratch); 424 if (tmp == 0xDEADBEEF) 425 break; 426 DRM_UDELAY(1); 427 } 428 429 if (i >= adev->usec_timeout) 430 r = -ETIMEDOUT; 431 432 error_free_scratch: 433 amdgpu_gfx_scratch_free(adev, scratch); 434 return r; 435 } 436 437 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 438 { 439 struct amdgpu_device *adev = ring->adev; 440 struct amdgpu_ib ib; 441 struct dma_fence *f = NULL; 442 443 unsigned index; 444 uint64_t gpu_addr; 445 uint32_t tmp; 446 long r; 447 448 r = amdgpu_device_wb_get(adev, &index); 449 if (r) 450 return r; 451 452 gpu_addr = adev->wb.gpu_addr + (index * 4); 453 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 454 memset(&ib, 0, sizeof(ib)); 455 r = amdgpu_ib_get(adev, NULL, 16, &ib); 456 if (r) 457 goto err1; 458 459 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 460 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 461 ib.ptr[2] = lower_32_bits(gpu_addr); 462 ib.ptr[3] = upper_32_bits(gpu_addr); 463 ib.ptr[4] = 0xDEADBEEF; 464 ib.length_dw = 5; 465 466 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 467 if (r) 468 goto err2; 469 470 r = dma_fence_wait_timeout(f, false, timeout); 471 if (r == 0) { 472 r = -ETIMEDOUT; 473 goto err2; 474 } else if (r < 0) { 475 goto err2; 476 } 477 478 tmp = adev->wb.wb[index]; 479 if (tmp == 0xDEADBEEF) 480 r = 0; 481 else 482 r = -EINVAL; 483 484 err2: 485 amdgpu_ib_free(adev, &ib, NULL); 486 dma_fence_put(f); 487 err1: 488 amdgpu_device_wb_free(adev, index); 489 return r; 490 } 491 492 493 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 494 { 495 release_firmware(adev->gfx.pfp_fw); 496 adev->gfx.pfp_fw = NULL; 497 release_firmware(adev->gfx.me_fw); 498 adev->gfx.me_fw = NULL; 499 release_firmware(adev->gfx.ce_fw); 500 adev->gfx.ce_fw = NULL; 501 release_firmware(adev->gfx.rlc_fw); 502 adev->gfx.rlc_fw = NULL; 503 release_firmware(adev->gfx.mec_fw); 504 adev->gfx.mec_fw = NULL; 505 release_firmware(adev->gfx.mec2_fw); 506 adev->gfx.mec2_fw = NULL; 507 508 kfree(adev->gfx.rlc.register_list_format); 509 } 510 511 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 512 { 513 const struct rlc_firmware_header_v2_1 *rlc_hdr; 514 515 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 516 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 517 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 518 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 519 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 520 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 521 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 522 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 523 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 524 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 525 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 526 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 527 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 528 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 529 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 530 } 531 532 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 533 { 534 adev->gfx.me_fw_write_wait = false; 535 adev->gfx.mec_fw_write_wait = false; 536 537 switch (adev->asic_type) { 538 case CHIP_VEGA10: 539 if ((adev->gfx.me_fw_version >= 0x0000009c) && 540 (adev->gfx.me_feature_version >= 42) && 541 (adev->gfx.pfp_fw_version >= 0x000000b1) && 542 (adev->gfx.pfp_feature_version >= 42)) 543 adev->gfx.me_fw_write_wait = true; 544 545 if ((adev->gfx.mec_fw_version >= 0x00000193) && 546 (adev->gfx.mec_feature_version >= 42)) 547 adev->gfx.mec_fw_write_wait = true; 548 break; 549 case CHIP_VEGA12: 550 if ((adev->gfx.me_fw_version >= 0x0000009c) && 551 (adev->gfx.me_feature_version >= 44) && 552 (adev->gfx.pfp_fw_version >= 0x000000b2) && 553 (adev->gfx.pfp_feature_version >= 44)) 554 adev->gfx.me_fw_write_wait = true; 555 556 if ((adev->gfx.mec_fw_version >= 0x00000196) && 557 (adev->gfx.mec_feature_version >= 44)) 558 adev->gfx.mec_fw_write_wait = true; 559 break; 560 case CHIP_VEGA20: 561 if ((adev->gfx.me_fw_version >= 0x0000009c) && 562 (adev->gfx.me_feature_version >= 44) && 563 (adev->gfx.pfp_fw_version >= 0x000000b2) && 564 (adev->gfx.pfp_feature_version >= 44)) 565 adev->gfx.me_fw_write_wait = true; 566 567 if ((adev->gfx.mec_fw_version >= 0x00000197) && 568 (adev->gfx.mec_feature_version >= 44)) 569 adev->gfx.mec_fw_write_wait = true; 570 break; 571 case CHIP_RAVEN: 572 if ((adev->gfx.me_fw_version >= 0x0000009c) && 573 (adev->gfx.me_feature_version >= 42) && 574 (adev->gfx.pfp_fw_version >= 0x000000b1) && 575 (adev->gfx.pfp_feature_version >= 42)) 576 adev->gfx.me_fw_write_wait = true; 577 578 if ((adev->gfx.mec_fw_version >= 0x00000192) && 579 (adev->gfx.mec_feature_version >= 42)) 580 adev->gfx.mec_fw_write_wait = true; 581 break; 582 default: 583 break; 584 } 585 } 586 587 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 588 { 589 switch (adev->asic_type) { 590 case CHIP_VEGA10: 591 case CHIP_VEGA12: 592 case CHIP_VEGA20: 593 break; 594 case CHIP_RAVEN: 595 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) 596 break; 597 if ((adev->gfx.rlc_fw_version != 106 && 598 adev->gfx.rlc_fw_version < 531) || 599 (adev->gfx.rlc_fw_version == 53815) || 600 (adev->gfx.rlc_feature_version < 1) || 601 !adev->gfx.rlc.is_rlc_v2_1) 602 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 603 break; 604 default: 605 break; 606 } 607 } 608 609 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 610 { 611 const char *chip_name; 612 char fw_name[30]; 613 int err; 614 struct amdgpu_firmware_info *info = NULL; 615 const struct common_firmware_header *header = NULL; 616 const struct gfx_firmware_header_v1_0 *cp_hdr; 617 const struct rlc_firmware_header_v2_0 *rlc_hdr; 618 unsigned int *tmp = NULL; 619 unsigned int i = 0; 620 uint16_t version_major; 621 uint16_t version_minor; 622 uint32_t smu_version; 623 624 DRM_DEBUG("\n"); 625 626 switch (adev->asic_type) { 627 case CHIP_VEGA10: 628 chip_name = "vega10"; 629 break; 630 case CHIP_VEGA12: 631 chip_name = "vega12"; 632 break; 633 case CHIP_VEGA20: 634 chip_name = "vega20"; 635 break; 636 case CHIP_RAVEN: 637 if (adev->rev_id >= 8) 638 chip_name = "raven2"; 639 else if (adev->pdev->device == 0x15d8) 640 chip_name = "picasso"; 641 else 642 chip_name = "raven"; 643 break; 644 default: 645 BUG(); 646 } 647 648 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 649 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 650 if (err) 651 goto out; 652 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 653 if (err) 654 goto out; 655 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 656 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 657 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 658 659 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 660 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 661 if (err) 662 goto out; 663 err = amdgpu_ucode_validate(adev->gfx.me_fw); 664 if (err) 665 goto out; 666 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 667 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 668 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 669 670 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 671 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 672 if (err) 673 goto out; 674 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 675 if (err) 676 goto out; 677 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 678 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 679 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 680 681 /* 682 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 683 * instead of picasso_rlc.bin. 684 * Judgment method: 685 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 686 * or revision >= 0xD8 && revision <= 0xDF 687 * otherwise is PCO FP5 688 */ 689 if (!strcmp(chip_name, "picasso") && 690 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 691 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 692 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); 693 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 694 (smu_version >= 0x41e2b)) 695 /** 696 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 697 */ 698 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); 699 else 700 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 701 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 702 if (err) 703 goto out; 704 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 705 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 706 707 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 708 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 709 if (version_major == 2 && version_minor == 1) 710 adev->gfx.rlc.is_rlc_v2_1 = true; 711 712 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 713 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 714 adev->gfx.rlc.save_and_restore_offset = 715 le32_to_cpu(rlc_hdr->save_and_restore_offset); 716 adev->gfx.rlc.clear_state_descriptor_offset = 717 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 718 adev->gfx.rlc.avail_scratch_ram_locations = 719 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 720 adev->gfx.rlc.reg_restore_list_size = 721 le32_to_cpu(rlc_hdr->reg_restore_list_size); 722 adev->gfx.rlc.reg_list_format_start = 723 le32_to_cpu(rlc_hdr->reg_list_format_start); 724 adev->gfx.rlc.reg_list_format_separate_start = 725 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 726 adev->gfx.rlc.starting_offsets_start = 727 le32_to_cpu(rlc_hdr->starting_offsets_start); 728 adev->gfx.rlc.reg_list_format_size_bytes = 729 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 730 adev->gfx.rlc.reg_list_size_bytes = 731 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 732 adev->gfx.rlc.register_list_format = 733 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 734 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 735 if (!adev->gfx.rlc.register_list_format) { 736 err = -ENOMEM; 737 goto out; 738 } 739 740 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 741 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 742 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 743 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 744 745 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 746 747 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 748 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 749 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 750 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 751 752 if (adev->gfx.rlc.is_rlc_v2_1) 753 gfx_v9_0_init_rlc_ext_microcode(adev); 754 755 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 756 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 757 if (err) 758 goto out; 759 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 760 if (err) 761 goto out; 762 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 763 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 764 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 765 766 767 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 768 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 769 if (!err) { 770 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 771 if (err) 772 goto out; 773 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 774 adev->gfx.mec2_fw->data; 775 adev->gfx.mec2_fw_version = 776 le32_to_cpu(cp_hdr->header.ucode_version); 777 adev->gfx.mec2_feature_version = 778 le32_to_cpu(cp_hdr->ucode_feature_version); 779 } else { 780 err = 0; 781 adev->gfx.mec2_fw = NULL; 782 } 783 784 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 785 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 786 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 787 info->fw = adev->gfx.pfp_fw; 788 header = (const struct common_firmware_header *)info->fw->data; 789 adev->firmware.fw_size += 790 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 791 792 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 793 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 794 info->fw = adev->gfx.me_fw; 795 header = (const struct common_firmware_header *)info->fw->data; 796 adev->firmware.fw_size += 797 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 798 799 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 800 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 801 info->fw = adev->gfx.ce_fw; 802 header = (const struct common_firmware_header *)info->fw->data; 803 adev->firmware.fw_size += 804 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 805 806 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 807 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 808 info->fw = adev->gfx.rlc_fw; 809 header = (const struct common_firmware_header *)info->fw->data; 810 adev->firmware.fw_size += 811 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 812 813 if (adev->gfx.rlc.is_rlc_v2_1 && 814 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 815 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 816 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 817 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 818 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 819 info->fw = adev->gfx.rlc_fw; 820 adev->firmware.fw_size += 821 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 822 823 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 824 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 825 info->fw = adev->gfx.rlc_fw; 826 adev->firmware.fw_size += 827 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 828 829 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 830 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 831 info->fw = adev->gfx.rlc_fw; 832 adev->firmware.fw_size += 833 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 834 } 835 836 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 837 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 838 info->fw = adev->gfx.mec_fw; 839 header = (const struct common_firmware_header *)info->fw->data; 840 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 841 adev->firmware.fw_size += 842 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 843 844 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 845 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 846 info->fw = adev->gfx.mec_fw; 847 adev->firmware.fw_size += 848 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 849 850 if (adev->gfx.mec2_fw) { 851 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 852 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 853 info->fw = adev->gfx.mec2_fw; 854 header = (const struct common_firmware_header *)info->fw->data; 855 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 856 adev->firmware.fw_size += 857 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 858 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 859 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 860 info->fw = adev->gfx.mec2_fw; 861 adev->firmware.fw_size += 862 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 863 } 864 865 } 866 867 out: 868 gfx_v9_0_check_if_need_gfxoff(adev); 869 gfx_v9_0_check_fw_write_wait(adev); 870 if (err) { 871 dev_err(adev->dev, 872 "gfx9: Failed to load firmware \"%s\"\n", 873 fw_name); 874 release_firmware(adev->gfx.pfp_fw); 875 adev->gfx.pfp_fw = NULL; 876 release_firmware(adev->gfx.me_fw); 877 adev->gfx.me_fw = NULL; 878 release_firmware(adev->gfx.ce_fw); 879 adev->gfx.ce_fw = NULL; 880 release_firmware(adev->gfx.rlc_fw); 881 adev->gfx.rlc_fw = NULL; 882 release_firmware(adev->gfx.mec_fw); 883 adev->gfx.mec_fw = NULL; 884 release_firmware(adev->gfx.mec2_fw); 885 adev->gfx.mec2_fw = NULL; 886 } 887 return err; 888 } 889 890 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 891 { 892 u32 count = 0; 893 const struct cs_section_def *sect = NULL; 894 const struct cs_extent_def *ext = NULL; 895 896 /* begin clear state */ 897 count += 2; 898 /* context control state */ 899 count += 3; 900 901 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 902 for (ext = sect->section; ext->extent != NULL; ++ext) { 903 if (sect->id == SECT_CONTEXT) 904 count += 2 + ext->reg_count; 905 else 906 return 0; 907 } 908 } 909 910 /* end clear state */ 911 count += 2; 912 /* clear state */ 913 count += 2; 914 915 return count; 916 } 917 918 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 919 volatile u32 *buffer) 920 { 921 u32 count = 0, i; 922 const struct cs_section_def *sect = NULL; 923 const struct cs_extent_def *ext = NULL; 924 925 if (adev->gfx.rlc.cs_data == NULL) 926 return; 927 if (buffer == NULL) 928 return; 929 930 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 931 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 932 933 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 934 buffer[count++] = cpu_to_le32(0x80000000); 935 buffer[count++] = cpu_to_le32(0x80000000); 936 937 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 938 for (ext = sect->section; ext->extent != NULL; ++ext) { 939 if (sect->id == SECT_CONTEXT) { 940 buffer[count++] = 941 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 942 buffer[count++] = cpu_to_le32(ext->reg_index - 943 PACKET3_SET_CONTEXT_REG_START); 944 for (i = 0; i < ext->reg_count; i++) 945 buffer[count++] = cpu_to_le32(ext->extent[i]); 946 } else { 947 return; 948 } 949 } 950 } 951 952 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 953 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 954 955 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 956 buffer[count++] = cpu_to_le32(0); 957 } 958 959 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 960 { 961 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 962 uint32_t pg_always_on_cu_num = 2; 963 uint32_t always_on_cu_num; 964 uint32_t i, j, k; 965 uint32_t mask, cu_bitmap, counter; 966 967 if (adev->flags & AMD_IS_APU) 968 always_on_cu_num = 4; 969 else if (adev->asic_type == CHIP_VEGA12) 970 always_on_cu_num = 8; 971 else 972 always_on_cu_num = 12; 973 974 mutex_lock(&adev->grbm_idx_mutex); 975 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 976 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 977 mask = 1; 978 cu_bitmap = 0; 979 counter = 0; 980 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 981 982 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 983 if (cu_info->bitmap[i][j] & mask) { 984 if (counter == pg_always_on_cu_num) 985 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 986 if (counter < always_on_cu_num) 987 cu_bitmap |= mask; 988 else 989 break; 990 counter++; 991 } 992 mask <<= 1; 993 } 994 995 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 996 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 997 } 998 } 999 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1000 mutex_unlock(&adev->grbm_idx_mutex); 1001 } 1002 1003 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1004 { 1005 uint32_t data; 1006 1007 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1008 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1009 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1010 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1011 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1012 1013 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1014 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1015 1016 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1017 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1018 1019 mutex_lock(&adev->grbm_idx_mutex); 1020 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1021 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1022 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1023 1024 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1025 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1026 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1027 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1028 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1029 1030 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1031 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1032 data &= 0x0000FFFF; 1033 data |= 0x00C00000; 1034 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1035 1036 /* 1037 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1038 * programmed in gfx_v9_0_init_always_on_cu_mask() 1039 */ 1040 1041 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1042 * but used for RLC_LB_CNTL configuration */ 1043 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1044 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1045 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1046 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1047 mutex_unlock(&adev->grbm_idx_mutex); 1048 1049 gfx_v9_0_init_always_on_cu_mask(adev); 1050 } 1051 1052 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1053 { 1054 uint32_t data; 1055 1056 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1057 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1058 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1059 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1060 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1061 1062 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1063 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1064 1065 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1066 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1067 1068 mutex_lock(&adev->grbm_idx_mutex); 1069 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1070 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1071 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1072 1073 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1074 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1075 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1076 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1077 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1078 1079 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1080 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1081 data &= 0x0000FFFF; 1082 data |= 0x00C00000; 1083 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1084 1085 /* 1086 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1087 * programmed in gfx_v9_0_init_always_on_cu_mask() 1088 */ 1089 1090 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1091 * but used for RLC_LB_CNTL configuration */ 1092 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1093 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1094 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1095 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1096 mutex_unlock(&adev->grbm_idx_mutex); 1097 1098 gfx_v9_0_init_always_on_cu_mask(adev); 1099 } 1100 1101 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1102 { 1103 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1104 } 1105 1106 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1107 { 1108 return 5; 1109 } 1110 1111 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1112 { 1113 const struct cs_section_def *cs_data; 1114 int r; 1115 1116 adev->gfx.rlc.cs_data = gfx9_cs_data; 1117 1118 cs_data = adev->gfx.rlc.cs_data; 1119 1120 if (cs_data) { 1121 /* init clear state block */ 1122 r = amdgpu_gfx_rlc_init_csb(adev); 1123 if (r) 1124 return r; 1125 } 1126 1127 if (adev->asic_type == CHIP_RAVEN) { 1128 /* TODO: double check the cp_table_size for RV */ 1129 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1130 r = amdgpu_gfx_rlc_init_cpt(adev); 1131 if (r) 1132 return r; 1133 } 1134 1135 switch (adev->asic_type) { 1136 case CHIP_RAVEN: 1137 gfx_v9_0_init_lbpw(adev); 1138 break; 1139 case CHIP_VEGA20: 1140 gfx_v9_4_init_lbpw(adev); 1141 break; 1142 default: 1143 break; 1144 } 1145 1146 return 0; 1147 } 1148 1149 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev) 1150 { 1151 int r; 1152 1153 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1154 if (unlikely(r != 0)) 1155 return r; 1156 1157 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, 1158 AMDGPU_GEM_DOMAIN_VRAM); 1159 if (!r) 1160 adev->gfx.rlc.clear_state_gpu_addr = 1161 amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); 1162 1163 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1164 1165 return r; 1166 } 1167 1168 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev) 1169 { 1170 int r; 1171 1172 if (!adev->gfx.rlc.clear_state_obj) 1173 return; 1174 1175 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); 1176 if (likely(r == 0)) { 1177 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 1178 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1179 } 1180 } 1181 1182 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1183 { 1184 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1185 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1186 } 1187 1188 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1189 { 1190 int r; 1191 u32 *hpd; 1192 const __le32 *fw_data; 1193 unsigned fw_size; 1194 u32 *fw; 1195 size_t mec_hpd_size; 1196 1197 const struct gfx_firmware_header_v1_0 *mec_hdr; 1198 1199 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1200 1201 /* take ownership of the relevant compute queues */ 1202 amdgpu_gfx_compute_queue_acquire(adev); 1203 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1204 1205 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1206 AMDGPU_GEM_DOMAIN_VRAM, 1207 &adev->gfx.mec.hpd_eop_obj, 1208 &adev->gfx.mec.hpd_eop_gpu_addr, 1209 (void **)&hpd); 1210 if (r) { 1211 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1212 gfx_v9_0_mec_fini(adev); 1213 return r; 1214 } 1215 1216 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); 1217 1218 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1219 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1220 1221 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1222 1223 fw_data = (const __le32 *) 1224 (adev->gfx.mec_fw->data + 1225 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1226 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 1227 1228 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 1229 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1230 &adev->gfx.mec.mec_fw_obj, 1231 &adev->gfx.mec.mec_fw_gpu_addr, 1232 (void **)&fw); 1233 if (r) { 1234 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 1235 gfx_v9_0_mec_fini(adev); 1236 return r; 1237 } 1238 1239 memcpy(fw, fw_data, fw_size); 1240 1241 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1242 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1243 1244 return 0; 1245 } 1246 1247 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 1248 { 1249 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1250 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1251 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1252 (address << SQ_IND_INDEX__INDEX__SHIFT) | 1253 (SQ_IND_INDEX__FORCE_READ_MASK)); 1254 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1255 } 1256 1257 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 1258 uint32_t wave, uint32_t thread, 1259 uint32_t regno, uint32_t num, uint32_t *out) 1260 { 1261 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1262 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1263 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1264 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1265 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 1266 (SQ_IND_INDEX__FORCE_READ_MASK) | 1267 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1268 while (num--) 1269 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1270 } 1271 1272 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1273 { 1274 /* type 1 wave data */ 1275 dst[(*no_fields)++] = 1; 1276 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 1277 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 1278 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 1279 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 1280 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 1281 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 1282 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 1283 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 1284 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 1285 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 1286 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 1287 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 1288 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 1289 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 1290 } 1291 1292 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 1293 uint32_t wave, uint32_t start, 1294 uint32_t size, uint32_t *dst) 1295 { 1296 wave_read_regs( 1297 adev, simd, wave, 0, 1298 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 1299 } 1300 1301 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 1302 uint32_t wave, uint32_t thread, 1303 uint32_t start, uint32_t size, 1304 uint32_t *dst) 1305 { 1306 wave_read_regs( 1307 adev, simd, wave, thread, 1308 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1309 } 1310 1311 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 1312 u32 me, u32 pipe, u32 q) 1313 { 1314 soc15_grbm_select(adev, me, pipe, q, 0); 1315 } 1316 1317 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 1318 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 1319 .select_se_sh = &gfx_v9_0_select_se_sh, 1320 .read_wave_data = &gfx_v9_0_read_wave_data, 1321 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 1322 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 1323 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q 1324 }; 1325 1326 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 1327 { 1328 u32 gb_addr_config; 1329 int err; 1330 1331 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 1332 1333 switch (adev->asic_type) { 1334 case CHIP_VEGA10: 1335 adev->gfx.config.max_hw_contexts = 8; 1336 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1337 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1338 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1339 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1340 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 1341 break; 1342 case CHIP_VEGA12: 1343 adev->gfx.config.max_hw_contexts = 8; 1344 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1345 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1346 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1347 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1348 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 1349 DRM_INFO("fix gfx.config for vega12\n"); 1350 break; 1351 case CHIP_VEGA20: 1352 adev->gfx.config.max_hw_contexts = 8; 1353 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1354 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1355 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1356 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1357 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1358 gb_addr_config &= ~0xf3e777ff; 1359 gb_addr_config |= 0x22014042; 1360 /* check vbios table if gpu info is not available */ 1361 err = amdgpu_atomfirmware_get_gfx_info(adev); 1362 if (err) 1363 return err; 1364 break; 1365 case CHIP_RAVEN: 1366 adev->gfx.config.max_hw_contexts = 8; 1367 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1368 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1369 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1370 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1371 if (adev->rev_id >= 8) 1372 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 1373 else 1374 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 1375 break; 1376 default: 1377 BUG(); 1378 break; 1379 } 1380 1381 adev->gfx.config.gb_addr_config = gb_addr_config; 1382 1383 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 1384 REG_GET_FIELD( 1385 adev->gfx.config.gb_addr_config, 1386 GB_ADDR_CONFIG, 1387 NUM_PIPES); 1388 1389 adev->gfx.config.max_tile_pipes = 1390 adev->gfx.config.gb_addr_config_fields.num_pipes; 1391 1392 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 1393 REG_GET_FIELD( 1394 adev->gfx.config.gb_addr_config, 1395 GB_ADDR_CONFIG, 1396 NUM_BANKS); 1397 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 1398 REG_GET_FIELD( 1399 adev->gfx.config.gb_addr_config, 1400 GB_ADDR_CONFIG, 1401 MAX_COMPRESSED_FRAGS); 1402 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 1403 REG_GET_FIELD( 1404 adev->gfx.config.gb_addr_config, 1405 GB_ADDR_CONFIG, 1406 NUM_RB_PER_SE); 1407 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 1408 REG_GET_FIELD( 1409 adev->gfx.config.gb_addr_config, 1410 GB_ADDR_CONFIG, 1411 NUM_SHADER_ENGINES); 1412 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 1413 REG_GET_FIELD( 1414 adev->gfx.config.gb_addr_config, 1415 GB_ADDR_CONFIG, 1416 PIPE_INTERLEAVE_SIZE)); 1417 1418 return 0; 1419 } 1420 1421 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, 1422 struct amdgpu_ngg_buf *ngg_buf, 1423 int size_se, 1424 int default_size_se) 1425 { 1426 int r; 1427 1428 if (size_se < 0) { 1429 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se); 1430 return -EINVAL; 1431 } 1432 size_se = size_se ? size_se : default_size_se; 1433 1434 ngg_buf->size = size_se * adev->gfx.config.max_shader_engines; 1435 r = amdgpu_bo_create_kernel(adev, ngg_buf->size, 1436 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1437 &ngg_buf->bo, 1438 &ngg_buf->gpu_addr, 1439 NULL); 1440 if (r) { 1441 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r); 1442 return r; 1443 } 1444 ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo); 1445 1446 return r; 1447 } 1448 1449 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev) 1450 { 1451 int i; 1452 1453 for (i = 0; i < NGG_BUF_MAX; i++) 1454 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo, 1455 &adev->gfx.ngg.buf[i].gpu_addr, 1456 NULL); 1457 1458 memset(&adev->gfx.ngg.buf[0], 0, 1459 sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX); 1460 1461 adev->gfx.ngg.init = false; 1462 1463 return 0; 1464 } 1465 1466 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) 1467 { 1468 int r; 1469 1470 if (!amdgpu_ngg || adev->gfx.ngg.init == true) 1471 return 0; 1472 1473 /* GDS reserve memory: 64 bytes alignment */ 1474 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); 1475 adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size; 1476 adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); 1477 adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); 1478 1479 /* Primitive Buffer */ 1480 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], 1481 amdgpu_prim_buf_per_se, 1482 64 * 1024); 1483 if (r) { 1484 dev_err(adev->dev, "Failed to create Primitive Buffer\n"); 1485 goto err; 1486 } 1487 1488 /* Position Buffer */ 1489 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS], 1490 amdgpu_pos_buf_per_se, 1491 256 * 1024); 1492 if (r) { 1493 dev_err(adev->dev, "Failed to create Position Buffer\n"); 1494 goto err; 1495 } 1496 1497 /* Control Sideband */ 1498 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL], 1499 amdgpu_cntl_sb_buf_per_se, 1500 256); 1501 if (r) { 1502 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n"); 1503 goto err; 1504 } 1505 1506 /* Parameter Cache, not created by default */ 1507 if (amdgpu_param_buf_per_se <= 0) 1508 goto out; 1509 1510 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM], 1511 amdgpu_param_buf_per_se, 1512 512 * 1024); 1513 if (r) { 1514 dev_err(adev->dev, "Failed to create Parameter Cache\n"); 1515 goto err; 1516 } 1517 1518 out: 1519 adev->gfx.ngg.init = true; 1520 return 0; 1521 err: 1522 gfx_v9_0_ngg_fini(adev); 1523 return r; 1524 } 1525 1526 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) 1527 { 1528 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 1529 int r; 1530 u32 data, base; 1531 1532 if (!amdgpu_ngg) 1533 return 0; 1534 1535 /* Program buffer size */ 1536 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, 1537 adev->gfx.ngg.buf[NGG_PRIM].size >> 8); 1538 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, 1539 adev->gfx.ngg.buf[NGG_POS].size >> 8); 1540 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); 1541 1542 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, 1543 adev->gfx.ngg.buf[NGG_CNTL].size >> 8); 1544 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, 1545 adev->gfx.ngg.buf[NGG_PARAM].size >> 10); 1546 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); 1547 1548 /* Program buffer base address */ 1549 base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 1550 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); 1551 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data); 1552 1553 base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 1554 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); 1555 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data); 1556 1557 base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 1558 data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); 1559 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data); 1560 1561 base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 1562 data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); 1563 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data); 1564 1565 base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 1566 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); 1567 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data); 1568 1569 base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 1570 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); 1571 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data); 1572 1573 /* Clear GDS reserved memory */ 1574 r = amdgpu_ring_alloc(ring, 17); 1575 if (r) { 1576 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n", 1577 ring->name, r); 1578 return r; 1579 } 1580 1581 gfx_v9_0_write_data_to_reg(ring, 0, false, 1582 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 1583 (adev->gds.gds_size + 1584 adev->gfx.ngg.gds_reserve_size)); 1585 1586 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 1587 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 1588 PACKET3_DMA_DATA_DST_SEL(1) | 1589 PACKET3_DMA_DATA_SRC_SEL(2))); 1590 amdgpu_ring_write(ring, 0); 1591 amdgpu_ring_write(ring, 0); 1592 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); 1593 amdgpu_ring_write(ring, 0); 1594 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 1595 adev->gfx.ngg.gds_reserve_size); 1596 1597 gfx_v9_0_write_data_to_reg(ring, 0, false, 1598 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0); 1599 1600 amdgpu_ring_commit(ring); 1601 1602 return 0; 1603 } 1604 1605 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1606 int mec, int pipe, int queue) 1607 { 1608 int r; 1609 unsigned irq_type; 1610 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1611 1612 ring = &adev->gfx.compute_ring[ring_id]; 1613 1614 /* mec0 is me1 */ 1615 ring->me = mec + 1; 1616 ring->pipe = pipe; 1617 ring->queue = queue; 1618 1619 ring->ring_obj = NULL; 1620 ring->use_doorbell = true; 1621 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1622 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1623 + (ring_id * GFX9_MEC_HPD_SIZE); 1624 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1625 1626 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1627 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1628 + ring->pipe; 1629 1630 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1631 r = amdgpu_ring_init(adev, ring, 1024, 1632 &adev->gfx.eop_irq, irq_type); 1633 if (r) 1634 return r; 1635 1636 1637 return 0; 1638 } 1639 1640 static int gfx_v9_0_sw_init(void *handle) 1641 { 1642 int i, j, k, r, ring_id; 1643 struct amdgpu_ring *ring; 1644 struct amdgpu_kiq *kiq; 1645 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1646 1647 switch (adev->asic_type) { 1648 case CHIP_VEGA10: 1649 case CHIP_VEGA12: 1650 case CHIP_VEGA20: 1651 case CHIP_RAVEN: 1652 adev->gfx.mec.num_mec = 2; 1653 break; 1654 default: 1655 adev->gfx.mec.num_mec = 1; 1656 break; 1657 } 1658 1659 adev->gfx.mec.num_pipe_per_mec = 4; 1660 adev->gfx.mec.num_queue_per_pipe = 8; 1661 1662 /* EOP Event */ 1663 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 1664 if (r) 1665 return r; 1666 1667 /* Privileged reg */ 1668 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 1669 &adev->gfx.priv_reg_irq); 1670 if (r) 1671 return r; 1672 1673 /* Privileged inst */ 1674 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 1675 &adev->gfx.priv_inst_irq); 1676 if (r) 1677 return r; 1678 1679 /* ECC error */ 1680 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 1681 &adev->gfx.cp_ecc_error_irq); 1682 if (r) 1683 return r; 1684 1685 /* FUE error */ 1686 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 1687 &adev->gfx.cp_ecc_error_irq); 1688 if (r) 1689 return r; 1690 1691 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1692 1693 gfx_v9_0_scratch_init(adev); 1694 1695 r = gfx_v9_0_init_microcode(adev); 1696 if (r) { 1697 DRM_ERROR("Failed to load gfx firmware!\n"); 1698 return r; 1699 } 1700 1701 r = adev->gfx.rlc.funcs->init(adev); 1702 if (r) { 1703 DRM_ERROR("Failed to init rlc BOs!\n"); 1704 return r; 1705 } 1706 1707 r = gfx_v9_0_mec_init(adev); 1708 if (r) { 1709 DRM_ERROR("Failed to init MEC BOs!\n"); 1710 return r; 1711 } 1712 1713 /* set up the gfx ring */ 1714 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 1715 ring = &adev->gfx.gfx_ring[i]; 1716 ring->ring_obj = NULL; 1717 if (!i) 1718 sprintf(ring->name, "gfx"); 1719 else 1720 sprintf(ring->name, "gfx_%d", i); 1721 ring->use_doorbell = true; 1722 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 1723 r = amdgpu_ring_init(adev, ring, 1024, 1724 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); 1725 if (r) 1726 return r; 1727 } 1728 1729 /* set up the compute queues - allocate horizontally across pipes */ 1730 ring_id = 0; 1731 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1732 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1733 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1734 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 1735 continue; 1736 1737 r = gfx_v9_0_compute_ring_init(adev, 1738 ring_id, 1739 i, k, j); 1740 if (r) 1741 return r; 1742 1743 ring_id++; 1744 } 1745 } 1746 } 1747 1748 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); 1749 if (r) { 1750 DRM_ERROR("Failed to init KIQ BOs!\n"); 1751 return r; 1752 } 1753 1754 kiq = &adev->gfx.kiq; 1755 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 1756 if (r) 1757 return r; 1758 1759 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 1760 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); 1761 if (r) 1762 return r; 1763 1764 adev->gfx.ce_ram_size = 0x8000; 1765 1766 r = gfx_v9_0_gpu_early_init(adev); 1767 if (r) 1768 return r; 1769 1770 r = gfx_v9_0_ngg_init(adev); 1771 if (r) 1772 return r; 1773 1774 return 0; 1775 } 1776 1777 1778 static int gfx_v9_0_sw_fini(void *handle) 1779 { 1780 int i; 1781 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1782 1783 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && 1784 adev->gfx.ras_if) { 1785 struct ras_common_if *ras_if = adev->gfx.ras_if; 1786 struct ras_ih_if ih_info = { 1787 .head = *ras_if, 1788 }; 1789 1790 amdgpu_ras_debugfs_remove(adev, ras_if); 1791 amdgpu_ras_sysfs_remove(adev, ras_if); 1792 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 1793 amdgpu_ras_feature_enable(adev, ras_if, 0); 1794 kfree(ras_if); 1795 } 1796 1797 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1798 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1799 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1800 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1801 1802 amdgpu_gfx_mqd_sw_fini(adev); 1803 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 1804 amdgpu_gfx_kiq_fini(adev); 1805 1806 gfx_v9_0_mec_fini(adev); 1807 gfx_v9_0_ngg_fini(adev); 1808 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 1809 if (adev->asic_type == CHIP_RAVEN) { 1810 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 1811 &adev->gfx.rlc.cp_table_gpu_addr, 1812 (void **)&adev->gfx.rlc.cp_table_ptr); 1813 } 1814 gfx_v9_0_free_microcode(adev); 1815 1816 return 0; 1817 } 1818 1819 1820 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 1821 { 1822 /* TODO */ 1823 } 1824 1825 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) 1826 { 1827 u32 data; 1828 1829 if (instance == 0xffffffff) 1830 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 1831 else 1832 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 1833 1834 if (se_num == 0xffffffff) 1835 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 1836 else 1837 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1838 1839 if (sh_num == 0xffffffff) 1840 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 1841 else 1842 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 1843 1844 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 1845 } 1846 1847 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1848 { 1849 u32 data, mask; 1850 1851 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 1852 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 1853 1854 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 1855 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 1856 1857 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 1858 adev->gfx.config.max_sh_per_se); 1859 1860 return (~data) & mask; 1861 } 1862 1863 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 1864 { 1865 int i, j; 1866 u32 data; 1867 u32 active_rbs = 0; 1868 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 1869 adev->gfx.config.max_sh_per_se; 1870 1871 mutex_lock(&adev->grbm_idx_mutex); 1872 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1873 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1874 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1875 data = gfx_v9_0_get_rb_active_bitmap(adev); 1876 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 1877 rb_bitmap_width_per_sh); 1878 } 1879 } 1880 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1881 mutex_unlock(&adev->grbm_idx_mutex); 1882 1883 adev->gfx.config.backend_enable_mask = active_rbs; 1884 adev->gfx.config.num_rbs = hweight32(active_rbs); 1885 } 1886 1887 #define DEFAULT_SH_MEM_BASES (0x6000) 1888 #define FIRST_COMPUTE_VMID (8) 1889 #define LAST_COMPUTE_VMID (16) 1890 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 1891 { 1892 int i; 1893 uint32_t sh_mem_config; 1894 uint32_t sh_mem_bases; 1895 1896 /* 1897 * Configure apertures: 1898 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 1899 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 1900 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 1901 */ 1902 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 1903 1904 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 1905 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 1906 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 1907 1908 mutex_lock(&adev->srbm_mutex); 1909 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 1910 soc15_grbm_select(adev, 0, 0, 0, i); 1911 /* CP and shaders */ 1912 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 1913 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 1914 } 1915 soc15_grbm_select(adev, 0, 0, 0, 0); 1916 mutex_unlock(&adev->srbm_mutex); 1917 } 1918 1919 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 1920 { 1921 u32 tmp; 1922 int i; 1923 1924 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 1925 1926 gfx_v9_0_tiling_mode_table_init(adev); 1927 1928 gfx_v9_0_setup_rb(adev); 1929 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 1930 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 1931 1932 /* XXX SH_MEM regs */ 1933 /* where to put LDS, scratch, GPUVM in FSA64 space */ 1934 mutex_lock(&adev->srbm_mutex); 1935 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { 1936 soc15_grbm_select(adev, 0, 0, 0, i); 1937 /* CP and shaders */ 1938 if (i == 0) { 1939 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 1940 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1941 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 1942 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 1943 } else { 1944 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 1945 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1946 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 1947 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 1948 (adev->gmc.private_aperture_start >> 48)); 1949 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 1950 (adev->gmc.shared_aperture_start >> 48)); 1951 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 1952 } 1953 } 1954 soc15_grbm_select(adev, 0, 0, 0, 0); 1955 1956 mutex_unlock(&adev->srbm_mutex); 1957 1958 gfx_v9_0_init_compute_vmid(adev); 1959 1960 mutex_lock(&adev->grbm_idx_mutex); 1961 /* 1962 * making sure that the following register writes will be broadcasted 1963 * to all the shaders 1964 */ 1965 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1966 1967 WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE, 1968 (adev->gfx.config.sc_prim_fifo_size_frontend << 1969 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 1970 (adev->gfx.config.sc_prim_fifo_size_backend << 1971 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 1972 (adev->gfx.config.sc_hiz_tile_fifo_size << 1973 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 1974 (adev->gfx.config.sc_earlyz_tile_fifo_size << 1975 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 1976 mutex_unlock(&adev->grbm_idx_mutex); 1977 1978 } 1979 1980 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 1981 { 1982 u32 i, j, k; 1983 u32 mask; 1984 1985 mutex_lock(&adev->grbm_idx_mutex); 1986 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1987 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1988 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1989 for (k = 0; k < adev->usec_timeout; k++) { 1990 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 1991 break; 1992 udelay(1); 1993 } 1994 if (k == adev->usec_timeout) { 1995 gfx_v9_0_select_se_sh(adev, 0xffffffff, 1996 0xffffffff, 0xffffffff); 1997 mutex_unlock(&adev->grbm_idx_mutex); 1998 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 1999 i, j); 2000 return; 2001 } 2002 } 2003 } 2004 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2005 mutex_unlock(&adev->grbm_idx_mutex); 2006 2007 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2008 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2009 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2010 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2011 for (k = 0; k < adev->usec_timeout; k++) { 2012 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2013 break; 2014 udelay(1); 2015 } 2016 } 2017 2018 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2019 bool enable) 2020 { 2021 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2022 2023 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2024 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2025 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2026 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2027 2028 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2029 } 2030 2031 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2032 { 2033 /* csib */ 2034 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2035 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2036 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2037 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2038 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2039 adev->gfx.rlc.clear_state_size); 2040 } 2041 2042 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2043 int indirect_offset, 2044 int list_size, 2045 int *unique_indirect_regs, 2046 int unique_indirect_reg_count, 2047 int *indirect_start_offsets, 2048 int *indirect_start_offsets_count, 2049 int max_start_offsets_count) 2050 { 2051 int idx; 2052 2053 for (; indirect_offset < list_size; indirect_offset++) { 2054 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2055 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2056 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2057 2058 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2059 indirect_offset += 2; 2060 2061 /* look for the matching indice */ 2062 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2063 if (unique_indirect_regs[idx] == 2064 register_list_format[indirect_offset] || 2065 !unique_indirect_regs[idx]) 2066 break; 2067 } 2068 2069 BUG_ON(idx >= unique_indirect_reg_count); 2070 2071 if (!unique_indirect_regs[idx]) 2072 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2073 2074 indirect_offset++; 2075 } 2076 } 2077 } 2078 2079 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2080 { 2081 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2082 int unique_indirect_reg_count = 0; 2083 2084 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2085 int indirect_start_offsets_count = 0; 2086 2087 int list_size = 0; 2088 int i = 0, j = 0; 2089 u32 tmp = 0; 2090 2091 u32 *register_list_format = 2092 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2093 if (!register_list_format) 2094 return -ENOMEM; 2095 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 2096 adev->gfx.rlc.reg_list_format_size_bytes); 2097 2098 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2099 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2100 gfx_v9_1_parse_ind_reg_list(register_list_format, 2101 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2102 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2103 unique_indirect_regs, 2104 unique_indirect_reg_count, 2105 indirect_start_offsets, 2106 &indirect_start_offsets_count, 2107 ARRAY_SIZE(indirect_start_offsets)); 2108 2109 /* enable auto inc in case it is disabled */ 2110 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2111 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2112 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2113 2114 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2115 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2116 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2117 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2118 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2119 adev->gfx.rlc.register_restore[i]); 2120 2121 /* load indirect register */ 2122 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2123 adev->gfx.rlc.reg_list_format_start); 2124 2125 /* direct register portion */ 2126 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2127 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2128 register_list_format[i]); 2129 2130 /* indirect register portion */ 2131 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2132 if (register_list_format[i] == 0xFFFFFFFF) { 2133 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2134 continue; 2135 } 2136 2137 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2138 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2139 2140 for (j = 0; j < unique_indirect_reg_count; j++) { 2141 if (register_list_format[i] == unique_indirect_regs[j]) { 2142 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2143 break; 2144 } 2145 } 2146 2147 BUG_ON(j >= unique_indirect_reg_count); 2148 2149 i++; 2150 } 2151 2152 /* set save/restore list size */ 2153 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2154 list_size = list_size >> 1; 2155 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2156 adev->gfx.rlc.reg_restore_list_size); 2157 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2158 2159 /* write the starting offsets to RLC scratch ram */ 2160 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2161 adev->gfx.rlc.starting_offsets_start); 2162 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2163 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2164 indirect_start_offsets[i]); 2165 2166 /* load unique indirect regs*/ 2167 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2168 if (unique_indirect_regs[i] != 0) { 2169 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2170 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2171 unique_indirect_regs[i] & 0x3FFFF); 2172 2173 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2174 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2175 unique_indirect_regs[i] >> 20); 2176 } 2177 } 2178 2179 kfree(register_list_format); 2180 return 0; 2181 } 2182 2183 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2184 { 2185 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2186 } 2187 2188 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2189 bool enable) 2190 { 2191 uint32_t data = 0; 2192 uint32_t default_data = 0; 2193 2194 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2195 if (enable == true) { 2196 /* enable GFXIP control over CGPG */ 2197 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2198 if(default_data != data) 2199 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2200 2201 /* update status */ 2202 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2203 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2204 if(default_data != data) 2205 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2206 } else { 2207 /* restore GFXIP control over GCPG */ 2208 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2209 if(default_data != data) 2210 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2211 } 2212 } 2213 2214 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2215 { 2216 uint32_t data = 0; 2217 2218 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2219 AMD_PG_SUPPORT_GFX_SMG | 2220 AMD_PG_SUPPORT_GFX_DMG)) { 2221 /* init IDLE_POLL_COUNT = 60 */ 2222 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2223 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2224 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2225 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2226 2227 /* init RLC PG Delay */ 2228 data = 0; 2229 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2230 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2231 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2232 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2233 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2234 2235 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2236 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2237 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2238 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2239 2240 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2241 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2242 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2243 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2244 2245 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2246 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2247 2248 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2249 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2250 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2251 2252 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2253 } 2254 } 2255 2256 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2257 bool enable) 2258 { 2259 uint32_t data = 0; 2260 uint32_t default_data = 0; 2261 2262 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2263 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2264 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2265 enable ? 1 : 0); 2266 if (default_data != data) 2267 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2268 } 2269 2270 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2271 bool enable) 2272 { 2273 uint32_t data = 0; 2274 uint32_t default_data = 0; 2275 2276 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2277 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2278 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2279 enable ? 1 : 0); 2280 if(default_data != data) 2281 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2282 } 2283 2284 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2285 bool enable) 2286 { 2287 uint32_t data = 0; 2288 uint32_t default_data = 0; 2289 2290 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2291 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2292 CP_PG_DISABLE, 2293 enable ? 0 : 1); 2294 if(default_data != data) 2295 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2296 } 2297 2298 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2299 bool enable) 2300 { 2301 uint32_t data, default_data; 2302 2303 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2304 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2305 GFX_POWER_GATING_ENABLE, 2306 enable ? 1 : 0); 2307 if(default_data != data) 2308 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2309 } 2310 2311 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 2312 bool enable) 2313 { 2314 uint32_t data, default_data; 2315 2316 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2317 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2318 GFX_PIPELINE_PG_ENABLE, 2319 enable ? 1 : 0); 2320 if(default_data != data) 2321 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2322 2323 if (!enable) 2324 /* read any GFX register to wake up GFX */ 2325 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 2326 } 2327 2328 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 2329 bool enable) 2330 { 2331 uint32_t data, default_data; 2332 2333 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2334 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2335 STATIC_PER_CU_PG_ENABLE, 2336 enable ? 1 : 0); 2337 if(default_data != data) 2338 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2339 } 2340 2341 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 2342 bool enable) 2343 { 2344 uint32_t data, default_data; 2345 2346 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2347 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2348 DYN_PER_CU_PG_ENABLE, 2349 enable ? 1 : 0); 2350 if(default_data != data) 2351 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2352 } 2353 2354 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 2355 { 2356 gfx_v9_0_init_csb(adev); 2357 2358 /* 2359 * Rlc save restore list is workable since v2_1. 2360 * And it's needed by gfxoff feature. 2361 */ 2362 if (adev->gfx.rlc.is_rlc_v2_1) { 2363 gfx_v9_1_init_rlc_save_restore_list(adev); 2364 gfx_v9_0_enable_save_restore_machine(adev); 2365 } 2366 2367 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2368 AMD_PG_SUPPORT_GFX_SMG | 2369 AMD_PG_SUPPORT_GFX_DMG | 2370 AMD_PG_SUPPORT_CP | 2371 AMD_PG_SUPPORT_GDS | 2372 AMD_PG_SUPPORT_RLC_SMU_HS)) { 2373 WREG32(mmRLC_JUMP_TABLE_RESTORE, 2374 adev->gfx.rlc.cp_table_gpu_addr >> 8); 2375 gfx_v9_0_init_gfx_power_gating(adev); 2376 } 2377 } 2378 2379 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 2380 { 2381 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 2382 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 2383 gfx_v9_0_wait_for_rlc_serdes(adev); 2384 } 2385 2386 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 2387 { 2388 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2389 udelay(50); 2390 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2391 udelay(50); 2392 } 2393 2394 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 2395 { 2396 #ifdef AMDGPU_RLC_DEBUG_RETRY 2397 u32 rlc_ucode_ver; 2398 #endif 2399 2400 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2401 udelay(50); 2402 2403 /* carrizo do enable cp interrupt after cp inited */ 2404 if (!(adev->flags & AMD_IS_APU)) { 2405 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 2406 udelay(50); 2407 } 2408 2409 #ifdef AMDGPU_RLC_DEBUG_RETRY 2410 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 2411 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 2412 if(rlc_ucode_ver == 0x108) { 2413 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 2414 rlc_ucode_ver, adev->gfx.rlc_fw_version); 2415 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 2416 * default is 0x9C4 to create a 100us interval */ 2417 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 2418 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 2419 * to disable the page fault retry interrupts, default is 2420 * 0x100 (256) */ 2421 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 2422 } 2423 #endif 2424 } 2425 2426 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 2427 { 2428 const struct rlc_firmware_header_v2_0 *hdr; 2429 const __le32 *fw_data; 2430 unsigned i, fw_size; 2431 2432 if (!adev->gfx.rlc_fw) 2433 return -EINVAL; 2434 2435 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2436 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2437 2438 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2439 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2440 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2441 2442 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 2443 RLCG_UCODE_LOADING_START_ADDRESS); 2444 for (i = 0; i < fw_size; i++) 2445 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 2446 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2447 2448 return 0; 2449 } 2450 2451 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 2452 { 2453 int r; 2454 2455 if (amdgpu_sriov_vf(adev)) { 2456 gfx_v9_0_init_csb(adev); 2457 return 0; 2458 } 2459 2460 adev->gfx.rlc.funcs->stop(adev); 2461 2462 /* disable CG */ 2463 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 2464 2465 gfx_v9_0_init_pg(adev); 2466 2467 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 2468 /* legacy rlc firmware loading */ 2469 r = gfx_v9_0_rlc_load_microcode(adev); 2470 if (r) 2471 return r; 2472 } 2473 2474 switch (adev->asic_type) { 2475 case CHIP_RAVEN: 2476 if (amdgpu_lbpw == 0) 2477 gfx_v9_0_enable_lbpw(adev, false); 2478 else 2479 gfx_v9_0_enable_lbpw(adev, true); 2480 break; 2481 case CHIP_VEGA20: 2482 if (amdgpu_lbpw > 0) 2483 gfx_v9_0_enable_lbpw(adev, true); 2484 else 2485 gfx_v9_0_enable_lbpw(adev, false); 2486 break; 2487 default: 2488 break; 2489 } 2490 2491 adev->gfx.rlc.funcs->start(adev); 2492 2493 return 0; 2494 } 2495 2496 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 2497 { 2498 int i; 2499 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 2500 2501 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 2502 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 2503 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 2504 if (!enable) { 2505 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2506 adev->gfx.gfx_ring[i].sched.ready = false; 2507 } 2508 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 2509 udelay(50); 2510 } 2511 2512 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 2513 { 2514 const struct gfx_firmware_header_v1_0 *pfp_hdr; 2515 const struct gfx_firmware_header_v1_0 *ce_hdr; 2516 const struct gfx_firmware_header_v1_0 *me_hdr; 2517 const __le32 *fw_data; 2518 unsigned i, fw_size; 2519 2520 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 2521 return -EINVAL; 2522 2523 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 2524 adev->gfx.pfp_fw->data; 2525 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 2526 adev->gfx.ce_fw->data; 2527 me_hdr = (const struct gfx_firmware_header_v1_0 *) 2528 adev->gfx.me_fw->data; 2529 2530 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 2531 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 2532 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 2533 2534 gfx_v9_0_cp_gfx_enable(adev, false); 2535 2536 /* PFP */ 2537 fw_data = (const __le32 *) 2538 (adev->gfx.pfp_fw->data + 2539 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 2540 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 2541 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 2542 for (i = 0; i < fw_size; i++) 2543 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 2544 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 2545 2546 /* CE */ 2547 fw_data = (const __le32 *) 2548 (adev->gfx.ce_fw->data + 2549 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 2550 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 2551 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 2552 for (i = 0; i < fw_size; i++) 2553 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 2554 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 2555 2556 /* ME */ 2557 fw_data = (const __le32 *) 2558 (adev->gfx.me_fw->data + 2559 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 2560 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 2561 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 2562 for (i = 0; i < fw_size; i++) 2563 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 2564 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 2565 2566 return 0; 2567 } 2568 2569 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 2570 { 2571 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 2572 const struct cs_section_def *sect = NULL; 2573 const struct cs_extent_def *ext = NULL; 2574 int r, i, tmp; 2575 2576 /* init the CP */ 2577 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 2578 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 2579 2580 gfx_v9_0_cp_gfx_enable(adev, true); 2581 2582 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 2583 if (r) { 2584 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 2585 return r; 2586 } 2587 2588 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2589 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 2590 2591 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 2592 amdgpu_ring_write(ring, 0x80000000); 2593 amdgpu_ring_write(ring, 0x80000000); 2594 2595 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 2596 for (ext = sect->section; ext->extent != NULL; ++ext) { 2597 if (sect->id == SECT_CONTEXT) { 2598 amdgpu_ring_write(ring, 2599 PACKET3(PACKET3_SET_CONTEXT_REG, 2600 ext->reg_count)); 2601 amdgpu_ring_write(ring, 2602 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 2603 for (i = 0; i < ext->reg_count; i++) 2604 amdgpu_ring_write(ring, ext->extent[i]); 2605 } 2606 } 2607 } 2608 2609 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2610 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 2611 2612 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 2613 amdgpu_ring_write(ring, 0); 2614 2615 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 2616 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 2617 amdgpu_ring_write(ring, 0x8000); 2618 amdgpu_ring_write(ring, 0x8000); 2619 2620 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 2621 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 2622 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 2623 amdgpu_ring_write(ring, tmp); 2624 amdgpu_ring_write(ring, 0); 2625 2626 amdgpu_ring_commit(ring); 2627 2628 return 0; 2629 } 2630 2631 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 2632 { 2633 struct amdgpu_ring *ring; 2634 u32 tmp; 2635 u32 rb_bufsz; 2636 u64 rb_addr, rptr_addr, wptr_gpu_addr; 2637 2638 /* Set the write pointer delay */ 2639 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 2640 2641 /* set the RB to use vmid 0 */ 2642 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 2643 2644 /* Set ring buffer size */ 2645 ring = &adev->gfx.gfx_ring[0]; 2646 rb_bufsz = order_base_2(ring->ring_size / 8); 2647 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 2648 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 2649 #ifdef __BIG_ENDIAN 2650 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 2651 #endif 2652 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 2653 2654 /* Initialize the ring buffer's write pointers */ 2655 ring->wptr = 0; 2656 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 2657 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 2658 2659 /* set the wb address wether it's enabled or not */ 2660 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 2661 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 2662 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 2663 2664 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2665 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 2666 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 2667 2668 mdelay(1); 2669 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 2670 2671 rb_addr = ring->gpu_addr >> 8; 2672 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 2673 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 2674 2675 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 2676 if (ring->use_doorbell) { 2677 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2678 DOORBELL_OFFSET, ring->doorbell_index); 2679 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2680 DOORBELL_EN, 1); 2681 } else { 2682 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 2683 } 2684 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 2685 2686 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 2687 DOORBELL_RANGE_LOWER, ring->doorbell_index); 2688 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 2689 2690 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 2691 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 2692 2693 2694 /* start the ring */ 2695 gfx_v9_0_cp_gfx_start(adev); 2696 ring->sched.ready = true; 2697 2698 return 0; 2699 } 2700 2701 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 2702 { 2703 int i; 2704 2705 if (enable) { 2706 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 2707 } else { 2708 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 2709 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 2710 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2711 adev->gfx.compute_ring[i].sched.ready = false; 2712 adev->gfx.kiq.ring.sched.ready = false; 2713 } 2714 udelay(50); 2715 } 2716 2717 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 2718 { 2719 const struct gfx_firmware_header_v1_0 *mec_hdr; 2720 const __le32 *fw_data; 2721 unsigned i; 2722 u32 tmp; 2723 2724 if (!adev->gfx.mec_fw) 2725 return -EINVAL; 2726 2727 gfx_v9_0_cp_compute_enable(adev, false); 2728 2729 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 2730 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 2731 2732 fw_data = (const __le32 *) 2733 (adev->gfx.mec_fw->data + 2734 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 2735 tmp = 0; 2736 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2737 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2738 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 2739 2740 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 2741 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 2742 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 2743 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 2744 2745 /* MEC1 */ 2746 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 2747 mec_hdr->jt_offset); 2748 for (i = 0; i < mec_hdr->jt_size; i++) 2749 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 2750 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 2751 2752 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 2753 adev->gfx.mec_fw_version); 2754 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 2755 2756 return 0; 2757 } 2758 2759 /* KIQ functions */ 2760 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 2761 { 2762 uint32_t tmp; 2763 struct amdgpu_device *adev = ring->adev; 2764 2765 /* tell RLC which is KIQ queue */ 2766 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 2767 tmp &= 0xffffff00; 2768 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 2769 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2770 tmp |= 0x80; 2771 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2772 } 2773 2774 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) 2775 { 2776 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 2777 uint64_t queue_mask = 0; 2778 int r, i; 2779 2780 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 2781 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 2782 continue; 2783 2784 /* This situation may be hit in the future if a new HW 2785 * generation exposes more than 64 queues. If so, the 2786 * definition of queue_mask needs updating */ 2787 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 2788 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 2789 break; 2790 } 2791 2792 queue_mask |= (1ull << i); 2793 } 2794 2795 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); 2796 if (r) { 2797 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 2798 return r; 2799 } 2800 2801 /* set resources */ 2802 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 2803 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 2804 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 2805 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 2806 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 2807 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 2808 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 2809 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 2810 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 2811 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2812 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 2813 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 2814 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2815 2816 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 2817 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 2818 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 2819 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 2820 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 2821 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 2822 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 2823 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 2824 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 2825 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 2826 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ 2827 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 2828 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 2829 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 2830 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 2831 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 2832 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 2833 } 2834 2835 r = amdgpu_ring_test_helper(kiq_ring); 2836 if (r) 2837 DRM_ERROR("KCQ enable failed\n"); 2838 2839 return r; 2840 } 2841 2842 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 2843 { 2844 struct amdgpu_device *adev = ring->adev; 2845 struct v9_mqd *mqd = ring->mqd_ptr; 2846 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 2847 uint32_t tmp; 2848 2849 mqd->header = 0xC0310800; 2850 mqd->compute_pipelinestat_enable = 0x00000001; 2851 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 2852 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 2853 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 2854 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 2855 mqd->compute_misc_reserved = 0x00000003; 2856 2857 mqd->dynamic_cu_mask_addr_lo = 2858 lower_32_bits(ring->mqd_gpu_addr 2859 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 2860 mqd->dynamic_cu_mask_addr_hi = 2861 upper_32_bits(ring->mqd_gpu_addr 2862 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 2863 2864 eop_base_addr = ring->eop_gpu_addr >> 8; 2865 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 2866 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 2867 2868 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2869 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 2870 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 2871 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 2872 2873 mqd->cp_hqd_eop_control = tmp; 2874 2875 /* enable doorbell? */ 2876 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 2877 2878 if (ring->use_doorbell) { 2879 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2880 DOORBELL_OFFSET, ring->doorbell_index); 2881 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2882 DOORBELL_EN, 1); 2883 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2884 DOORBELL_SOURCE, 0); 2885 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2886 DOORBELL_HIT, 0); 2887 } else { 2888 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2889 DOORBELL_EN, 0); 2890 } 2891 2892 mqd->cp_hqd_pq_doorbell_control = tmp; 2893 2894 /* disable the queue if it's active */ 2895 ring->wptr = 0; 2896 mqd->cp_hqd_dequeue_request = 0; 2897 mqd->cp_hqd_pq_rptr = 0; 2898 mqd->cp_hqd_pq_wptr_lo = 0; 2899 mqd->cp_hqd_pq_wptr_hi = 0; 2900 2901 /* set the pointer to the MQD */ 2902 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 2903 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 2904 2905 /* set MQD vmid to 0 */ 2906 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 2907 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 2908 mqd->cp_mqd_control = tmp; 2909 2910 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 2911 hqd_gpu_addr = ring->gpu_addr >> 8; 2912 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 2913 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 2914 2915 /* set up the HQD, this is similar to CP_RB0_CNTL */ 2916 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 2917 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 2918 (order_base_2(ring->ring_size / 4) - 1)); 2919 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 2920 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 2921 #ifdef __BIG_ENDIAN 2922 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 2923 #endif 2924 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 2925 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 2926 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 2927 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 2928 mqd->cp_hqd_pq_control = tmp; 2929 2930 /* set the wb address whether it's enabled or not */ 2931 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 2932 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 2933 mqd->cp_hqd_pq_rptr_report_addr_hi = 2934 upper_32_bits(wb_gpu_addr) & 0xffff; 2935 2936 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 2937 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2938 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 2939 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 2940 2941 tmp = 0; 2942 /* enable the doorbell if requested */ 2943 if (ring->use_doorbell) { 2944 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 2945 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2946 DOORBELL_OFFSET, ring->doorbell_index); 2947 2948 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2949 DOORBELL_EN, 1); 2950 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2951 DOORBELL_SOURCE, 0); 2952 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2953 DOORBELL_HIT, 0); 2954 } 2955 2956 mqd->cp_hqd_pq_doorbell_control = tmp; 2957 2958 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 2959 ring->wptr = 0; 2960 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 2961 2962 /* set the vmid for the queue */ 2963 mqd->cp_hqd_vmid = 0; 2964 2965 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 2966 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 2967 mqd->cp_hqd_persistent_state = tmp; 2968 2969 /* set MIN_IB_AVAIL_SIZE */ 2970 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 2971 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 2972 mqd->cp_hqd_ib_control = tmp; 2973 2974 /* activate the queue */ 2975 mqd->cp_hqd_active = 1; 2976 2977 return 0; 2978 } 2979 2980 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 2981 { 2982 struct amdgpu_device *adev = ring->adev; 2983 struct v9_mqd *mqd = ring->mqd_ptr; 2984 int j; 2985 2986 /* disable wptr polling */ 2987 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 2988 2989 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 2990 mqd->cp_hqd_eop_base_addr_lo); 2991 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 2992 mqd->cp_hqd_eop_base_addr_hi); 2993 2994 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2995 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 2996 mqd->cp_hqd_eop_control); 2997 2998 /* enable doorbell? */ 2999 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3000 mqd->cp_hqd_pq_doorbell_control); 3001 3002 /* disable the queue if it's active */ 3003 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3004 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3005 for (j = 0; j < adev->usec_timeout; j++) { 3006 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3007 break; 3008 udelay(1); 3009 } 3010 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3011 mqd->cp_hqd_dequeue_request); 3012 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3013 mqd->cp_hqd_pq_rptr); 3014 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3015 mqd->cp_hqd_pq_wptr_lo); 3016 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3017 mqd->cp_hqd_pq_wptr_hi); 3018 } 3019 3020 /* set the pointer to the MQD */ 3021 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3022 mqd->cp_mqd_base_addr_lo); 3023 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3024 mqd->cp_mqd_base_addr_hi); 3025 3026 /* set MQD vmid to 0 */ 3027 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3028 mqd->cp_mqd_control); 3029 3030 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3031 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3032 mqd->cp_hqd_pq_base_lo); 3033 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3034 mqd->cp_hqd_pq_base_hi); 3035 3036 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3037 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3038 mqd->cp_hqd_pq_control); 3039 3040 /* set the wb address whether it's enabled or not */ 3041 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3042 mqd->cp_hqd_pq_rptr_report_addr_lo); 3043 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3044 mqd->cp_hqd_pq_rptr_report_addr_hi); 3045 3046 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3047 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3048 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3049 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3050 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3051 3052 /* enable the doorbell if requested */ 3053 if (ring->use_doorbell) { 3054 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3055 (adev->doorbell_index.kiq * 2) << 2); 3056 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3057 (adev->doorbell_index.userqueue_end * 2) << 2); 3058 } 3059 3060 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3061 mqd->cp_hqd_pq_doorbell_control); 3062 3063 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3064 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3065 mqd->cp_hqd_pq_wptr_lo); 3066 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3067 mqd->cp_hqd_pq_wptr_hi); 3068 3069 /* set the vmid for the queue */ 3070 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3071 3072 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3073 mqd->cp_hqd_persistent_state); 3074 3075 /* activate the queue */ 3076 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3077 mqd->cp_hqd_active); 3078 3079 if (ring->use_doorbell) 3080 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3081 3082 return 0; 3083 } 3084 3085 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3086 { 3087 struct amdgpu_device *adev = ring->adev; 3088 int j; 3089 3090 /* disable the queue if it's active */ 3091 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3092 3093 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3094 3095 for (j = 0; j < adev->usec_timeout; j++) { 3096 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3097 break; 3098 udelay(1); 3099 } 3100 3101 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3102 DRM_DEBUG("KIQ dequeue request failed.\n"); 3103 3104 /* Manual disable if dequeue request times out */ 3105 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3106 } 3107 3108 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3109 0); 3110 } 3111 3112 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3113 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3114 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3115 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3116 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3117 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3118 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3119 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3120 3121 return 0; 3122 } 3123 3124 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3125 { 3126 struct amdgpu_device *adev = ring->adev; 3127 struct v9_mqd *mqd = ring->mqd_ptr; 3128 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3129 3130 gfx_v9_0_kiq_setting(ring); 3131 3132 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3133 /* reset MQD to a clean status */ 3134 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3135 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3136 3137 /* reset ring buffer */ 3138 ring->wptr = 0; 3139 amdgpu_ring_clear_ring(ring); 3140 3141 mutex_lock(&adev->srbm_mutex); 3142 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3143 gfx_v9_0_kiq_init_register(ring); 3144 soc15_grbm_select(adev, 0, 0, 0, 0); 3145 mutex_unlock(&adev->srbm_mutex); 3146 } else { 3147 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3148 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3149 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3150 mutex_lock(&adev->srbm_mutex); 3151 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3152 gfx_v9_0_mqd_init(ring); 3153 gfx_v9_0_kiq_init_register(ring); 3154 soc15_grbm_select(adev, 0, 0, 0, 0); 3155 mutex_unlock(&adev->srbm_mutex); 3156 3157 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3158 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3159 } 3160 3161 return 0; 3162 } 3163 3164 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3165 { 3166 struct amdgpu_device *adev = ring->adev; 3167 struct v9_mqd *mqd = ring->mqd_ptr; 3168 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3169 3170 if (!adev->in_gpu_reset && !adev->in_suspend) { 3171 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3172 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3173 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3174 mutex_lock(&adev->srbm_mutex); 3175 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3176 gfx_v9_0_mqd_init(ring); 3177 soc15_grbm_select(adev, 0, 0, 0, 0); 3178 mutex_unlock(&adev->srbm_mutex); 3179 3180 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3181 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3182 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3183 /* reset MQD to a clean status */ 3184 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3185 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3186 3187 /* reset ring buffer */ 3188 ring->wptr = 0; 3189 amdgpu_ring_clear_ring(ring); 3190 } else { 3191 amdgpu_ring_clear_ring(ring); 3192 } 3193 3194 return 0; 3195 } 3196 3197 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3198 { 3199 struct amdgpu_ring *ring; 3200 int r; 3201 3202 ring = &adev->gfx.kiq.ring; 3203 3204 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3205 if (unlikely(r != 0)) 3206 return r; 3207 3208 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3209 if (unlikely(r != 0)) 3210 return r; 3211 3212 gfx_v9_0_kiq_init_queue(ring); 3213 amdgpu_bo_kunmap(ring->mqd_obj); 3214 ring->mqd_ptr = NULL; 3215 amdgpu_bo_unreserve(ring->mqd_obj); 3216 ring->sched.ready = true; 3217 return 0; 3218 } 3219 3220 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3221 { 3222 struct amdgpu_ring *ring = NULL; 3223 int r = 0, i; 3224 3225 gfx_v9_0_cp_compute_enable(adev, true); 3226 3227 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3228 ring = &adev->gfx.compute_ring[i]; 3229 3230 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3231 if (unlikely(r != 0)) 3232 goto done; 3233 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3234 if (!r) { 3235 r = gfx_v9_0_kcq_init_queue(ring); 3236 amdgpu_bo_kunmap(ring->mqd_obj); 3237 ring->mqd_ptr = NULL; 3238 } 3239 amdgpu_bo_unreserve(ring->mqd_obj); 3240 if (r) 3241 goto done; 3242 } 3243 3244 r = gfx_v9_0_kiq_kcq_enable(adev); 3245 done: 3246 return r; 3247 } 3248 3249 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3250 { 3251 int r, i; 3252 struct amdgpu_ring *ring; 3253 3254 if (!(adev->flags & AMD_IS_APU)) 3255 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3256 3257 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3258 /* legacy firmware loading */ 3259 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3260 if (r) 3261 return r; 3262 3263 r = gfx_v9_0_cp_compute_load_microcode(adev); 3264 if (r) 3265 return r; 3266 } 3267 3268 r = gfx_v9_0_kiq_resume(adev); 3269 if (r) 3270 return r; 3271 3272 r = gfx_v9_0_cp_gfx_resume(adev); 3273 if (r) 3274 return r; 3275 3276 r = gfx_v9_0_kcq_resume(adev); 3277 if (r) 3278 return r; 3279 3280 ring = &adev->gfx.gfx_ring[0]; 3281 r = amdgpu_ring_test_helper(ring); 3282 if (r) 3283 return r; 3284 3285 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3286 ring = &adev->gfx.compute_ring[i]; 3287 amdgpu_ring_test_helper(ring); 3288 } 3289 3290 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3291 3292 return 0; 3293 } 3294 3295 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3296 { 3297 gfx_v9_0_cp_gfx_enable(adev, enable); 3298 gfx_v9_0_cp_compute_enable(adev, enable); 3299 } 3300 3301 static int gfx_v9_0_hw_init(void *handle) 3302 { 3303 int r; 3304 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3305 3306 gfx_v9_0_init_golden_registers(adev); 3307 3308 gfx_v9_0_constants_init(adev); 3309 3310 r = gfx_v9_0_csb_vram_pin(adev); 3311 if (r) 3312 return r; 3313 3314 r = adev->gfx.rlc.funcs->resume(adev); 3315 if (r) 3316 return r; 3317 3318 r = gfx_v9_0_cp_resume(adev); 3319 if (r) 3320 return r; 3321 3322 r = gfx_v9_0_ngg_en(adev); 3323 if (r) 3324 return r; 3325 3326 return r; 3327 } 3328 3329 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) 3330 { 3331 int r, i; 3332 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3333 3334 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); 3335 if (r) 3336 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3337 3338 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3339 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3340 3341 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 3342 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 3343 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 3344 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 3345 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 3346 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 3347 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 3348 amdgpu_ring_write(kiq_ring, 0); 3349 amdgpu_ring_write(kiq_ring, 0); 3350 amdgpu_ring_write(kiq_ring, 0); 3351 } 3352 r = amdgpu_ring_test_helper(kiq_ring); 3353 if (r) 3354 DRM_ERROR("KCQ disable failed\n"); 3355 3356 return r; 3357 } 3358 3359 static int gfx_v9_0_hw_fini(void *handle) 3360 { 3361 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3362 3363 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 3364 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3365 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3366 3367 /* disable KCQ to avoid CPC touch memory not valid anymore */ 3368 gfx_v9_0_kcq_disable(adev); 3369 3370 if (amdgpu_sriov_vf(adev)) { 3371 gfx_v9_0_cp_gfx_enable(adev, false); 3372 /* must disable polling for SRIOV when hw finished, otherwise 3373 * CPC engine may still keep fetching WB address which is already 3374 * invalid after sw finished and trigger DMAR reading error in 3375 * hypervisor side. 3376 */ 3377 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3378 return 0; 3379 } 3380 3381 /* Use deinitialize sequence from CAIL when unbinding device from driver, 3382 * otherwise KIQ is hanging when binding back 3383 */ 3384 if (!adev->in_gpu_reset && !adev->in_suspend) { 3385 mutex_lock(&adev->srbm_mutex); 3386 soc15_grbm_select(adev, adev->gfx.kiq.ring.me, 3387 adev->gfx.kiq.ring.pipe, 3388 adev->gfx.kiq.ring.queue, 0); 3389 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); 3390 soc15_grbm_select(adev, 0, 0, 0, 0); 3391 mutex_unlock(&adev->srbm_mutex); 3392 } 3393 3394 gfx_v9_0_cp_enable(adev, false); 3395 adev->gfx.rlc.funcs->stop(adev); 3396 3397 gfx_v9_0_csb_vram_unpin(adev); 3398 3399 return 0; 3400 } 3401 3402 static int gfx_v9_0_suspend(void *handle) 3403 { 3404 return gfx_v9_0_hw_fini(handle); 3405 } 3406 3407 static int gfx_v9_0_resume(void *handle) 3408 { 3409 return gfx_v9_0_hw_init(handle); 3410 } 3411 3412 static bool gfx_v9_0_is_idle(void *handle) 3413 { 3414 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3415 3416 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 3417 GRBM_STATUS, GUI_ACTIVE)) 3418 return false; 3419 else 3420 return true; 3421 } 3422 3423 static int gfx_v9_0_wait_for_idle(void *handle) 3424 { 3425 unsigned i; 3426 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3427 3428 for (i = 0; i < adev->usec_timeout; i++) { 3429 if (gfx_v9_0_is_idle(handle)) 3430 return 0; 3431 udelay(1); 3432 } 3433 return -ETIMEDOUT; 3434 } 3435 3436 static int gfx_v9_0_soft_reset(void *handle) 3437 { 3438 u32 grbm_soft_reset = 0; 3439 u32 tmp; 3440 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3441 3442 /* GRBM_STATUS */ 3443 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 3444 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 3445 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 3446 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 3447 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 3448 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 3449 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 3450 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3451 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3452 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3453 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 3454 } 3455 3456 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 3457 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3458 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3459 } 3460 3461 /* GRBM_STATUS2 */ 3462 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 3463 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 3464 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3465 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3466 3467 3468 if (grbm_soft_reset) { 3469 /* stop the rlc */ 3470 adev->gfx.rlc.funcs->stop(adev); 3471 3472 /* Disable GFX parsing/prefetching */ 3473 gfx_v9_0_cp_gfx_enable(adev, false); 3474 3475 /* Disable MEC parsing/prefetching */ 3476 gfx_v9_0_cp_compute_enable(adev, false); 3477 3478 if (grbm_soft_reset) { 3479 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3480 tmp |= grbm_soft_reset; 3481 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 3482 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3483 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3484 3485 udelay(50); 3486 3487 tmp &= ~grbm_soft_reset; 3488 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3489 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3490 } 3491 3492 /* Wait a little for things to settle down */ 3493 udelay(50); 3494 } 3495 return 0; 3496 } 3497 3498 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 3499 { 3500 uint64_t clock; 3501 3502 mutex_lock(&adev->gfx.gpu_clock_mutex); 3503 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 3504 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 3505 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 3506 mutex_unlock(&adev->gfx.gpu_clock_mutex); 3507 return clock; 3508 } 3509 3510 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 3511 uint32_t vmid, 3512 uint32_t gds_base, uint32_t gds_size, 3513 uint32_t gws_base, uint32_t gws_size, 3514 uint32_t oa_base, uint32_t oa_size) 3515 { 3516 struct amdgpu_device *adev = ring->adev; 3517 3518 /* GDS Base */ 3519 gfx_v9_0_write_data_to_reg(ring, 0, false, 3520 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 3521 gds_base); 3522 3523 /* GDS Size */ 3524 gfx_v9_0_write_data_to_reg(ring, 0, false, 3525 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 3526 gds_size); 3527 3528 /* GWS */ 3529 gfx_v9_0_write_data_to_reg(ring, 0, false, 3530 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 3531 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 3532 3533 /* OA */ 3534 gfx_v9_0_write_data_to_reg(ring, 0, false, 3535 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 3536 (1 << (oa_size + oa_base)) - (1 << oa_base)); 3537 } 3538 3539 static const u32 vgpr_init_compute_shader[] = 3540 { 3541 0xb07c0000, 0xbe8000ff, 3542 0x000000f8, 0xbf110800, 3543 0x7e000280, 0x7e020280, 3544 0x7e040280, 0x7e060280, 3545 0x7e080280, 0x7e0a0280, 3546 0x7e0c0280, 0x7e0e0280, 3547 0x80808800, 0xbe803200, 3548 0xbf84fff5, 0xbf9c0000, 3549 0xd28c0001, 0x0001007f, 3550 0xd28d0001, 0x0002027e, 3551 0x10020288, 0xb8810904, 3552 0xb7814000, 0xd1196a01, 3553 0x00000301, 0xbe800087, 3554 0xbefc00c1, 0xd89c4000, 3555 0x00020201, 0xd89cc080, 3556 0x00040401, 0x320202ff, 3557 0x00000800, 0x80808100, 3558 0xbf84fff8, 0x7e020280, 3559 0xbf810000, 0x00000000, 3560 }; 3561 3562 static const u32 sgpr_init_compute_shader[] = 3563 { 3564 0xb07c0000, 0xbe8000ff, 3565 0x0000005f, 0xbee50080, 3566 0xbe812c65, 0xbe822c65, 3567 0xbe832c65, 0xbe842c65, 3568 0xbe852c65, 0xb77c0005, 3569 0x80808500, 0xbf84fff8, 3570 0xbe800080, 0xbf810000, 3571 }; 3572 3573 static const struct soc15_reg_entry vgpr_init_regs[] = { 3574 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 3575 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 3576 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 3577 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 3578 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 3579 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 3580 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 3581 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 3582 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ 3583 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 3584 }; 3585 3586 static const struct soc15_reg_entry sgpr_init_regs[] = { 3587 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 3588 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 3589 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 3590 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 3591 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 3592 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 3593 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 3594 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 3595 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ 3596 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 3597 }; 3598 3599 static const struct soc15_reg_entry sec_ded_counter_registers[] = { 3600 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 3601 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 3602 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 3603 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 3604 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 3605 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 3606 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 3607 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 3608 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 3609 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 3610 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 3611 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 3612 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 3613 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 3614 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 3615 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 3616 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 3617 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 3618 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 3619 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 3620 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 3621 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 3622 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 3623 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 3624 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 3625 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 3626 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 3627 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 3628 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 3629 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 3630 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 3631 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 3632 }; 3633 3634 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 3635 { 3636 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 3637 int i, r; 3638 3639 r = amdgpu_ring_alloc(ring, 7); 3640 if (r) { 3641 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 3642 ring->name, r); 3643 return r; 3644 } 3645 3646 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 3647 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 3648 3649 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 3650 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 3651 PACKET3_DMA_DATA_DST_SEL(1) | 3652 PACKET3_DMA_DATA_SRC_SEL(2) | 3653 PACKET3_DMA_DATA_ENGINE(0))); 3654 amdgpu_ring_write(ring, 0); 3655 amdgpu_ring_write(ring, 0); 3656 amdgpu_ring_write(ring, 0); 3657 amdgpu_ring_write(ring, 0); 3658 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 3659 adev->gds.gds_size); 3660 3661 amdgpu_ring_commit(ring); 3662 3663 for (i = 0; i < adev->usec_timeout; i++) { 3664 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 3665 break; 3666 udelay(1); 3667 } 3668 3669 if (i >= adev->usec_timeout) 3670 r = -ETIMEDOUT; 3671 3672 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 3673 3674 return r; 3675 } 3676 3677 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 3678 { 3679 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 3680 struct amdgpu_ib ib; 3681 struct dma_fence *f = NULL; 3682 int r, i, j, k; 3683 unsigned total_size, vgpr_offset, sgpr_offset; 3684 u64 gpu_addr; 3685 3686 /* only support when RAS is enabled */ 3687 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 3688 return 0; 3689 3690 /* bail if the compute ring is not ready */ 3691 if (!ring->sched.ready) 3692 return 0; 3693 3694 total_size = 3695 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 3696 total_size += 3697 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 3698 total_size = ALIGN(total_size, 256); 3699 vgpr_offset = total_size; 3700 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 3701 sgpr_offset = total_size; 3702 total_size += sizeof(sgpr_init_compute_shader); 3703 3704 /* allocate an indirect buffer to put the commands in */ 3705 memset(&ib, 0, sizeof(ib)); 3706 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 3707 if (r) { 3708 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 3709 return r; 3710 } 3711 3712 /* load the compute shaders */ 3713 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 3714 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 3715 3716 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 3717 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 3718 3719 /* init the ib length to 0 */ 3720 ib.length_dw = 0; 3721 3722 /* VGPR */ 3723 /* write the register state for the compute dispatch */ 3724 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { 3725 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 3726 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) 3727 - PACKET3_SET_SH_REG_START; 3728 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value; 3729 } 3730 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 3731 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 3732 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 3733 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 3734 - PACKET3_SET_SH_REG_START; 3735 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 3736 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 3737 3738 /* write dispatch packet */ 3739 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 3740 ib.ptr[ib.length_dw++] = 128; /* x */ 3741 ib.ptr[ib.length_dw++] = 1; /* y */ 3742 ib.ptr[ib.length_dw++] = 1; /* z */ 3743 ib.ptr[ib.length_dw++] = 3744 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 3745 3746 /* write CS partial flush packet */ 3747 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 3748 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 3749 3750 /* SGPR */ 3751 /* write the register state for the compute dispatch */ 3752 for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { 3753 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 3754 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) 3755 - PACKET3_SET_SH_REG_START; 3756 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; 3757 } 3758 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 3759 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 3760 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 3761 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 3762 - PACKET3_SET_SH_REG_START; 3763 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 3764 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 3765 3766 /* write dispatch packet */ 3767 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 3768 ib.ptr[ib.length_dw++] = 128; /* x */ 3769 ib.ptr[ib.length_dw++] = 1; /* y */ 3770 ib.ptr[ib.length_dw++] = 1; /* z */ 3771 ib.ptr[ib.length_dw++] = 3772 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 3773 3774 /* write CS partial flush packet */ 3775 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 3776 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 3777 3778 /* shedule the ib on the ring */ 3779 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 3780 if (r) { 3781 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 3782 goto fail; 3783 } 3784 3785 /* wait for the GPU to finish processing the IB */ 3786 r = dma_fence_wait(f, false); 3787 if (r) { 3788 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 3789 goto fail; 3790 } 3791 3792 /* read back registers to clear the counters */ 3793 mutex_lock(&adev->grbm_idx_mutex); 3794 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { 3795 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { 3796 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { 3797 gfx_v9_0_select_se_sh(adev, j, 0x0, k); 3798 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); 3799 } 3800 } 3801 } 3802 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 3803 mutex_unlock(&adev->grbm_idx_mutex); 3804 3805 fail: 3806 amdgpu_ib_free(adev, &ib, NULL); 3807 dma_fence_put(f); 3808 3809 return r; 3810 } 3811 3812 static int gfx_v9_0_early_init(void *handle) 3813 { 3814 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3815 3816 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 3817 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 3818 gfx_v9_0_set_ring_funcs(adev); 3819 gfx_v9_0_set_irq_funcs(adev); 3820 gfx_v9_0_set_gds_init(adev); 3821 gfx_v9_0_set_rlc_funcs(adev); 3822 3823 return 0; 3824 } 3825 3826 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 3827 struct amdgpu_iv_entry *entry); 3828 3829 static int gfx_v9_0_ecc_late_init(void *handle) 3830 { 3831 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3832 struct ras_common_if **ras_if = &adev->gfx.ras_if; 3833 struct ras_ih_if ih_info = { 3834 .cb = gfx_v9_0_process_ras_data_cb, 3835 }; 3836 struct ras_fs_if fs_info = { 3837 .sysfs_name = "gfx_err_count", 3838 .debugfs_name = "gfx_err_inject", 3839 }; 3840 struct ras_common_if ras_block = { 3841 .block = AMDGPU_RAS_BLOCK__GFX, 3842 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, 3843 .sub_block_index = 0, 3844 .name = "gfx", 3845 }; 3846 int r; 3847 3848 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { 3849 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); 3850 return 0; 3851 } 3852 3853 r = gfx_v9_0_do_edc_gds_workarounds(adev); 3854 if (r) 3855 return r; 3856 3857 /* requires IBs so do in late init after IB pool is initialized */ 3858 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 3859 if (r) 3860 return r; 3861 3862 /* handle resume path. */ 3863 if (*ras_if) { 3864 /* resend ras TA enable cmd during resume. 3865 * prepare to handle failure. 3866 */ 3867 ih_info.head = **ras_if; 3868 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 3869 if (r) { 3870 if (r == -EAGAIN) { 3871 /* request a gpu reset. will run again. */ 3872 amdgpu_ras_request_reset_on_boot(adev, 3873 AMDGPU_RAS_BLOCK__GFX); 3874 return 0; 3875 } 3876 /* fail to enable ras, cleanup all. */ 3877 goto irq; 3878 } 3879 /* enable successfully. continue. */ 3880 goto resume; 3881 } 3882 3883 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); 3884 if (!*ras_if) 3885 return -ENOMEM; 3886 3887 **ras_if = ras_block; 3888 3889 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 3890 if (r) { 3891 if (r == -EAGAIN) { 3892 amdgpu_ras_request_reset_on_boot(adev, 3893 AMDGPU_RAS_BLOCK__GFX); 3894 r = 0; 3895 } 3896 goto feature; 3897 } 3898 3899 ih_info.head = **ras_if; 3900 fs_info.head = **ras_if; 3901 3902 r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); 3903 if (r) 3904 goto interrupt; 3905 3906 amdgpu_ras_debugfs_create(adev, &fs_info); 3907 3908 r = amdgpu_ras_sysfs_create(adev, &fs_info); 3909 if (r) 3910 goto sysfs; 3911 resume: 3912 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 3913 if (r) 3914 goto irq; 3915 3916 return 0; 3917 irq: 3918 amdgpu_ras_sysfs_remove(adev, *ras_if); 3919 sysfs: 3920 amdgpu_ras_debugfs_remove(adev, *ras_if); 3921 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 3922 interrupt: 3923 amdgpu_ras_feature_enable(adev, *ras_if, 0); 3924 feature: 3925 kfree(*ras_if); 3926 *ras_if = NULL; 3927 return r; 3928 } 3929 3930 static int gfx_v9_0_late_init(void *handle) 3931 { 3932 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3933 int r; 3934 3935 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 3936 if (r) 3937 return r; 3938 3939 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 3940 if (r) 3941 return r; 3942 3943 r = gfx_v9_0_ecc_late_init(handle); 3944 if (r) 3945 return r; 3946 3947 return 0; 3948 } 3949 3950 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 3951 { 3952 uint32_t rlc_setting; 3953 3954 /* if RLC is not enabled, do nothing */ 3955 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 3956 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 3957 return false; 3958 3959 return true; 3960 } 3961 3962 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) 3963 { 3964 uint32_t data; 3965 unsigned i; 3966 3967 data = RLC_SAFE_MODE__CMD_MASK; 3968 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 3969 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 3970 3971 /* wait for RLC_SAFE_MODE */ 3972 for (i = 0; i < adev->usec_timeout; i++) { 3973 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 3974 break; 3975 udelay(1); 3976 } 3977 } 3978 3979 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) 3980 { 3981 uint32_t data; 3982 3983 data = RLC_SAFE_MODE__CMD_MASK; 3984 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 3985 } 3986 3987 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 3988 bool enable) 3989 { 3990 amdgpu_gfx_rlc_enter_safe_mode(adev); 3991 3992 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 3993 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 3994 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 3995 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 3996 } else { 3997 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 3998 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 3999 } 4000 4001 amdgpu_gfx_rlc_exit_safe_mode(adev); 4002 } 4003 4004 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4005 bool enable) 4006 { 4007 /* TODO: double check if we need to perform under safe mode */ 4008 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4009 4010 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4011 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4012 else 4013 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4014 4015 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4016 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4017 else 4018 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4019 4020 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4021 } 4022 4023 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4024 bool enable) 4025 { 4026 uint32_t data, def; 4027 4028 amdgpu_gfx_rlc_enter_safe_mode(adev); 4029 4030 /* It is disabled by HW by default */ 4031 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4032 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4033 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4034 4035 if (adev->asic_type != CHIP_VEGA12) 4036 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4037 4038 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4039 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4040 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4041 4042 /* only for Vega10 & Raven1 */ 4043 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4044 4045 if (def != data) 4046 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4047 4048 /* MGLS is a global flag to control all MGLS in GFX */ 4049 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4050 /* 2 - RLC memory Light sleep */ 4051 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4052 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4053 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4054 if (def != data) 4055 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4056 } 4057 /* 3 - CP memory Light sleep */ 4058 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4059 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4060 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4061 if (def != data) 4062 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4063 } 4064 } 4065 } else { 4066 /* 1 - MGCG_OVERRIDE */ 4067 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4068 4069 if (adev->asic_type != CHIP_VEGA12) 4070 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4071 4072 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4073 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4074 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4075 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4076 4077 if (def != data) 4078 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4079 4080 /* 2 - disable MGLS in RLC */ 4081 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4082 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4083 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4084 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4085 } 4086 4087 /* 3 - disable MGLS in CP */ 4088 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4089 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4090 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4091 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4092 } 4093 } 4094 4095 amdgpu_gfx_rlc_exit_safe_mode(adev); 4096 } 4097 4098 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4099 bool enable) 4100 { 4101 uint32_t data, def; 4102 4103 amdgpu_gfx_rlc_enter_safe_mode(adev); 4104 4105 /* Enable 3D CGCG/CGLS */ 4106 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 4107 /* write cmd to clear cgcg/cgls ov */ 4108 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4109 /* unset CGCG override */ 4110 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4111 /* update CGCG and CGLS override bits */ 4112 if (def != data) 4113 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4114 4115 /* enable 3Dcgcg FSM(0x0000363f) */ 4116 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4117 4118 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4119 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4120 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4121 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4122 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4123 if (def != data) 4124 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4125 4126 /* set IDLE_POLL_COUNT(0x00900100) */ 4127 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4128 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4129 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4130 if (def != data) 4131 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4132 } else { 4133 /* Disable CGCG/CGLS */ 4134 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4135 /* disable cgcg, cgls should be disabled */ 4136 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 4137 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 4138 /* disable cgcg and cgls in FSM */ 4139 if (def != data) 4140 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4141 } 4142 4143 amdgpu_gfx_rlc_exit_safe_mode(adev); 4144 } 4145 4146 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4147 bool enable) 4148 { 4149 uint32_t def, data; 4150 4151 amdgpu_gfx_rlc_enter_safe_mode(adev); 4152 4153 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 4154 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4155 /* unset CGCG override */ 4156 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 4157 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4158 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4159 else 4160 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4161 /* update CGCG and CGLS override bits */ 4162 if (def != data) 4163 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4164 4165 /* enable cgcg FSM(0x0000363F) */ 4166 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4167 4168 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4169 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4170 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4171 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4172 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4173 if (def != data) 4174 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4175 4176 /* set IDLE_POLL_COUNT(0x00900100) */ 4177 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4178 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4179 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4180 if (def != data) 4181 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4182 } else { 4183 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4184 /* reset CGCG/CGLS bits */ 4185 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4186 /* disable cgcg and cgls in FSM */ 4187 if (def != data) 4188 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4189 } 4190 4191 amdgpu_gfx_rlc_exit_safe_mode(adev); 4192 } 4193 4194 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 4195 bool enable) 4196 { 4197 if (enable) { 4198 /* CGCG/CGLS should be enabled after MGCG/MGLS 4199 * === MGCG + MGLS === 4200 */ 4201 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4202 /* === CGCG /CGLS for GFX 3D Only === */ 4203 gfx_v9_0_update_3d_clock_gating(adev, enable); 4204 /* === CGCG + CGLS === */ 4205 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4206 } else { 4207 /* CGCG/CGLS should be disabled before MGCG/MGLS 4208 * === CGCG + CGLS === 4209 */ 4210 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4211 /* === CGCG /CGLS for GFX 3D Only === */ 4212 gfx_v9_0_update_3d_clock_gating(adev, enable); 4213 /* === MGCG + MGLS === */ 4214 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4215 } 4216 return 0; 4217 } 4218 4219 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 4220 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 4221 .set_safe_mode = gfx_v9_0_set_safe_mode, 4222 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 4223 .init = gfx_v9_0_rlc_init, 4224 .get_csb_size = gfx_v9_0_get_csb_size, 4225 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 4226 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 4227 .resume = gfx_v9_0_rlc_resume, 4228 .stop = gfx_v9_0_rlc_stop, 4229 .reset = gfx_v9_0_rlc_reset, 4230 .start = gfx_v9_0_rlc_start 4231 }; 4232 4233 static int gfx_v9_0_set_powergating_state(void *handle, 4234 enum amd_powergating_state state) 4235 { 4236 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4237 bool enable = (state == AMD_PG_STATE_GATE) ? true : false; 4238 4239 switch (adev->asic_type) { 4240 case CHIP_RAVEN: 4241 if (!enable) { 4242 amdgpu_gfx_off_ctrl(adev, false); 4243 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4244 } 4245 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 4246 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 4247 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 4248 } else { 4249 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 4250 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 4251 } 4252 4253 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 4254 gfx_v9_0_enable_cp_power_gating(adev, true); 4255 else 4256 gfx_v9_0_enable_cp_power_gating(adev, false); 4257 4258 /* update gfx cgpg state */ 4259 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 4260 4261 /* update mgcg state */ 4262 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 4263 4264 if (enable) 4265 amdgpu_gfx_off_ctrl(adev, true); 4266 break; 4267 case CHIP_VEGA12: 4268 if (!enable) { 4269 amdgpu_gfx_off_ctrl(adev, false); 4270 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4271 } else { 4272 amdgpu_gfx_off_ctrl(adev, true); 4273 } 4274 break; 4275 default: 4276 break; 4277 } 4278 4279 return 0; 4280 } 4281 4282 static int gfx_v9_0_set_clockgating_state(void *handle, 4283 enum amd_clockgating_state state) 4284 { 4285 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4286 4287 if (amdgpu_sriov_vf(adev)) 4288 return 0; 4289 4290 switch (adev->asic_type) { 4291 case CHIP_VEGA10: 4292 case CHIP_VEGA12: 4293 case CHIP_VEGA20: 4294 case CHIP_RAVEN: 4295 gfx_v9_0_update_gfx_clock_gating(adev, 4296 state == AMD_CG_STATE_GATE ? true : false); 4297 break; 4298 default: 4299 break; 4300 } 4301 return 0; 4302 } 4303 4304 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 4305 { 4306 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4307 int data; 4308 4309 if (amdgpu_sriov_vf(adev)) 4310 *flags = 0; 4311 4312 /* AMD_CG_SUPPORT_GFX_MGCG */ 4313 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4314 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 4315 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 4316 4317 /* AMD_CG_SUPPORT_GFX_CGCG */ 4318 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4319 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 4320 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 4321 4322 /* AMD_CG_SUPPORT_GFX_CGLS */ 4323 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 4324 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 4325 4326 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 4327 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4328 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 4329 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 4330 4331 /* AMD_CG_SUPPORT_GFX_CP_LS */ 4332 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4333 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 4334 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 4335 4336 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 4337 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4338 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 4339 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 4340 4341 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 4342 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 4343 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 4344 } 4345 4346 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 4347 { 4348 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 4349 } 4350 4351 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 4352 { 4353 struct amdgpu_device *adev = ring->adev; 4354 u64 wptr; 4355 4356 /* XXX check if swapping is necessary on BE */ 4357 if (ring->use_doorbell) { 4358 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 4359 } else { 4360 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 4361 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 4362 } 4363 4364 return wptr; 4365 } 4366 4367 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 4368 { 4369 struct amdgpu_device *adev = ring->adev; 4370 4371 if (ring->use_doorbell) { 4372 /* XXX check if swapping is necessary on BE */ 4373 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4374 WDOORBELL64(ring->doorbell_index, ring->wptr); 4375 } else { 4376 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4377 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 4378 } 4379 } 4380 4381 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 4382 { 4383 struct amdgpu_device *adev = ring->adev; 4384 u32 ref_and_mask, reg_mem_engine; 4385 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; 4386 4387 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 4388 switch (ring->me) { 4389 case 1: 4390 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 4391 break; 4392 case 2: 4393 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 4394 break; 4395 default: 4396 return; 4397 } 4398 reg_mem_engine = 0; 4399 } else { 4400 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 4401 reg_mem_engine = 1; /* pfp */ 4402 } 4403 4404 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 4405 adev->nbio_funcs->get_hdp_flush_req_offset(adev), 4406 adev->nbio_funcs->get_hdp_flush_done_offset(adev), 4407 ref_and_mask, ref_and_mask, 0x20); 4408 } 4409 4410 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 4411 struct amdgpu_job *job, 4412 struct amdgpu_ib *ib, 4413 uint32_t flags) 4414 { 4415 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4416 u32 header, control = 0; 4417 4418 if (ib->flags & AMDGPU_IB_FLAG_CE) 4419 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 4420 else 4421 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 4422 4423 control |= ib->length_dw | (vmid << 24); 4424 4425 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 4426 control |= INDIRECT_BUFFER_PRE_ENB(1); 4427 4428 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 4429 gfx_v9_0_ring_emit_de_meta(ring); 4430 } 4431 4432 amdgpu_ring_write(ring, header); 4433 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4434 amdgpu_ring_write(ring, 4435 #ifdef __BIG_ENDIAN 4436 (2 << 0) | 4437 #endif 4438 lower_32_bits(ib->gpu_addr)); 4439 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4440 amdgpu_ring_write(ring, control); 4441 } 4442 4443 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 4444 struct amdgpu_job *job, 4445 struct amdgpu_ib *ib, 4446 uint32_t flags) 4447 { 4448 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4449 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 4450 4451 /* Currently, there is a high possibility to get wave ID mismatch 4452 * between ME and GDS, leading to a hw deadlock, because ME generates 4453 * different wave IDs than the GDS expects. This situation happens 4454 * randomly when at least 5 compute pipes use GDS ordered append. 4455 * The wave IDs generated by ME are also wrong after suspend/resume. 4456 * Those are probably bugs somewhere else in the kernel driver. 4457 * 4458 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 4459 * GDS to 0 for this ring (me/pipe). 4460 */ 4461 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 4462 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 4463 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 4464 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 4465 } 4466 4467 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 4468 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4469 amdgpu_ring_write(ring, 4470 #ifdef __BIG_ENDIAN 4471 (2 << 0) | 4472 #endif 4473 lower_32_bits(ib->gpu_addr)); 4474 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4475 amdgpu_ring_write(ring, control); 4476 } 4477 4478 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 4479 u64 seq, unsigned flags) 4480 { 4481 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 4482 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 4483 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 4484 4485 /* RELEASE_MEM - flush caches, send int */ 4486 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 4487 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 4488 EOP_TC_NC_ACTION_EN) : 4489 (EOP_TCL1_ACTION_EN | 4490 EOP_TC_ACTION_EN | 4491 EOP_TC_WB_ACTION_EN | 4492 EOP_TC_MD_ACTION_EN)) | 4493 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 4494 EVENT_INDEX(5))); 4495 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 4496 4497 /* 4498 * the address should be Qword aligned if 64bit write, Dword 4499 * aligned if only send 32bit data low (discard data high) 4500 */ 4501 if (write64bit) 4502 BUG_ON(addr & 0x7); 4503 else 4504 BUG_ON(addr & 0x3); 4505 amdgpu_ring_write(ring, lower_32_bits(addr)); 4506 amdgpu_ring_write(ring, upper_32_bits(addr)); 4507 amdgpu_ring_write(ring, lower_32_bits(seq)); 4508 amdgpu_ring_write(ring, upper_32_bits(seq)); 4509 amdgpu_ring_write(ring, 0); 4510 } 4511 4512 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 4513 { 4514 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 4515 uint32_t seq = ring->fence_drv.sync_seq; 4516 uint64_t addr = ring->fence_drv.gpu_addr; 4517 4518 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 4519 lower_32_bits(addr), upper_32_bits(addr), 4520 seq, 0xffffffff, 4); 4521 } 4522 4523 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 4524 unsigned vmid, uint64_t pd_addr) 4525 { 4526 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 4527 4528 /* compute doesn't have PFP */ 4529 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 4530 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 4531 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 4532 amdgpu_ring_write(ring, 0x0); 4533 } 4534 } 4535 4536 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 4537 { 4538 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 4539 } 4540 4541 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 4542 { 4543 u64 wptr; 4544 4545 /* XXX check if swapping is necessary on BE */ 4546 if (ring->use_doorbell) 4547 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 4548 else 4549 BUG(); 4550 return wptr; 4551 } 4552 4553 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 4554 bool acquire) 4555 { 4556 struct amdgpu_device *adev = ring->adev; 4557 int pipe_num, tmp, reg; 4558 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 4559 4560 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 4561 4562 /* first me only has 2 entries, GFX and HP3D */ 4563 if (ring->me > 0) 4564 pipe_num -= 2; 4565 4566 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num; 4567 tmp = RREG32(reg); 4568 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 4569 WREG32(reg, tmp); 4570 } 4571 4572 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, 4573 struct amdgpu_ring *ring, 4574 bool acquire) 4575 { 4576 int i, pipe; 4577 bool reserve; 4578 struct amdgpu_ring *iring; 4579 4580 mutex_lock(&adev->gfx.pipe_reserve_mutex); 4581 pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0); 4582 if (acquire) 4583 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4584 else 4585 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4586 4587 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 4588 /* Clear all reservations - everyone reacquires all resources */ 4589 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 4590 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 4591 true); 4592 4593 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 4594 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 4595 true); 4596 } else { 4597 /* Lower all pipes without a current reservation */ 4598 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 4599 iring = &adev->gfx.gfx_ring[i]; 4600 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 4601 iring->me, 4602 iring->pipe, 4603 0); 4604 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4605 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 4606 } 4607 4608 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 4609 iring = &adev->gfx.compute_ring[i]; 4610 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 4611 iring->me, 4612 iring->pipe, 4613 0); 4614 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4615 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 4616 } 4617 } 4618 4619 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 4620 } 4621 4622 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, 4623 struct amdgpu_ring *ring, 4624 bool acquire) 4625 { 4626 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 4627 uint32_t queue_priority = acquire ? 0xf : 0x0; 4628 4629 mutex_lock(&adev->srbm_mutex); 4630 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4631 4632 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); 4633 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); 4634 4635 soc15_grbm_select(adev, 0, 0, 0, 0); 4636 mutex_unlock(&adev->srbm_mutex); 4637 } 4638 4639 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring, 4640 enum drm_sched_priority priority) 4641 { 4642 struct amdgpu_device *adev = ring->adev; 4643 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 4644 4645 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 4646 return; 4647 4648 gfx_v9_0_hqd_set_priority(adev, ring, acquire); 4649 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire); 4650 } 4651 4652 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 4653 { 4654 struct amdgpu_device *adev = ring->adev; 4655 4656 /* XXX check if swapping is necessary on BE */ 4657 if (ring->use_doorbell) { 4658 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4659 WDOORBELL64(ring->doorbell_index, ring->wptr); 4660 } else{ 4661 BUG(); /* only DOORBELL method supported on gfx9 now */ 4662 } 4663 } 4664 4665 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 4666 u64 seq, unsigned int flags) 4667 { 4668 struct amdgpu_device *adev = ring->adev; 4669 4670 /* we only allocate 32bit for each seq wb address */ 4671 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 4672 4673 /* write fence seq to the "addr" */ 4674 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4675 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4676 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 4677 amdgpu_ring_write(ring, lower_32_bits(addr)); 4678 amdgpu_ring_write(ring, upper_32_bits(addr)); 4679 amdgpu_ring_write(ring, lower_32_bits(seq)); 4680 4681 if (flags & AMDGPU_FENCE_FLAG_INT) { 4682 /* set register to trigger INT */ 4683 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4684 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4685 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 4686 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 4687 amdgpu_ring_write(ring, 0); 4688 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 4689 } 4690 } 4691 4692 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 4693 { 4694 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 4695 amdgpu_ring_write(ring, 0); 4696 } 4697 4698 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 4699 { 4700 struct v9_ce_ib_state ce_payload = {0}; 4701 uint64_t csa_addr; 4702 int cnt; 4703 4704 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 4705 csa_addr = amdgpu_csa_vaddr(ring->adev); 4706 4707 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 4708 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 4709 WRITE_DATA_DST_SEL(8) | 4710 WR_CONFIRM) | 4711 WRITE_DATA_CACHE_POLICY(0)); 4712 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 4713 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 4714 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 4715 } 4716 4717 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 4718 { 4719 struct v9_de_ib_state de_payload = {0}; 4720 uint64_t csa_addr, gds_addr; 4721 int cnt; 4722 4723 csa_addr = amdgpu_csa_vaddr(ring->adev); 4724 gds_addr = csa_addr + 4096; 4725 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 4726 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 4727 4728 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 4729 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 4730 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 4731 WRITE_DATA_DST_SEL(8) | 4732 WR_CONFIRM) | 4733 WRITE_DATA_CACHE_POLICY(0)); 4734 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 4735 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 4736 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 4737 } 4738 4739 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) 4740 { 4741 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 4742 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ 4743 } 4744 4745 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 4746 { 4747 uint32_t dw2 = 0; 4748 4749 if (amdgpu_sriov_vf(ring->adev)) 4750 gfx_v9_0_ring_emit_ce_meta(ring); 4751 4752 gfx_v9_0_ring_emit_tmz(ring, true); 4753 4754 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 4755 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 4756 /* set load_global_config & load_global_uconfig */ 4757 dw2 |= 0x8001; 4758 /* set load_cs_sh_regs */ 4759 dw2 |= 0x01000000; 4760 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 4761 dw2 |= 0x10002; 4762 4763 /* set load_ce_ram if preamble presented */ 4764 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 4765 dw2 |= 0x10000000; 4766 } else { 4767 /* still load_ce_ram if this is the first time preamble presented 4768 * although there is no context switch happens. 4769 */ 4770 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 4771 dw2 |= 0x10000000; 4772 } 4773 4774 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4775 amdgpu_ring_write(ring, dw2); 4776 amdgpu_ring_write(ring, 0); 4777 } 4778 4779 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 4780 { 4781 unsigned ret; 4782 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 4783 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 4784 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 4785 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 4786 ret = ring->wptr & ring->buf_mask; 4787 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 4788 return ret; 4789 } 4790 4791 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 4792 { 4793 unsigned cur; 4794 BUG_ON(offset > ring->buf_mask); 4795 BUG_ON(ring->ring[offset] != 0x55aa55aa); 4796 4797 cur = (ring->wptr & ring->buf_mask) - 1; 4798 if (likely(cur > offset)) 4799 ring->ring[offset] = cur - offset; 4800 else 4801 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 4802 } 4803 4804 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 4805 { 4806 struct amdgpu_device *adev = ring->adev; 4807 4808 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 4809 amdgpu_ring_write(ring, 0 | /* src: register*/ 4810 (5 << 8) | /* dst: memory */ 4811 (1 << 20)); /* write confirm */ 4812 amdgpu_ring_write(ring, reg); 4813 amdgpu_ring_write(ring, 0); 4814 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 4815 adev->virt.reg_val_offs * 4)); 4816 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 4817 adev->virt.reg_val_offs * 4)); 4818 } 4819 4820 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 4821 uint32_t val) 4822 { 4823 uint32_t cmd = 0; 4824 4825 switch (ring->funcs->type) { 4826 case AMDGPU_RING_TYPE_GFX: 4827 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 4828 break; 4829 case AMDGPU_RING_TYPE_KIQ: 4830 cmd = (1 << 16); /* no inc addr */ 4831 break; 4832 default: 4833 cmd = WR_CONFIRM; 4834 break; 4835 } 4836 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4837 amdgpu_ring_write(ring, cmd); 4838 amdgpu_ring_write(ring, reg); 4839 amdgpu_ring_write(ring, 0); 4840 amdgpu_ring_write(ring, val); 4841 } 4842 4843 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 4844 uint32_t val, uint32_t mask) 4845 { 4846 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 4847 } 4848 4849 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 4850 uint32_t reg0, uint32_t reg1, 4851 uint32_t ref, uint32_t mask) 4852 { 4853 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 4854 struct amdgpu_device *adev = ring->adev; 4855 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 4856 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 4857 4858 if (fw_version_ok) 4859 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 4860 ref, mask, 0x20); 4861 else 4862 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 4863 ref, mask); 4864 } 4865 4866 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 4867 { 4868 struct amdgpu_device *adev = ring->adev; 4869 uint32_t value = 0; 4870 4871 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 4872 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 4873 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 4874 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 4875 WREG32(mmSQ_CMD, value); 4876 } 4877 4878 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 4879 enum amdgpu_interrupt_state state) 4880 { 4881 switch (state) { 4882 case AMDGPU_IRQ_STATE_DISABLE: 4883 case AMDGPU_IRQ_STATE_ENABLE: 4884 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4885 TIME_STAMP_INT_ENABLE, 4886 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4887 break; 4888 default: 4889 break; 4890 } 4891 } 4892 4893 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 4894 int me, int pipe, 4895 enum amdgpu_interrupt_state state) 4896 { 4897 u32 mec_int_cntl, mec_int_cntl_reg; 4898 4899 /* 4900 * amdgpu controls only the first MEC. That's why this function only 4901 * handles the setting of interrupts for this specific MEC. All other 4902 * pipes' interrupts are set by amdkfd. 4903 */ 4904 4905 if (me == 1) { 4906 switch (pipe) { 4907 case 0: 4908 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 4909 break; 4910 case 1: 4911 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 4912 break; 4913 case 2: 4914 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 4915 break; 4916 case 3: 4917 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 4918 break; 4919 default: 4920 DRM_DEBUG("invalid pipe %d\n", pipe); 4921 return; 4922 } 4923 } else { 4924 DRM_DEBUG("invalid me %d\n", me); 4925 return; 4926 } 4927 4928 switch (state) { 4929 case AMDGPU_IRQ_STATE_DISABLE: 4930 mec_int_cntl = RREG32(mec_int_cntl_reg); 4931 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 4932 TIME_STAMP_INT_ENABLE, 0); 4933 WREG32(mec_int_cntl_reg, mec_int_cntl); 4934 break; 4935 case AMDGPU_IRQ_STATE_ENABLE: 4936 mec_int_cntl = RREG32(mec_int_cntl_reg); 4937 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 4938 TIME_STAMP_INT_ENABLE, 1); 4939 WREG32(mec_int_cntl_reg, mec_int_cntl); 4940 break; 4941 default: 4942 break; 4943 } 4944 } 4945 4946 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 4947 struct amdgpu_irq_src *source, 4948 unsigned type, 4949 enum amdgpu_interrupt_state state) 4950 { 4951 switch (state) { 4952 case AMDGPU_IRQ_STATE_DISABLE: 4953 case AMDGPU_IRQ_STATE_ENABLE: 4954 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4955 PRIV_REG_INT_ENABLE, 4956 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4957 break; 4958 default: 4959 break; 4960 } 4961 4962 return 0; 4963 } 4964 4965 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 4966 struct amdgpu_irq_src *source, 4967 unsigned type, 4968 enum amdgpu_interrupt_state state) 4969 { 4970 switch (state) { 4971 case AMDGPU_IRQ_STATE_DISABLE: 4972 case AMDGPU_IRQ_STATE_ENABLE: 4973 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4974 PRIV_INSTR_INT_ENABLE, 4975 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4976 default: 4977 break; 4978 } 4979 4980 return 0; 4981 } 4982 4983 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 4984 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 4985 CP_ECC_ERROR_INT_ENABLE, 1) 4986 4987 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 4988 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 4989 CP_ECC_ERROR_INT_ENABLE, 0) 4990 4991 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 4992 struct amdgpu_irq_src *source, 4993 unsigned type, 4994 enum amdgpu_interrupt_state state) 4995 { 4996 switch (state) { 4997 case AMDGPU_IRQ_STATE_DISABLE: 4998 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4999 CP_ECC_ERROR_INT_ENABLE, 0); 5000 DISABLE_ECC_ON_ME_PIPE(1, 0); 5001 DISABLE_ECC_ON_ME_PIPE(1, 1); 5002 DISABLE_ECC_ON_ME_PIPE(1, 2); 5003 DISABLE_ECC_ON_ME_PIPE(1, 3); 5004 break; 5005 5006 case AMDGPU_IRQ_STATE_ENABLE: 5007 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5008 CP_ECC_ERROR_INT_ENABLE, 1); 5009 ENABLE_ECC_ON_ME_PIPE(1, 0); 5010 ENABLE_ECC_ON_ME_PIPE(1, 1); 5011 ENABLE_ECC_ON_ME_PIPE(1, 2); 5012 ENABLE_ECC_ON_ME_PIPE(1, 3); 5013 break; 5014 default: 5015 break; 5016 } 5017 5018 return 0; 5019 } 5020 5021 5022 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5023 struct amdgpu_irq_src *src, 5024 unsigned type, 5025 enum amdgpu_interrupt_state state) 5026 { 5027 switch (type) { 5028 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 5029 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 5030 break; 5031 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5032 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5033 break; 5034 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5035 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5036 break; 5037 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5038 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5039 break; 5040 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5041 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5042 break; 5043 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 5044 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 5045 break; 5046 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 5047 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 5048 break; 5049 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 5050 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 5051 break; 5052 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 5053 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 5054 break; 5055 default: 5056 break; 5057 } 5058 return 0; 5059 } 5060 5061 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 5062 struct amdgpu_irq_src *source, 5063 struct amdgpu_iv_entry *entry) 5064 { 5065 int i; 5066 u8 me_id, pipe_id, queue_id; 5067 struct amdgpu_ring *ring; 5068 5069 DRM_DEBUG("IH: CP EOP\n"); 5070 me_id = (entry->ring_id & 0x0c) >> 2; 5071 pipe_id = (entry->ring_id & 0x03) >> 0; 5072 queue_id = (entry->ring_id & 0x70) >> 4; 5073 5074 switch (me_id) { 5075 case 0: 5076 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5077 break; 5078 case 1: 5079 case 2: 5080 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5081 ring = &adev->gfx.compute_ring[i]; 5082 /* Per-queue interrupt is supported for MEC starting from VI. 5083 * The interrupt can only be enabled/disabled per pipe instead of per queue. 5084 */ 5085 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 5086 amdgpu_fence_process(ring); 5087 } 5088 break; 5089 } 5090 return 0; 5091 } 5092 5093 static void gfx_v9_0_fault(struct amdgpu_device *adev, 5094 struct amdgpu_iv_entry *entry) 5095 { 5096 u8 me_id, pipe_id, queue_id; 5097 struct amdgpu_ring *ring; 5098 int i; 5099 5100 me_id = (entry->ring_id & 0x0c) >> 2; 5101 pipe_id = (entry->ring_id & 0x03) >> 0; 5102 queue_id = (entry->ring_id & 0x70) >> 4; 5103 5104 switch (me_id) { 5105 case 0: 5106 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 5107 break; 5108 case 1: 5109 case 2: 5110 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5111 ring = &adev->gfx.compute_ring[i]; 5112 if (ring->me == me_id && ring->pipe == pipe_id && 5113 ring->queue == queue_id) 5114 drm_sched_fault(&ring->sched); 5115 } 5116 break; 5117 } 5118 } 5119 5120 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 5121 struct amdgpu_irq_src *source, 5122 struct amdgpu_iv_entry *entry) 5123 { 5124 DRM_ERROR("Illegal register access in command stream\n"); 5125 gfx_v9_0_fault(adev, entry); 5126 return 0; 5127 } 5128 5129 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 5130 struct amdgpu_irq_src *source, 5131 struct amdgpu_iv_entry *entry) 5132 { 5133 DRM_ERROR("Illegal instruction in command stream\n"); 5134 gfx_v9_0_fault(adev, entry); 5135 return 0; 5136 } 5137 5138 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 5139 struct amdgpu_iv_entry *entry) 5140 { 5141 /* TODO ue will trigger an interrupt. */ 5142 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); 5143 amdgpu_ras_reset_gpu(adev, 0); 5144 return AMDGPU_RAS_UE; 5145 } 5146 5147 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev, 5148 struct amdgpu_irq_src *source, 5149 struct amdgpu_iv_entry *entry) 5150 { 5151 struct ras_common_if *ras_if = adev->gfx.ras_if; 5152 struct ras_dispatch_if ih_data = { 5153 .entry = entry, 5154 }; 5155 5156 if (!ras_if) 5157 return 0; 5158 5159 ih_data.head = *ras_if; 5160 5161 DRM_ERROR("CP ECC ERROR IRQ\n"); 5162 amdgpu_ras_interrupt_dispatch(adev, &ih_data); 5163 return 0; 5164 } 5165 5166 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 5167 .name = "gfx_v9_0", 5168 .early_init = gfx_v9_0_early_init, 5169 .late_init = gfx_v9_0_late_init, 5170 .sw_init = gfx_v9_0_sw_init, 5171 .sw_fini = gfx_v9_0_sw_fini, 5172 .hw_init = gfx_v9_0_hw_init, 5173 .hw_fini = gfx_v9_0_hw_fini, 5174 .suspend = gfx_v9_0_suspend, 5175 .resume = gfx_v9_0_resume, 5176 .is_idle = gfx_v9_0_is_idle, 5177 .wait_for_idle = gfx_v9_0_wait_for_idle, 5178 .soft_reset = gfx_v9_0_soft_reset, 5179 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 5180 .set_powergating_state = gfx_v9_0_set_powergating_state, 5181 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 5182 }; 5183 5184 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 5185 .type = AMDGPU_RING_TYPE_GFX, 5186 .align_mask = 0xff, 5187 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5188 .support_64bit_ptrs = true, 5189 .vmhub = AMDGPU_GFXHUB, 5190 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 5191 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 5192 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 5193 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 5194 5 + /* COND_EXEC */ 5195 7 + /* PIPELINE_SYNC */ 5196 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5197 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5198 2 + /* VM_FLUSH */ 5199 8 + /* FENCE for VM_FLUSH */ 5200 20 + /* GDS switch */ 5201 4 + /* double SWITCH_BUFFER, 5202 the first COND_EXEC jump to the place just 5203 prior to this double SWITCH_BUFFER */ 5204 5 + /* COND_EXEC */ 5205 7 + /* HDP_flush */ 5206 4 + /* VGT_flush */ 5207 14 + /* CE_META */ 5208 31 + /* DE_META */ 5209 3 + /* CNTX_CTRL */ 5210 5 + /* HDP_INVL */ 5211 8 + 8 + /* FENCE x2 */ 5212 2, /* SWITCH_BUFFER */ 5213 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 5214 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 5215 .emit_fence = gfx_v9_0_ring_emit_fence, 5216 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 5217 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 5218 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 5219 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 5220 .test_ring = gfx_v9_0_ring_test_ring, 5221 .test_ib = gfx_v9_0_ring_test_ib, 5222 .insert_nop = amdgpu_ring_insert_nop, 5223 .pad_ib = amdgpu_ring_generic_pad_ib, 5224 .emit_switch_buffer = gfx_v9_ring_emit_sb, 5225 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 5226 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 5227 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 5228 .emit_tmz = gfx_v9_0_ring_emit_tmz, 5229 .emit_wreg = gfx_v9_0_ring_emit_wreg, 5230 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 5231 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 5232 .soft_recovery = gfx_v9_0_ring_soft_recovery, 5233 }; 5234 5235 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 5236 .type = AMDGPU_RING_TYPE_COMPUTE, 5237 .align_mask = 0xff, 5238 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5239 .support_64bit_ptrs = true, 5240 .vmhub = AMDGPU_GFXHUB, 5241 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 5242 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 5243 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 5244 .emit_frame_size = 5245 20 + /* gfx_v9_0_ring_emit_gds_switch */ 5246 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5247 5 + /* hdp invalidate */ 5248 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 5249 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5250 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5251 2 + /* gfx_v9_0_ring_emit_vm_flush */ 5252 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 5253 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 5254 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 5255 .emit_fence = gfx_v9_0_ring_emit_fence, 5256 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 5257 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 5258 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 5259 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 5260 .test_ring = gfx_v9_0_ring_test_ring, 5261 .test_ib = gfx_v9_0_ring_test_ib, 5262 .insert_nop = amdgpu_ring_insert_nop, 5263 .pad_ib = amdgpu_ring_generic_pad_ib, 5264 .set_priority = gfx_v9_0_ring_set_priority_compute, 5265 .emit_wreg = gfx_v9_0_ring_emit_wreg, 5266 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 5267 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 5268 }; 5269 5270 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 5271 .type = AMDGPU_RING_TYPE_KIQ, 5272 .align_mask = 0xff, 5273 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5274 .support_64bit_ptrs = true, 5275 .vmhub = AMDGPU_GFXHUB, 5276 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 5277 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 5278 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 5279 .emit_frame_size = 5280 20 + /* gfx_v9_0_ring_emit_gds_switch */ 5281 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5282 5 + /* hdp invalidate */ 5283 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 5284 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5285 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5286 2 + /* gfx_v9_0_ring_emit_vm_flush */ 5287 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 5288 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 5289 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 5290 .test_ring = gfx_v9_0_ring_test_ring, 5291 .insert_nop = amdgpu_ring_insert_nop, 5292 .pad_ib = amdgpu_ring_generic_pad_ib, 5293 .emit_rreg = gfx_v9_0_ring_emit_rreg, 5294 .emit_wreg = gfx_v9_0_ring_emit_wreg, 5295 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 5296 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 5297 }; 5298 5299 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 5300 { 5301 int i; 5302 5303 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 5304 5305 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 5306 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 5307 5308 for (i = 0; i < adev->gfx.num_compute_rings; i++) 5309 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 5310 } 5311 5312 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 5313 .set = gfx_v9_0_set_eop_interrupt_state, 5314 .process = gfx_v9_0_eop_irq, 5315 }; 5316 5317 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 5318 .set = gfx_v9_0_set_priv_reg_fault_state, 5319 .process = gfx_v9_0_priv_reg_irq, 5320 }; 5321 5322 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 5323 .set = gfx_v9_0_set_priv_inst_fault_state, 5324 .process = gfx_v9_0_priv_inst_irq, 5325 }; 5326 5327 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 5328 .set = gfx_v9_0_set_cp_ecc_error_state, 5329 .process = gfx_v9_0_cp_ecc_error_irq, 5330 }; 5331 5332 5333 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 5334 { 5335 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 5336 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 5337 5338 adev->gfx.priv_reg_irq.num_types = 1; 5339 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 5340 5341 adev->gfx.priv_inst_irq.num_types = 1; 5342 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 5343 5344 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 5345 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 5346 } 5347 5348 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 5349 { 5350 switch (adev->asic_type) { 5351 case CHIP_VEGA10: 5352 case CHIP_VEGA12: 5353 case CHIP_VEGA20: 5354 case CHIP_RAVEN: 5355 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 5356 break; 5357 default: 5358 break; 5359 } 5360 } 5361 5362 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 5363 { 5364 /* init asci gds info */ 5365 switch (adev->asic_type) { 5366 case CHIP_VEGA10: 5367 case CHIP_VEGA12: 5368 case CHIP_VEGA20: 5369 adev->gds.gds_size = 0x10000; 5370 break; 5371 case CHIP_RAVEN: 5372 adev->gds.gds_size = 0x1000; 5373 break; 5374 default: 5375 adev->gds.gds_size = 0x10000; 5376 break; 5377 } 5378 5379 switch (adev->asic_type) { 5380 case CHIP_VEGA10: 5381 case CHIP_VEGA20: 5382 adev->gds.gds_compute_max_wave_id = 0x7ff; 5383 break; 5384 case CHIP_VEGA12: 5385 adev->gds.gds_compute_max_wave_id = 0x27f; 5386 break; 5387 case CHIP_RAVEN: 5388 if (adev->rev_id >= 0x8) 5389 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 5390 else 5391 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 5392 break; 5393 default: 5394 /* this really depends on the chip */ 5395 adev->gds.gds_compute_max_wave_id = 0x7ff; 5396 break; 5397 } 5398 5399 adev->gds.gws_size = 64; 5400 adev->gds.oa_size = 16; 5401 } 5402 5403 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 5404 u32 bitmap) 5405 { 5406 u32 data; 5407 5408 if (!bitmap) 5409 return; 5410 5411 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 5412 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 5413 5414 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 5415 } 5416 5417 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 5418 { 5419 u32 data, mask; 5420 5421 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 5422 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 5423 5424 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 5425 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 5426 5427 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 5428 5429 return (~data) & mask; 5430 } 5431 5432 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 5433 struct amdgpu_cu_info *cu_info) 5434 { 5435 int i, j, k, counter, active_cu_number = 0; 5436 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 5437 unsigned disable_masks[4 * 2]; 5438 5439 if (!adev || !cu_info) 5440 return -EINVAL; 5441 5442 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 5443 5444 mutex_lock(&adev->grbm_idx_mutex); 5445 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 5446 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 5447 mask = 1; 5448 ao_bitmap = 0; 5449 counter = 0; 5450 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 5451 if (i < 4 && j < 2) 5452 gfx_v9_0_set_user_cu_inactive_bitmap( 5453 adev, disable_masks[i * 2 + j]); 5454 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 5455 cu_info->bitmap[i][j] = bitmap; 5456 5457 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 5458 if (bitmap & mask) { 5459 if (counter < adev->gfx.config.max_cu_per_sh) 5460 ao_bitmap |= mask; 5461 counter ++; 5462 } 5463 mask <<= 1; 5464 } 5465 active_cu_number += counter; 5466 if (i < 2 && j < 2) 5467 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 5468 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 5469 } 5470 } 5471 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5472 mutex_unlock(&adev->grbm_idx_mutex); 5473 5474 cu_info->number = active_cu_number; 5475 cu_info->ao_cu_mask = ao_cu_mask; 5476 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 5477 5478 return 0; 5479 } 5480 5481 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 5482 { 5483 .type = AMD_IP_BLOCK_TYPE_GFX, 5484 .major = 9, 5485 .minor = 0, 5486 .rev = 0, 5487 .funcs = &gfx_v9_0_ip_funcs, 5488 }; 5489