1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 #include "vega10_enum.h" 40 #include "hdp/hdp_4_0_offset.h" 41 42 #include "soc15.h" 43 #include "soc15_common.h" 44 #include "clearstate_gfx9.h" 45 #include "v9_structs.h" 46 47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 48 49 #include "amdgpu_ras.h" 50 51 #define GFX9_NUM_GFX_RINGS 1 52 #define GFX9_MEC_HPD_SIZE 4096 53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 55 56 #define mmPWR_MISC_CNTL_STATUS 0x0183 57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0 59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1 60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L 61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L 62 63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 65 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 69 70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 72 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 76 77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 79 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 83 84 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 86 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 87 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 90 91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 93 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 98 99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 101 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 106 107 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 108 { 109 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 110 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 111 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 112 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 113 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 114 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 115 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 116 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 117 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 118 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 119 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 120 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 121 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 122 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 123 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 124 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 125 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 126 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 127 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 128 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 129 }; 130 131 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 132 { 133 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 134 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 135 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 136 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 137 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 138 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 139 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 140 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 141 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 142 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 143 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 144 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 145 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 146 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 147 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 148 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 149 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 150 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 151 }; 152 153 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 154 { 155 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 156 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 157 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 158 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 159 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 160 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 161 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 162 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 163 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 164 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 165 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 166 }; 167 168 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 169 { 170 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 171 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 172 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 173 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 174 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 175 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 176 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 177 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 178 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 179 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 180 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 181 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 182 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 183 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 184 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 185 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 186 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 187 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 188 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 189 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 190 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 191 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 192 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 193 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 194 }; 195 196 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 197 { 198 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 199 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 200 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 201 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 202 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 203 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 204 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 205 }; 206 207 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 208 { 209 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 210 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 211 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 212 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 213 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 214 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 215 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 216 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 217 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 218 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 219 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 220 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 221 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 222 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 223 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 224 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 225 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 226 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 227 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 228 }; 229 230 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 231 { 232 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 233 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 234 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 235 }; 236 237 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 238 { 239 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 240 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 241 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 242 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 243 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 244 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 245 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 246 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 247 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 248 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 249 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 250 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 251 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 252 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 253 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 254 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 255 }; 256 257 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 258 { 259 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 260 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 261 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 262 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 263 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 264 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 265 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 266 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 267 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 268 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 269 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 270 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 271 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 272 }; 273 274 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 275 { 276 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 277 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 278 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 279 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 280 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 281 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 282 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 283 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 284 }; 285 286 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 287 { 288 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 289 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 290 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 291 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 292 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 293 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 294 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 295 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 296 }; 297 298 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 299 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 300 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 301 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 302 303 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 304 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 305 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 306 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 307 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 308 struct amdgpu_cu_info *cu_info); 309 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 310 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); 311 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 312 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 313 314 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 315 { 316 switch (adev->asic_type) { 317 case CHIP_VEGA10: 318 if (!amdgpu_virt_support_skip_setting(adev)) { 319 soc15_program_register_sequence(adev, 320 golden_settings_gc_9_0, 321 ARRAY_SIZE(golden_settings_gc_9_0)); 322 soc15_program_register_sequence(adev, 323 golden_settings_gc_9_0_vg10, 324 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 325 } 326 break; 327 case CHIP_VEGA12: 328 soc15_program_register_sequence(adev, 329 golden_settings_gc_9_2_1, 330 ARRAY_SIZE(golden_settings_gc_9_2_1)); 331 soc15_program_register_sequence(adev, 332 golden_settings_gc_9_2_1_vg12, 333 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 334 break; 335 case CHIP_VEGA20: 336 soc15_program_register_sequence(adev, 337 golden_settings_gc_9_0, 338 ARRAY_SIZE(golden_settings_gc_9_0)); 339 soc15_program_register_sequence(adev, 340 golden_settings_gc_9_0_vg20, 341 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 342 break; 343 case CHIP_RAVEN: 344 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 345 ARRAY_SIZE(golden_settings_gc_9_1)); 346 if (adev->rev_id >= 8) 347 soc15_program_register_sequence(adev, 348 golden_settings_gc_9_1_rv2, 349 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 350 else 351 soc15_program_register_sequence(adev, 352 golden_settings_gc_9_1_rv1, 353 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 354 break; 355 default: 356 break; 357 } 358 359 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 360 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 361 } 362 363 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 364 { 365 adev->gfx.scratch.num_reg = 8; 366 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 367 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 368 } 369 370 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 371 bool wc, uint32_t reg, uint32_t val) 372 { 373 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 374 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 375 WRITE_DATA_DST_SEL(0) | 376 (wc ? WR_CONFIRM : 0)); 377 amdgpu_ring_write(ring, reg); 378 amdgpu_ring_write(ring, 0); 379 amdgpu_ring_write(ring, val); 380 } 381 382 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 383 int mem_space, int opt, uint32_t addr0, 384 uint32_t addr1, uint32_t ref, uint32_t mask, 385 uint32_t inv) 386 { 387 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 388 amdgpu_ring_write(ring, 389 /* memory (1) or register (0) */ 390 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 391 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 392 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 393 WAIT_REG_MEM_ENGINE(eng_sel))); 394 395 if (mem_space) 396 BUG_ON(addr0 & 0x3); /* Dword align */ 397 amdgpu_ring_write(ring, addr0); 398 amdgpu_ring_write(ring, addr1); 399 amdgpu_ring_write(ring, ref); 400 amdgpu_ring_write(ring, mask); 401 amdgpu_ring_write(ring, inv); /* poll interval */ 402 } 403 404 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 405 { 406 struct amdgpu_device *adev = ring->adev; 407 uint32_t scratch; 408 uint32_t tmp = 0; 409 unsigned i; 410 int r; 411 412 r = amdgpu_gfx_scratch_get(adev, &scratch); 413 if (r) 414 return r; 415 416 WREG32(scratch, 0xCAFEDEAD); 417 r = amdgpu_ring_alloc(ring, 3); 418 if (r) 419 goto error_free_scratch; 420 421 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 422 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 423 amdgpu_ring_write(ring, 0xDEADBEEF); 424 amdgpu_ring_commit(ring); 425 426 for (i = 0; i < adev->usec_timeout; i++) { 427 tmp = RREG32(scratch); 428 if (tmp == 0xDEADBEEF) 429 break; 430 udelay(1); 431 } 432 433 if (i >= adev->usec_timeout) 434 r = -ETIMEDOUT; 435 436 error_free_scratch: 437 amdgpu_gfx_scratch_free(adev, scratch); 438 return r; 439 } 440 441 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 442 { 443 struct amdgpu_device *adev = ring->adev; 444 struct amdgpu_ib ib; 445 struct dma_fence *f = NULL; 446 447 unsigned index; 448 uint64_t gpu_addr; 449 uint32_t tmp; 450 long r; 451 452 r = amdgpu_device_wb_get(adev, &index); 453 if (r) 454 return r; 455 456 gpu_addr = adev->wb.gpu_addr + (index * 4); 457 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 458 memset(&ib, 0, sizeof(ib)); 459 r = amdgpu_ib_get(adev, NULL, 16, &ib); 460 if (r) 461 goto err1; 462 463 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 464 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 465 ib.ptr[2] = lower_32_bits(gpu_addr); 466 ib.ptr[3] = upper_32_bits(gpu_addr); 467 ib.ptr[4] = 0xDEADBEEF; 468 ib.length_dw = 5; 469 470 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 471 if (r) 472 goto err2; 473 474 r = dma_fence_wait_timeout(f, false, timeout); 475 if (r == 0) { 476 r = -ETIMEDOUT; 477 goto err2; 478 } else if (r < 0) { 479 goto err2; 480 } 481 482 tmp = adev->wb.wb[index]; 483 if (tmp == 0xDEADBEEF) 484 r = 0; 485 else 486 r = -EINVAL; 487 488 err2: 489 amdgpu_ib_free(adev, &ib, NULL); 490 dma_fence_put(f); 491 err1: 492 amdgpu_device_wb_free(adev, index); 493 return r; 494 } 495 496 497 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 498 { 499 release_firmware(adev->gfx.pfp_fw); 500 adev->gfx.pfp_fw = NULL; 501 release_firmware(adev->gfx.me_fw); 502 adev->gfx.me_fw = NULL; 503 release_firmware(adev->gfx.ce_fw); 504 adev->gfx.ce_fw = NULL; 505 release_firmware(adev->gfx.rlc_fw); 506 adev->gfx.rlc_fw = NULL; 507 release_firmware(adev->gfx.mec_fw); 508 adev->gfx.mec_fw = NULL; 509 release_firmware(adev->gfx.mec2_fw); 510 adev->gfx.mec2_fw = NULL; 511 512 kfree(adev->gfx.rlc.register_list_format); 513 } 514 515 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 516 { 517 const struct rlc_firmware_header_v2_1 *rlc_hdr; 518 519 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 520 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 521 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 522 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 523 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 524 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 525 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 526 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 527 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 528 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 529 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 530 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 531 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 532 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 533 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 534 } 535 536 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 537 { 538 adev->gfx.me_fw_write_wait = false; 539 adev->gfx.mec_fw_write_wait = false; 540 541 switch (adev->asic_type) { 542 case CHIP_VEGA10: 543 if ((adev->gfx.me_fw_version >= 0x0000009c) && 544 (adev->gfx.me_feature_version >= 42) && 545 (adev->gfx.pfp_fw_version >= 0x000000b1) && 546 (adev->gfx.pfp_feature_version >= 42)) 547 adev->gfx.me_fw_write_wait = true; 548 549 if ((adev->gfx.mec_fw_version >= 0x00000193) && 550 (adev->gfx.mec_feature_version >= 42)) 551 adev->gfx.mec_fw_write_wait = true; 552 break; 553 case CHIP_VEGA12: 554 if ((adev->gfx.me_fw_version >= 0x0000009c) && 555 (adev->gfx.me_feature_version >= 44) && 556 (adev->gfx.pfp_fw_version >= 0x000000b2) && 557 (adev->gfx.pfp_feature_version >= 44)) 558 adev->gfx.me_fw_write_wait = true; 559 560 if ((adev->gfx.mec_fw_version >= 0x00000196) && 561 (adev->gfx.mec_feature_version >= 44)) 562 adev->gfx.mec_fw_write_wait = true; 563 break; 564 case CHIP_VEGA20: 565 if ((adev->gfx.me_fw_version >= 0x0000009c) && 566 (adev->gfx.me_feature_version >= 44) && 567 (adev->gfx.pfp_fw_version >= 0x000000b2) && 568 (adev->gfx.pfp_feature_version >= 44)) 569 adev->gfx.me_fw_write_wait = true; 570 571 if ((adev->gfx.mec_fw_version >= 0x00000197) && 572 (adev->gfx.mec_feature_version >= 44)) 573 adev->gfx.mec_fw_write_wait = true; 574 break; 575 case CHIP_RAVEN: 576 if ((adev->gfx.me_fw_version >= 0x0000009c) && 577 (adev->gfx.me_feature_version >= 42) && 578 (adev->gfx.pfp_fw_version >= 0x000000b1) && 579 (adev->gfx.pfp_feature_version >= 42)) 580 adev->gfx.me_fw_write_wait = true; 581 582 if ((adev->gfx.mec_fw_version >= 0x00000192) && 583 (adev->gfx.mec_feature_version >= 42)) 584 adev->gfx.mec_fw_write_wait = true; 585 break; 586 default: 587 break; 588 } 589 } 590 591 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 592 { 593 switch (adev->asic_type) { 594 case CHIP_VEGA10: 595 case CHIP_VEGA12: 596 case CHIP_VEGA20: 597 break; 598 case CHIP_RAVEN: 599 if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) 600 &&((adev->gfx.rlc_fw_version != 106 && 601 adev->gfx.rlc_fw_version < 531) || 602 (adev->gfx.rlc_fw_version == 53815) || 603 (adev->gfx.rlc_feature_version < 1) || 604 !adev->gfx.rlc.is_rlc_v2_1)) 605 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 606 607 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 608 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 609 AMD_PG_SUPPORT_CP | 610 AMD_PG_SUPPORT_RLC_SMU_HS; 611 break; 612 default: 613 break; 614 } 615 } 616 617 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 618 { 619 const char *chip_name; 620 char fw_name[30]; 621 int err; 622 struct amdgpu_firmware_info *info = NULL; 623 const struct common_firmware_header *header = NULL; 624 const struct gfx_firmware_header_v1_0 *cp_hdr; 625 const struct rlc_firmware_header_v2_0 *rlc_hdr; 626 unsigned int *tmp = NULL; 627 unsigned int i = 0; 628 uint16_t version_major; 629 uint16_t version_minor; 630 uint32_t smu_version; 631 632 DRM_DEBUG("\n"); 633 634 switch (adev->asic_type) { 635 case CHIP_VEGA10: 636 chip_name = "vega10"; 637 break; 638 case CHIP_VEGA12: 639 chip_name = "vega12"; 640 break; 641 case CHIP_VEGA20: 642 chip_name = "vega20"; 643 break; 644 case CHIP_RAVEN: 645 if (adev->rev_id >= 8) 646 chip_name = "raven2"; 647 else if (adev->pdev->device == 0x15d8) 648 chip_name = "picasso"; 649 else 650 chip_name = "raven"; 651 break; 652 default: 653 BUG(); 654 } 655 656 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 657 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 658 if (err) 659 goto out; 660 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 661 if (err) 662 goto out; 663 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 664 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 665 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 666 667 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 668 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 669 if (err) 670 goto out; 671 err = amdgpu_ucode_validate(adev->gfx.me_fw); 672 if (err) 673 goto out; 674 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 675 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 676 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 677 678 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 679 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 680 if (err) 681 goto out; 682 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 683 if (err) 684 goto out; 685 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 686 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 687 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 688 689 /* 690 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 691 * instead of picasso_rlc.bin. 692 * Judgment method: 693 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 694 * or revision >= 0xD8 && revision <= 0xDF 695 * otherwise is PCO FP5 696 */ 697 if (!strcmp(chip_name, "picasso") && 698 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 699 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 700 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); 701 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 702 (smu_version >= 0x41e2b)) 703 /** 704 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 705 */ 706 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); 707 else 708 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 709 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 710 if (err) 711 goto out; 712 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 713 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 714 715 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 716 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 717 if (version_major == 2 && version_minor == 1) 718 adev->gfx.rlc.is_rlc_v2_1 = true; 719 720 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 721 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 722 adev->gfx.rlc.save_and_restore_offset = 723 le32_to_cpu(rlc_hdr->save_and_restore_offset); 724 adev->gfx.rlc.clear_state_descriptor_offset = 725 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 726 adev->gfx.rlc.avail_scratch_ram_locations = 727 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 728 adev->gfx.rlc.reg_restore_list_size = 729 le32_to_cpu(rlc_hdr->reg_restore_list_size); 730 adev->gfx.rlc.reg_list_format_start = 731 le32_to_cpu(rlc_hdr->reg_list_format_start); 732 adev->gfx.rlc.reg_list_format_separate_start = 733 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 734 adev->gfx.rlc.starting_offsets_start = 735 le32_to_cpu(rlc_hdr->starting_offsets_start); 736 adev->gfx.rlc.reg_list_format_size_bytes = 737 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 738 adev->gfx.rlc.reg_list_size_bytes = 739 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 740 adev->gfx.rlc.register_list_format = 741 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 742 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 743 if (!adev->gfx.rlc.register_list_format) { 744 err = -ENOMEM; 745 goto out; 746 } 747 748 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 749 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 750 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 751 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 752 753 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 754 755 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 756 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 757 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 758 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 759 760 if (adev->gfx.rlc.is_rlc_v2_1) 761 gfx_v9_0_init_rlc_ext_microcode(adev); 762 763 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 764 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 765 if (err) 766 goto out; 767 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 768 if (err) 769 goto out; 770 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 771 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 772 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 773 774 775 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 776 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 777 if (!err) { 778 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 779 if (err) 780 goto out; 781 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 782 adev->gfx.mec2_fw->data; 783 adev->gfx.mec2_fw_version = 784 le32_to_cpu(cp_hdr->header.ucode_version); 785 adev->gfx.mec2_feature_version = 786 le32_to_cpu(cp_hdr->ucode_feature_version); 787 } else { 788 err = 0; 789 adev->gfx.mec2_fw = NULL; 790 } 791 792 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 793 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 794 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 795 info->fw = adev->gfx.pfp_fw; 796 header = (const struct common_firmware_header *)info->fw->data; 797 adev->firmware.fw_size += 798 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 799 800 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 801 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 802 info->fw = adev->gfx.me_fw; 803 header = (const struct common_firmware_header *)info->fw->data; 804 adev->firmware.fw_size += 805 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 806 807 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 808 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 809 info->fw = adev->gfx.ce_fw; 810 header = (const struct common_firmware_header *)info->fw->data; 811 adev->firmware.fw_size += 812 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 813 814 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 815 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 816 info->fw = adev->gfx.rlc_fw; 817 header = (const struct common_firmware_header *)info->fw->data; 818 adev->firmware.fw_size += 819 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 820 821 if (adev->gfx.rlc.is_rlc_v2_1 && 822 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 823 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 824 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 825 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 826 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 827 info->fw = adev->gfx.rlc_fw; 828 adev->firmware.fw_size += 829 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 830 831 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 832 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 833 info->fw = adev->gfx.rlc_fw; 834 adev->firmware.fw_size += 835 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 836 837 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 838 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 839 info->fw = adev->gfx.rlc_fw; 840 adev->firmware.fw_size += 841 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 842 } 843 844 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 845 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 846 info->fw = adev->gfx.mec_fw; 847 header = (const struct common_firmware_header *)info->fw->data; 848 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 849 adev->firmware.fw_size += 850 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 851 852 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 853 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 854 info->fw = adev->gfx.mec_fw; 855 adev->firmware.fw_size += 856 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 857 858 if (adev->gfx.mec2_fw) { 859 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 860 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 861 info->fw = adev->gfx.mec2_fw; 862 header = (const struct common_firmware_header *)info->fw->data; 863 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 864 adev->firmware.fw_size += 865 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 866 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 867 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 868 info->fw = adev->gfx.mec2_fw; 869 adev->firmware.fw_size += 870 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 871 } 872 873 } 874 875 out: 876 gfx_v9_0_check_if_need_gfxoff(adev); 877 gfx_v9_0_check_fw_write_wait(adev); 878 if (err) { 879 dev_err(adev->dev, 880 "gfx9: Failed to load firmware \"%s\"\n", 881 fw_name); 882 release_firmware(adev->gfx.pfp_fw); 883 adev->gfx.pfp_fw = NULL; 884 release_firmware(adev->gfx.me_fw); 885 adev->gfx.me_fw = NULL; 886 release_firmware(adev->gfx.ce_fw); 887 adev->gfx.ce_fw = NULL; 888 release_firmware(adev->gfx.rlc_fw); 889 adev->gfx.rlc_fw = NULL; 890 release_firmware(adev->gfx.mec_fw); 891 adev->gfx.mec_fw = NULL; 892 release_firmware(adev->gfx.mec2_fw); 893 adev->gfx.mec2_fw = NULL; 894 } 895 return err; 896 } 897 898 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 899 { 900 u32 count = 0; 901 const struct cs_section_def *sect = NULL; 902 const struct cs_extent_def *ext = NULL; 903 904 /* begin clear state */ 905 count += 2; 906 /* context control state */ 907 count += 3; 908 909 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 910 for (ext = sect->section; ext->extent != NULL; ++ext) { 911 if (sect->id == SECT_CONTEXT) 912 count += 2 + ext->reg_count; 913 else 914 return 0; 915 } 916 } 917 918 /* end clear state */ 919 count += 2; 920 /* clear state */ 921 count += 2; 922 923 return count; 924 } 925 926 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 927 volatile u32 *buffer) 928 { 929 u32 count = 0, i; 930 const struct cs_section_def *sect = NULL; 931 const struct cs_extent_def *ext = NULL; 932 933 if (adev->gfx.rlc.cs_data == NULL) 934 return; 935 if (buffer == NULL) 936 return; 937 938 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 939 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 940 941 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 942 buffer[count++] = cpu_to_le32(0x80000000); 943 buffer[count++] = cpu_to_le32(0x80000000); 944 945 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 946 for (ext = sect->section; ext->extent != NULL; ++ext) { 947 if (sect->id == SECT_CONTEXT) { 948 buffer[count++] = 949 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 950 buffer[count++] = cpu_to_le32(ext->reg_index - 951 PACKET3_SET_CONTEXT_REG_START); 952 for (i = 0; i < ext->reg_count; i++) 953 buffer[count++] = cpu_to_le32(ext->extent[i]); 954 } else { 955 return; 956 } 957 } 958 } 959 960 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 961 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 962 963 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 964 buffer[count++] = cpu_to_le32(0); 965 } 966 967 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 968 { 969 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 970 uint32_t pg_always_on_cu_num = 2; 971 uint32_t always_on_cu_num; 972 uint32_t i, j, k; 973 uint32_t mask, cu_bitmap, counter; 974 975 if (adev->flags & AMD_IS_APU) 976 always_on_cu_num = 4; 977 else if (adev->asic_type == CHIP_VEGA12) 978 always_on_cu_num = 8; 979 else 980 always_on_cu_num = 12; 981 982 mutex_lock(&adev->grbm_idx_mutex); 983 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 984 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 985 mask = 1; 986 cu_bitmap = 0; 987 counter = 0; 988 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 989 990 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 991 if (cu_info->bitmap[i][j] & mask) { 992 if (counter == pg_always_on_cu_num) 993 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 994 if (counter < always_on_cu_num) 995 cu_bitmap |= mask; 996 else 997 break; 998 counter++; 999 } 1000 mask <<= 1; 1001 } 1002 1003 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1004 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1005 } 1006 } 1007 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1008 mutex_unlock(&adev->grbm_idx_mutex); 1009 } 1010 1011 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1012 { 1013 uint32_t data; 1014 1015 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1016 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1017 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1018 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1019 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1020 1021 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1022 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1023 1024 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1025 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1026 1027 mutex_lock(&adev->grbm_idx_mutex); 1028 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1029 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1030 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1031 1032 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1033 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1034 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1035 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1036 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1037 1038 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1039 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1040 data &= 0x0000FFFF; 1041 data |= 0x00C00000; 1042 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1043 1044 /* 1045 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1046 * programmed in gfx_v9_0_init_always_on_cu_mask() 1047 */ 1048 1049 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1050 * but used for RLC_LB_CNTL configuration */ 1051 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1052 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1053 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1054 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1055 mutex_unlock(&adev->grbm_idx_mutex); 1056 1057 gfx_v9_0_init_always_on_cu_mask(adev); 1058 } 1059 1060 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1061 { 1062 uint32_t data; 1063 1064 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1065 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1066 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1067 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1068 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1069 1070 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1071 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1072 1073 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1074 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1075 1076 mutex_lock(&adev->grbm_idx_mutex); 1077 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1078 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1079 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1080 1081 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1082 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1083 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1084 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1085 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1086 1087 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1088 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1089 data &= 0x0000FFFF; 1090 data |= 0x00C00000; 1091 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1092 1093 /* 1094 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1095 * programmed in gfx_v9_0_init_always_on_cu_mask() 1096 */ 1097 1098 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1099 * but used for RLC_LB_CNTL configuration */ 1100 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1101 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1102 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1103 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1104 mutex_unlock(&adev->grbm_idx_mutex); 1105 1106 gfx_v9_0_init_always_on_cu_mask(adev); 1107 } 1108 1109 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1110 { 1111 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1112 } 1113 1114 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1115 { 1116 return 5; 1117 } 1118 1119 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1120 { 1121 const struct cs_section_def *cs_data; 1122 int r; 1123 1124 adev->gfx.rlc.cs_data = gfx9_cs_data; 1125 1126 cs_data = adev->gfx.rlc.cs_data; 1127 1128 if (cs_data) { 1129 /* init clear state block */ 1130 r = amdgpu_gfx_rlc_init_csb(adev); 1131 if (r) 1132 return r; 1133 } 1134 1135 if (adev->asic_type == CHIP_RAVEN) { 1136 /* TODO: double check the cp_table_size for RV */ 1137 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1138 r = amdgpu_gfx_rlc_init_cpt(adev); 1139 if (r) 1140 return r; 1141 } 1142 1143 switch (adev->asic_type) { 1144 case CHIP_RAVEN: 1145 gfx_v9_0_init_lbpw(adev); 1146 break; 1147 case CHIP_VEGA20: 1148 gfx_v9_4_init_lbpw(adev); 1149 break; 1150 default: 1151 break; 1152 } 1153 1154 return 0; 1155 } 1156 1157 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev) 1158 { 1159 int r; 1160 1161 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1162 if (unlikely(r != 0)) 1163 return r; 1164 1165 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, 1166 AMDGPU_GEM_DOMAIN_VRAM); 1167 if (!r) 1168 adev->gfx.rlc.clear_state_gpu_addr = 1169 amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); 1170 1171 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1172 1173 return r; 1174 } 1175 1176 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev) 1177 { 1178 int r; 1179 1180 if (!adev->gfx.rlc.clear_state_obj) 1181 return; 1182 1183 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); 1184 if (likely(r == 0)) { 1185 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 1186 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1187 } 1188 } 1189 1190 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1191 { 1192 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1193 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1194 } 1195 1196 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1197 { 1198 int r; 1199 u32 *hpd; 1200 const __le32 *fw_data; 1201 unsigned fw_size; 1202 u32 *fw; 1203 size_t mec_hpd_size; 1204 1205 const struct gfx_firmware_header_v1_0 *mec_hdr; 1206 1207 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1208 1209 /* take ownership of the relevant compute queues */ 1210 amdgpu_gfx_compute_queue_acquire(adev); 1211 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1212 1213 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1214 AMDGPU_GEM_DOMAIN_VRAM, 1215 &adev->gfx.mec.hpd_eop_obj, 1216 &adev->gfx.mec.hpd_eop_gpu_addr, 1217 (void **)&hpd); 1218 if (r) { 1219 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1220 gfx_v9_0_mec_fini(adev); 1221 return r; 1222 } 1223 1224 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); 1225 1226 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1227 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1228 1229 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1230 1231 fw_data = (const __le32 *) 1232 (adev->gfx.mec_fw->data + 1233 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1234 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 1235 1236 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 1237 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1238 &adev->gfx.mec.mec_fw_obj, 1239 &adev->gfx.mec.mec_fw_gpu_addr, 1240 (void **)&fw); 1241 if (r) { 1242 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 1243 gfx_v9_0_mec_fini(adev); 1244 return r; 1245 } 1246 1247 memcpy(fw, fw_data, fw_size); 1248 1249 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1250 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1251 1252 return 0; 1253 } 1254 1255 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 1256 { 1257 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1258 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1259 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1260 (address << SQ_IND_INDEX__INDEX__SHIFT) | 1261 (SQ_IND_INDEX__FORCE_READ_MASK)); 1262 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1263 } 1264 1265 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 1266 uint32_t wave, uint32_t thread, 1267 uint32_t regno, uint32_t num, uint32_t *out) 1268 { 1269 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1270 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1271 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1272 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1273 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 1274 (SQ_IND_INDEX__FORCE_READ_MASK) | 1275 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1276 while (num--) 1277 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1278 } 1279 1280 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1281 { 1282 /* type 1 wave data */ 1283 dst[(*no_fields)++] = 1; 1284 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 1285 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 1286 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 1287 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 1288 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 1289 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 1290 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 1291 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 1292 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 1293 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 1294 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 1295 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 1296 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 1297 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 1298 } 1299 1300 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 1301 uint32_t wave, uint32_t start, 1302 uint32_t size, uint32_t *dst) 1303 { 1304 wave_read_regs( 1305 adev, simd, wave, 0, 1306 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 1307 } 1308 1309 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 1310 uint32_t wave, uint32_t thread, 1311 uint32_t start, uint32_t size, 1312 uint32_t *dst) 1313 { 1314 wave_read_regs( 1315 adev, simd, wave, thread, 1316 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1317 } 1318 1319 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 1320 u32 me, u32 pipe, u32 q, u32 vm) 1321 { 1322 soc15_grbm_select(adev, me, pipe, q, vm); 1323 } 1324 1325 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 1326 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 1327 .select_se_sh = &gfx_v9_0_select_se_sh, 1328 .read_wave_data = &gfx_v9_0_read_wave_data, 1329 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 1330 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 1331 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q 1332 }; 1333 1334 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 1335 { 1336 u32 gb_addr_config; 1337 int err; 1338 1339 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 1340 1341 switch (adev->asic_type) { 1342 case CHIP_VEGA10: 1343 adev->gfx.config.max_hw_contexts = 8; 1344 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1345 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1346 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1347 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1348 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 1349 break; 1350 case CHIP_VEGA12: 1351 adev->gfx.config.max_hw_contexts = 8; 1352 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1353 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1354 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1355 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1356 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 1357 DRM_INFO("fix gfx.config for vega12\n"); 1358 break; 1359 case CHIP_VEGA20: 1360 adev->gfx.config.max_hw_contexts = 8; 1361 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1362 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1363 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1364 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1365 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1366 gb_addr_config &= ~0xf3e777ff; 1367 gb_addr_config |= 0x22014042; 1368 /* check vbios table if gpu info is not available */ 1369 err = amdgpu_atomfirmware_get_gfx_info(adev); 1370 if (err) 1371 return err; 1372 break; 1373 case CHIP_RAVEN: 1374 adev->gfx.config.max_hw_contexts = 8; 1375 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1376 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1377 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1378 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1379 if (adev->rev_id >= 8) 1380 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 1381 else 1382 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 1383 break; 1384 default: 1385 BUG(); 1386 break; 1387 } 1388 1389 adev->gfx.config.gb_addr_config = gb_addr_config; 1390 1391 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 1392 REG_GET_FIELD( 1393 adev->gfx.config.gb_addr_config, 1394 GB_ADDR_CONFIG, 1395 NUM_PIPES); 1396 1397 adev->gfx.config.max_tile_pipes = 1398 adev->gfx.config.gb_addr_config_fields.num_pipes; 1399 1400 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 1401 REG_GET_FIELD( 1402 adev->gfx.config.gb_addr_config, 1403 GB_ADDR_CONFIG, 1404 NUM_BANKS); 1405 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 1406 REG_GET_FIELD( 1407 adev->gfx.config.gb_addr_config, 1408 GB_ADDR_CONFIG, 1409 MAX_COMPRESSED_FRAGS); 1410 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 1411 REG_GET_FIELD( 1412 adev->gfx.config.gb_addr_config, 1413 GB_ADDR_CONFIG, 1414 NUM_RB_PER_SE); 1415 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 1416 REG_GET_FIELD( 1417 adev->gfx.config.gb_addr_config, 1418 GB_ADDR_CONFIG, 1419 NUM_SHADER_ENGINES); 1420 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 1421 REG_GET_FIELD( 1422 adev->gfx.config.gb_addr_config, 1423 GB_ADDR_CONFIG, 1424 PIPE_INTERLEAVE_SIZE)); 1425 1426 return 0; 1427 } 1428 1429 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, 1430 struct amdgpu_ngg_buf *ngg_buf, 1431 int size_se, 1432 int default_size_se) 1433 { 1434 int r; 1435 1436 if (size_se < 0) { 1437 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se); 1438 return -EINVAL; 1439 } 1440 size_se = size_se ? size_se : default_size_se; 1441 1442 ngg_buf->size = size_se * adev->gfx.config.max_shader_engines; 1443 r = amdgpu_bo_create_kernel(adev, ngg_buf->size, 1444 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1445 &ngg_buf->bo, 1446 &ngg_buf->gpu_addr, 1447 NULL); 1448 if (r) { 1449 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r); 1450 return r; 1451 } 1452 ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo); 1453 1454 return r; 1455 } 1456 1457 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev) 1458 { 1459 int i; 1460 1461 for (i = 0; i < NGG_BUF_MAX; i++) 1462 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo, 1463 &adev->gfx.ngg.buf[i].gpu_addr, 1464 NULL); 1465 1466 memset(&adev->gfx.ngg.buf[0], 0, 1467 sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX); 1468 1469 adev->gfx.ngg.init = false; 1470 1471 return 0; 1472 } 1473 1474 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) 1475 { 1476 int r; 1477 1478 if (!amdgpu_ngg || adev->gfx.ngg.init == true) 1479 return 0; 1480 1481 /* GDS reserve memory: 64 bytes alignment */ 1482 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); 1483 adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size; 1484 adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); 1485 adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); 1486 1487 /* Primitive Buffer */ 1488 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], 1489 amdgpu_prim_buf_per_se, 1490 64 * 1024); 1491 if (r) { 1492 dev_err(adev->dev, "Failed to create Primitive Buffer\n"); 1493 goto err; 1494 } 1495 1496 /* Position Buffer */ 1497 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS], 1498 amdgpu_pos_buf_per_se, 1499 256 * 1024); 1500 if (r) { 1501 dev_err(adev->dev, "Failed to create Position Buffer\n"); 1502 goto err; 1503 } 1504 1505 /* Control Sideband */ 1506 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL], 1507 amdgpu_cntl_sb_buf_per_se, 1508 256); 1509 if (r) { 1510 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n"); 1511 goto err; 1512 } 1513 1514 /* Parameter Cache, not created by default */ 1515 if (amdgpu_param_buf_per_se <= 0) 1516 goto out; 1517 1518 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM], 1519 amdgpu_param_buf_per_se, 1520 512 * 1024); 1521 if (r) { 1522 dev_err(adev->dev, "Failed to create Parameter Cache\n"); 1523 goto err; 1524 } 1525 1526 out: 1527 adev->gfx.ngg.init = true; 1528 return 0; 1529 err: 1530 gfx_v9_0_ngg_fini(adev); 1531 return r; 1532 } 1533 1534 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) 1535 { 1536 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 1537 int r; 1538 u32 data, base; 1539 1540 if (!amdgpu_ngg) 1541 return 0; 1542 1543 /* Program buffer size */ 1544 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, 1545 adev->gfx.ngg.buf[NGG_PRIM].size >> 8); 1546 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, 1547 adev->gfx.ngg.buf[NGG_POS].size >> 8); 1548 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); 1549 1550 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, 1551 adev->gfx.ngg.buf[NGG_CNTL].size >> 8); 1552 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, 1553 adev->gfx.ngg.buf[NGG_PARAM].size >> 10); 1554 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); 1555 1556 /* Program buffer base address */ 1557 base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 1558 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); 1559 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data); 1560 1561 base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 1562 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); 1563 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data); 1564 1565 base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 1566 data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); 1567 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data); 1568 1569 base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 1570 data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); 1571 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data); 1572 1573 base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 1574 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); 1575 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data); 1576 1577 base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 1578 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); 1579 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data); 1580 1581 /* Clear GDS reserved memory */ 1582 r = amdgpu_ring_alloc(ring, 17); 1583 if (r) { 1584 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n", 1585 ring->name, r); 1586 return r; 1587 } 1588 1589 gfx_v9_0_write_data_to_reg(ring, 0, false, 1590 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 1591 (adev->gds.gds_size + 1592 adev->gfx.ngg.gds_reserve_size)); 1593 1594 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 1595 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 1596 PACKET3_DMA_DATA_DST_SEL(1) | 1597 PACKET3_DMA_DATA_SRC_SEL(2))); 1598 amdgpu_ring_write(ring, 0); 1599 amdgpu_ring_write(ring, 0); 1600 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); 1601 amdgpu_ring_write(ring, 0); 1602 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 1603 adev->gfx.ngg.gds_reserve_size); 1604 1605 gfx_v9_0_write_data_to_reg(ring, 0, false, 1606 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0); 1607 1608 amdgpu_ring_commit(ring); 1609 1610 return 0; 1611 } 1612 1613 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1614 int mec, int pipe, int queue) 1615 { 1616 int r; 1617 unsigned irq_type; 1618 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1619 1620 ring = &adev->gfx.compute_ring[ring_id]; 1621 1622 /* mec0 is me1 */ 1623 ring->me = mec + 1; 1624 ring->pipe = pipe; 1625 ring->queue = queue; 1626 1627 ring->ring_obj = NULL; 1628 ring->use_doorbell = true; 1629 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1630 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1631 + (ring_id * GFX9_MEC_HPD_SIZE); 1632 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1633 1634 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1635 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1636 + ring->pipe; 1637 1638 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1639 r = amdgpu_ring_init(adev, ring, 1024, 1640 &adev->gfx.eop_irq, irq_type); 1641 if (r) 1642 return r; 1643 1644 1645 return 0; 1646 } 1647 1648 static int gfx_v9_0_sw_init(void *handle) 1649 { 1650 int i, j, k, r, ring_id; 1651 struct amdgpu_ring *ring; 1652 struct amdgpu_kiq *kiq; 1653 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1654 1655 switch (adev->asic_type) { 1656 case CHIP_VEGA10: 1657 case CHIP_VEGA12: 1658 case CHIP_VEGA20: 1659 case CHIP_RAVEN: 1660 adev->gfx.mec.num_mec = 2; 1661 break; 1662 default: 1663 adev->gfx.mec.num_mec = 1; 1664 break; 1665 } 1666 1667 adev->gfx.mec.num_pipe_per_mec = 4; 1668 adev->gfx.mec.num_queue_per_pipe = 8; 1669 1670 /* EOP Event */ 1671 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 1672 if (r) 1673 return r; 1674 1675 /* Privileged reg */ 1676 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 1677 &adev->gfx.priv_reg_irq); 1678 if (r) 1679 return r; 1680 1681 /* Privileged inst */ 1682 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 1683 &adev->gfx.priv_inst_irq); 1684 if (r) 1685 return r; 1686 1687 /* ECC error */ 1688 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 1689 &adev->gfx.cp_ecc_error_irq); 1690 if (r) 1691 return r; 1692 1693 /* FUE error */ 1694 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 1695 &adev->gfx.cp_ecc_error_irq); 1696 if (r) 1697 return r; 1698 1699 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1700 1701 gfx_v9_0_scratch_init(adev); 1702 1703 r = gfx_v9_0_init_microcode(adev); 1704 if (r) { 1705 DRM_ERROR("Failed to load gfx firmware!\n"); 1706 return r; 1707 } 1708 1709 r = adev->gfx.rlc.funcs->init(adev); 1710 if (r) { 1711 DRM_ERROR("Failed to init rlc BOs!\n"); 1712 return r; 1713 } 1714 1715 r = gfx_v9_0_mec_init(adev); 1716 if (r) { 1717 DRM_ERROR("Failed to init MEC BOs!\n"); 1718 return r; 1719 } 1720 1721 /* set up the gfx ring */ 1722 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 1723 ring = &adev->gfx.gfx_ring[i]; 1724 ring->ring_obj = NULL; 1725 if (!i) 1726 sprintf(ring->name, "gfx"); 1727 else 1728 sprintf(ring->name, "gfx_%d", i); 1729 ring->use_doorbell = true; 1730 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 1731 r = amdgpu_ring_init(adev, ring, 1024, 1732 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); 1733 if (r) 1734 return r; 1735 } 1736 1737 /* set up the compute queues - allocate horizontally across pipes */ 1738 ring_id = 0; 1739 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1740 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1741 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1742 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 1743 continue; 1744 1745 r = gfx_v9_0_compute_ring_init(adev, 1746 ring_id, 1747 i, k, j); 1748 if (r) 1749 return r; 1750 1751 ring_id++; 1752 } 1753 } 1754 } 1755 1756 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); 1757 if (r) { 1758 DRM_ERROR("Failed to init KIQ BOs!\n"); 1759 return r; 1760 } 1761 1762 kiq = &adev->gfx.kiq; 1763 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 1764 if (r) 1765 return r; 1766 1767 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 1768 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); 1769 if (r) 1770 return r; 1771 1772 adev->gfx.ce_ram_size = 0x8000; 1773 1774 r = gfx_v9_0_gpu_early_init(adev); 1775 if (r) 1776 return r; 1777 1778 r = gfx_v9_0_ngg_init(adev); 1779 if (r) 1780 return r; 1781 1782 return 0; 1783 } 1784 1785 1786 static int gfx_v9_0_sw_fini(void *handle) 1787 { 1788 int i; 1789 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1790 1791 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && 1792 adev->gfx.ras_if) { 1793 struct ras_common_if *ras_if = adev->gfx.ras_if; 1794 struct ras_ih_if ih_info = { 1795 .head = *ras_if, 1796 }; 1797 1798 amdgpu_ras_debugfs_remove(adev, ras_if); 1799 amdgpu_ras_sysfs_remove(adev, ras_if); 1800 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 1801 amdgpu_ras_feature_enable(adev, ras_if, 0); 1802 kfree(ras_if); 1803 } 1804 1805 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1806 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1807 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1808 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1809 1810 amdgpu_gfx_mqd_sw_fini(adev); 1811 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 1812 amdgpu_gfx_kiq_fini(adev); 1813 1814 gfx_v9_0_mec_fini(adev); 1815 gfx_v9_0_ngg_fini(adev); 1816 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 1817 if (adev->asic_type == CHIP_RAVEN) { 1818 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 1819 &adev->gfx.rlc.cp_table_gpu_addr, 1820 (void **)&adev->gfx.rlc.cp_table_ptr); 1821 } 1822 gfx_v9_0_free_microcode(adev); 1823 1824 return 0; 1825 } 1826 1827 1828 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 1829 { 1830 /* TODO */ 1831 } 1832 1833 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) 1834 { 1835 u32 data; 1836 1837 if (instance == 0xffffffff) 1838 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 1839 else 1840 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 1841 1842 if (se_num == 0xffffffff) 1843 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 1844 else 1845 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1846 1847 if (sh_num == 0xffffffff) 1848 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 1849 else 1850 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 1851 1852 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 1853 } 1854 1855 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1856 { 1857 u32 data, mask; 1858 1859 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 1860 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 1861 1862 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 1863 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 1864 1865 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 1866 adev->gfx.config.max_sh_per_se); 1867 1868 return (~data) & mask; 1869 } 1870 1871 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 1872 { 1873 int i, j; 1874 u32 data; 1875 u32 active_rbs = 0; 1876 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 1877 adev->gfx.config.max_sh_per_se; 1878 1879 mutex_lock(&adev->grbm_idx_mutex); 1880 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1881 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1882 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1883 data = gfx_v9_0_get_rb_active_bitmap(adev); 1884 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 1885 rb_bitmap_width_per_sh); 1886 } 1887 } 1888 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1889 mutex_unlock(&adev->grbm_idx_mutex); 1890 1891 adev->gfx.config.backend_enable_mask = active_rbs; 1892 adev->gfx.config.num_rbs = hweight32(active_rbs); 1893 } 1894 1895 #define DEFAULT_SH_MEM_BASES (0x6000) 1896 #define FIRST_COMPUTE_VMID (8) 1897 #define LAST_COMPUTE_VMID (16) 1898 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 1899 { 1900 int i; 1901 uint32_t sh_mem_config; 1902 uint32_t sh_mem_bases; 1903 1904 /* 1905 * Configure apertures: 1906 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 1907 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 1908 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 1909 */ 1910 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 1911 1912 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 1913 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 1914 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 1915 1916 mutex_lock(&adev->srbm_mutex); 1917 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 1918 soc15_grbm_select(adev, 0, 0, 0, i); 1919 /* CP and shaders */ 1920 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 1921 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 1922 } 1923 soc15_grbm_select(adev, 0, 0, 0, 0); 1924 mutex_unlock(&adev->srbm_mutex); 1925 1926 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 1927 acccess. These should be enabled by FW for target VMIDs. */ 1928 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 1929 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 1930 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 1931 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 1932 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 1933 } 1934 } 1935 1936 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 1937 { 1938 u32 tmp; 1939 int i; 1940 1941 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 1942 1943 gfx_v9_0_tiling_mode_table_init(adev); 1944 1945 gfx_v9_0_setup_rb(adev); 1946 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 1947 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 1948 1949 /* XXX SH_MEM regs */ 1950 /* where to put LDS, scratch, GPUVM in FSA64 space */ 1951 mutex_lock(&adev->srbm_mutex); 1952 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { 1953 soc15_grbm_select(adev, 0, 0, 0, i); 1954 /* CP and shaders */ 1955 if (i == 0) { 1956 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 1957 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1958 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 1959 !!amdgpu_noretry); 1960 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 1961 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 1962 } else { 1963 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 1964 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1965 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 1966 !!amdgpu_noretry); 1967 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 1968 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 1969 (adev->gmc.private_aperture_start >> 48)); 1970 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 1971 (adev->gmc.shared_aperture_start >> 48)); 1972 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 1973 } 1974 } 1975 soc15_grbm_select(adev, 0, 0, 0, 0); 1976 1977 mutex_unlock(&adev->srbm_mutex); 1978 1979 gfx_v9_0_init_compute_vmid(adev); 1980 } 1981 1982 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 1983 { 1984 u32 i, j, k; 1985 u32 mask; 1986 1987 mutex_lock(&adev->grbm_idx_mutex); 1988 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1989 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1990 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1991 for (k = 0; k < adev->usec_timeout; k++) { 1992 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 1993 break; 1994 udelay(1); 1995 } 1996 if (k == adev->usec_timeout) { 1997 gfx_v9_0_select_se_sh(adev, 0xffffffff, 1998 0xffffffff, 0xffffffff); 1999 mutex_unlock(&adev->grbm_idx_mutex); 2000 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2001 i, j); 2002 return; 2003 } 2004 } 2005 } 2006 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2007 mutex_unlock(&adev->grbm_idx_mutex); 2008 2009 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2010 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2011 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2012 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2013 for (k = 0; k < adev->usec_timeout; k++) { 2014 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2015 break; 2016 udelay(1); 2017 } 2018 } 2019 2020 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2021 bool enable) 2022 { 2023 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2024 2025 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2026 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2027 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2028 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2029 2030 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2031 } 2032 2033 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2034 { 2035 /* csib */ 2036 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2037 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2038 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2039 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2040 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2041 adev->gfx.rlc.clear_state_size); 2042 } 2043 2044 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2045 int indirect_offset, 2046 int list_size, 2047 int *unique_indirect_regs, 2048 int unique_indirect_reg_count, 2049 int *indirect_start_offsets, 2050 int *indirect_start_offsets_count, 2051 int max_start_offsets_count) 2052 { 2053 int idx; 2054 2055 for (; indirect_offset < list_size; indirect_offset++) { 2056 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2057 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2058 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2059 2060 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2061 indirect_offset += 2; 2062 2063 /* look for the matching indice */ 2064 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2065 if (unique_indirect_regs[idx] == 2066 register_list_format[indirect_offset] || 2067 !unique_indirect_regs[idx]) 2068 break; 2069 } 2070 2071 BUG_ON(idx >= unique_indirect_reg_count); 2072 2073 if (!unique_indirect_regs[idx]) 2074 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2075 2076 indirect_offset++; 2077 } 2078 } 2079 } 2080 2081 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2082 { 2083 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2084 int unique_indirect_reg_count = 0; 2085 2086 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2087 int indirect_start_offsets_count = 0; 2088 2089 int list_size = 0; 2090 int i = 0, j = 0; 2091 u32 tmp = 0; 2092 2093 u32 *register_list_format = 2094 kmemdup(adev->gfx.rlc.register_list_format, 2095 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2096 if (!register_list_format) 2097 return -ENOMEM; 2098 2099 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2100 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2101 gfx_v9_1_parse_ind_reg_list(register_list_format, 2102 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2103 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2104 unique_indirect_regs, 2105 unique_indirect_reg_count, 2106 indirect_start_offsets, 2107 &indirect_start_offsets_count, 2108 ARRAY_SIZE(indirect_start_offsets)); 2109 2110 /* enable auto inc in case it is disabled */ 2111 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2112 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2113 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2114 2115 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2116 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2117 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2118 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2119 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2120 adev->gfx.rlc.register_restore[i]); 2121 2122 /* load indirect register */ 2123 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2124 adev->gfx.rlc.reg_list_format_start); 2125 2126 /* direct register portion */ 2127 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2128 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2129 register_list_format[i]); 2130 2131 /* indirect register portion */ 2132 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2133 if (register_list_format[i] == 0xFFFFFFFF) { 2134 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2135 continue; 2136 } 2137 2138 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2139 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2140 2141 for (j = 0; j < unique_indirect_reg_count; j++) { 2142 if (register_list_format[i] == unique_indirect_regs[j]) { 2143 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2144 break; 2145 } 2146 } 2147 2148 BUG_ON(j >= unique_indirect_reg_count); 2149 2150 i++; 2151 } 2152 2153 /* set save/restore list size */ 2154 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2155 list_size = list_size >> 1; 2156 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2157 adev->gfx.rlc.reg_restore_list_size); 2158 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2159 2160 /* write the starting offsets to RLC scratch ram */ 2161 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2162 adev->gfx.rlc.starting_offsets_start); 2163 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2164 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2165 indirect_start_offsets[i]); 2166 2167 /* load unique indirect regs*/ 2168 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2169 if (unique_indirect_regs[i] != 0) { 2170 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2171 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2172 unique_indirect_regs[i] & 0x3FFFF); 2173 2174 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2175 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2176 unique_indirect_regs[i] >> 20); 2177 } 2178 } 2179 2180 kfree(register_list_format); 2181 return 0; 2182 } 2183 2184 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2185 { 2186 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2187 } 2188 2189 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2190 bool enable) 2191 { 2192 uint32_t data = 0; 2193 uint32_t default_data = 0; 2194 2195 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2196 if (enable == true) { 2197 /* enable GFXIP control over CGPG */ 2198 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2199 if(default_data != data) 2200 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2201 2202 /* update status */ 2203 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2204 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2205 if(default_data != data) 2206 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2207 } else { 2208 /* restore GFXIP control over GCPG */ 2209 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2210 if(default_data != data) 2211 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2212 } 2213 } 2214 2215 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2216 { 2217 uint32_t data = 0; 2218 2219 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2220 AMD_PG_SUPPORT_GFX_SMG | 2221 AMD_PG_SUPPORT_GFX_DMG)) { 2222 /* init IDLE_POLL_COUNT = 60 */ 2223 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2224 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2225 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2226 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2227 2228 /* init RLC PG Delay */ 2229 data = 0; 2230 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2231 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2232 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2233 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2234 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2235 2236 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2237 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2238 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2239 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2240 2241 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2242 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2243 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2244 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2245 2246 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2247 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2248 2249 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2250 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2251 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2252 2253 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2254 } 2255 } 2256 2257 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2258 bool enable) 2259 { 2260 uint32_t data = 0; 2261 uint32_t default_data = 0; 2262 2263 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2264 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2265 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2266 enable ? 1 : 0); 2267 if (default_data != data) 2268 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2269 } 2270 2271 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2272 bool enable) 2273 { 2274 uint32_t data = 0; 2275 uint32_t default_data = 0; 2276 2277 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2278 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2279 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2280 enable ? 1 : 0); 2281 if(default_data != data) 2282 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2283 } 2284 2285 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2286 bool enable) 2287 { 2288 uint32_t data = 0; 2289 uint32_t default_data = 0; 2290 2291 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2292 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2293 CP_PG_DISABLE, 2294 enable ? 0 : 1); 2295 if(default_data != data) 2296 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2297 } 2298 2299 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2300 bool enable) 2301 { 2302 uint32_t data, default_data; 2303 2304 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2305 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2306 GFX_POWER_GATING_ENABLE, 2307 enable ? 1 : 0); 2308 if(default_data != data) 2309 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2310 } 2311 2312 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 2313 bool enable) 2314 { 2315 uint32_t data, default_data; 2316 2317 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2318 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2319 GFX_PIPELINE_PG_ENABLE, 2320 enable ? 1 : 0); 2321 if(default_data != data) 2322 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2323 2324 if (!enable) 2325 /* read any GFX register to wake up GFX */ 2326 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 2327 } 2328 2329 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 2330 bool enable) 2331 { 2332 uint32_t data, default_data; 2333 2334 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2335 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2336 STATIC_PER_CU_PG_ENABLE, 2337 enable ? 1 : 0); 2338 if(default_data != data) 2339 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2340 } 2341 2342 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 2343 bool enable) 2344 { 2345 uint32_t data, default_data; 2346 2347 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2348 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2349 DYN_PER_CU_PG_ENABLE, 2350 enable ? 1 : 0); 2351 if(default_data != data) 2352 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2353 } 2354 2355 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 2356 { 2357 gfx_v9_0_init_csb(adev); 2358 2359 /* 2360 * Rlc save restore list is workable since v2_1. 2361 * And it's needed by gfxoff feature. 2362 */ 2363 if (adev->gfx.rlc.is_rlc_v2_1) { 2364 gfx_v9_1_init_rlc_save_restore_list(adev); 2365 gfx_v9_0_enable_save_restore_machine(adev); 2366 } 2367 2368 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2369 AMD_PG_SUPPORT_GFX_SMG | 2370 AMD_PG_SUPPORT_GFX_DMG | 2371 AMD_PG_SUPPORT_CP | 2372 AMD_PG_SUPPORT_GDS | 2373 AMD_PG_SUPPORT_RLC_SMU_HS)) { 2374 WREG32(mmRLC_JUMP_TABLE_RESTORE, 2375 adev->gfx.rlc.cp_table_gpu_addr >> 8); 2376 gfx_v9_0_init_gfx_power_gating(adev); 2377 } 2378 } 2379 2380 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 2381 { 2382 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 2383 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 2384 gfx_v9_0_wait_for_rlc_serdes(adev); 2385 } 2386 2387 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 2388 { 2389 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2390 udelay(50); 2391 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2392 udelay(50); 2393 } 2394 2395 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 2396 { 2397 #ifdef AMDGPU_RLC_DEBUG_RETRY 2398 u32 rlc_ucode_ver; 2399 #endif 2400 2401 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2402 udelay(50); 2403 2404 /* carrizo do enable cp interrupt after cp inited */ 2405 if (!(adev->flags & AMD_IS_APU)) { 2406 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 2407 udelay(50); 2408 } 2409 2410 #ifdef AMDGPU_RLC_DEBUG_RETRY 2411 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 2412 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 2413 if(rlc_ucode_ver == 0x108) { 2414 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 2415 rlc_ucode_ver, adev->gfx.rlc_fw_version); 2416 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 2417 * default is 0x9C4 to create a 100us interval */ 2418 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 2419 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 2420 * to disable the page fault retry interrupts, default is 2421 * 0x100 (256) */ 2422 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 2423 } 2424 #endif 2425 } 2426 2427 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 2428 { 2429 const struct rlc_firmware_header_v2_0 *hdr; 2430 const __le32 *fw_data; 2431 unsigned i, fw_size; 2432 2433 if (!adev->gfx.rlc_fw) 2434 return -EINVAL; 2435 2436 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2437 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2438 2439 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2440 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2441 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2442 2443 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 2444 RLCG_UCODE_LOADING_START_ADDRESS); 2445 for (i = 0; i < fw_size; i++) 2446 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 2447 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2448 2449 return 0; 2450 } 2451 2452 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 2453 { 2454 int r; 2455 2456 if (amdgpu_sriov_vf(adev)) { 2457 gfx_v9_0_init_csb(adev); 2458 return 0; 2459 } 2460 2461 adev->gfx.rlc.funcs->stop(adev); 2462 2463 /* disable CG */ 2464 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 2465 2466 gfx_v9_0_init_pg(adev); 2467 2468 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 2469 /* legacy rlc firmware loading */ 2470 r = gfx_v9_0_rlc_load_microcode(adev); 2471 if (r) 2472 return r; 2473 } 2474 2475 switch (adev->asic_type) { 2476 case CHIP_RAVEN: 2477 if (amdgpu_lbpw == 0) 2478 gfx_v9_0_enable_lbpw(adev, false); 2479 else 2480 gfx_v9_0_enable_lbpw(adev, true); 2481 break; 2482 case CHIP_VEGA20: 2483 if (amdgpu_lbpw > 0) 2484 gfx_v9_0_enable_lbpw(adev, true); 2485 else 2486 gfx_v9_0_enable_lbpw(adev, false); 2487 break; 2488 default: 2489 break; 2490 } 2491 2492 adev->gfx.rlc.funcs->start(adev); 2493 2494 return 0; 2495 } 2496 2497 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 2498 { 2499 int i; 2500 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 2501 2502 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 2503 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 2504 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 2505 if (!enable) { 2506 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2507 adev->gfx.gfx_ring[i].sched.ready = false; 2508 } 2509 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 2510 udelay(50); 2511 } 2512 2513 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 2514 { 2515 const struct gfx_firmware_header_v1_0 *pfp_hdr; 2516 const struct gfx_firmware_header_v1_0 *ce_hdr; 2517 const struct gfx_firmware_header_v1_0 *me_hdr; 2518 const __le32 *fw_data; 2519 unsigned i, fw_size; 2520 2521 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 2522 return -EINVAL; 2523 2524 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 2525 adev->gfx.pfp_fw->data; 2526 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 2527 adev->gfx.ce_fw->data; 2528 me_hdr = (const struct gfx_firmware_header_v1_0 *) 2529 adev->gfx.me_fw->data; 2530 2531 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 2532 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 2533 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 2534 2535 gfx_v9_0_cp_gfx_enable(adev, false); 2536 2537 /* PFP */ 2538 fw_data = (const __le32 *) 2539 (adev->gfx.pfp_fw->data + 2540 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 2541 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 2542 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 2543 for (i = 0; i < fw_size; i++) 2544 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 2545 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 2546 2547 /* CE */ 2548 fw_data = (const __le32 *) 2549 (adev->gfx.ce_fw->data + 2550 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 2551 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 2552 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 2553 for (i = 0; i < fw_size; i++) 2554 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 2555 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 2556 2557 /* ME */ 2558 fw_data = (const __le32 *) 2559 (adev->gfx.me_fw->data + 2560 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 2561 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 2562 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 2563 for (i = 0; i < fw_size; i++) 2564 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 2565 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 2566 2567 return 0; 2568 } 2569 2570 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 2571 { 2572 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 2573 const struct cs_section_def *sect = NULL; 2574 const struct cs_extent_def *ext = NULL; 2575 int r, i, tmp; 2576 2577 /* init the CP */ 2578 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 2579 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 2580 2581 gfx_v9_0_cp_gfx_enable(adev, true); 2582 2583 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 2584 if (r) { 2585 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 2586 return r; 2587 } 2588 2589 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2590 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 2591 2592 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 2593 amdgpu_ring_write(ring, 0x80000000); 2594 amdgpu_ring_write(ring, 0x80000000); 2595 2596 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 2597 for (ext = sect->section; ext->extent != NULL; ++ext) { 2598 if (sect->id == SECT_CONTEXT) { 2599 amdgpu_ring_write(ring, 2600 PACKET3(PACKET3_SET_CONTEXT_REG, 2601 ext->reg_count)); 2602 amdgpu_ring_write(ring, 2603 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 2604 for (i = 0; i < ext->reg_count; i++) 2605 amdgpu_ring_write(ring, ext->extent[i]); 2606 } 2607 } 2608 } 2609 2610 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2611 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 2612 2613 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 2614 amdgpu_ring_write(ring, 0); 2615 2616 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 2617 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 2618 amdgpu_ring_write(ring, 0x8000); 2619 amdgpu_ring_write(ring, 0x8000); 2620 2621 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 2622 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 2623 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 2624 amdgpu_ring_write(ring, tmp); 2625 amdgpu_ring_write(ring, 0); 2626 2627 amdgpu_ring_commit(ring); 2628 2629 return 0; 2630 } 2631 2632 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 2633 { 2634 struct amdgpu_ring *ring; 2635 u32 tmp; 2636 u32 rb_bufsz; 2637 u64 rb_addr, rptr_addr, wptr_gpu_addr; 2638 2639 /* Set the write pointer delay */ 2640 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 2641 2642 /* set the RB to use vmid 0 */ 2643 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 2644 2645 /* Set ring buffer size */ 2646 ring = &adev->gfx.gfx_ring[0]; 2647 rb_bufsz = order_base_2(ring->ring_size / 8); 2648 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 2649 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 2650 #ifdef __BIG_ENDIAN 2651 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 2652 #endif 2653 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 2654 2655 /* Initialize the ring buffer's write pointers */ 2656 ring->wptr = 0; 2657 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 2658 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 2659 2660 /* set the wb address wether it's enabled or not */ 2661 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 2662 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 2663 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 2664 2665 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2666 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 2667 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 2668 2669 mdelay(1); 2670 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 2671 2672 rb_addr = ring->gpu_addr >> 8; 2673 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 2674 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 2675 2676 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 2677 if (ring->use_doorbell) { 2678 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2679 DOORBELL_OFFSET, ring->doorbell_index); 2680 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2681 DOORBELL_EN, 1); 2682 } else { 2683 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 2684 } 2685 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 2686 2687 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 2688 DOORBELL_RANGE_LOWER, ring->doorbell_index); 2689 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 2690 2691 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 2692 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 2693 2694 2695 /* start the ring */ 2696 gfx_v9_0_cp_gfx_start(adev); 2697 ring->sched.ready = true; 2698 2699 return 0; 2700 } 2701 2702 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 2703 { 2704 int i; 2705 2706 if (enable) { 2707 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 2708 } else { 2709 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 2710 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 2711 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2712 adev->gfx.compute_ring[i].sched.ready = false; 2713 adev->gfx.kiq.ring.sched.ready = false; 2714 } 2715 udelay(50); 2716 } 2717 2718 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 2719 { 2720 const struct gfx_firmware_header_v1_0 *mec_hdr; 2721 const __le32 *fw_data; 2722 unsigned i; 2723 u32 tmp; 2724 2725 if (!adev->gfx.mec_fw) 2726 return -EINVAL; 2727 2728 gfx_v9_0_cp_compute_enable(adev, false); 2729 2730 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 2731 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 2732 2733 fw_data = (const __le32 *) 2734 (adev->gfx.mec_fw->data + 2735 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 2736 tmp = 0; 2737 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2738 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2739 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 2740 2741 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 2742 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 2743 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 2744 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 2745 2746 /* MEC1 */ 2747 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 2748 mec_hdr->jt_offset); 2749 for (i = 0; i < mec_hdr->jt_size; i++) 2750 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 2751 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 2752 2753 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 2754 adev->gfx.mec_fw_version); 2755 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 2756 2757 return 0; 2758 } 2759 2760 /* KIQ functions */ 2761 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 2762 { 2763 uint32_t tmp; 2764 struct amdgpu_device *adev = ring->adev; 2765 2766 /* tell RLC which is KIQ queue */ 2767 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 2768 tmp &= 0xffffff00; 2769 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 2770 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2771 tmp |= 0x80; 2772 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2773 } 2774 2775 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) 2776 { 2777 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 2778 uint64_t queue_mask = 0; 2779 int r, i; 2780 2781 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 2782 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 2783 continue; 2784 2785 /* This situation may be hit in the future if a new HW 2786 * generation exposes more than 64 queues. If so, the 2787 * definition of queue_mask needs updating */ 2788 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 2789 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 2790 break; 2791 } 2792 2793 queue_mask |= (1ull << i); 2794 } 2795 2796 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); 2797 if (r) { 2798 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 2799 return r; 2800 } 2801 2802 /* set resources */ 2803 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 2804 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 2805 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 2806 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 2807 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 2808 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 2809 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 2810 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 2811 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 2812 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2813 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 2814 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 2815 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2816 2817 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 2818 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 2819 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 2820 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 2821 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 2822 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 2823 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 2824 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 2825 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 2826 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 2827 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ 2828 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 2829 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 2830 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 2831 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 2832 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 2833 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 2834 } 2835 2836 r = amdgpu_ring_test_helper(kiq_ring); 2837 if (r) 2838 DRM_ERROR("KCQ enable failed\n"); 2839 2840 return r; 2841 } 2842 2843 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 2844 { 2845 struct amdgpu_device *adev = ring->adev; 2846 struct v9_mqd *mqd = ring->mqd_ptr; 2847 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 2848 uint32_t tmp; 2849 2850 mqd->header = 0xC0310800; 2851 mqd->compute_pipelinestat_enable = 0x00000001; 2852 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 2853 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 2854 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 2855 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 2856 mqd->compute_misc_reserved = 0x00000003; 2857 2858 mqd->dynamic_cu_mask_addr_lo = 2859 lower_32_bits(ring->mqd_gpu_addr 2860 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 2861 mqd->dynamic_cu_mask_addr_hi = 2862 upper_32_bits(ring->mqd_gpu_addr 2863 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 2864 2865 eop_base_addr = ring->eop_gpu_addr >> 8; 2866 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 2867 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 2868 2869 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2870 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 2871 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 2872 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 2873 2874 mqd->cp_hqd_eop_control = tmp; 2875 2876 /* enable doorbell? */ 2877 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 2878 2879 if (ring->use_doorbell) { 2880 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2881 DOORBELL_OFFSET, ring->doorbell_index); 2882 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2883 DOORBELL_EN, 1); 2884 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2885 DOORBELL_SOURCE, 0); 2886 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2887 DOORBELL_HIT, 0); 2888 } else { 2889 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2890 DOORBELL_EN, 0); 2891 } 2892 2893 mqd->cp_hqd_pq_doorbell_control = tmp; 2894 2895 /* disable the queue if it's active */ 2896 ring->wptr = 0; 2897 mqd->cp_hqd_dequeue_request = 0; 2898 mqd->cp_hqd_pq_rptr = 0; 2899 mqd->cp_hqd_pq_wptr_lo = 0; 2900 mqd->cp_hqd_pq_wptr_hi = 0; 2901 2902 /* set the pointer to the MQD */ 2903 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 2904 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 2905 2906 /* set MQD vmid to 0 */ 2907 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 2908 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 2909 mqd->cp_mqd_control = tmp; 2910 2911 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 2912 hqd_gpu_addr = ring->gpu_addr >> 8; 2913 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 2914 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 2915 2916 /* set up the HQD, this is similar to CP_RB0_CNTL */ 2917 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 2918 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 2919 (order_base_2(ring->ring_size / 4) - 1)); 2920 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 2921 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 2922 #ifdef __BIG_ENDIAN 2923 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 2924 #endif 2925 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 2926 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 2927 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 2928 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 2929 mqd->cp_hqd_pq_control = tmp; 2930 2931 /* set the wb address whether it's enabled or not */ 2932 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 2933 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 2934 mqd->cp_hqd_pq_rptr_report_addr_hi = 2935 upper_32_bits(wb_gpu_addr) & 0xffff; 2936 2937 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 2938 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2939 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 2940 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 2941 2942 tmp = 0; 2943 /* enable the doorbell if requested */ 2944 if (ring->use_doorbell) { 2945 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 2946 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2947 DOORBELL_OFFSET, ring->doorbell_index); 2948 2949 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2950 DOORBELL_EN, 1); 2951 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2952 DOORBELL_SOURCE, 0); 2953 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2954 DOORBELL_HIT, 0); 2955 } 2956 2957 mqd->cp_hqd_pq_doorbell_control = tmp; 2958 2959 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 2960 ring->wptr = 0; 2961 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 2962 2963 /* set the vmid for the queue */ 2964 mqd->cp_hqd_vmid = 0; 2965 2966 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 2967 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 2968 mqd->cp_hqd_persistent_state = tmp; 2969 2970 /* set MIN_IB_AVAIL_SIZE */ 2971 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 2972 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 2973 mqd->cp_hqd_ib_control = tmp; 2974 2975 /* activate the queue */ 2976 mqd->cp_hqd_active = 1; 2977 2978 return 0; 2979 } 2980 2981 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 2982 { 2983 struct amdgpu_device *adev = ring->adev; 2984 struct v9_mqd *mqd = ring->mqd_ptr; 2985 int j; 2986 2987 /* disable wptr polling */ 2988 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 2989 2990 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 2991 mqd->cp_hqd_eop_base_addr_lo); 2992 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 2993 mqd->cp_hqd_eop_base_addr_hi); 2994 2995 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2996 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 2997 mqd->cp_hqd_eop_control); 2998 2999 /* enable doorbell? */ 3000 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3001 mqd->cp_hqd_pq_doorbell_control); 3002 3003 /* disable the queue if it's active */ 3004 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3005 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3006 for (j = 0; j < adev->usec_timeout; j++) { 3007 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3008 break; 3009 udelay(1); 3010 } 3011 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3012 mqd->cp_hqd_dequeue_request); 3013 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3014 mqd->cp_hqd_pq_rptr); 3015 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3016 mqd->cp_hqd_pq_wptr_lo); 3017 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3018 mqd->cp_hqd_pq_wptr_hi); 3019 } 3020 3021 /* set the pointer to the MQD */ 3022 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3023 mqd->cp_mqd_base_addr_lo); 3024 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3025 mqd->cp_mqd_base_addr_hi); 3026 3027 /* set MQD vmid to 0 */ 3028 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3029 mqd->cp_mqd_control); 3030 3031 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3032 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3033 mqd->cp_hqd_pq_base_lo); 3034 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3035 mqd->cp_hqd_pq_base_hi); 3036 3037 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3038 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3039 mqd->cp_hqd_pq_control); 3040 3041 /* set the wb address whether it's enabled or not */ 3042 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3043 mqd->cp_hqd_pq_rptr_report_addr_lo); 3044 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3045 mqd->cp_hqd_pq_rptr_report_addr_hi); 3046 3047 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3048 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3049 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3050 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3051 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3052 3053 /* enable the doorbell if requested */ 3054 if (ring->use_doorbell) { 3055 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3056 (adev->doorbell_index.kiq * 2) << 2); 3057 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3058 (adev->doorbell_index.userqueue_end * 2) << 2); 3059 } 3060 3061 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3062 mqd->cp_hqd_pq_doorbell_control); 3063 3064 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3065 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3066 mqd->cp_hqd_pq_wptr_lo); 3067 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3068 mqd->cp_hqd_pq_wptr_hi); 3069 3070 /* set the vmid for the queue */ 3071 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3072 3073 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3074 mqd->cp_hqd_persistent_state); 3075 3076 /* activate the queue */ 3077 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3078 mqd->cp_hqd_active); 3079 3080 if (ring->use_doorbell) 3081 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3082 3083 return 0; 3084 } 3085 3086 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3087 { 3088 struct amdgpu_device *adev = ring->adev; 3089 int j; 3090 3091 /* disable the queue if it's active */ 3092 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3093 3094 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3095 3096 for (j = 0; j < adev->usec_timeout; j++) { 3097 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3098 break; 3099 udelay(1); 3100 } 3101 3102 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3103 DRM_DEBUG("KIQ dequeue request failed.\n"); 3104 3105 /* Manual disable if dequeue request times out */ 3106 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3107 } 3108 3109 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3110 0); 3111 } 3112 3113 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3114 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3115 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3116 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3117 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3118 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3119 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3120 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3121 3122 return 0; 3123 } 3124 3125 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3126 { 3127 struct amdgpu_device *adev = ring->adev; 3128 struct v9_mqd *mqd = ring->mqd_ptr; 3129 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3130 3131 gfx_v9_0_kiq_setting(ring); 3132 3133 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3134 /* reset MQD to a clean status */ 3135 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3136 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3137 3138 /* reset ring buffer */ 3139 ring->wptr = 0; 3140 amdgpu_ring_clear_ring(ring); 3141 3142 mutex_lock(&adev->srbm_mutex); 3143 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3144 gfx_v9_0_kiq_init_register(ring); 3145 soc15_grbm_select(adev, 0, 0, 0, 0); 3146 mutex_unlock(&adev->srbm_mutex); 3147 } else { 3148 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3149 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3150 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3151 mutex_lock(&adev->srbm_mutex); 3152 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3153 gfx_v9_0_mqd_init(ring); 3154 gfx_v9_0_kiq_init_register(ring); 3155 soc15_grbm_select(adev, 0, 0, 0, 0); 3156 mutex_unlock(&adev->srbm_mutex); 3157 3158 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3159 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3160 } 3161 3162 return 0; 3163 } 3164 3165 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3166 { 3167 struct amdgpu_device *adev = ring->adev; 3168 struct v9_mqd *mqd = ring->mqd_ptr; 3169 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3170 3171 if (!adev->in_gpu_reset && !adev->in_suspend) { 3172 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3173 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3174 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3175 mutex_lock(&adev->srbm_mutex); 3176 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3177 gfx_v9_0_mqd_init(ring); 3178 soc15_grbm_select(adev, 0, 0, 0, 0); 3179 mutex_unlock(&adev->srbm_mutex); 3180 3181 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3182 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3183 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3184 /* reset MQD to a clean status */ 3185 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3186 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3187 3188 /* reset ring buffer */ 3189 ring->wptr = 0; 3190 amdgpu_ring_clear_ring(ring); 3191 } else { 3192 amdgpu_ring_clear_ring(ring); 3193 } 3194 3195 return 0; 3196 } 3197 3198 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3199 { 3200 struct amdgpu_ring *ring; 3201 int r; 3202 3203 ring = &adev->gfx.kiq.ring; 3204 3205 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3206 if (unlikely(r != 0)) 3207 return r; 3208 3209 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3210 if (unlikely(r != 0)) 3211 return r; 3212 3213 gfx_v9_0_kiq_init_queue(ring); 3214 amdgpu_bo_kunmap(ring->mqd_obj); 3215 ring->mqd_ptr = NULL; 3216 amdgpu_bo_unreserve(ring->mqd_obj); 3217 ring->sched.ready = true; 3218 return 0; 3219 } 3220 3221 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3222 { 3223 struct amdgpu_ring *ring = NULL; 3224 int r = 0, i; 3225 3226 gfx_v9_0_cp_compute_enable(adev, true); 3227 3228 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3229 ring = &adev->gfx.compute_ring[i]; 3230 3231 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3232 if (unlikely(r != 0)) 3233 goto done; 3234 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3235 if (!r) { 3236 r = gfx_v9_0_kcq_init_queue(ring); 3237 amdgpu_bo_kunmap(ring->mqd_obj); 3238 ring->mqd_ptr = NULL; 3239 } 3240 amdgpu_bo_unreserve(ring->mqd_obj); 3241 if (r) 3242 goto done; 3243 } 3244 3245 r = gfx_v9_0_kiq_kcq_enable(adev); 3246 done: 3247 return r; 3248 } 3249 3250 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3251 { 3252 int r, i; 3253 struct amdgpu_ring *ring; 3254 3255 if (!(adev->flags & AMD_IS_APU)) 3256 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3257 3258 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3259 /* legacy firmware loading */ 3260 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3261 if (r) 3262 return r; 3263 3264 r = gfx_v9_0_cp_compute_load_microcode(adev); 3265 if (r) 3266 return r; 3267 } 3268 3269 r = gfx_v9_0_kiq_resume(adev); 3270 if (r) 3271 return r; 3272 3273 r = gfx_v9_0_cp_gfx_resume(adev); 3274 if (r) 3275 return r; 3276 3277 r = gfx_v9_0_kcq_resume(adev); 3278 if (r) 3279 return r; 3280 3281 ring = &adev->gfx.gfx_ring[0]; 3282 r = amdgpu_ring_test_helper(ring); 3283 if (r) 3284 return r; 3285 3286 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3287 ring = &adev->gfx.compute_ring[i]; 3288 amdgpu_ring_test_helper(ring); 3289 } 3290 3291 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3292 3293 return 0; 3294 } 3295 3296 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3297 { 3298 gfx_v9_0_cp_gfx_enable(adev, enable); 3299 gfx_v9_0_cp_compute_enable(adev, enable); 3300 } 3301 3302 static int gfx_v9_0_hw_init(void *handle) 3303 { 3304 int r; 3305 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3306 3307 gfx_v9_0_init_golden_registers(adev); 3308 3309 gfx_v9_0_constants_init(adev); 3310 3311 r = gfx_v9_0_csb_vram_pin(adev); 3312 if (r) 3313 return r; 3314 3315 r = adev->gfx.rlc.funcs->resume(adev); 3316 if (r) 3317 return r; 3318 3319 r = gfx_v9_0_cp_resume(adev); 3320 if (r) 3321 return r; 3322 3323 r = gfx_v9_0_ngg_en(adev); 3324 if (r) 3325 return r; 3326 3327 return r; 3328 } 3329 3330 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) 3331 { 3332 int r, i; 3333 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3334 3335 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); 3336 if (r) 3337 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3338 3339 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3340 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3341 3342 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 3343 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 3344 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 3345 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 3346 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 3347 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 3348 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 3349 amdgpu_ring_write(kiq_ring, 0); 3350 amdgpu_ring_write(kiq_ring, 0); 3351 amdgpu_ring_write(kiq_ring, 0); 3352 } 3353 r = amdgpu_ring_test_helper(kiq_ring); 3354 if (r) 3355 DRM_ERROR("KCQ disable failed\n"); 3356 3357 return r; 3358 } 3359 3360 static int gfx_v9_0_hw_fini(void *handle) 3361 { 3362 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3363 3364 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 3365 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3366 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3367 3368 /* disable KCQ to avoid CPC touch memory not valid anymore */ 3369 gfx_v9_0_kcq_disable(adev); 3370 3371 if (amdgpu_sriov_vf(adev)) { 3372 gfx_v9_0_cp_gfx_enable(adev, false); 3373 /* must disable polling for SRIOV when hw finished, otherwise 3374 * CPC engine may still keep fetching WB address which is already 3375 * invalid after sw finished and trigger DMAR reading error in 3376 * hypervisor side. 3377 */ 3378 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3379 return 0; 3380 } 3381 3382 /* Use deinitialize sequence from CAIL when unbinding device from driver, 3383 * otherwise KIQ is hanging when binding back 3384 */ 3385 if (!adev->in_gpu_reset && !adev->in_suspend) { 3386 mutex_lock(&adev->srbm_mutex); 3387 soc15_grbm_select(adev, adev->gfx.kiq.ring.me, 3388 adev->gfx.kiq.ring.pipe, 3389 adev->gfx.kiq.ring.queue, 0); 3390 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); 3391 soc15_grbm_select(adev, 0, 0, 0, 0); 3392 mutex_unlock(&adev->srbm_mutex); 3393 } 3394 3395 gfx_v9_0_cp_enable(adev, false); 3396 adev->gfx.rlc.funcs->stop(adev); 3397 3398 gfx_v9_0_csb_vram_unpin(adev); 3399 3400 return 0; 3401 } 3402 3403 static int gfx_v9_0_suspend(void *handle) 3404 { 3405 return gfx_v9_0_hw_fini(handle); 3406 } 3407 3408 static int gfx_v9_0_resume(void *handle) 3409 { 3410 return gfx_v9_0_hw_init(handle); 3411 } 3412 3413 static bool gfx_v9_0_is_idle(void *handle) 3414 { 3415 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3416 3417 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 3418 GRBM_STATUS, GUI_ACTIVE)) 3419 return false; 3420 else 3421 return true; 3422 } 3423 3424 static int gfx_v9_0_wait_for_idle(void *handle) 3425 { 3426 unsigned i; 3427 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3428 3429 for (i = 0; i < adev->usec_timeout; i++) { 3430 if (gfx_v9_0_is_idle(handle)) 3431 return 0; 3432 udelay(1); 3433 } 3434 return -ETIMEDOUT; 3435 } 3436 3437 static int gfx_v9_0_soft_reset(void *handle) 3438 { 3439 u32 grbm_soft_reset = 0; 3440 u32 tmp; 3441 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3442 3443 /* GRBM_STATUS */ 3444 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 3445 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 3446 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 3447 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 3448 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 3449 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 3450 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 3451 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3452 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3453 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3454 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 3455 } 3456 3457 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 3458 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3459 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3460 } 3461 3462 /* GRBM_STATUS2 */ 3463 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 3464 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 3465 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3466 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3467 3468 3469 if (grbm_soft_reset) { 3470 /* stop the rlc */ 3471 adev->gfx.rlc.funcs->stop(adev); 3472 3473 /* Disable GFX parsing/prefetching */ 3474 gfx_v9_0_cp_gfx_enable(adev, false); 3475 3476 /* Disable MEC parsing/prefetching */ 3477 gfx_v9_0_cp_compute_enable(adev, false); 3478 3479 if (grbm_soft_reset) { 3480 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3481 tmp |= grbm_soft_reset; 3482 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 3483 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3484 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3485 3486 udelay(50); 3487 3488 tmp &= ~grbm_soft_reset; 3489 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3490 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3491 } 3492 3493 /* Wait a little for things to settle down */ 3494 udelay(50); 3495 } 3496 return 0; 3497 } 3498 3499 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 3500 { 3501 uint64_t clock; 3502 3503 mutex_lock(&adev->gfx.gpu_clock_mutex); 3504 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 3505 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 3506 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 3507 mutex_unlock(&adev->gfx.gpu_clock_mutex); 3508 return clock; 3509 } 3510 3511 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 3512 uint32_t vmid, 3513 uint32_t gds_base, uint32_t gds_size, 3514 uint32_t gws_base, uint32_t gws_size, 3515 uint32_t oa_base, uint32_t oa_size) 3516 { 3517 struct amdgpu_device *adev = ring->adev; 3518 3519 /* GDS Base */ 3520 gfx_v9_0_write_data_to_reg(ring, 0, false, 3521 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 3522 gds_base); 3523 3524 /* GDS Size */ 3525 gfx_v9_0_write_data_to_reg(ring, 0, false, 3526 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 3527 gds_size); 3528 3529 /* GWS */ 3530 gfx_v9_0_write_data_to_reg(ring, 0, false, 3531 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 3532 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 3533 3534 /* OA */ 3535 gfx_v9_0_write_data_to_reg(ring, 0, false, 3536 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 3537 (1 << (oa_size + oa_base)) - (1 << oa_base)); 3538 } 3539 3540 static const u32 vgpr_init_compute_shader[] = 3541 { 3542 0xb07c0000, 0xbe8000ff, 3543 0x000000f8, 0xbf110800, 3544 0x7e000280, 0x7e020280, 3545 0x7e040280, 0x7e060280, 3546 0x7e080280, 0x7e0a0280, 3547 0x7e0c0280, 0x7e0e0280, 3548 0x80808800, 0xbe803200, 3549 0xbf84fff5, 0xbf9c0000, 3550 0xd28c0001, 0x0001007f, 3551 0xd28d0001, 0x0002027e, 3552 0x10020288, 0xb8810904, 3553 0xb7814000, 0xd1196a01, 3554 0x00000301, 0xbe800087, 3555 0xbefc00c1, 0xd89c4000, 3556 0x00020201, 0xd89cc080, 3557 0x00040401, 0x320202ff, 3558 0x00000800, 0x80808100, 3559 0xbf84fff8, 0x7e020280, 3560 0xbf810000, 0x00000000, 3561 }; 3562 3563 static const u32 sgpr_init_compute_shader[] = 3564 { 3565 0xb07c0000, 0xbe8000ff, 3566 0x0000005f, 0xbee50080, 3567 0xbe812c65, 0xbe822c65, 3568 0xbe832c65, 0xbe842c65, 3569 0xbe852c65, 0xb77c0005, 3570 0x80808500, 0xbf84fff8, 3571 0xbe800080, 0xbf810000, 3572 }; 3573 3574 static const struct soc15_reg_entry vgpr_init_regs[] = { 3575 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 3576 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 3577 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 3578 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 3579 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 3580 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 3581 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 3582 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 3583 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ 3584 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 3585 }; 3586 3587 static const struct soc15_reg_entry sgpr_init_regs[] = { 3588 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 3589 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 3590 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 3591 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 3592 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 3593 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 3594 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 3595 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 3596 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ 3597 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 3598 }; 3599 3600 static const struct soc15_reg_entry sec_ded_counter_registers[] = { 3601 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 3602 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 3603 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 3604 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 3605 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 3606 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 3607 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 3608 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 3609 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 3610 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 3611 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 3612 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 3613 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 3614 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 3615 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 3616 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 3617 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 3618 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 3619 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 3620 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 3621 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 3622 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 3623 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 3624 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 3625 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 3626 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 3627 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 3628 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 3629 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 3630 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 3631 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 3632 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 3633 }; 3634 3635 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 3636 { 3637 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 3638 int i, r; 3639 3640 r = amdgpu_ring_alloc(ring, 7); 3641 if (r) { 3642 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 3643 ring->name, r); 3644 return r; 3645 } 3646 3647 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 3648 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 3649 3650 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 3651 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 3652 PACKET3_DMA_DATA_DST_SEL(1) | 3653 PACKET3_DMA_DATA_SRC_SEL(2) | 3654 PACKET3_DMA_DATA_ENGINE(0))); 3655 amdgpu_ring_write(ring, 0); 3656 amdgpu_ring_write(ring, 0); 3657 amdgpu_ring_write(ring, 0); 3658 amdgpu_ring_write(ring, 0); 3659 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 3660 adev->gds.gds_size); 3661 3662 amdgpu_ring_commit(ring); 3663 3664 for (i = 0; i < adev->usec_timeout; i++) { 3665 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 3666 break; 3667 udelay(1); 3668 } 3669 3670 if (i >= adev->usec_timeout) 3671 r = -ETIMEDOUT; 3672 3673 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 3674 3675 return r; 3676 } 3677 3678 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 3679 { 3680 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 3681 struct amdgpu_ib ib; 3682 struct dma_fence *f = NULL; 3683 int r, i, j, k; 3684 unsigned total_size, vgpr_offset, sgpr_offset; 3685 u64 gpu_addr; 3686 3687 /* only support when RAS is enabled */ 3688 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 3689 return 0; 3690 3691 /* bail if the compute ring is not ready */ 3692 if (!ring->sched.ready) 3693 return 0; 3694 3695 total_size = 3696 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 3697 total_size += 3698 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 3699 total_size = ALIGN(total_size, 256); 3700 vgpr_offset = total_size; 3701 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 3702 sgpr_offset = total_size; 3703 total_size += sizeof(sgpr_init_compute_shader); 3704 3705 /* allocate an indirect buffer to put the commands in */ 3706 memset(&ib, 0, sizeof(ib)); 3707 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 3708 if (r) { 3709 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 3710 return r; 3711 } 3712 3713 /* load the compute shaders */ 3714 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 3715 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 3716 3717 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 3718 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 3719 3720 /* init the ib length to 0 */ 3721 ib.length_dw = 0; 3722 3723 /* VGPR */ 3724 /* write the register state for the compute dispatch */ 3725 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { 3726 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 3727 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) 3728 - PACKET3_SET_SH_REG_START; 3729 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value; 3730 } 3731 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 3732 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 3733 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 3734 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 3735 - PACKET3_SET_SH_REG_START; 3736 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 3737 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 3738 3739 /* write dispatch packet */ 3740 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 3741 ib.ptr[ib.length_dw++] = 128; /* x */ 3742 ib.ptr[ib.length_dw++] = 1; /* y */ 3743 ib.ptr[ib.length_dw++] = 1; /* z */ 3744 ib.ptr[ib.length_dw++] = 3745 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 3746 3747 /* write CS partial flush packet */ 3748 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 3749 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 3750 3751 /* SGPR */ 3752 /* write the register state for the compute dispatch */ 3753 for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { 3754 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 3755 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) 3756 - PACKET3_SET_SH_REG_START; 3757 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; 3758 } 3759 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 3760 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 3761 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 3762 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 3763 - PACKET3_SET_SH_REG_START; 3764 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 3765 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 3766 3767 /* write dispatch packet */ 3768 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 3769 ib.ptr[ib.length_dw++] = 128; /* x */ 3770 ib.ptr[ib.length_dw++] = 1; /* y */ 3771 ib.ptr[ib.length_dw++] = 1; /* z */ 3772 ib.ptr[ib.length_dw++] = 3773 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 3774 3775 /* write CS partial flush packet */ 3776 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 3777 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 3778 3779 /* shedule the ib on the ring */ 3780 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 3781 if (r) { 3782 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 3783 goto fail; 3784 } 3785 3786 /* wait for the GPU to finish processing the IB */ 3787 r = dma_fence_wait(f, false); 3788 if (r) { 3789 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 3790 goto fail; 3791 } 3792 3793 /* read back registers to clear the counters */ 3794 mutex_lock(&adev->grbm_idx_mutex); 3795 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { 3796 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { 3797 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { 3798 gfx_v9_0_select_se_sh(adev, j, 0x0, k); 3799 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); 3800 } 3801 } 3802 } 3803 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 3804 mutex_unlock(&adev->grbm_idx_mutex); 3805 3806 fail: 3807 amdgpu_ib_free(adev, &ib, NULL); 3808 dma_fence_put(f); 3809 3810 return r; 3811 } 3812 3813 static int gfx_v9_0_early_init(void *handle) 3814 { 3815 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3816 3817 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 3818 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 3819 gfx_v9_0_set_ring_funcs(adev); 3820 gfx_v9_0_set_irq_funcs(adev); 3821 gfx_v9_0_set_gds_init(adev); 3822 gfx_v9_0_set_rlc_funcs(adev); 3823 3824 return 0; 3825 } 3826 3827 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 3828 struct amdgpu_iv_entry *entry); 3829 3830 static int gfx_v9_0_ecc_late_init(void *handle) 3831 { 3832 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3833 struct ras_common_if **ras_if = &adev->gfx.ras_if; 3834 struct ras_ih_if ih_info = { 3835 .cb = gfx_v9_0_process_ras_data_cb, 3836 }; 3837 struct ras_fs_if fs_info = { 3838 .sysfs_name = "gfx_err_count", 3839 .debugfs_name = "gfx_err_inject", 3840 }; 3841 struct ras_common_if ras_block = { 3842 .block = AMDGPU_RAS_BLOCK__GFX, 3843 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, 3844 .sub_block_index = 0, 3845 .name = "gfx", 3846 }; 3847 int r; 3848 3849 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { 3850 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); 3851 return 0; 3852 } 3853 3854 r = gfx_v9_0_do_edc_gds_workarounds(adev); 3855 if (r) 3856 return r; 3857 3858 /* requires IBs so do in late init after IB pool is initialized */ 3859 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 3860 if (r) 3861 return r; 3862 3863 /* handle resume path. */ 3864 if (*ras_if) { 3865 /* resend ras TA enable cmd during resume. 3866 * prepare to handle failure. 3867 */ 3868 ih_info.head = **ras_if; 3869 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 3870 if (r) { 3871 if (r == -EAGAIN) { 3872 /* request a gpu reset. will run again. */ 3873 amdgpu_ras_request_reset_on_boot(adev, 3874 AMDGPU_RAS_BLOCK__GFX); 3875 return 0; 3876 } 3877 /* fail to enable ras, cleanup all. */ 3878 goto irq; 3879 } 3880 /* enable successfully. continue. */ 3881 goto resume; 3882 } 3883 3884 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); 3885 if (!*ras_if) 3886 return -ENOMEM; 3887 3888 **ras_if = ras_block; 3889 3890 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 3891 if (r) { 3892 if (r == -EAGAIN) { 3893 amdgpu_ras_request_reset_on_boot(adev, 3894 AMDGPU_RAS_BLOCK__GFX); 3895 r = 0; 3896 } 3897 goto feature; 3898 } 3899 3900 ih_info.head = **ras_if; 3901 fs_info.head = **ras_if; 3902 3903 r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); 3904 if (r) 3905 goto interrupt; 3906 3907 amdgpu_ras_debugfs_create(adev, &fs_info); 3908 3909 r = amdgpu_ras_sysfs_create(adev, &fs_info); 3910 if (r) 3911 goto sysfs; 3912 resume: 3913 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 3914 if (r) 3915 goto irq; 3916 3917 return 0; 3918 irq: 3919 amdgpu_ras_sysfs_remove(adev, *ras_if); 3920 sysfs: 3921 amdgpu_ras_debugfs_remove(adev, *ras_if); 3922 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 3923 interrupt: 3924 amdgpu_ras_feature_enable(adev, *ras_if, 0); 3925 feature: 3926 kfree(*ras_if); 3927 *ras_if = NULL; 3928 return r; 3929 } 3930 3931 static int gfx_v9_0_late_init(void *handle) 3932 { 3933 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3934 int r; 3935 3936 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 3937 if (r) 3938 return r; 3939 3940 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 3941 if (r) 3942 return r; 3943 3944 r = gfx_v9_0_ecc_late_init(handle); 3945 if (r) 3946 return r; 3947 3948 return 0; 3949 } 3950 3951 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 3952 { 3953 uint32_t rlc_setting; 3954 3955 /* if RLC is not enabled, do nothing */ 3956 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 3957 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 3958 return false; 3959 3960 return true; 3961 } 3962 3963 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) 3964 { 3965 uint32_t data; 3966 unsigned i; 3967 3968 data = RLC_SAFE_MODE__CMD_MASK; 3969 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 3970 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 3971 3972 /* wait for RLC_SAFE_MODE */ 3973 for (i = 0; i < adev->usec_timeout; i++) { 3974 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 3975 break; 3976 udelay(1); 3977 } 3978 } 3979 3980 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) 3981 { 3982 uint32_t data; 3983 3984 data = RLC_SAFE_MODE__CMD_MASK; 3985 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 3986 } 3987 3988 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 3989 bool enable) 3990 { 3991 amdgpu_gfx_rlc_enter_safe_mode(adev); 3992 3993 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 3994 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 3995 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 3996 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 3997 } else { 3998 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 3999 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 4000 } 4001 4002 amdgpu_gfx_rlc_exit_safe_mode(adev); 4003 } 4004 4005 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4006 bool enable) 4007 { 4008 /* TODO: double check if we need to perform under safe mode */ 4009 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4010 4011 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4012 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4013 else 4014 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4015 4016 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4017 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4018 else 4019 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4020 4021 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4022 } 4023 4024 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4025 bool enable) 4026 { 4027 uint32_t data, def; 4028 4029 amdgpu_gfx_rlc_enter_safe_mode(adev); 4030 4031 /* It is disabled by HW by default */ 4032 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4033 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4034 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4035 4036 if (adev->asic_type != CHIP_VEGA12) 4037 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4038 4039 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4040 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4041 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4042 4043 /* only for Vega10 & Raven1 */ 4044 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4045 4046 if (def != data) 4047 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4048 4049 /* MGLS is a global flag to control all MGLS in GFX */ 4050 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4051 /* 2 - RLC memory Light sleep */ 4052 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4053 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4054 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4055 if (def != data) 4056 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4057 } 4058 /* 3 - CP memory Light sleep */ 4059 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4060 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4061 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4062 if (def != data) 4063 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4064 } 4065 } 4066 } else { 4067 /* 1 - MGCG_OVERRIDE */ 4068 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4069 4070 if (adev->asic_type != CHIP_VEGA12) 4071 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4072 4073 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4074 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4075 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4076 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4077 4078 if (def != data) 4079 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4080 4081 /* 2 - disable MGLS in RLC */ 4082 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4083 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4084 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4085 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4086 } 4087 4088 /* 3 - disable MGLS in CP */ 4089 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4090 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4091 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4092 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4093 } 4094 } 4095 4096 amdgpu_gfx_rlc_exit_safe_mode(adev); 4097 } 4098 4099 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4100 bool enable) 4101 { 4102 uint32_t data, def; 4103 4104 amdgpu_gfx_rlc_enter_safe_mode(adev); 4105 4106 /* Enable 3D CGCG/CGLS */ 4107 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 4108 /* write cmd to clear cgcg/cgls ov */ 4109 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4110 /* unset CGCG override */ 4111 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4112 /* update CGCG and CGLS override bits */ 4113 if (def != data) 4114 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4115 4116 /* enable 3Dcgcg FSM(0x0000363f) */ 4117 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4118 4119 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4120 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4121 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4122 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4123 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4124 if (def != data) 4125 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4126 4127 /* set IDLE_POLL_COUNT(0x00900100) */ 4128 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4129 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4130 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4131 if (def != data) 4132 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4133 } else { 4134 /* Disable CGCG/CGLS */ 4135 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4136 /* disable cgcg, cgls should be disabled */ 4137 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 4138 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 4139 /* disable cgcg and cgls in FSM */ 4140 if (def != data) 4141 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4142 } 4143 4144 amdgpu_gfx_rlc_exit_safe_mode(adev); 4145 } 4146 4147 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4148 bool enable) 4149 { 4150 uint32_t def, data; 4151 4152 amdgpu_gfx_rlc_enter_safe_mode(adev); 4153 4154 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 4155 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4156 /* unset CGCG override */ 4157 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 4158 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4159 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4160 else 4161 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4162 /* update CGCG and CGLS override bits */ 4163 if (def != data) 4164 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4165 4166 /* enable cgcg FSM(0x0000363F) */ 4167 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4168 4169 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4170 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4171 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4172 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4173 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4174 if (def != data) 4175 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4176 4177 /* set IDLE_POLL_COUNT(0x00900100) */ 4178 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4179 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4180 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4181 if (def != data) 4182 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4183 } else { 4184 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4185 /* reset CGCG/CGLS bits */ 4186 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4187 /* disable cgcg and cgls in FSM */ 4188 if (def != data) 4189 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4190 } 4191 4192 amdgpu_gfx_rlc_exit_safe_mode(adev); 4193 } 4194 4195 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 4196 bool enable) 4197 { 4198 if (enable) { 4199 /* CGCG/CGLS should be enabled after MGCG/MGLS 4200 * === MGCG + MGLS === 4201 */ 4202 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4203 /* === CGCG /CGLS for GFX 3D Only === */ 4204 gfx_v9_0_update_3d_clock_gating(adev, enable); 4205 /* === CGCG + CGLS === */ 4206 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4207 } else { 4208 /* CGCG/CGLS should be disabled before MGCG/MGLS 4209 * === CGCG + CGLS === 4210 */ 4211 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4212 /* === CGCG /CGLS for GFX 3D Only === */ 4213 gfx_v9_0_update_3d_clock_gating(adev, enable); 4214 /* === MGCG + MGLS === */ 4215 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4216 } 4217 return 0; 4218 } 4219 4220 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 4221 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 4222 .set_safe_mode = gfx_v9_0_set_safe_mode, 4223 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 4224 .init = gfx_v9_0_rlc_init, 4225 .get_csb_size = gfx_v9_0_get_csb_size, 4226 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 4227 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 4228 .resume = gfx_v9_0_rlc_resume, 4229 .stop = gfx_v9_0_rlc_stop, 4230 .reset = gfx_v9_0_rlc_reset, 4231 .start = gfx_v9_0_rlc_start 4232 }; 4233 4234 static int gfx_v9_0_set_powergating_state(void *handle, 4235 enum amd_powergating_state state) 4236 { 4237 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4238 bool enable = (state == AMD_PG_STATE_GATE) ? true : false; 4239 4240 switch (adev->asic_type) { 4241 case CHIP_RAVEN: 4242 if (!enable) { 4243 amdgpu_gfx_off_ctrl(adev, false); 4244 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4245 } 4246 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 4247 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 4248 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 4249 } else { 4250 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 4251 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 4252 } 4253 4254 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 4255 gfx_v9_0_enable_cp_power_gating(adev, true); 4256 else 4257 gfx_v9_0_enable_cp_power_gating(adev, false); 4258 4259 /* update gfx cgpg state */ 4260 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 4261 4262 /* update mgcg state */ 4263 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 4264 4265 if (enable) 4266 amdgpu_gfx_off_ctrl(adev, true); 4267 break; 4268 case CHIP_VEGA12: 4269 if (!enable) { 4270 amdgpu_gfx_off_ctrl(adev, false); 4271 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4272 } else { 4273 amdgpu_gfx_off_ctrl(adev, true); 4274 } 4275 break; 4276 default: 4277 break; 4278 } 4279 4280 return 0; 4281 } 4282 4283 static int gfx_v9_0_set_clockgating_state(void *handle, 4284 enum amd_clockgating_state state) 4285 { 4286 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4287 4288 if (amdgpu_sriov_vf(adev)) 4289 return 0; 4290 4291 switch (adev->asic_type) { 4292 case CHIP_VEGA10: 4293 case CHIP_VEGA12: 4294 case CHIP_VEGA20: 4295 case CHIP_RAVEN: 4296 gfx_v9_0_update_gfx_clock_gating(adev, 4297 state == AMD_CG_STATE_GATE ? true : false); 4298 break; 4299 default: 4300 break; 4301 } 4302 return 0; 4303 } 4304 4305 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 4306 { 4307 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4308 int data; 4309 4310 if (amdgpu_sriov_vf(adev)) 4311 *flags = 0; 4312 4313 /* AMD_CG_SUPPORT_GFX_MGCG */ 4314 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4315 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 4316 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 4317 4318 /* AMD_CG_SUPPORT_GFX_CGCG */ 4319 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4320 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 4321 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 4322 4323 /* AMD_CG_SUPPORT_GFX_CGLS */ 4324 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 4325 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 4326 4327 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 4328 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4329 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 4330 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 4331 4332 /* AMD_CG_SUPPORT_GFX_CP_LS */ 4333 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4334 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 4335 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 4336 4337 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 4338 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4339 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 4340 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 4341 4342 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 4343 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 4344 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 4345 } 4346 4347 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 4348 { 4349 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 4350 } 4351 4352 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 4353 { 4354 struct amdgpu_device *adev = ring->adev; 4355 u64 wptr; 4356 4357 /* XXX check if swapping is necessary on BE */ 4358 if (ring->use_doorbell) { 4359 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 4360 } else { 4361 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 4362 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 4363 } 4364 4365 return wptr; 4366 } 4367 4368 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 4369 { 4370 struct amdgpu_device *adev = ring->adev; 4371 4372 if (ring->use_doorbell) { 4373 /* XXX check if swapping is necessary on BE */ 4374 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4375 WDOORBELL64(ring->doorbell_index, ring->wptr); 4376 } else { 4377 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4378 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 4379 } 4380 } 4381 4382 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 4383 { 4384 struct amdgpu_device *adev = ring->adev; 4385 u32 ref_and_mask, reg_mem_engine; 4386 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; 4387 4388 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 4389 switch (ring->me) { 4390 case 1: 4391 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 4392 break; 4393 case 2: 4394 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 4395 break; 4396 default: 4397 return; 4398 } 4399 reg_mem_engine = 0; 4400 } else { 4401 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 4402 reg_mem_engine = 1; /* pfp */ 4403 } 4404 4405 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 4406 adev->nbio_funcs->get_hdp_flush_req_offset(adev), 4407 adev->nbio_funcs->get_hdp_flush_done_offset(adev), 4408 ref_and_mask, ref_and_mask, 0x20); 4409 } 4410 4411 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 4412 struct amdgpu_job *job, 4413 struct amdgpu_ib *ib, 4414 uint32_t flags) 4415 { 4416 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4417 u32 header, control = 0; 4418 4419 if (ib->flags & AMDGPU_IB_FLAG_CE) 4420 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 4421 else 4422 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 4423 4424 control |= ib->length_dw | (vmid << 24); 4425 4426 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 4427 control |= INDIRECT_BUFFER_PRE_ENB(1); 4428 4429 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 4430 gfx_v9_0_ring_emit_de_meta(ring); 4431 } 4432 4433 amdgpu_ring_write(ring, header); 4434 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4435 amdgpu_ring_write(ring, 4436 #ifdef __BIG_ENDIAN 4437 (2 << 0) | 4438 #endif 4439 lower_32_bits(ib->gpu_addr)); 4440 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4441 amdgpu_ring_write(ring, control); 4442 } 4443 4444 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 4445 struct amdgpu_job *job, 4446 struct amdgpu_ib *ib, 4447 uint32_t flags) 4448 { 4449 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4450 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 4451 4452 /* Currently, there is a high possibility to get wave ID mismatch 4453 * between ME and GDS, leading to a hw deadlock, because ME generates 4454 * different wave IDs than the GDS expects. This situation happens 4455 * randomly when at least 5 compute pipes use GDS ordered append. 4456 * The wave IDs generated by ME are also wrong after suspend/resume. 4457 * Those are probably bugs somewhere else in the kernel driver. 4458 * 4459 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 4460 * GDS to 0 for this ring (me/pipe). 4461 */ 4462 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 4463 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 4464 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 4465 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 4466 } 4467 4468 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 4469 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4470 amdgpu_ring_write(ring, 4471 #ifdef __BIG_ENDIAN 4472 (2 << 0) | 4473 #endif 4474 lower_32_bits(ib->gpu_addr)); 4475 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4476 amdgpu_ring_write(ring, control); 4477 } 4478 4479 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 4480 u64 seq, unsigned flags) 4481 { 4482 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 4483 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 4484 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 4485 4486 /* RELEASE_MEM - flush caches, send int */ 4487 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 4488 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 4489 EOP_TC_NC_ACTION_EN) : 4490 (EOP_TCL1_ACTION_EN | 4491 EOP_TC_ACTION_EN | 4492 EOP_TC_WB_ACTION_EN | 4493 EOP_TC_MD_ACTION_EN)) | 4494 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 4495 EVENT_INDEX(5))); 4496 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 4497 4498 /* 4499 * the address should be Qword aligned if 64bit write, Dword 4500 * aligned if only send 32bit data low (discard data high) 4501 */ 4502 if (write64bit) 4503 BUG_ON(addr & 0x7); 4504 else 4505 BUG_ON(addr & 0x3); 4506 amdgpu_ring_write(ring, lower_32_bits(addr)); 4507 amdgpu_ring_write(ring, upper_32_bits(addr)); 4508 amdgpu_ring_write(ring, lower_32_bits(seq)); 4509 amdgpu_ring_write(ring, upper_32_bits(seq)); 4510 amdgpu_ring_write(ring, 0); 4511 } 4512 4513 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 4514 { 4515 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 4516 uint32_t seq = ring->fence_drv.sync_seq; 4517 uint64_t addr = ring->fence_drv.gpu_addr; 4518 4519 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 4520 lower_32_bits(addr), upper_32_bits(addr), 4521 seq, 0xffffffff, 4); 4522 } 4523 4524 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 4525 unsigned vmid, uint64_t pd_addr) 4526 { 4527 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 4528 4529 /* compute doesn't have PFP */ 4530 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 4531 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 4532 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 4533 amdgpu_ring_write(ring, 0x0); 4534 } 4535 } 4536 4537 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 4538 { 4539 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 4540 } 4541 4542 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 4543 { 4544 u64 wptr; 4545 4546 /* XXX check if swapping is necessary on BE */ 4547 if (ring->use_doorbell) 4548 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 4549 else 4550 BUG(); 4551 return wptr; 4552 } 4553 4554 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 4555 bool acquire) 4556 { 4557 struct amdgpu_device *adev = ring->adev; 4558 int pipe_num, tmp, reg; 4559 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 4560 4561 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 4562 4563 /* first me only has 2 entries, GFX and HP3D */ 4564 if (ring->me > 0) 4565 pipe_num -= 2; 4566 4567 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num; 4568 tmp = RREG32(reg); 4569 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 4570 WREG32(reg, tmp); 4571 } 4572 4573 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, 4574 struct amdgpu_ring *ring, 4575 bool acquire) 4576 { 4577 int i, pipe; 4578 bool reserve; 4579 struct amdgpu_ring *iring; 4580 4581 mutex_lock(&adev->gfx.pipe_reserve_mutex); 4582 pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0); 4583 if (acquire) 4584 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4585 else 4586 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4587 4588 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 4589 /* Clear all reservations - everyone reacquires all resources */ 4590 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 4591 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 4592 true); 4593 4594 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 4595 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 4596 true); 4597 } else { 4598 /* Lower all pipes without a current reservation */ 4599 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 4600 iring = &adev->gfx.gfx_ring[i]; 4601 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 4602 iring->me, 4603 iring->pipe, 4604 0); 4605 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4606 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 4607 } 4608 4609 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 4610 iring = &adev->gfx.compute_ring[i]; 4611 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 4612 iring->me, 4613 iring->pipe, 4614 0); 4615 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4616 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 4617 } 4618 } 4619 4620 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 4621 } 4622 4623 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, 4624 struct amdgpu_ring *ring, 4625 bool acquire) 4626 { 4627 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 4628 uint32_t queue_priority = acquire ? 0xf : 0x0; 4629 4630 mutex_lock(&adev->srbm_mutex); 4631 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4632 4633 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); 4634 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); 4635 4636 soc15_grbm_select(adev, 0, 0, 0, 0); 4637 mutex_unlock(&adev->srbm_mutex); 4638 } 4639 4640 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring, 4641 enum drm_sched_priority priority) 4642 { 4643 struct amdgpu_device *adev = ring->adev; 4644 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 4645 4646 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 4647 return; 4648 4649 gfx_v9_0_hqd_set_priority(adev, ring, acquire); 4650 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire); 4651 } 4652 4653 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 4654 { 4655 struct amdgpu_device *adev = ring->adev; 4656 4657 /* XXX check if swapping is necessary on BE */ 4658 if (ring->use_doorbell) { 4659 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4660 WDOORBELL64(ring->doorbell_index, ring->wptr); 4661 } else{ 4662 BUG(); /* only DOORBELL method supported on gfx9 now */ 4663 } 4664 } 4665 4666 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 4667 u64 seq, unsigned int flags) 4668 { 4669 struct amdgpu_device *adev = ring->adev; 4670 4671 /* we only allocate 32bit for each seq wb address */ 4672 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 4673 4674 /* write fence seq to the "addr" */ 4675 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4676 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4677 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 4678 amdgpu_ring_write(ring, lower_32_bits(addr)); 4679 amdgpu_ring_write(ring, upper_32_bits(addr)); 4680 amdgpu_ring_write(ring, lower_32_bits(seq)); 4681 4682 if (flags & AMDGPU_FENCE_FLAG_INT) { 4683 /* set register to trigger INT */ 4684 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4685 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4686 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 4687 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 4688 amdgpu_ring_write(ring, 0); 4689 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 4690 } 4691 } 4692 4693 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 4694 { 4695 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 4696 amdgpu_ring_write(ring, 0); 4697 } 4698 4699 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 4700 { 4701 struct v9_ce_ib_state ce_payload = {0}; 4702 uint64_t csa_addr; 4703 int cnt; 4704 4705 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 4706 csa_addr = amdgpu_csa_vaddr(ring->adev); 4707 4708 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 4709 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 4710 WRITE_DATA_DST_SEL(8) | 4711 WR_CONFIRM) | 4712 WRITE_DATA_CACHE_POLICY(0)); 4713 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 4714 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 4715 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 4716 } 4717 4718 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 4719 { 4720 struct v9_de_ib_state de_payload = {0}; 4721 uint64_t csa_addr, gds_addr; 4722 int cnt; 4723 4724 csa_addr = amdgpu_csa_vaddr(ring->adev); 4725 gds_addr = csa_addr + 4096; 4726 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 4727 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 4728 4729 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 4730 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 4731 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 4732 WRITE_DATA_DST_SEL(8) | 4733 WR_CONFIRM) | 4734 WRITE_DATA_CACHE_POLICY(0)); 4735 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 4736 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 4737 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 4738 } 4739 4740 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) 4741 { 4742 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 4743 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ 4744 } 4745 4746 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 4747 { 4748 uint32_t dw2 = 0; 4749 4750 if (amdgpu_sriov_vf(ring->adev)) 4751 gfx_v9_0_ring_emit_ce_meta(ring); 4752 4753 gfx_v9_0_ring_emit_tmz(ring, true); 4754 4755 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 4756 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 4757 /* set load_global_config & load_global_uconfig */ 4758 dw2 |= 0x8001; 4759 /* set load_cs_sh_regs */ 4760 dw2 |= 0x01000000; 4761 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 4762 dw2 |= 0x10002; 4763 4764 /* set load_ce_ram if preamble presented */ 4765 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 4766 dw2 |= 0x10000000; 4767 } else { 4768 /* still load_ce_ram if this is the first time preamble presented 4769 * although there is no context switch happens. 4770 */ 4771 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 4772 dw2 |= 0x10000000; 4773 } 4774 4775 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4776 amdgpu_ring_write(ring, dw2); 4777 amdgpu_ring_write(ring, 0); 4778 } 4779 4780 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 4781 { 4782 unsigned ret; 4783 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 4784 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 4785 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 4786 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 4787 ret = ring->wptr & ring->buf_mask; 4788 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 4789 return ret; 4790 } 4791 4792 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 4793 { 4794 unsigned cur; 4795 BUG_ON(offset > ring->buf_mask); 4796 BUG_ON(ring->ring[offset] != 0x55aa55aa); 4797 4798 cur = (ring->wptr & ring->buf_mask) - 1; 4799 if (likely(cur > offset)) 4800 ring->ring[offset] = cur - offset; 4801 else 4802 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 4803 } 4804 4805 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 4806 { 4807 struct amdgpu_device *adev = ring->adev; 4808 4809 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 4810 amdgpu_ring_write(ring, 0 | /* src: register*/ 4811 (5 << 8) | /* dst: memory */ 4812 (1 << 20)); /* write confirm */ 4813 amdgpu_ring_write(ring, reg); 4814 amdgpu_ring_write(ring, 0); 4815 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 4816 adev->virt.reg_val_offs * 4)); 4817 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 4818 adev->virt.reg_val_offs * 4)); 4819 } 4820 4821 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 4822 uint32_t val) 4823 { 4824 uint32_t cmd = 0; 4825 4826 switch (ring->funcs->type) { 4827 case AMDGPU_RING_TYPE_GFX: 4828 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 4829 break; 4830 case AMDGPU_RING_TYPE_KIQ: 4831 cmd = (1 << 16); /* no inc addr */ 4832 break; 4833 default: 4834 cmd = WR_CONFIRM; 4835 break; 4836 } 4837 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4838 amdgpu_ring_write(ring, cmd); 4839 amdgpu_ring_write(ring, reg); 4840 amdgpu_ring_write(ring, 0); 4841 amdgpu_ring_write(ring, val); 4842 } 4843 4844 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 4845 uint32_t val, uint32_t mask) 4846 { 4847 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 4848 } 4849 4850 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 4851 uint32_t reg0, uint32_t reg1, 4852 uint32_t ref, uint32_t mask) 4853 { 4854 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 4855 struct amdgpu_device *adev = ring->adev; 4856 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 4857 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 4858 4859 if (fw_version_ok) 4860 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 4861 ref, mask, 0x20); 4862 else 4863 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 4864 ref, mask); 4865 } 4866 4867 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 4868 { 4869 struct amdgpu_device *adev = ring->adev; 4870 uint32_t value = 0; 4871 4872 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 4873 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 4874 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 4875 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 4876 WREG32_SOC15(GC, 0, mmSQ_CMD, value); 4877 } 4878 4879 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 4880 enum amdgpu_interrupt_state state) 4881 { 4882 switch (state) { 4883 case AMDGPU_IRQ_STATE_DISABLE: 4884 case AMDGPU_IRQ_STATE_ENABLE: 4885 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4886 TIME_STAMP_INT_ENABLE, 4887 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4888 break; 4889 default: 4890 break; 4891 } 4892 } 4893 4894 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 4895 int me, int pipe, 4896 enum amdgpu_interrupt_state state) 4897 { 4898 u32 mec_int_cntl, mec_int_cntl_reg; 4899 4900 /* 4901 * amdgpu controls only the first MEC. That's why this function only 4902 * handles the setting of interrupts for this specific MEC. All other 4903 * pipes' interrupts are set by amdkfd. 4904 */ 4905 4906 if (me == 1) { 4907 switch (pipe) { 4908 case 0: 4909 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 4910 break; 4911 case 1: 4912 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 4913 break; 4914 case 2: 4915 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 4916 break; 4917 case 3: 4918 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 4919 break; 4920 default: 4921 DRM_DEBUG("invalid pipe %d\n", pipe); 4922 return; 4923 } 4924 } else { 4925 DRM_DEBUG("invalid me %d\n", me); 4926 return; 4927 } 4928 4929 switch (state) { 4930 case AMDGPU_IRQ_STATE_DISABLE: 4931 mec_int_cntl = RREG32(mec_int_cntl_reg); 4932 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 4933 TIME_STAMP_INT_ENABLE, 0); 4934 WREG32(mec_int_cntl_reg, mec_int_cntl); 4935 break; 4936 case AMDGPU_IRQ_STATE_ENABLE: 4937 mec_int_cntl = RREG32(mec_int_cntl_reg); 4938 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 4939 TIME_STAMP_INT_ENABLE, 1); 4940 WREG32(mec_int_cntl_reg, mec_int_cntl); 4941 break; 4942 default: 4943 break; 4944 } 4945 } 4946 4947 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 4948 struct amdgpu_irq_src *source, 4949 unsigned type, 4950 enum amdgpu_interrupt_state state) 4951 { 4952 switch (state) { 4953 case AMDGPU_IRQ_STATE_DISABLE: 4954 case AMDGPU_IRQ_STATE_ENABLE: 4955 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4956 PRIV_REG_INT_ENABLE, 4957 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4958 break; 4959 default: 4960 break; 4961 } 4962 4963 return 0; 4964 } 4965 4966 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 4967 struct amdgpu_irq_src *source, 4968 unsigned type, 4969 enum amdgpu_interrupt_state state) 4970 { 4971 switch (state) { 4972 case AMDGPU_IRQ_STATE_DISABLE: 4973 case AMDGPU_IRQ_STATE_ENABLE: 4974 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4975 PRIV_INSTR_INT_ENABLE, 4976 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4977 default: 4978 break; 4979 } 4980 4981 return 0; 4982 } 4983 4984 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 4985 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 4986 CP_ECC_ERROR_INT_ENABLE, 1) 4987 4988 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 4989 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 4990 CP_ECC_ERROR_INT_ENABLE, 0) 4991 4992 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 4993 struct amdgpu_irq_src *source, 4994 unsigned type, 4995 enum amdgpu_interrupt_state state) 4996 { 4997 switch (state) { 4998 case AMDGPU_IRQ_STATE_DISABLE: 4999 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5000 CP_ECC_ERROR_INT_ENABLE, 0); 5001 DISABLE_ECC_ON_ME_PIPE(1, 0); 5002 DISABLE_ECC_ON_ME_PIPE(1, 1); 5003 DISABLE_ECC_ON_ME_PIPE(1, 2); 5004 DISABLE_ECC_ON_ME_PIPE(1, 3); 5005 break; 5006 5007 case AMDGPU_IRQ_STATE_ENABLE: 5008 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5009 CP_ECC_ERROR_INT_ENABLE, 1); 5010 ENABLE_ECC_ON_ME_PIPE(1, 0); 5011 ENABLE_ECC_ON_ME_PIPE(1, 1); 5012 ENABLE_ECC_ON_ME_PIPE(1, 2); 5013 ENABLE_ECC_ON_ME_PIPE(1, 3); 5014 break; 5015 default: 5016 break; 5017 } 5018 5019 return 0; 5020 } 5021 5022 5023 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5024 struct amdgpu_irq_src *src, 5025 unsigned type, 5026 enum amdgpu_interrupt_state state) 5027 { 5028 switch (type) { 5029 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 5030 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 5031 break; 5032 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5033 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5034 break; 5035 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5036 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5037 break; 5038 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5039 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5040 break; 5041 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5042 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5043 break; 5044 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 5045 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 5046 break; 5047 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 5048 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 5049 break; 5050 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 5051 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 5052 break; 5053 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 5054 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 5055 break; 5056 default: 5057 break; 5058 } 5059 return 0; 5060 } 5061 5062 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 5063 struct amdgpu_irq_src *source, 5064 struct amdgpu_iv_entry *entry) 5065 { 5066 int i; 5067 u8 me_id, pipe_id, queue_id; 5068 struct amdgpu_ring *ring; 5069 5070 DRM_DEBUG("IH: CP EOP\n"); 5071 me_id = (entry->ring_id & 0x0c) >> 2; 5072 pipe_id = (entry->ring_id & 0x03) >> 0; 5073 queue_id = (entry->ring_id & 0x70) >> 4; 5074 5075 switch (me_id) { 5076 case 0: 5077 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5078 break; 5079 case 1: 5080 case 2: 5081 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5082 ring = &adev->gfx.compute_ring[i]; 5083 /* Per-queue interrupt is supported for MEC starting from VI. 5084 * The interrupt can only be enabled/disabled per pipe instead of per queue. 5085 */ 5086 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 5087 amdgpu_fence_process(ring); 5088 } 5089 break; 5090 } 5091 return 0; 5092 } 5093 5094 static void gfx_v9_0_fault(struct amdgpu_device *adev, 5095 struct amdgpu_iv_entry *entry) 5096 { 5097 u8 me_id, pipe_id, queue_id; 5098 struct amdgpu_ring *ring; 5099 int i; 5100 5101 me_id = (entry->ring_id & 0x0c) >> 2; 5102 pipe_id = (entry->ring_id & 0x03) >> 0; 5103 queue_id = (entry->ring_id & 0x70) >> 4; 5104 5105 switch (me_id) { 5106 case 0: 5107 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 5108 break; 5109 case 1: 5110 case 2: 5111 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5112 ring = &adev->gfx.compute_ring[i]; 5113 if (ring->me == me_id && ring->pipe == pipe_id && 5114 ring->queue == queue_id) 5115 drm_sched_fault(&ring->sched); 5116 } 5117 break; 5118 } 5119 } 5120 5121 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 5122 struct amdgpu_irq_src *source, 5123 struct amdgpu_iv_entry *entry) 5124 { 5125 DRM_ERROR("Illegal register access in command stream\n"); 5126 gfx_v9_0_fault(adev, entry); 5127 return 0; 5128 } 5129 5130 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 5131 struct amdgpu_irq_src *source, 5132 struct amdgpu_iv_entry *entry) 5133 { 5134 DRM_ERROR("Illegal instruction in command stream\n"); 5135 gfx_v9_0_fault(adev, entry); 5136 return 0; 5137 } 5138 5139 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 5140 struct amdgpu_iv_entry *entry) 5141 { 5142 /* TODO ue will trigger an interrupt. */ 5143 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); 5144 amdgpu_ras_reset_gpu(adev, 0); 5145 return AMDGPU_RAS_UE; 5146 } 5147 5148 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev, 5149 struct amdgpu_irq_src *source, 5150 struct amdgpu_iv_entry *entry) 5151 { 5152 struct ras_common_if *ras_if = adev->gfx.ras_if; 5153 struct ras_dispatch_if ih_data = { 5154 .entry = entry, 5155 }; 5156 5157 if (!ras_if) 5158 return 0; 5159 5160 ih_data.head = *ras_if; 5161 5162 DRM_ERROR("CP ECC ERROR IRQ\n"); 5163 amdgpu_ras_interrupt_dispatch(adev, &ih_data); 5164 return 0; 5165 } 5166 5167 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 5168 .name = "gfx_v9_0", 5169 .early_init = gfx_v9_0_early_init, 5170 .late_init = gfx_v9_0_late_init, 5171 .sw_init = gfx_v9_0_sw_init, 5172 .sw_fini = gfx_v9_0_sw_fini, 5173 .hw_init = gfx_v9_0_hw_init, 5174 .hw_fini = gfx_v9_0_hw_fini, 5175 .suspend = gfx_v9_0_suspend, 5176 .resume = gfx_v9_0_resume, 5177 .is_idle = gfx_v9_0_is_idle, 5178 .wait_for_idle = gfx_v9_0_wait_for_idle, 5179 .soft_reset = gfx_v9_0_soft_reset, 5180 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 5181 .set_powergating_state = gfx_v9_0_set_powergating_state, 5182 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 5183 }; 5184 5185 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 5186 .type = AMDGPU_RING_TYPE_GFX, 5187 .align_mask = 0xff, 5188 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5189 .support_64bit_ptrs = true, 5190 .vmhub = AMDGPU_GFXHUB, 5191 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 5192 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 5193 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 5194 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 5195 5 + /* COND_EXEC */ 5196 7 + /* PIPELINE_SYNC */ 5197 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5198 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5199 2 + /* VM_FLUSH */ 5200 8 + /* FENCE for VM_FLUSH */ 5201 20 + /* GDS switch */ 5202 4 + /* double SWITCH_BUFFER, 5203 the first COND_EXEC jump to the place just 5204 prior to this double SWITCH_BUFFER */ 5205 5 + /* COND_EXEC */ 5206 7 + /* HDP_flush */ 5207 4 + /* VGT_flush */ 5208 14 + /* CE_META */ 5209 31 + /* DE_META */ 5210 3 + /* CNTX_CTRL */ 5211 5 + /* HDP_INVL */ 5212 8 + 8 + /* FENCE x2 */ 5213 2, /* SWITCH_BUFFER */ 5214 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 5215 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 5216 .emit_fence = gfx_v9_0_ring_emit_fence, 5217 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 5218 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 5219 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 5220 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 5221 .test_ring = gfx_v9_0_ring_test_ring, 5222 .test_ib = gfx_v9_0_ring_test_ib, 5223 .insert_nop = amdgpu_ring_insert_nop, 5224 .pad_ib = amdgpu_ring_generic_pad_ib, 5225 .emit_switch_buffer = gfx_v9_ring_emit_sb, 5226 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 5227 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 5228 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 5229 .emit_tmz = gfx_v9_0_ring_emit_tmz, 5230 .emit_wreg = gfx_v9_0_ring_emit_wreg, 5231 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 5232 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 5233 .soft_recovery = gfx_v9_0_ring_soft_recovery, 5234 }; 5235 5236 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 5237 .type = AMDGPU_RING_TYPE_COMPUTE, 5238 .align_mask = 0xff, 5239 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5240 .support_64bit_ptrs = true, 5241 .vmhub = AMDGPU_GFXHUB, 5242 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 5243 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 5244 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 5245 .emit_frame_size = 5246 20 + /* gfx_v9_0_ring_emit_gds_switch */ 5247 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5248 5 + /* hdp invalidate */ 5249 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 5250 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5251 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5252 2 + /* gfx_v9_0_ring_emit_vm_flush */ 5253 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 5254 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 5255 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 5256 .emit_fence = gfx_v9_0_ring_emit_fence, 5257 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 5258 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 5259 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 5260 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 5261 .test_ring = gfx_v9_0_ring_test_ring, 5262 .test_ib = gfx_v9_0_ring_test_ib, 5263 .insert_nop = amdgpu_ring_insert_nop, 5264 .pad_ib = amdgpu_ring_generic_pad_ib, 5265 .set_priority = gfx_v9_0_ring_set_priority_compute, 5266 .emit_wreg = gfx_v9_0_ring_emit_wreg, 5267 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 5268 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 5269 }; 5270 5271 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 5272 .type = AMDGPU_RING_TYPE_KIQ, 5273 .align_mask = 0xff, 5274 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5275 .support_64bit_ptrs = true, 5276 .vmhub = AMDGPU_GFXHUB, 5277 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 5278 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 5279 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 5280 .emit_frame_size = 5281 20 + /* gfx_v9_0_ring_emit_gds_switch */ 5282 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5283 5 + /* hdp invalidate */ 5284 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 5285 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5286 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5287 2 + /* gfx_v9_0_ring_emit_vm_flush */ 5288 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 5289 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 5290 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 5291 .test_ring = gfx_v9_0_ring_test_ring, 5292 .insert_nop = amdgpu_ring_insert_nop, 5293 .pad_ib = amdgpu_ring_generic_pad_ib, 5294 .emit_rreg = gfx_v9_0_ring_emit_rreg, 5295 .emit_wreg = gfx_v9_0_ring_emit_wreg, 5296 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 5297 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 5298 }; 5299 5300 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 5301 { 5302 int i; 5303 5304 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 5305 5306 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 5307 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 5308 5309 for (i = 0; i < adev->gfx.num_compute_rings; i++) 5310 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 5311 } 5312 5313 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 5314 .set = gfx_v9_0_set_eop_interrupt_state, 5315 .process = gfx_v9_0_eop_irq, 5316 }; 5317 5318 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 5319 .set = gfx_v9_0_set_priv_reg_fault_state, 5320 .process = gfx_v9_0_priv_reg_irq, 5321 }; 5322 5323 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 5324 .set = gfx_v9_0_set_priv_inst_fault_state, 5325 .process = gfx_v9_0_priv_inst_irq, 5326 }; 5327 5328 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 5329 .set = gfx_v9_0_set_cp_ecc_error_state, 5330 .process = gfx_v9_0_cp_ecc_error_irq, 5331 }; 5332 5333 5334 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 5335 { 5336 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 5337 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 5338 5339 adev->gfx.priv_reg_irq.num_types = 1; 5340 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 5341 5342 adev->gfx.priv_inst_irq.num_types = 1; 5343 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 5344 5345 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 5346 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 5347 } 5348 5349 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 5350 { 5351 switch (adev->asic_type) { 5352 case CHIP_VEGA10: 5353 case CHIP_VEGA12: 5354 case CHIP_VEGA20: 5355 case CHIP_RAVEN: 5356 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 5357 break; 5358 default: 5359 break; 5360 } 5361 } 5362 5363 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 5364 { 5365 /* init asci gds info */ 5366 switch (adev->asic_type) { 5367 case CHIP_VEGA10: 5368 case CHIP_VEGA12: 5369 case CHIP_VEGA20: 5370 adev->gds.gds_size = 0x10000; 5371 break; 5372 case CHIP_RAVEN: 5373 adev->gds.gds_size = 0x1000; 5374 break; 5375 default: 5376 adev->gds.gds_size = 0x10000; 5377 break; 5378 } 5379 5380 switch (adev->asic_type) { 5381 case CHIP_VEGA10: 5382 case CHIP_VEGA20: 5383 adev->gds.gds_compute_max_wave_id = 0x7ff; 5384 break; 5385 case CHIP_VEGA12: 5386 adev->gds.gds_compute_max_wave_id = 0x27f; 5387 break; 5388 case CHIP_RAVEN: 5389 if (adev->rev_id >= 0x8) 5390 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 5391 else 5392 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 5393 break; 5394 default: 5395 /* this really depends on the chip */ 5396 adev->gds.gds_compute_max_wave_id = 0x7ff; 5397 break; 5398 } 5399 5400 adev->gds.gws_size = 64; 5401 adev->gds.oa_size = 16; 5402 } 5403 5404 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 5405 u32 bitmap) 5406 { 5407 u32 data; 5408 5409 if (!bitmap) 5410 return; 5411 5412 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 5413 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 5414 5415 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 5416 } 5417 5418 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 5419 { 5420 u32 data, mask; 5421 5422 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 5423 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 5424 5425 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 5426 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 5427 5428 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 5429 5430 return (~data) & mask; 5431 } 5432 5433 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 5434 struct amdgpu_cu_info *cu_info) 5435 { 5436 int i, j, k, counter, active_cu_number = 0; 5437 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 5438 unsigned disable_masks[4 * 2]; 5439 5440 if (!adev || !cu_info) 5441 return -EINVAL; 5442 5443 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 5444 5445 mutex_lock(&adev->grbm_idx_mutex); 5446 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 5447 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 5448 mask = 1; 5449 ao_bitmap = 0; 5450 counter = 0; 5451 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 5452 if (i < 4 && j < 2) 5453 gfx_v9_0_set_user_cu_inactive_bitmap( 5454 adev, disable_masks[i * 2 + j]); 5455 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 5456 cu_info->bitmap[i][j] = bitmap; 5457 5458 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 5459 if (bitmap & mask) { 5460 if (counter < adev->gfx.config.max_cu_per_sh) 5461 ao_bitmap |= mask; 5462 counter ++; 5463 } 5464 mask <<= 1; 5465 } 5466 active_cu_number += counter; 5467 if (i < 2 && j < 2) 5468 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 5469 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 5470 } 5471 } 5472 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5473 mutex_unlock(&adev->grbm_idx_mutex); 5474 5475 cu_info->number = active_cu_number; 5476 cu_info->ao_cu_mask = ao_cu_mask; 5477 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 5478 5479 return 0; 5480 } 5481 5482 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 5483 { 5484 .type = AMD_IP_BLOCK_TYPE_GFX, 5485 .major = 9, 5486 .minor = 0, 5487 .rev = 0, 5488 .funcs = &gfx_v9_0_ip_funcs, 5489 }; 5490