1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 #include "vega10_enum.h" 40 #include "hdp/hdp_4_0_offset.h" 41 42 #include "soc15.h" 43 #include "soc15_common.h" 44 #include "clearstate_gfx9.h" 45 #include "v9_structs.h" 46 47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 48 49 #include "amdgpu_ras.h" 50 51 #define GFX9_NUM_GFX_RINGS 1 52 #define GFX9_MEC_HPD_SIZE 4096 53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 55 56 #define mmPWR_MISC_CNTL_STATUS 0x0183 57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0 59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1 60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L 61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L 62 63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 65 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 69 70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 72 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 76 77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 79 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 83 84 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 86 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 87 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 90 91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 93 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 98 99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 101 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 106 107 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); 108 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin"); 109 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); 110 111 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 112 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 113 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04 114 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 115 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09 116 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 117 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a 118 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 119 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b 120 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 121 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c 122 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 123 124 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 125 { 126 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 127 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 128 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 129 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 130 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 131 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 132 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 133 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 134 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 135 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 136 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 137 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 138 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 139 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 140 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 141 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 142 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 143 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 144 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 145 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 146 }; 147 148 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 149 { 150 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 151 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 152 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 153 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 154 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 155 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 156 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 157 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 158 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 159 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 160 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 161 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 162 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 163 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 164 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 165 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 166 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 167 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 168 }; 169 170 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 171 { 172 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 173 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 174 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 175 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 176 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 177 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 178 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 179 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 180 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 181 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 182 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 183 }; 184 185 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 186 { 187 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 188 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 189 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 190 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 191 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 192 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 193 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 194 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 195 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 196 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 197 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 198 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 199 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 200 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 201 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 202 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 203 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 204 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 205 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 206 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 207 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 208 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 209 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 210 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 211 }; 212 213 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 214 { 215 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 216 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 217 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 218 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 219 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 220 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 221 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 222 }; 223 224 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 225 { 226 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 227 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 228 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 229 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 230 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 231 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 232 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 233 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 234 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 235 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 236 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 237 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 238 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 239 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 240 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 241 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 242 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 243 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 244 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 245 }; 246 247 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 248 { 249 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 250 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 251 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 252 }; 253 254 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 255 { 256 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 257 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 258 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 259 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 260 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 261 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 262 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 263 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 264 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 265 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 266 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 267 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 268 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 269 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 270 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 271 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 272 }; 273 274 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 275 { 276 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 277 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 278 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 279 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 280 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 281 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 282 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 283 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 284 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 285 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 286 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 287 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 288 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 289 }; 290 291 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = 292 { 293 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 294 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), 295 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), 296 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), 297 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), 298 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), 299 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), 300 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), 301 }; 302 303 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 304 { 305 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 306 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 307 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 308 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 309 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 310 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 311 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 312 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 313 }; 314 315 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 316 { 317 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 318 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 319 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 320 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 321 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 322 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 323 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 324 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 325 }; 326 327 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 328 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 329 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 330 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 331 332 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 333 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 334 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 335 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 336 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 337 struct amdgpu_cu_info *cu_info); 338 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 339 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); 340 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 341 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 342 343 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 344 { 345 switch (adev->asic_type) { 346 case CHIP_VEGA10: 347 if (!amdgpu_virt_support_skip_setting(adev)) { 348 soc15_program_register_sequence(adev, 349 golden_settings_gc_9_0, 350 ARRAY_SIZE(golden_settings_gc_9_0)); 351 soc15_program_register_sequence(adev, 352 golden_settings_gc_9_0_vg10, 353 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 354 } 355 break; 356 case CHIP_VEGA12: 357 soc15_program_register_sequence(adev, 358 golden_settings_gc_9_2_1, 359 ARRAY_SIZE(golden_settings_gc_9_2_1)); 360 soc15_program_register_sequence(adev, 361 golden_settings_gc_9_2_1_vg12, 362 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 363 break; 364 case CHIP_VEGA20: 365 soc15_program_register_sequence(adev, 366 golden_settings_gc_9_0, 367 ARRAY_SIZE(golden_settings_gc_9_0)); 368 soc15_program_register_sequence(adev, 369 golden_settings_gc_9_0_vg20, 370 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 371 break; 372 case CHIP_ARCTURUS: 373 soc15_program_register_sequence(adev, 374 golden_settings_gc_9_4_1_arct, 375 ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); 376 break; 377 case CHIP_RAVEN: 378 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 379 ARRAY_SIZE(golden_settings_gc_9_1)); 380 if (adev->rev_id >= 8) 381 soc15_program_register_sequence(adev, 382 golden_settings_gc_9_1_rv2, 383 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 384 else 385 soc15_program_register_sequence(adev, 386 golden_settings_gc_9_1_rv1, 387 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 388 break; 389 default: 390 break; 391 } 392 393 if (adev->asic_type != CHIP_ARCTURUS) 394 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 395 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 396 } 397 398 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 399 { 400 adev->gfx.scratch.num_reg = 8; 401 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 402 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 403 } 404 405 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 406 bool wc, uint32_t reg, uint32_t val) 407 { 408 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 409 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 410 WRITE_DATA_DST_SEL(0) | 411 (wc ? WR_CONFIRM : 0)); 412 amdgpu_ring_write(ring, reg); 413 amdgpu_ring_write(ring, 0); 414 amdgpu_ring_write(ring, val); 415 } 416 417 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 418 int mem_space, int opt, uint32_t addr0, 419 uint32_t addr1, uint32_t ref, uint32_t mask, 420 uint32_t inv) 421 { 422 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 423 amdgpu_ring_write(ring, 424 /* memory (1) or register (0) */ 425 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 426 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 427 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 428 WAIT_REG_MEM_ENGINE(eng_sel))); 429 430 if (mem_space) 431 BUG_ON(addr0 & 0x3); /* Dword align */ 432 amdgpu_ring_write(ring, addr0); 433 amdgpu_ring_write(ring, addr1); 434 amdgpu_ring_write(ring, ref); 435 amdgpu_ring_write(ring, mask); 436 amdgpu_ring_write(ring, inv); /* poll interval */ 437 } 438 439 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 440 { 441 struct amdgpu_device *adev = ring->adev; 442 uint32_t scratch; 443 uint32_t tmp = 0; 444 unsigned i; 445 int r; 446 447 r = amdgpu_gfx_scratch_get(adev, &scratch); 448 if (r) 449 return r; 450 451 WREG32(scratch, 0xCAFEDEAD); 452 r = amdgpu_ring_alloc(ring, 3); 453 if (r) 454 goto error_free_scratch; 455 456 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 457 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 458 amdgpu_ring_write(ring, 0xDEADBEEF); 459 amdgpu_ring_commit(ring); 460 461 for (i = 0; i < adev->usec_timeout; i++) { 462 tmp = RREG32(scratch); 463 if (tmp == 0xDEADBEEF) 464 break; 465 udelay(1); 466 } 467 468 if (i >= adev->usec_timeout) 469 r = -ETIMEDOUT; 470 471 error_free_scratch: 472 amdgpu_gfx_scratch_free(adev, scratch); 473 return r; 474 } 475 476 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 477 { 478 struct amdgpu_device *adev = ring->adev; 479 struct amdgpu_ib ib; 480 struct dma_fence *f = NULL; 481 482 unsigned index; 483 uint64_t gpu_addr; 484 uint32_t tmp; 485 long r; 486 487 r = amdgpu_device_wb_get(adev, &index); 488 if (r) 489 return r; 490 491 gpu_addr = adev->wb.gpu_addr + (index * 4); 492 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 493 memset(&ib, 0, sizeof(ib)); 494 r = amdgpu_ib_get(adev, NULL, 16, &ib); 495 if (r) 496 goto err1; 497 498 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 499 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 500 ib.ptr[2] = lower_32_bits(gpu_addr); 501 ib.ptr[3] = upper_32_bits(gpu_addr); 502 ib.ptr[4] = 0xDEADBEEF; 503 ib.length_dw = 5; 504 505 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 506 if (r) 507 goto err2; 508 509 r = dma_fence_wait_timeout(f, false, timeout); 510 if (r == 0) { 511 r = -ETIMEDOUT; 512 goto err2; 513 } else if (r < 0) { 514 goto err2; 515 } 516 517 tmp = adev->wb.wb[index]; 518 if (tmp == 0xDEADBEEF) 519 r = 0; 520 else 521 r = -EINVAL; 522 523 err2: 524 amdgpu_ib_free(adev, &ib, NULL); 525 dma_fence_put(f); 526 err1: 527 amdgpu_device_wb_free(adev, index); 528 return r; 529 } 530 531 532 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 533 { 534 release_firmware(adev->gfx.pfp_fw); 535 adev->gfx.pfp_fw = NULL; 536 release_firmware(adev->gfx.me_fw); 537 adev->gfx.me_fw = NULL; 538 release_firmware(adev->gfx.ce_fw); 539 adev->gfx.ce_fw = NULL; 540 release_firmware(adev->gfx.rlc_fw); 541 adev->gfx.rlc_fw = NULL; 542 release_firmware(adev->gfx.mec_fw); 543 adev->gfx.mec_fw = NULL; 544 release_firmware(adev->gfx.mec2_fw); 545 adev->gfx.mec2_fw = NULL; 546 547 kfree(adev->gfx.rlc.register_list_format); 548 } 549 550 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 551 { 552 const struct rlc_firmware_header_v2_1 *rlc_hdr; 553 554 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 555 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 556 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 557 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 558 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 559 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 560 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 561 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 562 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 563 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 564 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 565 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 566 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 567 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 568 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 569 } 570 571 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 572 { 573 adev->gfx.me_fw_write_wait = false; 574 adev->gfx.mec_fw_write_wait = false; 575 576 switch (adev->asic_type) { 577 case CHIP_VEGA10: 578 if ((adev->gfx.me_fw_version >= 0x0000009c) && 579 (adev->gfx.me_feature_version >= 42) && 580 (adev->gfx.pfp_fw_version >= 0x000000b1) && 581 (adev->gfx.pfp_feature_version >= 42)) 582 adev->gfx.me_fw_write_wait = true; 583 584 if ((adev->gfx.mec_fw_version >= 0x00000193) && 585 (adev->gfx.mec_feature_version >= 42)) 586 adev->gfx.mec_fw_write_wait = true; 587 break; 588 case CHIP_VEGA12: 589 if ((adev->gfx.me_fw_version >= 0x0000009c) && 590 (adev->gfx.me_feature_version >= 44) && 591 (adev->gfx.pfp_fw_version >= 0x000000b2) && 592 (adev->gfx.pfp_feature_version >= 44)) 593 adev->gfx.me_fw_write_wait = true; 594 595 if ((adev->gfx.mec_fw_version >= 0x00000196) && 596 (adev->gfx.mec_feature_version >= 44)) 597 adev->gfx.mec_fw_write_wait = true; 598 break; 599 case CHIP_VEGA20: 600 if ((adev->gfx.me_fw_version >= 0x0000009c) && 601 (adev->gfx.me_feature_version >= 44) && 602 (adev->gfx.pfp_fw_version >= 0x000000b2) && 603 (adev->gfx.pfp_feature_version >= 44)) 604 adev->gfx.me_fw_write_wait = true; 605 606 if ((adev->gfx.mec_fw_version >= 0x00000197) && 607 (adev->gfx.mec_feature_version >= 44)) 608 adev->gfx.mec_fw_write_wait = true; 609 break; 610 case CHIP_RAVEN: 611 if ((adev->gfx.me_fw_version >= 0x0000009c) && 612 (adev->gfx.me_feature_version >= 42) && 613 (adev->gfx.pfp_fw_version >= 0x000000b1) && 614 (adev->gfx.pfp_feature_version >= 42)) 615 adev->gfx.me_fw_write_wait = true; 616 617 if ((adev->gfx.mec_fw_version >= 0x00000192) && 618 (adev->gfx.mec_feature_version >= 42)) 619 adev->gfx.mec_fw_write_wait = true; 620 break; 621 default: 622 break; 623 } 624 } 625 626 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 627 { 628 switch (adev->asic_type) { 629 case CHIP_VEGA10: 630 case CHIP_VEGA12: 631 case CHIP_VEGA20: 632 break; 633 case CHIP_RAVEN: 634 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) 635 break; 636 if ((adev->gfx.rlc_fw_version != 106 && 637 adev->gfx.rlc_fw_version < 531) || 638 (adev->gfx.rlc_fw_version == 53815) || 639 (adev->gfx.rlc_feature_version < 1) || 640 !adev->gfx.rlc.is_rlc_v2_1) 641 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 642 break; 643 default: 644 break; 645 } 646 } 647 648 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, 649 const char *chip_name) 650 { 651 char fw_name[30]; 652 int err; 653 struct amdgpu_firmware_info *info = NULL; 654 const struct common_firmware_header *header = NULL; 655 const struct gfx_firmware_header_v1_0 *cp_hdr; 656 657 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 658 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 659 if (err) 660 goto out; 661 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 662 if (err) 663 goto out; 664 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 665 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 666 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 667 668 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 669 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 670 if (err) 671 goto out; 672 err = amdgpu_ucode_validate(adev->gfx.me_fw); 673 if (err) 674 goto out; 675 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 676 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 677 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 678 679 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 680 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 681 if (err) 682 goto out; 683 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 684 if (err) 685 goto out; 686 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 687 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 688 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 689 690 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 691 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 692 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 693 info->fw = adev->gfx.pfp_fw; 694 header = (const struct common_firmware_header *)info->fw->data; 695 adev->firmware.fw_size += 696 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 697 698 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 699 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 700 info->fw = adev->gfx.me_fw; 701 header = (const struct common_firmware_header *)info->fw->data; 702 adev->firmware.fw_size += 703 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 704 705 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 706 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 707 info->fw = adev->gfx.ce_fw; 708 header = (const struct common_firmware_header *)info->fw->data; 709 adev->firmware.fw_size += 710 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 711 } 712 713 out: 714 if (err) { 715 dev_err(adev->dev, 716 "gfx9: Failed to load firmware \"%s\"\n", 717 fw_name); 718 release_firmware(adev->gfx.pfp_fw); 719 adev->gfx.pfp_fw = NULL; 720 release_firmware(adev->gfx.me_fw); 721 adev->gfx.me_fw = NULL; 722 release_firmware(adev->gfx.ce_fw); 723 adev->gfx.ce_fw = NULL; 724 } 725 return err; 726 } 727 728 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, 729 const char *chip_name) 730 { 731 char fw_name[30]; 732 int err; 733 struct amdgpu_firmware_info *info = NULL; 734 const struct common_firmware_header *header = NULL; 735 const struct rlc_firmware_header_v2_0 *rlc_hdr; 736 unsigned int *tmp = NULL; 737 unsigned int i = 0; 738 uint16_t version_major; 739 uint16_t version_minor; 740 uint32_t smu_version; 741 742 /* 743 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 744 * instead of picasso_rlc.bin. 745 * Judgment method: 746 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 747 * or revision >= 0xD8 && revision <= 0xDF 748 * otherwise is PCO FP5 749 */ 750 if (!strcmp(chip_name, "picasso") && 751 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 752 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 753 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); 754 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 755 (smu_version >= 0x41e2b)) 756 /** 757 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 758 */ 759 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); 760 else 761 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 762 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 763 if (err) 764 goto out; 765 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 766 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 767 768 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 769 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 770 if (version_major == 2 && version_minor == 1) 771 adev->gfx.rlc.is_rlc_v2_1 = true; 772 773 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 774 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 775 adev->gfx.rlc.save_and_restore_offset = 776 le32_to_cpu(rlc_hdr->save_and_restore_offset); 777 adev->gfx.rlc.clear_state_descriptor_offset = 778 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 779 adev->gfx.rlc.avail_scratch_ram_locations = 780 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 781 adev->gfx.rlc.reg_restore_list_size = 782 le32_to_cpu(rlc_hdr->reg_restore_list_size); 783 adev->gfx.rlc.reg_list_format_start = 784 le32_to_cpu(rlc_hdr->reg_list_format_start); 785 adev->gfx.rlc.reg_list_format_separate_start = 786 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 787 adev->gfx.rlc.starting_offsets_start = 788 le32_to_cpu(rlc_hdr->starting_offsets_start); 789 adev->gfx.rlc.reg_list_format_size_bytes = 790 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 791 adev->gfx.rlc.reg_list_size_bytes = 792 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 793 adev->gfx.rlc.register_list_format = 794 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 795 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 796 if (!adev->gfx.rlc.register_list_format) { 797 err = -ENOMEM; 798 goto out; 799 } 800 801 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 802 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 803 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 804 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 805 806 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 807 808 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 809 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 810 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 811 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 812 813 if (adev->gfx.rlc.is_rlc_v2_1) 814 gfx_v9_0_init_rlc_ext_microcode(adev); 815 816 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 817 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 818 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 819 info->fw = adev->gfx.rlc_fw; 820 header = (const struct common_firmware_header *)info->fw->data; 821 adev->firmware.fw_size += 822 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 823 824 if (adev->gfx.rlc.is_rlc_v2_1 && 825 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 826 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 827 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 828 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 829 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 830 info->fw = adev->gfx.rlc_fw; 831 adev->firmware.fw_size += 832 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 833 834 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 835 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 836 info->fw = adev->gfx.rlc_fw; 837 adev->firmware.fw_size += 838 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 839 840 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 841 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 842 info->fw = adev->gfx.rlc_fw; 843 adev->firmware.fw_size += 844 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 845 } 846 } 847 848 out: 849 if (err) { 850 dev_err(adev->dev, 851 "gfx9: Failed to load firmware \"%s\"\n", 852 fw_name); 853 release_firmware(adev->gfx.rlc_fw); 854 adev->gfx.rlc_fw = NULL; 855 } 856 return err; 857 } 858 859 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, 860 const char *chip_name) 861 { 862 char fw_name[30]; 863 int err; 864 struct amdgpu_firmware_info *info = NULL; 865 const struct common_firmware_header *header = NULL; 866 const struct gfx_firmware_header_v1_0 *cp_hdr; 867 868 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 869 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 870 if (err) 871 goto out; 872 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 873 if (err) 874 goto out; 875 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 876 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 877 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 878 879 880 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 881 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 882 if (!err) { 883 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 884 if (err) 885 goto out; 886 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 887 adev->gfx.mec2_fw->data; 888 adev->gfx.mec2_fw_version = 889 le32_to_cpu(cp_hdr->header.ucode_version); 890 adev->gfx.mec2_feature_version = 891 le32_to_cpu(cp_hdr->ucode_feature_version); 892 } else { 893 err = 0; 894 adev->gfx.mec2_fw = NULL; 895 } 896 897 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 898 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 899 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 900 info->fw = adev->gfx.mec_fw; 901 header = (const struct common_firmware_header *)info->fw->data; 902 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 903 adev->firmware.fw_size += 904 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 905 906 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 907 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 908 info->fw = adev->gfx.mec_fw; 909 adev->firmware.fw_size += 910 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 911 912 if (adev->gfx.mec2_fw) { 913 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 914 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 915 info->fw = adev->gfx.mec2_fw; 916 header = (const struct common_firmware_header *)info->fw->data; 917 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 918 adev->firmware.fw_size += 919 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 920 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 921 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 922 info->fw = adev->gfx.mec2_fw; 923 adev->firmware.fw_size += 924 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 925 } 926 } 927 928 out: 929 gfx_v9_0_check_if_need_gfxoff(adev); 930 gfx_v9_0_check_fw_write_wait(adev); 931 if (err) { 932 dev_err(adev->dev, 933 "gfx9: Failed to load firmware \"%s\"\n", 934 fw_name); 935 release_firmware(adev->gfx.mec_fw); 936 adev->gfx.mec_fw = NULL; 937 release_firmware(adev->gfx.mec2_fw); 938 adev->gfx.mec2_fw = NULL; 939 } 940 return err; 941 } 942 943 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 944 { 945 const char *chip_name; 946 int r; 947 948 DRM_DEBUG("\n"); 949 950 switch (adev->asic_type) { 951 case CHIP_VEGA10: 952 chip_name = "vega10"; 953 break; 954 case CHIP_VEGA12: 955 chip_name = "vega12"; 956 break; 957 case CHIP_VEGA20: 958 chip_name = "vega20"; 959 break; 960 case CHIP_RAVEN: 961 if (adev->rev_id >= 8) 962 chip_name = "raven2"; 963 else if (adev->pdev->device == 0x15d8) 964 chip_name = "picasso"; 965 else 966 chip_name = "raven"; 967 break; 968 break; 969 case CHIP_ARCTURUS: 970 chip_name = "arcturus"; 971 break; 972 default: 973 BUG(); 974 } 975 976 /* No CPG in Arcturus */ 977 if (adev->asic_type != CHIP_ARCTURUS) { 978 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); 979 if (r) 980 return r; 981 } 982 983 r = gfx_v9_0_init_rlc_microcode(adev, chip_name); 984 if (r) 985 return r; 986 987 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name); 988 if (r) 989 return r; 990 991 return r; 992 } 993 994 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 995 { 996 u32 count = 0; 997 const struct cs_section_def *sect = NULL; 998 const struct cs_extent_def *ext = NULL; 999 1000 /* begin clear state */ 1001 count += 2; 1002 /* context control state */ 1003 count += 3; 1004 1005 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1006 for (ext = sect->section; ext->extent != NULL; ++ext) { 1007 if (sect->id == SECT_CONTEXT) 1008 count += 2 + ext->reg_count; 1009 else 1010 return 0; 1011 } 1012 } 1013 1014 /* end clear state */ 1015 count += 2; 1016 /* clear state */ 1017 count += 2; 1018 1019 return count; 1020 } 1021 1022 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 1023 volatile u32 *buffer) 1024 { 1025 u32 count = 0, i; 1026 const struct cs_section_def *sect = NULL; 1027 const struct cs_extent_def *ext = NULL; 1028 1029 if (adev->gfx.rlc.cs_data == NULL) 1030 return; 1031 if (buffer == NULL) 1032 return; 1033 1034 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1035 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1036 1037 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1038 buffer[count++] = cpu_to_le32(0x80000000); 1039 buffer[count++] = cpu_to_le32(0x80000000); 1040 1041 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1042 for (ext = sect->section; ext->extent != NULL; ++ext) { 1043 if (sect->id == SECT_CONTEXT) { 1044 buffer[count++] = 1045 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1046 buffer[count++] = cpu_to_le32(ext->reg_index - 1047 PACKET3_SET_CONTEXT_REG_START); 1048 for (i = 0; i < ext->reg_count; i++) 1049 buffer[count++] = cpu_to_le32(ext->extent[i]); 1050 } else { 1051 return; 1052 } 1053 } 1054 } 1055 1056 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1057 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1058 1059 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1060 buffer[count++] = cpu_to_le32(0); 1061 } 1062 1063 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 1064 { 1065 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 1066 uint32_t pg_always_on_cu_num = 2; 1067 uint32_t always_on_cu_num; 1068 uint32_t i, j, k; 1069 uint32_t mask, cu_bitmap, counter; 1070 1071 if (adev->flags & AMD_IS_APU) 1072 always_on_cu_num = 4; 1073 else if (adev->asic_type == CHIP_VEGA12) 1074 always_on_cu_num = 8; 1075 else 1076 always_on_cu_num = 12; 1077 1078 mutex_lock(&adev->grbm_idx_mutex); 1079 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1080 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1081 mask = 1; 1082 cu_bitmap = 0; 1083 counter = 0; 1084 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1085 1086 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 1087 if (cu_info->bitmap[i][j] & mask) { 1088 if (counter == pg_always_on_cu_num) 1089 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 1090 if (counter < always_on_cu_num) 1091 cu_bitmap |= mask; 1092 else 1093 break; 1094 counter++; 1095 } 1096 mask <<= 1; 1097 } 1098 1099 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1100 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1101 } 1102 } 1103 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1104 mutex_unlock(&adev->grbm_idx_mutex); 1105 } 1106 1107 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1108 { 1109 uint32_t data; 1110 1111 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1112 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1113 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1114 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1115 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1116 1117 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1118 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1119 1120 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1121 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1122 1123 mutex_lock(&adev->grbm_idx_mutex); 1124 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1125 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1126 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1127 1128 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1129 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1130 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1131 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1132 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1133 1134 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1135 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1136 data &= 0x0000FFFF; 1137 data |= 0x00C00000; 1138 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1139 1140 /* 1141 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1142 * programmed in gfx_v9_0_init_always_on_cu_mask() 1143 */ 1144 1145 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1146 * but used for RLC_LB_CNTL configuration */ 1147 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1148 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1149 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1150 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1151 mutex_unlock(&adev->grbm_idx_mutex); 1152 1153 gfx_v9_0_init_always_on_cu_mask(adev); 1154 } 1155 1156 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1157 { 1158 uint32_t data; 1159 1160 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1161 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1162 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1163 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1164 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1165 1166 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1167 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1168 1169 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1170 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1171 1172 mutex_lock(&adev->grbm_idx_mutex); 1173 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1174 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1175 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1176 1177 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1178 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1179 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1180 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1181 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1182 1183 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1184 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1185 data &= 0x0000FFFF; 1186 data |= 0x00C00000; 1187 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1188 1189 /* 1190 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1191 * programmed in gfx_v9_0_init_always_on_cu_mask() 1192 */ 1193 1194 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1195 * but used for RLC_LB_CNTL configuration */ 1196 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1197 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1198 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1199 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1200 mutex_unlock(&adev->grbm_idx_mutex); 1201 1202 gfx_v9_0_init_always_on_cu_mask(adev); 1203 } 1204 1205 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1206 { 1207 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1208 } 1209 1210 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1211 { 1212 return 5; 1213 } 1214 1215 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1216 { 1217 const struct cs_section_def *cs_data; 1218 int r; 1219 1220 adev->gfx.rlc.cs_data = gfx9_cs_data; 1221 1222 cs_data = adev->gfx.rlc.cs_data; 1223 1224 if (cs_data) { 1225 /* init clear state block */ 1226 r = amdgpu_gfx_rlc_init_csb(adev); 1227 if (r) 1228 return r; 1229 } 1230 1231 if (adev->asic_type == CHIP_RAVEN) { 1232 /* TODO: double check the cp_table_size for RV */ 1233 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1234 r = amdgpu_gfx_rlc_init_cpt(adev); 1235 if (r) 1236 return r; 1237 } 1238 1239 switch (adev->asic_type) { 1240 case CHIP_RAVEN: 1241 gfx_v9_0_init_lbpw(adev); 1242 break; 1243 case CHIP_VEGA20: 1244 gfx_v9_4_init_lbpw(adev); 1245 break; 1246 default: 1247 break; 1248 } 1249 1250 return 0; 1251 } 1252 1253 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev) 1254 { 1255 int r; 1256 1257 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1258 if (unlikely(r != 0)) 1259 return r; 1260 1261 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, 1262 AMDGPU_GEM_DOMAIN_VRAM); 1263 if (!r) 1264 adev->gfx.rlc.clear_state_gpu_addr = 1265 amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); 1266 1267 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1268 1269 return r; 1270 } 1271 1272 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev) 1273 { 1274 int r; 1275 1276 if (!adev->gfx.rlc.clear_state_obj) 1277 return; 1278 1279 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); 1280 if (likely(r == 0)) { 1281 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 1282 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1283 } 1284 } 1285 1286 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1287 { 1288 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1289 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1290 } 1291 1292 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1293 { 1294 int r; 1295 u32 *hpd; 1296 const __le32 *fw_data; 1297 unsigned fw_size; 1298 u32 *fw; 1299 size_t mec_hpd_size; 1300 1301 const struct gfx_firmware_header_v1_0 *mec_hdr; 1302 1303 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1304 1305 /* take ownership of the relevant compute queues */ 1306 amdgpu_gfx_compute_queue_acquire(adev); 1307 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1308 1309 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1310 AMDGPU_GEM_DOMAIN_VRAM, 1311 &adev->gfx.mec.hpd_eop_obj, 1312 &adev->gfx.mec.hpd_eop_gpu_addr, 1313 (void **)&hpd); 1314 if (r) { 1315 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1316 gfx_v9_0_mec_fini(adev); 1317 return r; 1318 } 1319 1320 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); 1321 1322 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1323 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1324 1325 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1326 1327 fw_data = (const __le32 *) 1328 (adev->gfx.mec_fw->data + 1329 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1330 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 1331 1332 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 1333 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1334 &adev->gfx.mec.mec_fw_obj, 1335 &adev->gfx.mec.mec_fw_gpu_addr, 1336 (void **)&fw); 1337 if (r) { 1338 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 1339 gfx_v9_0_mec_fini(adev); 1340 return r; 1341 } 1342 1343 memcpy(fw, fw_data, fw_size); 1344 1345 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1346 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1347 1348 return 0; 1349 } 1350 1351 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 1352 { 1353 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1354 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1355 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1356 (address << SQ_IND_INDEX__INDEX__SHIFT) | 1357 (SQ_IND_INDEX__FORCE_READ_MASK)); 1358 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1359 } 1360 1361 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 1362 uint32_t wave, uint32_t thread, 1363 uint32_t regno, uint32_t num, uint32_t *out) 1364 { 1365 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1366 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1367 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1368 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1369 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 1370 (SQ_IND_INDEX__FORCE_READ_MASK) | 1371 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1372 while (num--) 1373 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1374 } 1375 1376 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1377 { 1378 /* type 1 wave data */ 1379 dst[(*no_fields)++] = 1; 1380 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 1381 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 1382 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 1383 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 1384 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 1385 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 1386 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 1387 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 1388 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 1389 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 1390 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 1391 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 1392 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 1393 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 1394 } 1395 1396 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 1397 uint32_t wave, uint32_t start, 1398 uint32_t size, uint32_t *dst) 1399 { 1400 wave_read_regs( 1401 adev, simd, wave, 0, 1402 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 1403 } 1404 1405 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 1406 uint32_t wave, uint32_t thread, 1407 uint32_t start, uint32_t size, 1408 uint32_t *dst) 1409 { 1410 wave_read_regs( 1411 adev, simd, wave, thread, 1412 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1413 } 1414 1415 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 1416 u32 me, u32 pipe, u32 q, u32 vm) 1417 { 1418 soc15_grbm_select(adev, me, pipe, q, vm); 1419 } 1420 1421 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 1422 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 1423 .select_se_sh = &gfx_v9_0_select_se_sh, 1424 .read_wave_data = &gfx_v9_0_read_wave_data, 1425 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 1426 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 1427 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q 1428 }; 1429 1430 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 1431 { 1432 u32 gb_addr_config; 1433 int err; 1434 1435 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 1436 1437 switch (adev->asic_type) { 1438 case CHIP_VEGA10: 1439 adev->gfx.config.max_hw_contexts = 8; 1440 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1441 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1442 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1443 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1444 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 1445 break; 1446 case CHIP_VEGA12: 1447 adev->gfx.config.max_hw_contexts = 8; 1448 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1449 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1450 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1451 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1452 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 1453 DRM_INFO("fix gfx.config for vega12\n"); 1454 break; 1455 case CHIP_VEGA20: 1456 adev->gfx.config.max_hw_contexts = 8; 1457 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1458 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1459 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1460 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1461 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1462 gb_addr_config &= ~0xf3e777ff; 1463 gb_addr_config |= 0x22014042; 1464 /* check vbios table if gpu info is not available */ 1465 err = amdgpu_atomfirmware_get_gfx_info(adev); 1466 if (err) 1467 return err; 1468 break; 1469 case CHIP_RAVEN: 1470 adev->gfx.config.max_hw_contexts = 8; 1471 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1472 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1473 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1474 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1475 if (adev->rev_id >= 8) 1476 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 1477 else 1478 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 1479 break; 1480 case CHIP_ARCTURUS: 1481 adev->gfx.config.max_hw_contexts = 8; 1482 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1483 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1484 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1485 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1486 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1487 gb_addr_config &= ~0xf3e777ff; 1488 gb_addr_config |= 0x22014042; 1489 break; 1490 default: 1491 BUG(); 1492 break; 1493 } 1494 1495 adev->gfx.config.gb_addr_config = gb_addr_config; 1496 1497 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 1498 REG_GET_FIELD( 1499 adev->gfx.config.gb_addr_config, 1500 GB_ADDR_CONFIG, 1501 NUM_PIPES); 1502 1503 adev->gfx.config.max_tile_pipes = 1504 adev->gfx.config.gb_addr_config_fields.num_pipes; 1505 1506 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 1507 REG_GET_FIELD( 1508 adev->gfx.config.gb_addr_config, 1509 GB_ADDR_CONFIG, 1510 NUM_BANKS); 1511 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 1512 REG_GET_FIELD( 1513 adev->gfx.config.gb_addr_config, 1514 GB_ADDR_CONFIG, 1515 MAX_COMPRESSED_FRAGS); 1516 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 1517 REG_GET_FIELD( 1518 adev->gfx.config.gb_addr_config, 1519 GB_ADDR_CONFIG, 1520 NUM_RB_PER_SE); 1521 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 1522 REG_GET_FIELD( 1523 adev->gfx.config.gb_addr_config, 1524 GB_ADDR_CONFIG, 1525 NUM_SHADER_ENGINES); 1526 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 1527 REG_GET_FIELD( 1528 adev->gfx.config.gb_addr_config, 1529 GB_ADDR_CONFIG, 1530 PIPE_INTERLEAVE_SIZE)); 1531 1532 return 0; 1533 } 1534 1535 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, 1536 struct amdgpu_ngg_buf *ngg_buf, 1537 int size_se, 1538 int default_size_se) 1539 { 1540 int r; 1541 1542 if (size_se < 0) { 1543 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se); 1544 return -EINVAL; 1545 } 1546 size_se = size_se ? size_se : default_size_se; 1547 1548 ngg_buf->size = size_se * adev->gfx.config.max_shader_engines; 1549 r = amdgpu_bo_create_kernel(adev, ngg_buf->size, 1550 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1551 &ngg_buf->bo, 1552 &ngg_buf->gpu_addr, 1553 NULL); 1554 if (r) { 1555 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r); 1556 return r; 1557 } 1558 ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo); 1559 1560 return r; 1561 } 1562 1563 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev) 1564 { 1565 int i; 1566 1567 for (i = 0; i < NGG_BUF_MAX; i++) 1568 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo, 1569 &adev->gfx.ngg.buf[i].gpu_addr, 1570 NULL); 1571 1572 memset(&adev->gfx.ngg.buf[0], 0, 1573 sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX); 1574 1575 adev->gfx.ngg.init = false; 1576 1577 return 0; 1578 } 1579 1580 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) 1581 { 1582 int r; 1583 1584 if (!amdgpu_ngg || adev->gfx.ngg.init == true) 1585 return 0; 1586 1587 /* GDS reserve memory: 64 bytes alignment */ 1588 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); 1589 adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size; 1590 adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); 1591 adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); 1592 1593 /* Primitive Buffer */ 1594 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], 1595 amdgpu_prim_buf_per_se, 1596 64 * 1024); 1597 if (r) { 1598 dev_err(adev->dev, "Failed to create Primitive Buffer\n"); 1599 goto err; 1600 } 1601 1602 /* Position Buffer */ 1603 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS], 1604 amdgpu_pos_buf_per_se, 1605 256 * 1024); 1606 if (r) { 1607 dev_err(adev->dev, "Failed to create Position Buffer\n"); 1608 goto err; 1609 } 1610 1611 /* Control Sideband */ 1612 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL], 1613 amdgpu_cntl_sb_buf_per_se, 1614 256); 1615 if (r) { 1616 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n"); 1617 goto err; 1618 } 1619 1620 /* Parameter Cache, not created by default */ 1621 if (amdgpu_param_buf_per_se <= 0) 1622 goto out; 1623 1624 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM], 1625 amdgpu_param_buf_per_se, 1626 512 * 1024); 1627 if (r) { 1628 dev_err(adev->dev, "Failed to create Parameter Cache\n"); 1629 goto err; 1630 } 1631 1632 out: 1633 adev->gfx.ngg.init = true; 1634 return 0; 1635 err: 1636 gfx_v9_0_ngg_fini(adev); 1637 return r; 1638 } 1639 1640 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) 1641 { 1642 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 1643 int r; 1644 u32 data, base; 1645 1646 if (!amdgpu_ngg) 1647 return 0; 1648 1649 /* Program buffer size */ 1650 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, 1651 adev->gfx.ngg.buf[NGG_PRIM].size >> 8); 1652 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, 1653 adev->gfx.ngg.buf[NGG_POS].size >> 8); 1654 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); 1655 1656 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, 1657 adev->gfx.ngg.buf[NGG_CNTL].size >> 8); 1658 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, 1659 adev->gfx.ngg.buf[NGG_PARAM].size >> 10); 1660 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); 1661 1662 /* Program buffer base address */ 1663 base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 1664 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); 1665 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data); 1666 1667 base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 1668 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); 1669 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data); 1670 1671 base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 1672 data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); 1673 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data); 1674 1675 base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 1676 data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); 1677 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data); 1678 1679 base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 1680 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); 1681 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data); 1682 1683 base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 1684 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); 1685 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data); 1686 1687 /* Clear GDS reserved memory */ 1688 r = amdgpu_ring_alloc(ring, 17); 1689 if (r) { 1690 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n", 1691 ring->name, r); 1692 return r; 1693 } 1694 1695 gfx_v9_0_write_data_to_reg(ring, 0, false, 1696 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 1697 (adev->gds.gds_size + 1698 adev->gfx.ngg.gds_reserve_size)); 1699 1700 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 1701 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 1702 PACKET3_DMA_DATA_DST_SEL(1) | 1703 PACKET3_DMA_DATA_SRC_SEL(2))); 1704 amdgpu_ring_write(ring, 0); 1705 amdgpu_ring_write(ring, 0); 1706 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); 1707 amdgpu_ring_write(ring, 0); 1708 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 1709 adev->gfx.ngg.gds_reserve_size); 1710 1711 gfx_v9_0_write_data_to_reg(ring, 0, false, 1712 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0); 1713 1714 amdgpu_ring_commit(ring); 1715 1716 return 0; 1717 } 1718 1719 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1720 int mec, int pipe, int queue) 1721 { 1722 int r; 1723 unsigned irq_type; 1724 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1725 1726 ring = &adev->gfx.compute_ring[ring_id]; 1727 1728 /* mec0 is me1 */ 1729 ring->me = mec + 1; 1730 ring->pipe = pipe; 1731 ring->queue = queue; 1732 1733 ring->ring_obj = NULL; 1734 ring->use_doorbell = true; 1735 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1736 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1737 + (ring_id * GFX9_MEC_HPD_SIZE); 1738 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1739 1740 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1741 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1742 + ring->pipe; 1743 1744 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1745 r = amdgpu_ring_init(adev, ring, 1024, 1746 &adev->gfx.eop_irq, irq_type); 1747 if (r) 1748 return r; 1749 1750 1751 return 0; 1752 } 1753 1754 static int gfx_v9_0_sw_init(void *handle) 1755 { 1756 int i, j, k, r, ring_id; 1757 struct amdgpu_ring *ring; 1758 struct amdgpu_kiq *kiq; 1759 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1760 1761 switch (adev->asic_type) { 1762 case CHIP_VEGA10: 1763 case CHIP_VEGA12: 1764 case CHIP_VEGA20: 1765 case CHIP_RAVEN: 1766 case CHIP_ARCTURUS: 1767 adev->gfx.mec.num_mec = 2; 1768 break; 1769 default: 1770 adev->gfx.mec.num_mec = 1; 1771 break; 1772 } 1773 1774 adev->gfx.mec.num_pipe_per_mec = 4; 1775 adev->gfx.mec.num_queue_per_pipe = 8; 1776 1777 /* EOP Event */ 1778 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 1779 if (r) 1780 return r; 1781 1782 /* Privileged reg */ 1783 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 1784 &adev->gfx.priv_reg_irq); 1785 if (r) 1786 return r; 1787 1788 /* Privileged inst */ 1789 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 1790 &adev->gfx.priv_inst_irq); 1791 if (r) 1792 return r; 1793 1794 /* ECC error */ 1795 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 1796 &adev->gfx.cp_ecc_error_irq); 1797 if (r) 1798 return r; 1799 1800 /* FUE error */ 1801 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 1802 &adev->gfx.cp_ecc_error_irq); 1803 if (r) 1804 return r; 1805 1806 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1807 1808 gfx_v9_0_scratch_init(adev); 1809 1810 r = gfx_v9_0_init_microcode(adev); 1811 if (r) { 1812 DRM_ERROR("Failed to load gfx firmware!\n"); 1813 return r; 1814 } 1815 1816 r = adev->gfx.rlc.funcs->init(adev); 1817 if (r) { 1818 DRM_ERROR("Failed to init rlc BOs!\n"); 1819 return r; 1820 } 1821 1822 r = gfx_v9_0_mec_init(adev); 1823 if (r) { 1824 DRM_ERROR("Failed to init MEC BOs!\n"); 1825 return r; 1826 } 1827 1828 /* set up the gfx ring */ 1829 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 1830 ring = &adev->gfx.gfx_ring[i]; 1831 ring->ring_obj = NULL; 1832 if (!i) 1833 sprintf(ring->name, "gfx"); 1834 else 1835 sprintf(ring->name, "gfx_%d", i); 1836 ring->use_doorbell = true; 1837 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 1838 r = amdgpu_ring_init(adev, ring, 1024, 1839 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); 1840 if (r) 1841 return r; 1842 } 1843 1844 /* set up the compute queues - allocate horizontally across pipes */ 1845 ring_id = 0; 1846 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1847 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1848 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1849 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 1850 continue; 1851 1852 r = gfx_v9_0_compute_ring_init(adev, 1853 ring_id, 1854 i, k, j); 1855 if (r) 1856 return r; 1857 1858 ring_id++; 1859 } 1860 } 1861 } 1862 1863 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); 1864 if (r) { 1865 DRM_ERROR("Failed to init KIQ BOs!\n"); 1866 return r; 1867 } 1868 1869 kiq = &adev->gfx.kiq; 1870 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 1871 if (r) 1872 return r; 1873 1874 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 1875 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); 1876 if (r) 1877 return r; 1878 1879 adev->gfx.ce_ram_size = 0x8000; 1880 1881 r = gfx_v9_0_gpu_early_init(adev); 1882 if (r) 1883 return r; 1884 1885 r = gfx_v9_0_ngg_init(adev); 1886 if (r) 1887 return r; 1888 1889 return 0; 1890 } 1891 1892 1893 static int gfx_v9_0_sw_fini(void *handle) 1894 { 1895 int i; 1896 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1897 1898 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && 1899 adev->gfx.ras_if) { 1900 struct ras_common_if *ras_if = adev->gfx.ras_if; 1901 struct ras_ih_if ih_info = { 1902 .head = *ras_if, 1903 }; 1904 1905 amdgpu_ras_debugfs_remove(adev, ras_if); 1906 amdgpu_ras_sysfs_remove(adev, ras_if); 1907 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 1908 amdgpu_ras_feature_enable(adev, ras_if, 0); 1909 kfree(ras_if); 1910 } 1911 1912 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1913 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1914 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1915 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1916 1917 amdgpu_gfx_mqd_sw_fini(adev); 1918 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 1919 amdgpu_gfx_kiq_fini(adev); 1920 1921 gfx_v9_0_mec_fini(adev); 1922 gfx_v9_0_ngg_fini(adev); 1923 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 1924 if (adev->asic_type == CHIP_RAVEN) { 1925 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 1926 &adev->gfx.rlc.cp_table_gpu_addr, 1927 (void **)&adev->gfx.rlc.cp_table_ptr); 1928 } 1929 gfx_v9_0_free_microcode(adev); 1930 1931 return 0; 1932 } 1933 1934 1935 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 1936 { 1937 /* TODO */ 1938 } 1939 1940 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) 1941 { 1942 u32 data; 1943 1944 if (instance == 0xffffffff) 1945 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 1946 else 1947 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 1948 1949 if (se_num == 0xffffffff) 1950 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 1951 else 1952 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1953 1954 if (sh_num == 0xffffffff) 1955 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 1956 else 1957 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 1958 1959 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 1960 } 1961 1962 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1963 { 1964 u32 data, mask; 1965 1966 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 1967 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 1968 1969 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 1970 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 1971 1972 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 1973 adev->gfx.config.max_sh_per_se); 1974 1975 return (~data) & mask; 1976 } 1977 1978 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 1979 { 1980 int i, j; 1981 u32 data; 1982 u32 active_rbs = 0; 1983 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 1984 adev->gfx.config.max_sh_per_se; 1985 1986 mutex_lock(&adev->grbm_idx_mutex); 1987 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1988 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1989 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1990 data = gfx_v9_0_get_rb_active_bitmap(adev); 1991 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 1992 rb_bitmap_width_per_sh); 1993 } 1994 } 1995 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1996 mutex_unlock(&adev->grbm_idx_mutex); 1997 1998 adev->gfx.config.backend_enable_mask = active_rbs; 1999 adev->gfx.config.num_rbs = hweight32(active_rbs); 2000 } 2001 2002 #define DEFAULT_SH_MEM_BASES (0x6000) 2003 #define FIRST_COMPUTE_VMID (8) 2004 #define LAST_COMPUTE_VMID (16) 2005 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 2006 { 2007 int i; 2008 uint32_t sh_mem_config; 2009 uint32_t sh_mem_bases; 2010 2011 /* 2012 * Configure apertures: 2013 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2014 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2015 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2016 */ 2017 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 2018 2019 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 2020 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 2021 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 2022 2023 mutex_lock(&adev->srbm_mutex); 2024 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 2025 soc15_grbm_select(adev, 0, 0, 0, i); 2026 /* CP and shaders */ 2027 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 2028 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 2029 } 2030 soc15_grbm_select(adev, 0, 0, 0, 0); 2031 mutex_unlock(&adev->srbm_mutex); 2032 2033 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 2034 acccess. These should be enabled by FW for target VMIDs. */ 2035 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 2036 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 2037 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 2038 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 2039 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 2040 } 2041 } 2042 2043 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 2044 { 2045 u32 tmp; 2046 int i; 2047 2048 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2049 2050 gfx_v9_0_tiling_mode_table_init(adev); 2051 2052 gfx_v9_0_setup_rb(adev); 2053 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 2054 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 2055 2056 /* XXX SH_MEM regs */ 2057 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2058 mutex_lock(&adev->srbm_mutex); 2059 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { 2060 soc15_grbm_select(adev, 0, 0, 0, i); 2061 /* CP and shaders */ 2062 if (i == 0) { 2063 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2064 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2065 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2066 !!amdgpu_noretry); 2067 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2068 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 2069 } else { 2070 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2071 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2072 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2073 !!amdgpu_noretry); 2074 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2075 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2076 (adev->gmc.private_aperture_start >> 48)); 2077 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2078 (adev->gmc.shared_aperture_start >> 48)); 2079 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 2080 } 2081 } 2082 soc15_grbm_select(adev, 0, 0, 0, 0); 2083 2084 mutex_unlock(&adev->srbm_mutex); 2085 2086 gfx_v9_0_init_compute_vmid(adev); 2087 } 2088 2089 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 2090 { 2091 u32 i, j, k; 2092 u32 mask; 2093 2094 mutex_lock(&adev->grbm_idx_mutex); 2095 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2096 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2097 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2098 for (k = 0; k < adev->usec_timeout; k++) { 2099 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 2100 break; 2101 udelay(1); 2102 } 2103 if (k == adev->usec_timeout) { 2104 gfx_v9_0_select_se_sh(adev, 0xffffffff, 2105 0xffffffff, 0xffffffff); 2106 mutex_unlock(&adev->grbm_idx_mutex); 2107 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2108 i, j); 2109 return; 2110 } 2111 } 2112 } 2113 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2114 mutex_unlock(&adev->grbm_idx_mutex); 2115 2116 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2117 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2118 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2119 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2120 for (k = 0; k < adev->usec_timeout; k++) { 2121 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2122 break; 2123 udelay(1); 2124 } 2125 } 2126 2127 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2128 bool enable) 2129 { 2130 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2131 2132 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2133 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2134 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2135 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2136 2137 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2138 } 2139 2140 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2141 { 2142 /* csib */ 2143 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2144 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2145 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2146 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2147 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2148 adev->gfx.rlc.clear_state_size); 2149 } 2150 2151 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2152 int indirect_offset, 2153 int list_size, 2154 int *unique_indirect_regs, 2155 int unique_indirect_reg_count, 2156 int *indirect_start_offsets, 2157 int *indirect_start_offsets_count, 2158 int max_start_offsets_count) 2159 { 2160 int idx; 2161 2162 for (; indirect_offset < list_size; indirect_offset++) { 2163 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2164 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2165 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2166 2167 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2168 indirect_offset += 2; 2169 2170 /* look for the matching indice */ 2171 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2172 if (unique_indirect_regs[idx] == 2173 register_list_format[indirect_offset] || 2174 !unique_indirect_regs[idx]) 2175 break; 2176 } 2177 2178 BUG_ON(idx >= unique_indirect_reg_count); 2179 2180 if (!unique_indirect_regs[idx]) 2181 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2182 2183 indirect_offset++; 2184 } 2185 } 2186 } 2187 2188 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2189 { 2190 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2191 int unique_indirect_reg_count = 0; 2192 2193 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2194 int indirect_start_offsets_count = 0; 2195 2196 int list_size = 0; 2197 int i = 0, j = 0; 2198 u32 tmp = 0; 2199 2200 u32 *register_list_format = 2201 kmemdup(adev->gfx.rlc.register_list_format, 2202 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2203 if (!register_list_format) 2204 return -ENOMEM; 2205 2206 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2207 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2208 gfx_v9_1_parse_ind_reg_list(register_list_format, 2209 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2210 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2211 unique_indirect_regs, 2212 unique_indirect_reg_count, 2213 indirect_start_offsets, 2214 &indirect_start_offsets_count, 2215 ARRAY_SIZE(indirect_start_offsets)); 2216 2217 /* enable auto inc in case it is disabled */ 2218 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2219 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2220 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2221 2222 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2223 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2224 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2225 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2226 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2227 adev->gfx.rlc.register_restore[i]); 2228 2229 /* load indirect register */ 2230 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2231 adev->gfx.rlc.reg_list_format_start); 2232 2233 /* direct register portion */ 2234 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2235 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2236 register_list_format[i]); 2237 2238 /* indirect register portion */ 2239 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2240 if (register_list_format[i] == 0xFFFFFFFF) { 2241 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2242 continue; 2243 } 2244 2245 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2246 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2247 2248 for (j = 0; j < unique_indirect_reg_count; j++) { 2249 if (register_list_format[i] == unique_indirect_regs[j]) { 2250 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2251 break; 2252 } 2253 } 2254 2255 BUG_ON(j >= unique_indirect_reg_count); 2256 2257 i++; 2258 } 2259 2260 /* set save/restore list size */ 2261 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2262 list_size = list_size >> 1; 2263 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2264 adev->gfx.rlc.reg_restore_list_size); 2265 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2266 2267 /* write the starting offsets to RLC scratch ram */ 2268 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2269 adev->gfx.rlc.starting_offsets_start); 2270 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2271 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2272 indirect_start_offsets[i]); 2273 2274 /* load unique indirect regs*/ 2275 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2276 if (unique_indirect_regs[i] != 0) { 2277 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2278 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2279 unique_indirect_regs[i] & 0x3FFFF); 2280 2281 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2282 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2283 unique_indirect_regs[i] >> 20); 2284 } 2285 } 2286 2287 kfree(register_list_format); 2288 return 0; 2289 } 2290 2291 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2292 { 2293 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2294 } 2295 2296 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2297 bool enable) 2298 { 2299 uint32_t data = 0; 2300 uint32_t default_data = 0; 2301 2302 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2303 if (enable == true) { 2304 /* enable GFXIP control over CGPG */ 2305 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2306 if(default_data != data) 2307 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2308 2309 /* update status */ 2310 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2311 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2312 if(default_data != data) 2313 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2314 } else { 2315 /* restore GFXIP control over GCPG */ 2316 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2317 if(default_data != data) 2318 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2319 } 2320 } 2321 2322 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2323 { 2324 uint32_t data = 0; 2325 2326 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2327 AMD_PG_SUPPORT_GFX_SMG | 2328 AMD_PG_SUPPORT_GFX_DMG)) { 2329 /* init IDLE_POLL_COUNT = 60 */ 2330 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2331 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2332 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2333 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2334 2335 /* init RLC PG Delay */ 2336 data = 0; 2337 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2338 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2339 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2340 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2341 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2342 2343 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2344 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2345 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2346 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2347 2348 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2349 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2350 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2351 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2352 2353 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2354 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2355 2356 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2357 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2358 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2359 2360 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2361 } 2362 } 2363 2364 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2365 bool enable) 2366 { 2367 uint32_t data = 0; 2368 uint32_t default_data = 0; 2369 2370 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2371 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2372 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2373 enable ? 1 : 0); 2374 if (default_data != data) 2375 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2376 } 2377 2378 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2379 bool enable) 2380 { 2381 uint32_t data = 0; 2382 uint32_t default_data = 0; 2383 2384 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2385 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2386 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2387 enable ? 1 : 0); 2388 if(default_data != data) 2389 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2390 } 2391 2392 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2393 bool enable) 2394 { 2395 uint32_t data = 0; 2396 uint32_t default_data = 0; 2397 2398 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2399 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2400 CP_PG_DISABLE, 2401 enable ? 0 : 1); 2402 if(default_data != data) 2403 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2404 } 2405 2406 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2407 bool enable) 2408 { 2409 uint32_t data, default_data; 2410 2411 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2412 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2413 GFX_POWER_GATING_ENABLE, 2414 enable ? 1 : 0); 2415 if(default_data != data) 2416 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2417 } 2418 2419 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 2420 bool enable) 2421 { 2422 uint32_t data, default_data; 2423 2424 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2425 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2426 GFX_PIPELINE_PG_ENABLE, 2427 enable ? 1 : 0); 2428 if(default_data != data) 2429 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2430 2431 if (!enable) 2432 /* read any GFX register to wake up GFX */ 2433 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 2434 } 2435 2436 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 2437 bool enable) 2438 { 2439 uint32_t data, default_data; 2440 2441 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2442 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2443 STATIC_PER_CU_PG_ENABLE, 2444 enable ? 1 : 0); 2445 if(default_data != data) 2446 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2447 } 2448 2449 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 2450 bool enable) 2451 { 2452 uint32_t data, default_data; 2453 2454 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2455 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2456 DYN_PER_CU_PG_ENABLE, 2457 enable ? 1 : 0); 2458 if(default_data != data) 2459 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2460 } 2461 2462 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 2463 { 2464 gfx_v9_0_init_csb(adev); 2465 2466 /* 2467 * Rlc save restore list is workable since v2_1. 2468 * And it's needed by gfxoff feature. 2469 */ 2470 if (adev->gfx.rlc.is_rlc_v2_1) { 2471 gfx_v9_1_init_rlc_save_restore_list(adev); 2472 gfx_v9_0_enable_save_restore_machine(adev); 2473 } 2474 2475 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2476 AMD_PG_SUPPORT_GFX_SMG | 2477 AMD_PG_SUPPORT_GFX_DMG | 2478 AMD_PG_SUPPORT_CP | 2479 AMD_PG_SUPPORT_GDS | 2480 AMD_PG_SUPPORT_RLC_SMU_HS)) { 2481 WREG32(mmRLC_JUMP_TABLE_RESTORE, 2482 adev->gfx.rlc.cp_table_gpu_addr >> 8); 2483 gfx_v9_0_init_gfx_power_gating(adev); 2484 } 2485 } 2486 2487 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 2488 { 2489 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 2490 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 2491 gfx_v9_0_wait_for_rlc_serdes(adev); 2492 } 2493 2494 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 2495 { 2496 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2497 udelay(50); 2498 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2499 udelay(50); 2500 } 2501 2502 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 2503 { 2504 #ifdef AMDGPU_RLC_DEBUG_RETRY 2505 u32 rlc_ucode_ver; 2506 #endif 2507 2508 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2509 udelay(50); 2510 2511 /* carrizo do enable cp interrupt after cp inited */ 2512 if (!(adev->flags & AMD_IS_APU)) { 2513 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 2514 udelay(50); 2515 } 2516 2517 #ifdef AMDGPU_RLC_DEBUG_RETRY 2518 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 2519 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 2520 if(rlc_ucode_ver == 0x108) { 2521 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 2522 rlc_ucode_ver, adev->gfx.rlc_fw_version); 2523 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 2524 * default is 0x9C4 to create a 100us interval */ 2525 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 2526 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 2527 * to disable the page fault retry interrupts, default is 2528 * 0x100 (256) */ 2529 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 2530 } 2531 #endif 2532 } 2533 2534 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 2535 { 2536 const struct rlc_firmware_header_v2_0 *hdr; 2537 const __le32 *fw_data; 2538 unsigned i, fw_size; 2539 2540 if (!adev->gfx.rlc_fw) 2541 return -EINVAL; 2542 2543 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2544 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2545 2546 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2547 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2548 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2549 2550 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 2551 RLCG_UCODE_LOADING_START_ADDRESS); 2552 for (i = 0; i < fw_size; i++) 2553 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 2554 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2555 2556 return 0; 2557 } 2558 2559 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 2560 { 2561 int r; 2562 2563 if (amdgpu_sriov_vf(adev)) { 2564 gfx_v9_0_init_csb(adev); 2565 return 0; 2566 } 2567 2568 adev->gfx.rlc.funcs->stop(adev); 2569 2570 /* disable CG */ 2571 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 2572 2573 gfx_v9_0_init_pg(adev); 2574 2575 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 2576 /* legacy rlc firmware loading */ 2577 r = gfx_v9_0_rlc_load_microcode(adev); 2578 if (r) 2579 return r; 2580 } 2581 2582 switch (adev->asic_type) { 2583 case CHIP_RAVEN: 2584 if (amdgpu_lbpw == 0) 2585 gfx_v9_0_enable_lbpw(adev, false); 2586 else 2587 gfx_v9_0_enable_lbpw(adev, true); 2588 break; 2589 case CHIP_VEGA20: 2590 if (amdgpu_lbpw > 0) 2591 gfx_v9_0_enable_lbpw(adev, true); 2592 else 2593 gfx_v9_0_enable_lbpw(adev, false); 2594 break; 2595 default: 2596 break; 2597 } 2598 2599 adev->gfx.rlc.funcs->start(adev); 2600 2601 return 0; 2602 } 2603 2604 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 2605 { 2606 int i; 2607 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 2608 2609 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 2610 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 2611 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 2612 if (!enable) { 2613 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2614 adev->gfx.gfx_ring[i].sched.ready = false; 2615 } 2616 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 2617 udelay(50); 2618 } 2619 2620 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 2621 { 2622 const struct gfx_firmware_header_v1_0 *pfp_hdr; 2623 const struct gfx_firmware_header_v1_0 *ce_hdr; 2624 const struct gfx_firmware_header_v1_0 *me_hdr; 2625 const __le32 *fw_data; 2626 unsigned i, fw_size; 2627 2628 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 2629 return -EINVAL; 2630 2631 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 2632 adev->gfx.pfp_fw->data; 2633 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 2634 adev->gfx.ce_fw->data; 2635 me_hdr = (const struct gfx_firmware_header_v1_0 *) 2636 adev->gfx.me_fw->data; 2637 2638 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 2639 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 2640 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 2641 2642 gfx_v9_0_cp_gfx_enable(adev, false); 2643 2644 /* PFP */ 2645 fw_data = (const __le32 *) 2646 (adev->gfx.pfp_fw->data + 2647 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 2648 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 2649 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 2650 for (i = 0; i < fw_size; i++) 2651 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 2652 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 2653 2654 /* CE */ 2655 fw_data = (const __le32 *) 2656 (adev->gfx.ce_fw->data + 2657 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 2658 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 2659 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 2660 for (i = 0; i < fw_size; i++) 2661 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 2662 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 2663 2664 /* ME */ 2665 fw_data = (const __le32 *) 2666 (adev->gfx.me_fw->data + 2667 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 2668 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 2669 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 2670 for (i = 0; i < fw_size; i++) 2671 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 2672 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 2673 2674 return 0; 2675 } 2676 2677 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 2678 { 2679 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 2680 const struct cs_section_def *sect = NULL; 2681 const struct cs_extent_def *ext = NULL; 2682 int r, i, tmp; 2683 2684 /* init the CP */ 2685 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 2686 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 2687 2688 gfx_v9_0_cp_gfx_enable(adev, true); 2689 2690 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 2691 if (r) { 2692 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 2693 return r; 2694 } 2695 2696 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2697 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 2698 2699 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 2700 amdgpu_ring_write(ring, 0x80000000); 2701 amdgpu_ring_write(ring, 0x80000000); 2702 2703 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 2704 for (ext = sect->section; ext->extent != NULL; ++ext) { 2705 if (sect->id == SECT_CONTEXT) { 2706 amdgpu_ring_write(ring, 2707 PACKET3(PACKET3_SET_CONTEXT_REG, 2708 ext->reg_count)); 2709 amdgpu_ring_write(ring, 2710 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 2711 for (i = 0; i < ext->reg_count; i++) 2712 amdgpu_ring_write(ring, ext->extent[i]); 2713 } 2714 } 2715 } 2716 2717 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2718 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 2719 2720 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 2721 amdgpu_ring_write(ring, 0); 2722 2723 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 2724 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 2725 amdgpu_ring_write(ring, 0x8000); 2726 amdgpu_ring_write(ring, 0x8000); 2727 2728 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 2729 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 2730 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 2731 amdgpu_ring_write(ring, tmp); 2732 amdgpu_ring_write(ring, 0); 2733 2734 amdgpu_ring_commit(ring); 2735 2736 return 0; 2737 } 2738 2739 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 2740 { 2741 struct amdgpu_ring *ring; 2742 u32 tmp; 2743 u32 rb_bufsz; 2744 u64 rb_addr, rptr_addr, wptr_gpu_addr; 2745 2746 /* Set the write pointer delay */ 2747 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 2748 2749 /* set the RB to use vmid 0 */ 2750 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 2751 2752 /* Set ring buffer size */ 2753 ring = &adev->gfx.gfx_ring[0]; 2754 rb_bufsz = order_base_2(ring->ring_size / 8); 2755 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 2756 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 2757 #ifdef __BIG_ENDIAN 2758 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 2759 #endif 2760 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 2761 2762 /* Initialize the ring buffer's write pointers */ 2763 ring->wptr = 0; 2764 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 2765 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 2766 2767 /* set the wb address wether it's enabled or not */ 2768 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 2769 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 2770 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 2771 2772 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2773 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 2774 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 2775 2776 mdelay(1); 2777 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 2778 2779 rb_addr = ring->gpu_addr >> 8; 2780 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 2781 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 2782 2783 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 2784 if (ring->use_doorbell) { 2785 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2786 DOORBELL_OFFSET, ring->doorbell_index); 2787 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2788 DOORBELL_EN, 1); 2789 } else { 2790 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 2791 } 2792 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 2793 2794 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 2795 DOORBELL_RANGE_LOWER, ring->doorbell_index); 2796 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 2797 2798 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 2799 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 2800 2801 2802 /* start the ring */ 2803 gfx_v9_0_cp_gfx_start(adev); 2804 ring->sched.ready = true; 2805 2806 return 0; 2807 } 2808 2809 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 2810 { 2811 int i; 2812 2813 if (enable) { 2814 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 2815 } else { 2816 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 2817 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 2818 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2819 adev->gfx.compute_ring[i].sched.ready = false; 2820 adev->gfx.kiq.ring.sched.ready = false; 2821 } 2822 udelay(50); 2823 } 2824 2825 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 2826 { 2827 const struct gfx_firmware_header_v1_0 *mec_hdr; 2828 const __le32 *fw_data; 2829 unsigned i; 2830 u32 tmp; 2831 2832 if (!adev->gfx.mec_fw) 2833 return -EINVAL; 2834 2835 gfx_v9_0_cp_compute_enable(adev, false); 2836 2837 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 2838 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 2839 2840 fw_data = (const __le32 *) 2841 (adev->gfx.mec_fw->data + 2842 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 2843 tmp = 0; 2844 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2845 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2846 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 2847 2848 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 2849 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 2850 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 2851 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 2852 2853 /* MEC1 */ 2854 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 2855 mec_hdr->jt_offset); 2856 for (i = 0; i < mec_hdr->jt_size; i++) 2857 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 2858 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 2859 2860 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 2861 adev->gfx.mec_fw_version); 2862 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 2863 2864 return 0; 2865 } 2866 2867 /* KIQ functions */ 2868 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 2869 { 2870 uint32_t tmp; 2871 struct amdgpu_device *adev = ring->adev; 2872 2873 /* tell RLC which is KIQ queue */ 2874 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 2875 tmp &= 0xffffff00; 2876 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 2877 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2878 tmp |= 0x80; 2879 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2880 } 2881 2882 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) 2883 { 2884 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 2885 uint64_t queue_mask = 0; 2886 int r, i; 2887 2888 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 2889 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 2890 continue; 2891 2892 /* This situation may be hit in the future if a new HW 2893 * generation exposes more than 64 queues. If so, the 2894 * definition of queue_mask needs updating */ 2895 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 2896 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 2897 break; 2898 } 2899 2900 queue_mask |= (1ull << i); 2901 } 2902 2903 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); 2904 if (r) { 2905 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 2906 return r; 2907 } 2908 2909 /* set resources */ 2910 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 2911 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 2912 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 2913 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 2914 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 2915 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 2916 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 2917 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 2918 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 2919 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2920 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 2921 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 2922 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2923 2924 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 2925 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 2926 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 2927 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 2928 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 2929 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 2930 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 2931 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 2932 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 2933 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 2934 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ 2935 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 2936 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 2937 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 2938 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 2939 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 2940 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 2941 } 2942 2943 r = amdgpu_ring_test_helper(kiq_ring); 2944 if (r) 2945 DRM_ERROR("KCQ enable failed\n"); 2946 2947 return r; 2948 } 2949 2950 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 2951 { 2952 struct amdgpu_device *adev = ring->adev; 2953 struct v9_mqd *mqd = ring->mqd_ptr; 2954 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 2955 uint32_t tmp; 2956 2957 mqd->header = 0xC0310800; 2958 mqd->compute_pipelinestat_enable = 0x00000001; 2959 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 2960 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 2961 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 2962 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 2963 mqd->compute_misc_reserved = 0x00000003; 2964 2965 mqd->dynamic_cu_mask_addr_lo = 2966 lower_32_bits(ring->mqd_gpu_addr 2967 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 2968 mqd->dynamic_cu_mask_addr_hi = 2969 upper_32_bits(ring->mqd_gpu_addr 2970 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 2971 2972 eop_base_addr = ring->eop_gpu_addr >> 8; 2973 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 2974 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 2975 2976 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2977 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 2978 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 2979 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 2980 2981 mqd->cp_hqd_eop_control = tmp; 2982 2983 /* enable doorbell? */ 2984 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 2985 2986 if (ring->use_doorbell) { 2987 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2988 DOORBELL_OFFSET, ring->doorbell_index); 2989 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2990 DOORBELL_EN, 1); 2991 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2992 DOORBELL_SOURCE, 0); 2993 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2994 DOORBELL_HIT, 0); 2995 } else { 2996 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2997 DOORBELL_EN, 0); 2998 } 2999 3000 mqd->cp_hqd_pq_doorbell_control = tmp; 3001 3002 /* disable the queue if it's active */ 3003 ring->wptr = 0; 3004 mqd->cp_hqd_dequeue_request = 0; 3005 mqd->cp_hqd_pq_rptr = 0; 3006 mqd->cp_hqd_pq_wptr_lo = 0; 3007 mqd->cp_hqd_pq_wptr_hi = 0; 3008 3009 /* set the pointer to the MQD */ 3010 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3011 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3012 3013 /* set MQD vmid to 0 */ 3014 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3015 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3016 mqd->cp_mqd_control = tmp; 3017 3018 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3019 hqd_gpu_addr = ring->gpu_addr >> 8; 3020 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3021 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3022 3023 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3024 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3025 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3026 (order_base_2(ring->ring_size / 4) - 1)); 3027 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3028 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 3029 #ifdef __BIG_ENDIAN 3030 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3031 #endif 3032 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3033 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3034 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3035 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3036 mqd->cp_hqd_pq_control = tmp; 3037 3038 /* set the wb address whether it's enabled or not */ 3039 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3040 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3041 mqd->cp_hqd_pq_rptr_report_addr_hi = 3042 upper_32_bits(wb_gpu_addr) & 0xffff; 3043 3044 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3045 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3046 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3047 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3048 3049 tmp = 0; 3050 /* enable the doorbell if requested */ 3051 if (ring->use_doorbell) { 3052 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3053 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3054 DOORBELL_OFFSET, ring->doorbell_index); 3055 3056 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3057 DOORBELL_EN, 1); 3058 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3059 DOORBELL_SOURCE, 0); 3060 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3061 DOORBELL_HIT, 0); 3062 } 3063 3064 mqd->cp_hqd_pq_doorbell_control = tmp; 3065 3066 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3067 ring->wptr = 0; 3068 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3069 3070 /* set the vmid for the queue */ 3071 mqd->cp_hqd_vmid = 0; 3072 3073 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3074 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3075 mqd->cp_hqd_persistent_state = tmp; 3076 3077 /* set MIN_IB_AVAIL_SIZE */ 3078 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3079 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3080 mqd->cp_hqd_ib_control = tmp; 3081 3082 /* activate the queue */ 3083 mqd->cp_hqd_active = 1; 3084 3085 return 0; 3086 } 3087 3088 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 3089 { 3090 struct amdgpu_device *adev = ring->adev; 3091 struct v9_mqd *mqd = ring->mqd_ptr; 3092 int j; 3093 3094 /* disable wptr polling */ 3095 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3096 3097 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3098 mqd->cp_hqd_eop_base_addr_lo); 3099 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3100 mqd->cp_hqd_eop_base_addr_hi); 3101 3102 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3103 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 3104 mqd->cp_hqd_eop_control); 3105 3106 /* enable doorbell? */ 3107 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3108 mqd->cp_hqd_pq_doorbell_control); 3109 3110 /* disable the queue if it's active */ 3111 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3112 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3113 for (j = 0; j < adev->usec_timeout; j++) { 3114 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3115 break; 3116 udelay(1); 3117 } 3118 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3119 mqd->cp_hqd_dequeue_request); 3120 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3121 mqd->cp_hqd_pq_rptr); 3122 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3123 mqd->cp_hqd_pq_wptr_lo); 3124 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3125 mqd->cp_hqd_pq_wptr_hi); 3126 } 3127 3128 /* set the pointer to the MQD */ 3129 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3130 mqd->cp_mqd_base_addr_lo); 3131 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3132 mqd->cp_mqd_base_addr_hi); 3133 3134 /* set MQD vmid to 0 */ 3135 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3136 mqd->cp_mqd_control); 3137 3138 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3139 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3140 mqd->cp_hqd_pq_base_lo); 3141 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3142 mqd->cp_hqd_pq_base_hi); 3143 3144 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3145 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3146 mqd->cp_hqd_pq_control); 3147 3148 /* set the wb address whether it's enabled or not */ 3149 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3150 mqd->cp_hqd_pq_rptr_report_addr_lo); 3151 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3152 mqd->cp_hqd_pq_rptr_report_addr_hi); 3153 3154 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3155 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3156 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3157 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3158 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3159 3160 /* enable the doorbell if requested */ 3161 if (ring->use_doorbell) { 3162 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3163 (adev->doorbell_index.kiq * 2) << 2); 3164 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3165 (adev->doorbell_index.userqueue_end * 2) << 2); 3166 } 3167 3168 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3169 mqd->cp_hqd_pq_doorbell_control); 3170 3171 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3172 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3173 mqd->cp_hqd_pq_wptr_lo); 3174 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3175 mqd->cp_hqd_pq_wptr_hi); 3176 3177 /* set the vmid for the queue */ 3178 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3179 3180 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3181 mqd->cp_hqd_persistent_state); 3182 3183 /* activate the queue */ 3184 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3185 mqd->cp_hqd_active); 3186 3187 if (ring->use_doorbell) 3188 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3189 3190 return 0; 3191 } 3192 3193 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3194 { 3195 struct amdgpu_device *adev = ring->adev; 3196 int j; 3197 3198 /* disable the queue if it's active */ 3199 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3200 3201 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3202 3203 for (j = 0; j < adev->usec_timeout; j++) { 3204 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3205 break; 3206 udelay(1); 3207 } 3208 3209 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3210 DRM_DEBUG("KIQ dequeue request failed.\n"); 3211 3212 /* Manual disable if dequeue request times out */ 3213 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3214 } 3215 3216 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3217 0); 3218 } 3219 3220 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3221 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3222 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3223 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3224 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3225 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3226 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3227 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3228 3229 return 0; 3230 } 3231 3232 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3233 { 3234 struct amdgpu_device *adev = ring->adev; 3235 struct v9_mqd *mqd = ring->mqd_ptr; 3236 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3237 3238 gfx_v9_0_kiq_setting(ring); 3239 3240 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3241 /* reset MQD to a clean status */ 3242 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3243 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3244 3245 /* reset ring buffer */ 3246 ring->wptr = 0; 3247 amdgpu_ring_clear_ring(ring); 3248 3249 mutex_lock(&adev->srbm_mutex); 3250 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3251 gfx_v9_0_kiq_init_register(ring); 3252 soc15_grbm_select(adev, 0, 0, 0, 0); 3253 mutex_unlock(&adev->srbm_mutex); 3254 } else { 3255 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3256 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3257 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3258 mutex_lock(&adev->srbm_mutex); 3259 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3260 gfx_v9_0_mqd_init(ring); 3261 gfx_v9_0_kiq_init_register(ring); 3262 soc15_grbm_select(adev, 0, 0, 0, 0); 3263 mutex_unlock(&adev->srbm_mutex); 3264 3265 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3266 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3267 } 3268 3269 return 0; 3270 } 3271 3272 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3273 { 3274 struct amdgpu_device *adev = ring->adev; 3275 struct v9_mqd *mqd = ring->mqd_ptr; 3276 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3277 3278 if (!adev->in_gpu_reset && !adev->in_suspend) { 3279 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3280 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3281 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3282 mutex_lock(&adev->srbm_mutex); 3283 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3284 gfx_v9_0_mqd_init(ring); 3285 soc15_grbm_select(adev, 0, 0, 0, 0); 3286 mutex_unlock(&adev->srbm_mutex); 3287 3288 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3289 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3290 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3291 /* reset MQD to a clean status */ 3292 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3293 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3294 3295 /* reset ring buffer */ 3296 ring->wptr = 0; 3297 amdgpu_ring_clear_ring(ring); 3298 } else { 3299 amdgpu_ring_clear_ring(ring); 3300 } 3301 3302 return 0; 3303 } 3304 3305 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3306 { 3307 struct amdgpu_ring *ring; 3308 int r; 3309 3310 ring = &adev->gfx.kiq.ring; 3311 3312 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3313 if (unlikely(r != 0)) 3314 return r; 3315 3316 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3317 if (unlikely(r != 0)) 3318 return r; 3319 3320 gfx_v9_0_kiq_init_queue(ring); 3321 amdgpu_bo_kunmap(ring->mqd_obj); 3322 ring->mqd_ptr = NULL; 3323 amdgpu_bo_unreserve(ring->mqd_obj); 3324 ring->sched.ready = true; 3325 return 0; 3326 } 3327 3328 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3329 { 3330 struct amdgpu_ring *ring = NULL; 3331 int r = 0, i; 3332 3333 gfx_v9_0_cp_compute_enable(adev, true); 3334 3335 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3336 ring = &adev->gfx.compute_ring[i]; 3337 3338 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3339 if (unlikely(r != 0)) 3340 goto done; 3341 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3342 if (!r) { 3343 r = gfx_v9_0_kcq_init_queue(ring); 3344 amdgpu_bo_kunmap(ring->mqd_obj); 3345 ring->mqd_ptr = NULL; 3346 } 3347 amdgpu_bo_unreserve(ring->mqd_obj); 3348 if (r) 3349 goto done; 3350 } 3351 3352 r = gfx_v9_0_kiq_kcq_enable(adev); 3353 done: 3354 return r; 3355 } 3356 3357 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3358 { 3359 int r, i; 3360 struct amdgpu_ring *ring; 3361 3362 if (!(adev->flags & AMD_IS_APU)) 3363 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3364 3365 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3366 if (adev->asic_type != CHIP_ARCTURUS) { 3367 /* legacy firmware loading */ 3368 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3369 if (r) 3370 return r; 3371 } 3372 3373 r = gfx_v9_0_cp_compute_load_microcode(adev); 3374 if (r) 3375 return r; 3376 } 3377 3378 r = gfx_v9_0_kiq_resume(adev); 3379 if (r) 3380 return r; 3381 3382 if (adev->asic_type != CHIP_ARCTURUS) { 3383 r = gfx_v9_0_cp_gfx_resume(adev); 3384 if (r) 3385 return r; 3386 } 3387 3388 r = gfx_v9_0_kcq_resume(adev); 3389 if (r) 3390 return r; 3391 3392 if (adev->asic_type != CHIP_ARCTURUS) { 3393 ring = &adev->gfx.gfx_ring[0]; 3394 r = amdgpu_ring_test_helper(ring); 3395 if (r) 3396 return r; 3397 } 3398 3399 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3400 ring = &adev->gfx.compute_ring[i]; 3401 amdgpu_ring_test_helper(ring); 3402 } 3403 3404 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3405 3406 return 0; 3407 } 3408 3409 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3410 { 3411 if (adev->asic_type != CHIP_ARCTURUS) 3412 gfx_v9_0_cp_gfx_enable(adev, enable); 3413 gfx_v9_0_cp_compute_enable(adev, enable); 3414 } 3415 3416 static int gfx_v9_0_hw_init(void *handle) 3417 { 3418 int r; 3419 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3420 3421 gfx_v9_0_init_golden_registers(adev); 3422 3423 gfx_v9_0_constants_init(adev); 3424 3425 r = gfx_v9_0_csb_vram_pin(adev); 3426 if (r) 3427 return r; 3428 3429 r = adev->gfx.rlc.funcs->resume(adev); 3430 if (r) 3431 return r; 3432 3433 r = gfx_v9_0_cp_resume(adev); 3434 if (r) 3435 return r; 3436 3437 if (adev->asic_type != CHIP_ARCTURUS) { 3438 r = gfx_v9_0_ngg_en(adev); 3439 if (r) 3440 return r; 3441 } 3442 3443 return r; 3444 } 3445 3446 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) 3447 { 3448 int r, i; 3449 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3450 3451 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); 3452 if (r) 3453 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3454 3455 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3456 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3457 3458 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 3459 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 3460 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 3461 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 3462 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 3463 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 3464 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 3465 amdgpu_ring_write(kiq_ring, 0); 3466 amdgpu_ring_write(kiq_ring, 0); 3467 amdgpu_ring_write(kiq_ring, 0); 3468 } 3469 r = amdgpu_ring_test_helper(kiq_ring); 3470 if (r) 3471 DRM_ERROR("KCQ disable failed\n"); 3472 3473 return r; 3474 } 3475 3476 static int gfx_v9_0_hw_fini(void *handle) 3477 { 3478 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3479 3480 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 3481 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3482 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3483 3484 /* disable KCQ to avoid CPC touch memory not valid anymore */ 3485 gfx_v9_0_kcq_disable(adev); 3486 3487 if (amdgpu_sriov_vf(adev)) { 3488 gfx_v9_0_cp_gfx_enable(adev, false); 3489 /* must disable polling for SRIOV when hw finished, otherwise 3490 * CPC engine may still keep fetching WB address which is already 3491 * invalid after sw finished and trigger DMAR reading error in 3492 * hypervisor side. 3493 */ 3494 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3495 return 0; 3496 } 3497 3498 /* Use deinitialize sequence from CAIL when unbinding device from driver, 3499 * otherwise KIQ is hanging when binding back 3500 */ 3501 if (!adev->in_gpu_reset && !adev->in_suspend) { 3502 mutex_lock(&adev->srbm_mutex); 3503 soc15_grbm_select(adev, adev->gfx.kiq.ring.me, 3504 adev->gfx.kiq.ring.pipe, 3505 adev->gfx.kiq.ring.queue, 0); 3506 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); 3507 soc15_grbm_select(adev, 0, 0, 0, 0); 3508 mutex_unlock(&adev->srbm_mutex); 3509 } 3510 3511 gfx_v9_0_cp_enable(adev, false); 3512 adev->gfx.rlc.funcs->stop(adev); 3513 3514 gfx_v9_0_csb_vram_unpin(adev); 3515 3516 return 0; 3517 } 3518 3519 static int gfx_v9_0_suspend(void *handle) 3520 { 3521 return gfx_v9_0_hw_fini(handle); 3522 } 3523 3524 static int gfx_v9_0_resume(void *handle) 3525 { 3526 return gfx_v9_0_hw_init(handle); 3527 } 3528 3529 static bool gfx_v9_0_is_idle(void *handle) 3530 { 3531 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3532 3533 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 3534 GRBM_STATUS, GUI_ACTIVE)) 3535 return false; 3536 else 3537 return true; 3538 } 3539 3540 static int gfx_v9_0_wait_for_idle(void *handle) 3541 { 3542 unsigned i; 3543 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3544 3545 for (i = 0; i < adev->usec_timeout; i++) { 3546 if (gfx_v9_0_is_idle(handle)) 3547 return 0; 3548 udelay(1); 3549 } 3550 return -ETIMEDOUT; 3551 } 3552 3553 static int gfx_v9_0_soft_reset(void *handle) 3554 { 3555 u32 grbm_soft_reset = 0; 3556 u32 tmp; 3557 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3558 3559 /* GRBM_STATUS */ 3560 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 3561 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 3562 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 3563 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 3564 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 3565 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 3566 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 3567 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3568 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3569 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3570 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 3571 } 3572 3573 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 3574 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3575 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3576 } 3577 3578 /* GRBM_STATUS2 */ 3579 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 3580 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 3581 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3582 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3583 3584 3585 if (grbm_soft_reset) { 3586 /* stop the rlc */ 3587 adev->gfx.rlc.funcs->stop(adev); 3588 3589 if (adev->asic_type != CHIP_ARCTURUS) 3590 /* Disable GFX parsing/prefetching */ 3591 gfx_v9_0_cp_gfx_enable(adev, false); 3592 3593 /* Disable MEC parsing/prefetching */ 3594 gfx_v9_0_cp_compute_enable(adev, false); 3595 3596 if (grbm_soft_reset) { 3597 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3598 tmp |= grbm_soft_reset; 3599 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 3600 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3601 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3602 3603 udelay(50); 3604 3605 tmp &= ~grbm_soft_reset; 3606 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3607 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3608 } 3609 3610 /* Wait a little for things to settle down */ 3611 udelay(50); 3612 } 3613 return 0; 3614 } 3615 3616 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 3617 { 3618 uint64_t clock; 3619 3620 mutex_lock(&adev->gfx.gpu_clock_mutex); 3621 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 3622 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 3623 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 3624 mutex_unlock(&adev->gfx.gpu_clock_mutex); 3625 return clock; 3626 } 3627 3628 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 3629 uint32_t vmid, 3630 uint32_t gds_base, uint32_t gds_size, 3631 uint32_t gws_base, uint32_t gws_size, 3632 uint32_t oa_base, uint32_t oa_size) 3633 { 3634 struct amdgpu_device *adev = ring->adev; 3635 3636 /* GDS Base */ 3637 gfx_v9_0_write_data_to_reg(ring, 0, false, 3638 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 3639 gds_base); 3640 3641 /* GDS Size */ 3642 gfx_v9_0_write_data_to_reg(ring, 0, false, 3643 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 3644 gds_size); 3645 3646 /* GWS */ 3647 gfx_v9_0_write_data_to_reg(ring, 0, false, 3648 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 3649 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 3650 3651 /* OA */ 3652 gfx_v9_0_write_data_to_reg(ring, 0, false, 3653 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 3654 (1 << (oa_size + oa_base)) - (1 << oa_base)); 3655 } 3656 3657 static const u32 vgpr_init_compute_shader[] = 3658 { 3659 0xb07c0000, 0xbe8000ff, 3660 0x000000f8, 0xbf110800, 3661 0x7e000280, 0x7e020280, 3662 0x7e040280, 0x7e060280, 3663 0x7e080280, 0x7e0a0280, 3664 0x7e0c0280, 0x7e0e0280, 3665 0x80808800, 0xbe803200, 3666 0xbf84fff5, 0xbf9c0000, 3667 0xd28c0001, 0x0001007f, 3668 0xd28d0001, 0x0002027e, 3669 0x10020288, 0xb8810904, 3670 0xb7814000, 0xd1196a01, 3671 0x00000301, 0xbe800087, 3672 0xbefc00c1, 0xd89c4000, 3673 0x00020201, 0xd89cc080, 3674 0x00040401, 0x320202ff, 3675 0x00000800, 0x80808100, 3676 0xbf84fff8, 0x7e020280, 3677 0xbf810000, 0x00000000, 3678 }; 3679 3680 static const u32 sgpr_init_compute_shader[] = 3681 { 3682 0xb07c0000, 0xbe8000ff, 3683 0x0000005f, 0xbee50080, 3684 0xbe812c65, 0xbe822c65, 3685 0xbe832c65, 0xbe842c65, 3686 0xbe852c65, 0xb77c0005, 3687 0x80808500, 0xbf84fff8, 3688 0xbe800080, 0xbf810000, 3689 }; 3690 3691 static const struct soc15_reg_entry vgpr_init_regs[] = { 3692 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 3693 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 3694 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 3695 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 3696 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 3697 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 3698 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 3699 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 3700 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ 3701 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 3702 }; 3703 3704 static const struct soc15_reg_entry sgpr_init_regs[] = { 3705 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 3706 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 3707 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 3708 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 3709 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 3710 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 3711 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 3712 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 3713 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ 3714 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 3715 }; 3716 3717 static const struct soc15_reg_entry sec_ded_counter_registers[] = { 3718 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 3719 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 3720 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 3721 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 3722 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 3723 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 3724 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 3725 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 3726 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 3727 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 3728 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 3729 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 3730 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 3731 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 3732 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 3733 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 3734 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 3735 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 3736 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 3737 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 3738 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 3739 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 3740 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 3741 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 3742 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 3743 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 3744 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 3745 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 3746 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 3747 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 3748 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 3749 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 3750 }; 3751 3752 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 3753 { 3754 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 3755 int i, r; 3756 3757 r = amdgpu_ring_alloc(ring, 7); 3758 if (r) { 3759 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 3760 ring->name, r); 3761 return r; 3762 } 3763 3764 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 3765 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 3766 3767 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 3768 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 3769 PACKET3_DMA_DATA_DST_SEL(1) | 3770 PACKET3_DMA_DATA_SRC_SEL(2) | 3771 PACKET3_DMA_DATA_ENGINE(0))); 3772 amdgpu_ring_write(ring, 0); 3773 amdgpu_ring_write(ring, 0); 3774 amdgpu_ring_write(ring, 0); 3775 amdgpu_ring_write(ring, 0); 3776 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 3777 adev->gds.gds_size); 3778 3779 amdgpu_ring_commit(ring); 3780 3781 for (i = 0; i < adev->usec_timeout; i++) { 3782 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 3783 break; 3784 udelay(1); 3785 } 3786 3787 if (i >= adev->usec_timeout) 3788 r = -ETIMEDOUT; 3789 3790 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 3791 3792 return r; 3793 } 3794 3795 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 3796 { 3797 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 3798 struct amdgpu_ib ib; 3799 struct dma_fence *f = NULL; 3800 int r, i, j, k; 3801 unsigned total_size, vgpr_offset, sgpr_offset; 3802 u64 gpu_addr; 3803 3804 /* only support when RAS is enabled */ 3805 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 3806 return 0; 3807 3808 /* bail if the compute ring is not ready */ 3809 if (!ring->sched.ready) 3810 return 0; 3811 3812 total_size = 3813 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 3814 total_size += 3815 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 3816 total_size = ALIGN(total_size, 256); 3817 vgpr_offset = total_size; 3818 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 3819 sgpr_offset = total_size; 3820 total_size += sizeof(sgpr_init_compute_shader); 3821 3822 /* allocate an indirect buffer to put the commands in */ 3823 memset(&ib, 0, sizeof(ib)); 3824 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 3825 if (r) { 3826 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 3827 return r; 3828 } 3829 3830 /* load the compute shaders */ 3831 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 3832 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 3833 3834 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 3835 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 3836 3837 /* init the ib length to 0 */ 3838 ib.length_dw = 0; 3839 3840 /* VGPR */ 3841 /* write the register state for the compute dispatch */ 3842 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { 3843 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 3844 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) 3845 - PACKET3_SET_SH_REG_START; 3846 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value; 3847 } 3848 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 3849 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 3850 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 3851 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 3852 - PACKET3_SET_SH_REG_START; 3853 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 3854 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 3855 3856 /* write dispatch packet */ 3857 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 3858 ib.ptr[ib.length_dw++] = 128; /* x */ 3859 ib.ptr[ib.length_dw++] = 1; /* y */ 3860 ib.ptr[ib.length_dw++] = 1; /* z */ 3861 ib.ptr[ib.length_dw++] = 3862 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 3863 3864 /* write CS partial flush packet */ 3865 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 3866 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 3867 3868 /* SGPR */ 3869 /* write the register state for the compute dispatch */ 3870 for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { 3871 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 3872 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) 3873 - PACKET3_SET_SH_REG_START; 3874 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; 3875 } 3876 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 3877 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 3878 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 3879 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 3880 - PACKET3_SET_SH_REG_START; 3881 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 3882 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 3883 3884 /* write dispatch packet */ 3885 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 3886 ib.ptr[ib.length_dw++] = 128; /* x */ 3887 ib.ptr[ib.length_dw++] = 1; /* y */ 3888 ib.ptr[ib.length_dw++] = 1; /* z */ 3889 ib.ptr[ib.length_dw++] = 3890 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 3891 3892 /* write CS partial flush packet */ 3893 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 3894 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 3895 3896 /* shedule the ib on the ring */ 3897 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 3898 if (r) { 3899 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 3900 goto fail; 3901 } 3902 3903 /* wait for the GPU to finish processing the IB */ 3904 r = dma_fence_wait(f, false); 3905 if (r) { 3906 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 3907 goto fail; 3908 } 3909 3910 /* read back registers to clear the counters */ 3911 mutex_lock(&adev->grbm_idx_mutex); 3912 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { 3913 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { 3914 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { 3915 gfx_v9_0_select_se_sh(adev, j, 0x0, k); 3916 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); 3917 } 3918 } 3919 } 3920 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 3921 mutex_unlock(&adev->grbm_idx_mutex); 3922 3923 fail: 3924 amdgpu_ib_free(adev, &ib, NULL); 3925 dma_fence_put(f); 3926 3927 return r; 3928 } 3929 3930 static int gfx_v9_0_early_init(void *handle) 3931 { 3932 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3933 3934 if (adev->asic_type == CHIP_ARCTURUS) 3935 adev->gfx.num_gfx_rings = 0; 3936 else 3937 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 3938 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 3939 gfx_v9_0_set_ring_funcs(adev); 3940 gfx_v9_0_set_irq_funcs(adev); 3941 gfx_v9_0_set_gds_init(adev); 3942 gfx_v9_0_set_rlc_funcs(adev); 3943 3944 return 0; 3945 } 3946 3947 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 3948 struct amdgpu_iv_entry *entry); 3949 3950 static int gfx_v9_0_ecc_late_init(void *handle) 3951 { 3952 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3953 struct ras_common_if **ras_if = &adev->gfx.ras_if; 3954 struct ras_ih_if ih_info = { 3955 .cb = gfx_v9_0_process_ras_data_cb, 3956 }; 3957 struct ras_fs_if fs_info = { 3958 .sysfs_name = "gfx_err_count", 3959 .debugfs_name = "gfx_err_inject", 3960 }; 3961 struct ras_common_if ras_block = { 3962 .block = AMDGPU_RAS_BLOCK__GFX, 3963 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, 3964 .sub_block_index = 0, 3965 .name = "gfx", 3966 }; 3967 int r; 3968 3969 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { 3970 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); 3971 return 0; 3972 } 3973 3974 r = gfx_v9_0_do_edc_gds_workarounds(adev); 3975 if (r) 3976 return r; 3977 3978 /* requires IBs so do in late init after IB pool is initialized */ 3979 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 3980 if (r) 3981 return r; 3982 3983 /* handle resume path. */ 3984 if (*ras_if) { 3985 /* resend ras TA enable cmd during resume. 3986 * prepare to handle failure. 3987 */ 3988 ih_info.head = **ras_if; 3989 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 3990 if (r) { 3991 if (r == -EAGAIN) { 3992 /* request a gpu reset. will run again. */ 3993 amdgpu_ras_request_reset_on_boot(adev, 3994 AMDGPU_RAS_BLOCK__GFX); 3995 return 0; 3996 } 3997 /* fail to enable ras, cleanup all. */ 3998 goto irq; 3999 } 4000 /* enable successfully. continue. */ 4001 goto resume; 4002 } 4003 4004 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); 4005 if (!*ras_if) 4006 return -ENOMEM; 4007 4008 **ras_if = ras_block; 4009 4010 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 4011 if (r) { 4012 if (r == -EAGAIN) { 4013 amdgpu_ras_request_reset_on_boot(adev, 4014 AMDGPU_RAS_BLOCK__GFX); 4015 r = 0; 4016 } 4017 goto feature; 4018 } 4019 4020 ih_info.head = **ras_if; 4021 fs_info.head = **ras_if; 4022 4023 r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); 4024 if (r) 4025 goto interrupt; 4026 4027 amdgpu_ras_debugfs_create(adev, &fs_info); 4028 4029 r = amdgpu_ras_sysfs_create(adev, &fs_info); 4030 if (r) 4031 goto sysfs; 4032 resume: 4033 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 4034 if (r) 4035 goto irq; 4036 4037 return 0; 4038 irq: 4039 amdgpu_ras_sysfs_remove(adev, *ras_if); 4040 sysfs: 4041 amdgpu_ras_debugfs_remove(adev, *ras_if); 4042 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 4043 interrupt: 4044 amdgpu_ras_feature_enable(adev, *ras_if, 0); 4045 feature: 4046 kfree(*ras_if); 4047 *ras_if = NULL; 4048 return r; 4049 } 4050 4051 static int gfx_v9_0_late_init(void *handle) 4052 { 4053 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4054 int r; 4055 4056 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4057 if (r) 4058 return r; 4059 4060 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4061 if (r) 4062 return r; 4063 4064 r = gfx_v9_0_ecc_late_init(handle); 4065 if (r) 4066 return r; 4067 4068 return 0; 4069 } 4070 4071 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 4072 { 4073 uint32_t rlc_setting; 4074 4075 /* if RLC is not enabled, do nothing */ 4076 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 4077 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 4078 return false; 4079 4080 return true; 4081 } 4082 4083 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) 4084 { 4085 uint32_t data; 4086 unsigned i; 4087 4088 data = RLC_SAFE_MODE__CMD_MASK; 4089 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4090 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4091 4092 /* wait for RLC_SAFE_MODE */ 4093 for (i = 0; i < adev->usec_timeout; i++) { 4094 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 4095 break; 4096 udelay(1); 4097 } 4098 } 4099 4100 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) 4101 { 4102 uint32_t data; 4103 4104 data = RLC_SAFE_MODE__CMD_MASK; 4105 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4106 } 4107 4108 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 4109 bool enable) 4110 { 4111 amdgpu_gfx_rlc_enter_safe_mode(adev); 4112 4113 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 4114 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 4115 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4116 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 4117 } else { 4118 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 4119 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 4120 } 4121 4122 amdgpu_gfx_rlc_exit_safe_mode(adev); 4123 } 4124 4125 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4126 bool enable) 4127 { 4128 /* TODO: double check if we need to perform under safe mode */ 4129 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4130 4131 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4132 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4133 else 4134 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4135 4136 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4137 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4138 else 4139 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4140 4141 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4142 } 4143 4144 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4145 bool enable) 4146 { 4147 uint32_t data, def; 4148 4149 amdgpu_gfx_rlc_enter_safe_mode(adev); 4150 4151 /* It is disabled by HW by default */ 4152 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4153 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4154 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4155 4156 if (adev->asic_type != CHIP_VEGA12) 4157 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4158 4159 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4160 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4161 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4162 4163 /* only for Vega10 & Raven1 */ 4164 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4165 4166 if (def != data) 4167 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4168 4169 /* MGLS is a global flag to control all MGLS in GFX */ 4170 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4171 /* 2 - RLC memory Light sleep */ 4172 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4173 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4174 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4175 if (def != data) 4176 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4177 } 4178 /* 3 - CP memory Light sleep */ 4179 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4180 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4181 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4182 if (def != data) 4183 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4184 } 4185 } 4186 } else { 4187 /* 1 - MGCG_OVERRIDE */ 4188 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4189 4190 if (adev->asic_type != CHIP_VEGA12) 4191 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4192 4193 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4194 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4195 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4196 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4197 4198 if (def != data) 4199 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4200 4201 /* 2 - disable MGLS in RLC */ 4202 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4203 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4204 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4205 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4206 } 4207 4208 /* 3 - disable MGLS in CP */ 4209 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4210 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4211 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4212 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4213 } 4214 } 4215 4216 amdgpu_gfx_rlc_exit_safe_mode(adev); 4217 } 4218 4219 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4220 bool enable) 4221 { 4222 uint32_t data, def; 4223 4224 amdgpu_gfx_rlc_enter_safe_mode(adev); 4225 4226 /* Enable 3D CGCG/CGLS */ 4227 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 4228 /* write cmd to clear cgcg/cgls ov */ 4229 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4230 /* unset CGCG override */ 4231 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4232 /* update CGCG and CGLS override bits */ 4233 if (def != data) 4234 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4235 4236 /* enable 3Dcgcg FSM(0x0000363f) */ 4237 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4238 4239 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4240 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4241 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4242 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4243 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4244 if (def != data) 4245 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4246 4247 /* set IDLE_POLL_COUNT(0x00900100) */ 4248 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4249 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4250 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4251 if (def != data) 4252 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4253 } else { 4254 /* Disable CGCG/CGLS */ 4255 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4256 /* disable cgcg, cgls should be disabled */ 4257 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 4258 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 4259 /* disable cgcg and cgls in FSM */ 4260 if (def != data) 4261 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4262 } 4263 4264 amdgpu_gfx_rlc_exit_safe_mode(adev); 4265 } 4266 4267 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4268 bool enable) 4269 { 4270 uint32_t def, data; 4271 4272 amdgpu_gfx_rlc_enter_safe_mode(adev); 4273 4274 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 4275 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4276 /* unset CGCG override */ 4277 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 4278 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4279 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4280 else 4281 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4282 /* update CGCG and CGLS override bits */ 4283 if (def != data) 4284 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4285 4286 /* enable cgcg FSM(0x0000363F) */ 4287 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4288 4289 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4290 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4291 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4292 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4293 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4294 if (def != data) 4295 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4296 4297 /* set IDLE_POLL_COUNT(0x00900100) */ 4298 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4299 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4300 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4301 if (def != data) 4302 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4303 } else { 4304 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4305 /* reset CGCG/CGLS bits */ 4306 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4307 /* disable cgcg and cgls in FSM */ 4308 if (def != data) 4309 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4310 } 4311 4312 amdgpu_gfx_rlc_exit_safe_mode(adev); 4313 } 4314 4315 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 4316 bool enable) 4317 { 4318 if (enable) { 4319 /* CGCG/CGLS should be enabled after MGCG/MGLS 4320 * === MGCG + MGLS === 4321 */ 4322 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4323 /* === CGCG /CGLS for GFX 3D Only === */ 4324 gfx_v9_0_update_3d_clock_gating(adev, enable); 4325 /* === CGCG + CGLS === */ 4326 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4327 } else { 4328 /* CGCG/CGLS should be disabled before MGCG/MGLS 4329 * === CGCG + CGLS === 4330 */ 4331 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4332 /* === CGCG /CGLS for GFX 3D Only === */ 4333 gfx_v9_0_update_3d_clock_gating(adev, enable); 4334 /* === MGCG + MGLS === */ 4335 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4336 } 4337 return 0; 4338 } 4339 4340 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 4341 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 4342 .set_safe_mode = gfx_v9_0_set_safe_mode, 4343 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 4344 .init = gfx_v9_0_rlc_init, 4345 .get_csb_size = gfx_v9_0_get_csb_size, 4346 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 4347 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 4348 .resume = gfx_v9_0_rlc_resume, 4349 .stop = gfx_v9_0_rlc_stop, 4350 .reset = gfx_v9_0_rlc_reset, 4351 .start = gfx_v9_0_rlc_start 4352 }; 4353 4354 static int gfx_v9_0_set_powergating_state(void *handle, 4355 enum amd_powergating_state state) 4356 { 4357 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4358 bool enable = (state == AMD_PG_STATE_GATE) ? true : false; 4359 4360 switch (adev->asic_type) { 4361 case CHIP_RAVEN: 4362 if (!enable) { 4363 amdgpu_gfx_off_ctrl(adev, false); 4364 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4365 } 4366 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 4367 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 4368 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 4369 } else { 4370 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 4371 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 4372 } 4373 4374 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 4375 gfx_v9_0_enable_cp_power_gating(adev, true); 4376 else 4377 gfx_v9_0_enable_cp_power_gating(adev, false); 4378 4379 /* update gfx cgpg state */ 4380 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 4381 4382 /* update mgcg state */ 4383 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 4384 4385 if (enable) 4386 amdgpu_gfx_off_ctrl(adev, true); 4387 break; 4388 case CHIP_VEGA12: 4389 if (!enable) { 4390 amdgpu_gfx_off_ctrl(adev, false); 4391 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4392 } else { 4393 amdgpu_gfx_off_ctrl(adev, true); 4394 } 4395 break; 4396 default: 4397 break; 4398 } 4399 4400 return 0; 4401 } 4402 4403 static int gfx_v9_0_set_clockgating_state(void *handle, 4404 enum amd_clockgating_state state) 4405 { 4406 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4407 4408 if (amdgpu_sriov_vf(adev)) 4409 return 0; 4410 4411 switch (adev->asic_type) { 4412 case CHIP_VEGA10: 4413 case CHIP_VEGA12: 4414 case CHIP_VEGA20: 4415 case CHIP_RAVEN: 4416 gfx_v9_0_update_gfx_clock_gating(adev, 4417 state == AMD_CG_STATE_GATE ? true : false); 4418 break; 4419 default: 4420 break; 4421 } 4422 return 0; 4423 } 4424 4425 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 4426 { 4427 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4428 int data; 4429 4430 if (amdgpu_sriov_vf(adev)) 4431 *flags = 0; 4432 4433 /* AMD_CG_SUPPORT_GFX_MGCG */ 4434 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4435 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 4436 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 4437 4438 /* AMD_CG_SUPPORT_GFX_CGCG */ 4439 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4440 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 4441 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 4442 4443 /* AMD_CG_SUPPORT_GFX_CGLS */ 4444 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 4445 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 4446 4447 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 4448 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4449 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 4450 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 4451 4452 /* AMD_CG_SUPPORT_GFX_CP_LS */ 4453 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4454 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 4455 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 4456 4457 if (adev->asic_type != CHIP_ARCTURUS) { 4458 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 4459 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4460 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 4461 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 4462 4463 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 4464 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 4465 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 4466 } 4467 } 4468 4469 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 4470 { 4471 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 4472 } 4473 4474 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 4475 { 4476 struct amdgpu_device *adev = ring->adev; 4477 u64 wptr; 4478 4479 /* XXX check if swapping is necessary on BE */ 4480 if (ring->use_doorbell) { 4481 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 4482 } else { 4483 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 4484 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 4485 } 4486 4487 return wptr; 4488 } 4489 4490 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 4491 { 4492 struct amdgpu_device *adev = ring->adev; 4493 4494 if (ring->use_doorbell) { 4495 /* XXX check if swapping is necessary on BE */ 4496 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4497 WDOORBELL64(ring->doorbell_index, ring->wptr); 4498 } else { 4499 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4500 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 4501 } 4502 } 4503 4504 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 4505 { 4506 struct amdgpu_device *adev = ring->adev; 4507 u32 ref_and_mask, reg_mem_engine; 4508 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; 4509 4510 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 4511 switch (ring->me) { 4512 case 1: 4513 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 4514 break; 4515 case 2: 4516 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 4517 break; 4518 default: 4519 return; 4520 } 4521 reg_mem_engine = 0; 4522 } else { 4523 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 4524 reg_mem_engine = 1; /* pfp */ 4525 } 4526 4527 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 4528 adev->nbio_funcs->get_hdp_flush_req_offset(adev), 4529 adev->nbio_funcs->get_hdp_flush_done_offset(adev), 4530 ref_and_mask, ref_and_mask, 0x20); 4531 } 4532 4533 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 4534 struct amdgpu_job *job, 4535 struct amdgpu_ib *ib, 4536 uint32_t flags) 4537 { 4538 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4539 u32 header, control = 0; 4540 4541 if (ib->flags & AMDGPU_IB_FLAG_CE) 4542 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 4543 else 4544 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 4545 4546 control |= ib->length_dw | (vmid << 24); 4547 4548 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 4549 control |= INDIRECT_BUFFER_PRE_ENB(1); 4550 4551 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 4552 gfx_v9_0_ring_emit_de_meta(ring); 4553 } 4554 4555 amdgpu_ring_write(ring, header); 4556 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4557 amdgpu_ring_write(ring, 4558 #ifdef __BIG_ENDIAN 4559 (2 << 0) | 4560 #endif 4561 lower_32_bits(ib->gpu_addr)); 4562 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4563 amdgpu_ring_write(ring, control); 4564 } 4565 4566 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 4567 struct amdgpu_job *job, 4568 struct amdgpu_ib *ib, 4569 uint32_t flags) 4570 { 4571 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4572 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 4573 4574 /* Currently, there is a high possibility to get wave ID mismatch 4575 * between ME and GDS, leading to a hw deadlock, because ME generates 4576 * different wave IDs than the GDS expects. This situation happens 4577 * randomly when at least 5 compute pipes use GDS ordered append. 4578 * The wave IDs generated by ME are also wrong after suspend/resume. 4579 * Those are probably bugs somewhere else in the kernel driver. 4580 * 4581 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 4582 * GDS to 0 for this ring (me/pipe). 4583 */ 4584 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 4585 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 4586 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 4587 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 4588 } 4589 4590 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 4591 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4592 amdgpu_ring_write(ring, 4593 #ifdef __BIG_ENDIAN 4594 (2 << 0) | 4595 #endif 4596 lower_32_bits(ib->gpu_addr)); 4597 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4598 amdgpu_ring_write(ring, control); 4599 } 4600 4601 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 4602 u64 seq, unsigned flags) 4603 { 4604 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 4605 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 4606 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 4607 4608 /* RELEASE_MEM - flush caches, send int */ 4609 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 4610 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 4611 EOP_TC_NC_ACTION_EN) : 4612 (EOP_TCL1_ACTION_EN | 4613 EOP_TC_ACTION_EN | 4614 EOP_TC_WB_ACTION_EN | 4615 EOP_TC_MD_ACTION_EN)) | 4616 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 4617 EVENT_INDEX(5))); 4618 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 4619 4620 /* 4621 * the address should be Qword aligned if 64bit write, Dword 4622 * aligned if only send 32bit data low (discard data high) 4623 */ 4624 if (write64bit) 4625 BUG_ON(addr & 0x7); 4626 else 4627 BUG_ON(addr & 0x3); 4628 amdgpu_ring_write(ring, lower_32_bits(addr)); 4629 amdgpu_ring_write(ring, upper_32_bits(addr)); 4630 amdgpu_ring_write(ring, lower_32_bits(seq)); 4631 amdgpu_ring_write(ring, upper_32_bits(seq)); 4632 amdgpu_ring_write(ring, 0); 4633 } 4634 4635 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 4636 { 4637 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 4638 uint32_t seq = ring->fence_drv.sync_seq; 4639 uint64_t addr = ring->fence_drv.gpu_addr; 4640 4641 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 4642 lower_32_bits(addr), upper_32_bits(addr), 4643 seq, 0xffffffff, 4); 4644 } 4645 4646 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 4647 unsigned vmid, uint64_t pd_addr) 4648 { 4649 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 4650 4651 /* compute doesn't have PFP */ 4652 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 4653 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 4654 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 4655 amdgpu_ring_write(ring, 0x0); 4656 } 4657 } 4658 4659 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 4660 { 4661 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 4662 } 4663 4664 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 4665 { 4666 u64 wptr; 4667 4668 /* XXX check if swapping is necessary on BE */ 4669 if (ring->use_doorbell) 4670 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 4671 else 4672 BUG(); 4673 return wptr; 4674 } 4675 4676 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 4677 bool acquire) 4678 { 4679 struct amdgpu_device *adev = ring->adev; 4680 int pipe_num, tmp, reg; 4681 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 4682 4683 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 4684 4685 /* first me only has 2 entries, GFX and HP3D */ 4686 if (ring->me > 0) 4687 pipe_num -= 2; 4688 4689 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num; 4690 tmp = RREG32(reg); 4691 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 4692 WREG32(reg, tmp); 4693 } 4694 4695 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, 4696 struct amdgpu_ring *ring, 4697 bool acquire) 4698 { 4699 int i, pipe; 4700 bool reserve; 4701 struct amdgpu_ring *iring; 4702 4703 mutex_lock(&adev->gfx.pipe_reserve_mutex); 4704 pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0); 4705 if (acquire) 4706 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4707 else 4708 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4709 4710 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 4711 /* Clear all reservations - everyone reacquires all resources */ 4712 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 4713 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 4714 true); 4715 4716 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 4717 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 4718 true); 4719 } else { 4720 /* Lower all pipes without a current reservation */ 4721 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 4722 iring = &adev->gfx.gfx_ring[i]; 4723 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 4724 iring->me, 4725 iring->pipe, 4726 0); 4727 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4728 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 4729 } 4730 4731 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 4732 iring = &adev->gfx.compute_ring[i]; 4733 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 4734 iring->me, 4735 iring->pipe, 4736 0); 4737 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4738 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 4739 } 4740 } 4741 4742 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 4743 } 4744 4745 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, 4746 struct amdgpu_ring *ring, 4747 bool acquire) 4748 { 4749 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 4750 uint32_t queue_priority = acquire ? 0xf : 0x0; 4751 4752 mutex_lock(&adev->srbm_mutex); 4753 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4754 4755 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); 4756 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); 4757 4758 soc15_grbm_select(adev, 0, 0, 0, 0); 4759 mutex_unlock(&adev->srbm_mutex); 4760 } 4761 4762 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring, 4763 enum drm_sched_priority priority) 4764 { 4765 struct amdgpu_device *adev = ring->adev; 4766 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 4767 4768 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 4769 return; 4770 4771 gfx_v9_0_hqd_set_priority(adev, ring, acquire); 4772 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire); 4773 } 4774 4775 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 4776 { 4777 struct amdgpu_device *adev = ring->adev; 4778 4779 /* XXX check if swapping is necessary on BE */ 4780 if (ring->use_doorbell) { 4781 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4782 WDOORBELL64(ring->doorbell_index, ring->wptr); 4783 } else{ 4784 BUG(); /* only DOORBELL method supported on gfx9 now */ 4785 } 4786 } 4787 4788 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 4789 u64 seq, unsigned int flags) 4790 { 4791 struct amdgpu_device *adev = ring->adev; 4792 4793 /* we only allocate 32bit for each seq wb address */ 4794 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 4795 4796 /* write fence seq to the "addr" */ 4797 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4798 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4799 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 4800 amdgpu_ring_write(ring, lower_32_bits(addr)); 4801 amdgpu_ring_write(ring, upper_32_bits(addr)); 4802 amdgpu_ring_write(ring, lower_32_bits(seq)); 4803 4804 if (flags & AMDGPU_FENCE_FLAG_INT) { 4805 /* set register to trigger INT */ 4806 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4807 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4808 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 4809 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 4810 amdgpu_ring_write(ring, 0); 4811 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 4812 } 4813 } 4814 4815 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 4816 { 4817 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 4818 amdgpu_ring_write(ring, 0); 4819 } 4820 4821 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 4822 { 4823 struct v9_ce_ib_state ce_payload = {0}; 4824 uint64_t csa_addr; 4825 int cnt; 4826 4827 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 4828 csa_addr = amdgpu_csa_vaddr(ring->adev); 4829 4830 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 4831 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 4832 WRITE_DATA_DST_SEL(8) | 4833 WR_CONFIRM) | 4834 WRITE_DATA_CACHE_POLICY(0)); 4835 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 4836 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 4837 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 4838 } 4839 4840 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 4841 { 4842 struct v9_de_ib_state de_payload = {0}; 4843 uint64_t csa_addr, gds_addr; 4844 int cnt; 4845 4846 csa_addr = amdgpu_csa_vaddr(ring->adev); 4847 gds_addr = csa_addr + 4096; 4848 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 4849 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 4850 4851 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 4852 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 4853 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 4854 WRITE_DATA_DST_SEL(8) | 4855 WR_CONFIRM) | 4856 WRITE_DATA_CACHE_POLICY(0)); 4857 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 4858 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 4859 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 4860 } 4861 4862 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) 4863 { 4864 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 4865 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ 4866 } 4867 4868 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 4869 { 4870 uint32_t dw2 = 0; 4871 4872 if (amdgpu_sriov_vf(ring->adev)) 4873 gfx_v9_0_ring_emit_ce_meta(ring); 4874 4875 gfx_v9_0_ring_emit_tmz(ring, true); 4876 4877 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 4878 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 4879 /* set load_global_config & load_global_uconfig */ 4880 dw2 |= 0x8001; 4881 /* set load_cs_sh_regs */ 4882 dw2 |= 0x01000000; 4883 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 4884 dw2 |= 0x10002; 4885 4886 /* set load_ce_ram if preamble presented */ 4887 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 4888 dw2 |= 0x10000000; 4889 } else { 4890 /* still load_ce_ram if this is the first time preamble presented 4891 * although there is no context switch happens. 4892 */ 4893 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 4894 dw2 |= 0x10000000; 4895 } 4896 4897 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4898 amdgpu_ring_write(ring, dw2); 4899 amdgpu_ring_write(ring, 0); 4900 } 4901 4902 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 4903 { 4904 unsigned ret; 4905 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 4906 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 4907 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 4908 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 4909 ret = ring->wptr & ring->buf_mask; 4910 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 4911 return ret; 4912 } 4913 4914 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 4915 { 4916 unsigned cur; 4917 BUG_ON(offset > ring->buf_mask); 4918 BUG_ON(ring->ring[offset] != 0x55aa55aa); 4919 4920 cur = (ring->wptr & ring->buf_mask) - 1; 4921 if (likely(cur > offset)) 4922 ring->ring[offset] = cur - offset; 4923 else 4924 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 4925 } 4926 4927 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 4928 { 4929 struct amdgpu_device *adev = ring->adev; 4930 4931 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 4932 amdgpu_ring_write(ring, 0 | /* src: register*/ 4933 (5 << 8) | /* dst: memory */ 4934 (1 << 20)); /* write confirm */ 4935 amdgpu_ring_write(ring, reg); 4936 amdgpu_ring_write(ring, 0); 4937 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 4938 adev->virt.reg_val_offs * 4)); 4939 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 4940 adev->virt.reg_val_offs * 4)); 4941 } 4942 4943 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 4944 uint32_t val) 4945 { 4946 uint32_t cmd = 0; 4947 4948 switch (ring->funcs->type) { 4949 case AMDGPU_RING_TYPE_GFX: 4950 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 4951 break; 4952 case AMDGPU_RING_TYPE_KIQ: 4953 cmd = (1 << 16); /* no inc addr */ 4954 break; 4955 default: 4956 cmd = WR_CONFIRM; 4957 break; 4958 } 4959 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4960 amdgpu_ring_write(ring, cmd); 4961 amdgpu_ring_write(ring, reg); 4962 amdgpu_ring_write(ring, 0); 4963 amdgpu_ring_write(ring, val); 4964 } 4965 4966 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 4967 uint32_t val, uint32_t mask) 4968 { 4969 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 4970 } 4971 4972 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 4973 uint32_t reg0, uint32_t reg1, 4974 uint32_t ref, uint32_t mask) 4975 { 4976 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 4977 struct amdgpu_device *adev = ring->adev; 4978 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 4979 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 4980 4981 if (fw_version_ok) 4982 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 4983 ref, mask, 0x20); 4984 else 4985 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 4986 ref, mask); 4987 } 4988 4989 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 4990 { 4991 struct amdgpu_device *adev = ring->adev; 4992 uint32_t value = 0; 4993 4994 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 4995 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 4996 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 4997 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 4998 WREG32(mmSQ_CMD, value); 4999 } 5000 5001 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5002 enum amdgpu_interrupt_state state) 5003 { 5004 switch (state) { 5005 case AMDGPU_IRQ_STATE_DISABLE: 5006 case AMDGPU_IRQ_STATE_ENABLE: 5007 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5008 TIME_STAMP_INT_ENABLE, 5009 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5010 break; 5011 default: 5012 break; 5013 } 5014 } 5015 5016 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5017 int me, int pipe, 5018 enum amdgpu_interrupt_state state) 5019 { 5020 u32 mec_int_cntl, mec_int_cntl_reg; 5021 5022 /* 5023 * amdgpu controls only the first MEC. That's why this function only 5024 * handles the setting of interrupts for this specific MEC. All other 5025 * pipes' interrupts are set by amdkfd. 5026 */ 5027 5028 if (me == 1) { 5029 switch (pipe) { 5030 case 0: 5031 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 5032 break; 5033 case 1: 5034 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 5035 break; 5036 case 2: 5037 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 5038 break; 5039 case 3: 5040 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 5041 break; 5042 default: 5043 DRM_DEBUG("invalid pipe %d\n", pipe); 5044 return; 5045 } 5046 } else { 5047 DRM_DEBUG("invalid me %d\n", me); 5048 return; 5049 } 5050 5051 switch (state) { 5052 case AMDGPU_IRQ_STATE_DISABLE: 5053 mec_int_cntl = RREG32(mec_int_cntl_reg); 5054 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5055 TIME_STAMP_INT_ENABLE, 0); 5056 WREG32(mec_int_cntl_reg, mec_int_cntl); 5057 break; 5058 case AMDGPU_IRQ_STATE_ENABLE: 5059 mec_int_cntl = RREG32(mec_int_cntl_reg); 5060 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5061 TIME_STAMP_INT_ENABLE, 1); 5062 WREG32(mec_int_cntl_reg, mec_int_cntl); 5063 break; 5064 default: 5065 break; 5066 } 5067 } 5068 5069 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 5070 struct amdgpu_irq_src *source, 5071 unsigned type, 5072 enum amdgpu_interrupt_state state) 5073 { 5074 switch (state) { 5075 case AMDGPU_IRQ_STATE_DISABLE: 5076 case AMDGPU_IRQ_STATE_ENABLE: 5077 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5078 PRIV_REG_INT_ENABLE, 5079 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5080 break; 5081 default: 5082 break; 5083 } 5084 5085 return 0; 5086 } 5087 5088 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 5089 struct amdgpu_irq_src *source, 5090 unsigned type, 5091 enum amdgpu_interrupt_state state) 5092 { 5093 switch (state) { 5094 case AMDGPU_IRQ_STATE_DISABLE: 5095 case AMDGPU_IRQ_STATE_ENABLE: 5096 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5097 PRIV_INSTR_INT_ENABLE, 5098 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5099 default: 5100 break; 5101 } 5102 5103 return 0; 5104 } 5105 5106 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 5107 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5108 CP_ECC_ERROR_INT_ENABLE, 1) 5109 5110 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 5111 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5112 CP_ECC_ERROR_INT_ENABLE, 0) 5113 5114 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 5115 struct amdgpu_irq_src *source, 5116 unsigned type, 5117 enum amdgpu_interrupt_state state) 5118 { 5119 switch (state) { 5120 case AMDGPU_IRQ_STATE_DISABLE: 5121 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5122 CP_ECC_ERROR_INT_ENABLE, 0); 5123 DISABLE_ECC_ON_ME_PIPE(1, 0); 5124 DISABLE_ECC_ON_ME_PIPE(1, 1); 5125 DISABLE_ECC_ON_ME_PIPE(1, 2); 5126 DISABLE_ECC_ON_ME_PIPE(1, 3); 5127 break; 5128 5129 case AMDGPU_IRQ_STATE_ENABLE: 5130 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5131 CP_ECC_ERROR_INT_ENABLE, 1); 5132 ENABLE_ECC_ON_ME_PIPE(1, 0); 5133 ENABLE_ECC_ON_ME_PIPE(1, 1); 5134 ENABLE_ECC_ON_ME_PIPE(1, 2); 5135 ENABLE_ECC_ON_ME_PIPE(1, 3); 5136 break; 5137 default: 5138 break; 5139 } 5140 5141 return 0; 5142 } 5143 5144 5145 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5146 struct amdgpu_irq_src *src, 5147 unsigned type, 5148 enum amdgpu_interrupt_state state) 5149 { 5150 switch (type) { 5151 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 5152 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 5153 break; 5154 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5155 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5156 break; 5157 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5158 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5159 break; 5160 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5161 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5162 break; 5163 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5164 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5165 break; 5166 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 5167 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 5168 break; 5169 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 5170 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 5171 break; 5172 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 5173 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 5174 break; 5175 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 5176 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 5177 break; 5178 default: 5179 break; 5180 } 5181 return 0; 5182 } 5183 5184 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 5185 struct amdgpu_irq_src *source, 5186 struct amdgpu_iv_entry *entry) 5187 { 5188 int i; 5189 u8 me_id, pipe_id, queue_id; 5190 struct amdgpu_ring *ring; 5191 5192 DRM_DEBUG("IH: CP EOP\n"); 5193 me_id = (entry->ring_id & 0x0c) >> 2; 5194 pipe_id = (entry->ring_id & 0x03) >> 0; 5195 queue_id = (entry->ring_id & 0x70) >> 4; 5196 5197 switch (me_id) { 5198 case 0: 5199 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5200 break; 5201 case 1: 5202 case 2: 5203 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5204 ring = &adev->gfx.compute_ring[i]; 5205 /* Per-queue interrupt is supported for MEC starting from VI. 5206 * The interrupt can only be enabled/disabled per pipe instead of per queue. 5207 */ 5208 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 5209 amdgpu_fence_process(ring); 5210 } 5211 break; 5212 } 5213 return 0; 5214 } 5215 5216 static void gfx_v9_0_fault(struct amdgpu_device *adev, 5217 struct amdgpu_iv_entry *entry) 5218 { 5219 u8 me_id, pipe_id, queue_id; 5220 struct amdgpu_ring *ring; 5221 int i; 5222 5223 me_id = (entry->ring_id & 0x0c) >> 2; 5224 pipe_id = (entry->ring_id & 0x03) >> 0; 5225 queue_id = (entry->ring_id & 0x70) >> 4; 5226 5227 switch (me_id) { 5228 case 0: 5229 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 5230 break; 5231 case 1: 5232 case 2: 5233 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5234 ring = &adev->gfx.compute_ring[i]; 5235 if (ring->me == me_id && ring->pipe == pipe_id && 5236 ring->queue == queue_id) 5237 drm_sched_fault(&ring->sched); 5238 } 5239 break; 5240 } 5241 } 5242 5243 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 5244 struct amdgpu_irq_src *source, 5245 struct amdgpu_iv_entry *entry) 5246 { 5247 DRM_ERROR("Illegal register access in command stream\n"); 5248 gfx_v9_0_fault(adev, entry); 5249 return 0; 5250 } 5251 5252 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 5253 struct amdgpu_irq_src *source, 5254 struct amdgpu_iv_entry *entry) 5255 { 5256 DRM_ERROR("Illegal instruction in command stream\n"); 5257 gfx_v9_0_fault(adev, entry); 5258 return 0; 5259 } 5260 5261 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 5262 struct amdgpu_iv_entry *entry) 5263 { 5264 /* TODO ue will trigger an interrupt. */ 5265 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); 5266 amdgpu_ras_reset_gpu(adev, 0); 5267 return AMDGPU_RAS_UE; 5268 } 5269 5270 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev, 5271 struct amdgpu_irq_src *source, 5272 struct amdgpu_iv_entry *entry) 5273 { 5274 struct ras_common_if *ras_if = adev->gfx.ras_if; 5275 struct ras_dispatch_if ih_data = { 5276 .entry = entry, 5277 }; 5278 5279 if (!ras_if) 5280 return 0; 5281 5282 ih_data.head = *ras_if; 5283 5284 DRM_ERROR("CP ECC ERROR IRQ\n"); 5285 amdgpu_ras_interrupt_dispatch(adev, &ih_data); 5286 return 0; 5287 } 5288 5289 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 5290 .name = "gfx_v9_0", 5291 .early_init = gfx_v9_0_early_init, 5292 .late_init = gfx_v9_0_late_init, 5293 .sw_init = gfx_v9_0_sw_init, 5294 .sw_fini = gfx_v9_0_sw_fini, 5295 .hw_init = gfx_v9_0_hw_init, 5296 .hw_fini = gfx_v9_0_hw_fini, 5297 .suspend = gfx_v9_0_suspend, 5298 .resume = gfx_v9_0_resume, 5299 .is_idle = gfx_v9_0_is_idle, 5300 .wait_for_idle = gfx_v9_0_wait_for_idle, 5301 .soft_reset = gfx_v9_0_soft_reset, 5302 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 5303 .set_powergating_state = gfx_v9_0_set_powergating_state, 5304 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 5305 }; 5306 5307 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 5308 .type = AMDGPU_RING_TYPE_GFX, 5309 .align_mask = 0xff, 5310 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5311 .support_64bit_ptrs = true, 5312 .vmhub = AMDGPU_GFXHUB_0, 5313 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 5314 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 5315 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 5316 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 5317 5 + /* COND_EXEC */ 5318 7 + /* PIPELINE_SYNC */ 5319 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5320 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5321 2 + /* VM_FLUSH */ 5322 8 + /* FENCE for VM_FLUSH */ 5323 20 + /* GDS switch */ 5324 4 + /* double SWITCH_BUFFER, 5325 the first COND_EXEC jump to the place just 5326 prior to this double SWITCH_BUFFER */ 5327 5 + /* COND_EXEC */ 5328 7 + /* HDP_flush */ 5329 4 + /* VGT_flush */ 5330 14 + /* CE_META */ 5331 31 + /* DE_META */ 5332 3 + /* CNTX_CTRL */ 5333 5 + /* HDP_INVL */ 5334 8 + 8 + /* FENCE x2 */ 5335 2, /* SWITCH_BUFFER */ 5336 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 5337 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 5338 .emit_fence = gfx_v9_0_ring_emit_fence, 5339 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 5340 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 5341 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 5342 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 5343 .test_ring = gfx_v9_0_ring_test_ring, 5344 .test_ib = gfx_v9_0_ring_test_ib, 5345 .insert_nop = amdgpu_ring_insert_nop, 5346 .pad_ib = amdgpu_ring_generic_pad_ib, 5347 .emit_switch_buffer = gfx_v9_ring_emit_sb, 5348 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 5349 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 5350 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 5351 .emit_tmz = gfx_v9_0_ring_emit_tmz, 5352 .emit_wreg = gfx_v9_0_ring_emit_wreg, 5353 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 5354 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 5355 .soft_recovery = gfx_v9_0_ring_soft_recovery, 5356 }; 5357 5358 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 5359 .type = AMDGPU_RING_TYPE_COMPUTE, 5360 .align_mask = 0xff, 5361 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5362 .support_64bit_ptrs = true, 5363 .vmhub = AMDGPU_GFXHUB_0, 5364 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 5365 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 5366 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 5367 .emit_frame_size = 5368 20 + /* gfx_v9_0_ring_emit_gds_switch */ 5369 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5370 5 + /* hdp invalidate */ 5371 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 5372 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5373 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5374 2 + /* gfx_v9_0_ring_emit_vm_flush */ 5375 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 5376 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 5377 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 5378 .emit_fence = gfx_v9_0_ring_emit_fence, 5379 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 5380 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 5381 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 5382 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 5383 .test_ring = gfx_v9_0_ring_test_ring, 5384 .test_ib = gfx_v9_0_ring_test_ib, 5385 .insert_nop = amdgpu_ring_insert_nop, 5386 .pad_ib = amdgpu_ring_generic_pad_ib, 5387 .set_priority = gfx_v9_0_ring_set_priority_compute, 5388 .emit_wreg = gfx_v9_0_ring_emit_wreg, 5389 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 5390 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 5391 }; 5392 5393 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 5394 .type = AMDGPU_RING_TYPE_KIQ, 5395 .align_mask = 0xff, 5396 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5397 .support_64bit_ptrs = true, 5398 .vmhub = AMDGPU_GFXHUB_0, 5399 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 5400 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 5401 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 5402 .emit_frame_size = 5403 20 + /* gfx_v9_0_ring_emit_gds_switch */ 5404 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5405 5 + /* hdp invalidate */ 5406 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 5407 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5408 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5409 2 + /* gfx_v9_0_ring_emit_vm_flush */ 5410 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 5411 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 5412 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 5413 .test_ring = gfx_v9_0_ring_test_ring, 5414 .insert_nop = amdgpu_ring_insert_nop, 5415 .pad_ib = amdgpu_ring_generic_pad_ib, 5416 .emit_rreg = gfx_v9_0_ring_emit_rreg, 5417 .emit_wreg = gfx_v9_0_ring_emit_wreg, 5418 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 5419 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 5420 }; 5421 5422 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 5423 { 5424 int i; 5425 5426 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 5427 5428 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 5429 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 5430 5431 for (i = 0; i < adev->gfx.num_compute_rings; i++) 5432 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 5433 } 5434 5435 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 5436 .set = gfx_v9_0_set_eop_interrupt_state, 5437 .process = gfx_v9_0_eop_irq, 5438 }; 5439 5440 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 5441 .set = gfx_v9_0_set_priv_reg_fault_state, 5442 .process = gfx_v9_0_priv_reg_irq, 5443 }; 5444 5445 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 5446 .set = gfx_v9_0_set_priv_inst_fault_state, 5447 .process = gfx_v9_0_priv_inst_irq, 5448 }; 5449 5450 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 5451 .set = gfx_v9_0_set_cp_ecc_error_state, 5452 .process = gfx_v9_0_cp_ecc_error_irq, 5453 }; 5454 5455 5456 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 5457 { 5458 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 5459 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 5460 5461 adev->gfx.priv_reg_irq.num_types = 1; 5462 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 5463 5464 adev->gfx.priv_inst_irq.num_types = 1; 5465 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 5466 5467 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 5468 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 5469 } 5470 5471 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 5472 { 5473 switch (adev->asic_type) { 5474 case CHIP_VEGA10: 5475 case CHIP_VEGA12: 5476 case CHIP_VEGA20: 5477 case CHIP_RAVEN: 5478 case CHIP_ARCTURUS: 5479 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 5480 break; 5481 default: 5482 break; 5483 } 5484 } 5485 5486 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 5487 { 5488 /* init asci gds info */ 5489 switch (adev->asic_type) { 5490 case CHIP_VEGA10: 5491 case CHIP_VEGA12: 5492 case CHIP_VEGA20: 5493 adev->gds.gds_size = 0x10000; 5494 break; 5495 case CHIP_RAVEN: 5496 case CHIP_ARCTURUS: 5497 adev->gds.gds_size = 0x1000; 5498 break; 5499 default: 5500 adev->gds.gds_size = 0x10000; 5501 break; 5502 } 5503 5504 switch (adev->asic_type) { 5505 case CHIP_VEGA10: 5506 case CHIP_VEGA20: 5507 adev->gds.gds_compute_max_wave_id = 0x7ff; 5508 break; 5509 case CHIP_VEGA12: 5510 adev->gds.gds_compute_max_wave_id = 0x27f; 5511 break; 5512 case CHIP_RAVEN: 5513 if (adev->rev_id >= 0x8) 5514 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 5515 else 5516 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 5517 break; 5518 case CHIP_ARCTURUS: 5519 adev->gds.gds_compute_max_wave_id = 0xfff; 5520 break; 5521 default: 5522 /* this really depends on the chip */ 5523 adev->gds.gds_compute_max_wave_id = 0x7ff; 5524 break; 5525 } 5526 5527 adev->gds.gws_size = 64; 5528 adev->gds.oa_size = 16; 5529 } 5530 5531 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 5532 u32 bitmap) 5533 { 5534 u32 data; 5535 5536 if (!bitmap) 5537 return; 5538 5539 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 5540 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 5541 5542 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 5543 } 5544 5545 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 5546 { 5547 u32 data, mask; 5548 5549 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 5550 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 5551 5552 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 5553 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 5554 5555 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 5556 5557 return (~data) & mask; 5558 } 5559 5560 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 5561 struct amdgpu_cu_info *cu_info) 5562 { 5563 int i, j, k, counter, active_cu_number = 0; 5564 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 5565 unsigned disable_masks[4 * 2]; 5566 5567 if (!adev || !cu_info) 5568 return -EINVAL; 5569 5570 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 5571 5572 mutex_lock(&adev->grbm_idx_mutex); 5573 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 5574 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 5575 mask = 1; 5576 ao_bitmap = 0; 5577 counter = 0; 5578 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 5579 if (i < 4 && j < 2) 5580 gfx_v9_0_set_user_cu_inactive_bitmap( 5581 adev, disable_masks[i * 2 + j]); 5582 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 5583 cu_info->bitmap[i][j] = bitmap; 5584 5585 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 5586 if (bitmap & mask) { 5587 if (counter < adev->gfx.config.max_cu_per_sh) 5588 ao_bitmap |= mask; 5589 counter ++; 5590 } 5591 mask <<= 1; 5592 } 5593 active_cu_number += counter; 5594 if (i < 2 && j < 2) 5595 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 5596 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 5597 } 5598 } 5599 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5600 mutex_unlock(&adev->grbm_idx_mutex); 5601 5602 cu_info->number = active_cu_number; 5603 cu_info->ao_cu_mask = ao_cu_mask; 5604 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 5605 5606 return 0; 5607 } 5608 5609 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 5610 { 5611 .type = AMD_IP_BLOCK_TYPE_GFX, 5612 .major = 9, 5613 .minor = 0, 5614 .rev = 0, 5615 .funcs = &gfx_v9_0_ip_funcs, 5616 }; 5617