1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 #include "vega10_enum.h" 40 #include "hdp/hdp_4_0_offset.h" 41 42 #include "soc15.h" 43 #include "soc15_common.h" 44 #include "clearstate_gfx9.h" 45 #include "v9_structs.h" 46 47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 48 49 #include "amdgpu_ras.h" 50 51 #define GFX9_NUM_GFX_RINGS 1 52 #define GFX9_MEC_HPD_SIZE 4096 53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 55 56 #define mmPWR_MISC_CNTL_STATUS 0x0183 57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0 59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1 60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L 61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L 62 63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 65 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 69 70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 72 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 76 77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 79 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 83 84 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 86 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 87 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 90 91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 93 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 98 99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 101 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 106 107 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 108 { 109 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 110 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 111 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 112 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 113 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 114 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 115 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 116 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 117 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 118 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 119 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 120 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 121 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 122 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 123 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 124 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 125 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 126 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 127 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 128 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 129 }; 130 131 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 132 { 133 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 134 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 135 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 136 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 137 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 138 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 139 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 140 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 141 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 142 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 143 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 144 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 145 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 146 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 147 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 148 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 149 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 150 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 151 }; 152 153 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 154 { 155 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 156 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 157 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 158 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 159 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 160 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 161 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 162 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 163 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 164 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 165 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 166 }; 167 168 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 169 { 170 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 171 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 172 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 173 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 174 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 175 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 176 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 177 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 178 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 179 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 180 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 181 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 182 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 183 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 184 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 185 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 186 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 187 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 188 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 189 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 190 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 191 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 192 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 193 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 194 }; 195 196 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 197 { 198 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 199 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 200 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 201 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 202 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 203 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 204 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 205 }; 206 207 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 208 { 209 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 210 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 211 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 212 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 213 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 214 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 215 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 216 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 217 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 218 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 219 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 220 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 221 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 222 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 223 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 224 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 225 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 226 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 227 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 228 }; 229 230 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 231 { 232 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 233 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 234 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 235 }; 236 237 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 238 { 239 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 240 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 241 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 242 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 243 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 244 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 245 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 246 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 247 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 248 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 249 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 250 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 251 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 252 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 253 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 254 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 255 }; 256 257 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 258 { 259 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 260 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 261 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 262 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 263 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 264 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 265 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 266 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 267 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 268 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 269 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 270 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 271 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 272 }; 273 274 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 275 { 276 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 277 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 278 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 279 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 280 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 281 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 282 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 283 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 284 }; 285 286 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 287 { 288 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 289 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 290 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 291 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 292 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 293 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 294 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 295 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 296 }; 297 298 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 299 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 300 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 301 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 302 303 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 304 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 305 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 306 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 307 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 308 struct amdgpu_cu_info *cu_info); 309 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 310 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); 311 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 312 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 313 314 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 315 { 316 switch (adev->asic_type) { 317 case CHIP_VEGA10: 318 if (!amdgpu_virt_support_skip_setting(adev)) { 319 soc15_program_register_sequence(adev, 320 golden_settings_gc_9_0, 321 ARRAY_SIZE(golden_settings_gc_9_0)); 322 soc15_program_register_sequence(adev, 323 golden_settings_gc_9_0_vg10, 324 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 325 } 326 break; 327 case CHIP_VEGA12: 328 soc15_program_register_sequence(adev, 329 golden_settings_gc_9_2_1, 330 ARRAY_SIZE(golden_settings_gc_9_2_1)); 331 soc15_program_register_sequence(adev, 332 golden_settings_gc_9_2_1_vg12, 333 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 334 break; 335 case CHIP_VEGA20: 336 soc15_program_register_sequence(adev, 337 golden_settings_gc_9_0, 338 ARRAY_SIZE(golden_settings_gc_9_0)); 339 soc15_program_register_sequence(adev, 340 golden_settings_gc_9_0_vg20, 341 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 342 break; 343 case CHIP_RAVEN: 344 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 345 ARRAY_SIZE(golden_settings_gc_9_1)); 346 if (adev->rev_id >= 8) 347 soc15_program_register_sequence(adev, 348 golden_settings_gc_9_1_rv2, 349 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 350 else 351 soc15_program_register_sequence(adev, 352 golden_settings_gc_9_1_rv1, 353 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 354 break; 355 default: 356 break; 357 } 358 359 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 360 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 361 } 362 363 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 364 { 365 adev->gfx.scratch.num_reg = 8; 366 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 367 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 368 } 369 370 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 371 bool wc, uint32_t reg, uint32_t val) 372 { 373 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 374 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 375 WRITE_DATA_DST_SEL(0) | 376 (wc ? WR_CONFIRM : 0)); 377 amdgpu_ring_write(ring, reg); 378 amdgpu_ring_write(ring, 0); 379 amdgpu_ring_write(ring, val); 380 } 381 382 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 383 int mem_space, int opt, uint32_t addr0, 384 uint32_t addr1, uint32_t ref, uint32_t mask, 385 uint32_t inv) 386 { 387 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 388 amdgpu_ring_write(ring, 389 /* memory (1) or register (0) */ 390 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 391 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 392 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 393 WAIT_REG_MEM_ENGINE(eng_sel))); 394 395 if (mem_space) 396 BUG_ON(addr0 & 0x3); /* Dword align */ 397 amdgpu_ring_write(ring, addr0); 398 amdgpu_ring_write(ring, addr1); 399 amdgpu_ring_write(ring, ref); 400 amdgpu_ring_write(ring, mask); 401 amdgpu_ring_write(ring, inv); /* poll interval */ 402 } 403 404 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 405 { 406 struct amdgpu_device *adev = ring->adev; 407 uint32_t scratch; 408 uint32_t tmp = 0; 409 unsigned i; 410 int r; 411 412 r = amdgpu_gfx_scratch_get(adev, &scratch); 413 if (r) 414 return r; 415 416 WREG32(scratch, 0xCAFEDEAD); 417 r = amdgpu_ring_alloc(ring, 3); 418 if (r) 419 goto error_free_scratch; 420 421 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 422 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 423 amdgpu_ring_write(ring, 0xDEADBEEF); 424 amdgpu_ring_commit(ring); 425 426 for (i = 0; i < adev->usec_timeout; i++) { 427 tmp = RREG32(scratch); 428 if (tmp == 0xDEADBEEF) 429 break; 430 udelay(1); 431 } 432 433 if (i >= adev->usec_timeout) 434 r = -ETIMEDOUT; 435 436 error_free_scratch: 437 amdgpu_gfx_scratch_free(adev, scratch); 438 return r; 439 } 440 441 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 442 { 443 struct amdgpu_device *adev = ring->adev; 444 struct amdgpu_ib ib; 445 struct dma_fence *f = NULL; 446 447 unsigned index; 448 uint64_t gpu_addr; 449 uint32_t tmp; 450 long r; 451 452 r = amdgpu_device_wb_get(adev, &index); 453 if (r) 454 return r; 455 456 gpu_addr = adev->wb.gpu_addr + (index * 4); 457 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 458 memset(&ib, 0, sizeof(ib)); 459 r = amdgpu_ib_get(adev, NULL, 16, &ib); 460 if (r) 461 goto err1; 462 463 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 464 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 465 ib.ptr[2] = lower_32_bits(gpu_addr); 466 ib.ptr[3] = upper_32_bits(gpu_addr); 467 ib.ptr[4] = 0xDEADBEEF; 468 ib.length_dw = 5; 469 470 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 471 if (r) 472 goto err2; 473 474 r = dma_fence_wait_timeout(f, false, timeout); 475 if (r == 0) { 476 r = -ETIMEDOUT; 477 goto err2; 478 } else if (r < 0) { 479 goto err2; 480 } 481 482 tmp = adev->wb.wb[index]; 483 if (tmp == 0xDEADBEEF) 484 r = 0; 485 else 486 r = -EINVAL; 487 488 err2: 489 amdgpu_ib_free(adev, &ib, NULL); 490 dma_fence_put(f); 491 err1: 492 amdgpu_device_wb_free(adev, index); 493 return r; 494 } 495 496 497 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 498 { 499 release_firmware(adev->gfx.pfp_fw); 500 adev->gfx.pfp_fw = NULL; 501 release_firmware(adev->gfx.me_fw); 502 adev->gfx.me_fw = NULL; 503 release_firmware(adev->gfx.ce_fw); 504 adev->gfx.ce_fw = NULL; 505 release_firmware(adev->gfx.rlc_fw); 506 adev->gfx.rlc_fw = NULL; 507 release_firmware(adev->gfx.mec_fw); 508 adev->gfx.mec_fw = NULL; 509 release_firmware(adev->gfx.mec2_fw); 510 adev->gfx.mec2_fw = NULL; 511 512 kfree(adev->gfx.rlc.register_list_format); 513 } 514 515 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 516 { 517 const struct rlc_firmware_header_v2_1 *rlc_hdr; 518 519 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 520 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 521 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 522 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 523 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 524 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 525 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 526 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 527 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 528 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 529 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 530 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 531 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 532 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 533 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 534 } 535 536 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 537 { 538 adev->gfx.me_fw_write_wait = false; 539 adev->gfx.mec_fw_write_wait = false; 540 541 switch (adev->asic_type) { 542 case CHIP_VEGA10: 543 if ((adev->gfx.me_fw_version >= 0x0000009c) && 544 (adev->gfx.me_feature_version >= 42) && 545 (adev->gfx.pfp_fw_version >= 0x000000b1) && 546 (adev->gfx.pfp_feature_version >= 42)) 547 adev->gfx.me_fw_write_wait = true; 548 549 if ((adev->gfx.mec_fw_version >= 0x00000193) && 550 (adev->gfx.mec_feature_version >= 42)) 551 adev->gfx.mec_fw_write_wait = true; 552 break; 553 case CHIP_VEGA12: 554 if ((adev->gfx.me_fw_version >= 0x0000009c) && 555 (adev->gfx.me_feature_version >= 44) && 556 (adev->gfx.pfp_fw_version >= 0x000000b2) && 557 (adev->gfx.pfp_feature_version >= 44)) 558 adev->gfx.me_fw_write_wait = true; 559 560 if ((adev->gfx.mec_fw_version >= 0x00000196) && 561 (adev->gfx.mec_feature_version >= 44)) 562 adev->gfx.mec_fw_write_wait = true; 563 break; 564 case CHIP_VEGA20: 565 if ((adev->gfx.me_fw_version >= 0x0000009c) && 566 (adev->gfx.me_feature_version >= 44) && 567 (adev->gfx.pfp_fw_version >= 0x000000b2) && 568 (adev->gfx.pfp_feature_version >= 44)) 569 adev->gfx.me_fw_write_wait = true; 570 571 if ((adev->gfx.mec_fw_version >= 0x00000197) && 572 (adev->gfx.mec_feature_version >= 44)) 573 adev->gfx.mec_fw_write_wait = true; 574 break; 575 case CHIP_RAVEN: 576 if ((adev->gfx.me_fw_version >= 0x0000009c) && 577 (adev->gfx.me_feature_version >= 42) && 578 (adev->gfx.pfp_fw_version >= 0x000000b1) && 579 (adev->gfx.pfp_feature_version >= 42)) 580 adev->gfx.me_fw_write_wait = true; 581 582 if ((adev->gfx.mec_fw_version >= 0x00000192) && 583 (adev->gfx.mec_feature_version >= 42)) 584 adev->gfx.mec_fw_write_wait = true; 585 break; 586 default: 587 break; 588 } 589 } 590 591 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 592 { 593 switch (adev->asic_type) { 594 case CHIP_VEGA10: 595 case CHIP_VEGA12: 596 case CHIP_VEGA20: 597 break; 598 case CHIP_RAVEN: 599 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) 600 break; 601 if ((adev->gfx.rlc_fw_version != 106 && 602 adev->gfx.rlc_fw_version < 531) || 603 (adev->gfx.rlc_fw_version == 53815) || 604 (adev->gfx.rlc_feature_version < 1) || 605 !adev->gfx.rlc.is_rlc_v2_1) 606 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 607 break; 608 default: 609 break; 610 } 611 } 612 613 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 614 { 615 const char *chip_name; 616 char fw_name[30]; 617 int err; 618 struct amdgpu_firmware_info *info = NULL; 619 const struct common_firmware_header *header = NULL; 620 const struct gfx_firmware_header_v1_0 *cp_hdr; 621 const struct rlc_firmware_header_v2_0 *rlc_hdr; 622 unsigned int *tmp = NULL; 623 unsigned int i = 0; 624 uint16_t version_major; 625 uint16_t version_minor; 626 uint32_t smu_version; 627 628 DRM_DEBUG("\n"); 629 630 switch (adev->asic_type) { 631 case CHIP_VEGA10: 632 chip_name = "vega10"; 633 break; 634 case CHIP_VEGA12: 635 chip_name = "vega12"; 636 break; 637 case CHIP_VEGA20: 638 chip_name = "vega20"; 639 break; 640 case CHIP_RAVEN: 641 if (adev->rev_id >= 8) 642 chip_name = "raven2"; 643 else if (adev->pdev->device == 0x15d8) 644 chip_name = "picasso"; 645 else 646 chip_name = "raven"; 647 break; 648 default: 649 BUG(); 650 } 651 652 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 653 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 654 if (err) 655 goto out; 656 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 657 if (err) 658 goto out; 659 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 660 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 661 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 662 663 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 664 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 665 if (err) 666 goto out; 667 err = amdgpu_ucode_validate(adev->gfx.me_fw); 668 if (err) 669 goto out; 670 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 671 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 672 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 673 674 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 675 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 676 if (err) 677 goto out; 678 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 679 if (err) 680 goto out; 681 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 682 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 683 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 684 685 /* 686 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 687 * instead of picasso_rlc.bin. 688 * Judgment method: 689 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 690 * or revision >= 0xD8 && revision <= 0xDF 691 * otherwise is PCO FP5 692 */ 693 if (!strcmp(chip_name, "picasso") && 694 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 695 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 696 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); 697 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 698 (smu_version >= 0x41e2b)) 699 /** 700 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 701 */ 702 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); 703 else 704 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 705 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 706 if (err) 707 goto out; 708 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 709 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 710 711 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 712 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 713 if (version_major == 2 && version_minor == 1) 714 adev->gfx.rlc.is_rlc_v2_1 = true; 715 716 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 717 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 718 adev->gfx.rlc.save_and_restore_offset = 719 le32_to_cpu(rlc_hdr->save_and_restore_offset); 720 adev->gfx.rlc.clear_state_descriptor_offset = 721 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 722 adev->gfx.rlc.avail_scratch_ram_locations = 723 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 724 adev->gfx.rlc.reg_restore_list_size = 725 le32_to_cpu(rlc_hdr->reg_restore_list_size); 726 adev->gfx.rlc.reg_list_format_start = 727 le32_to_cpu(rlc_hdr->reg_list_format_start); 728 adev->gfx.rlc.reg_list_format_separate_start = 729 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 730 adev->gfx.rlc.starting_offsets_start = 731 le32_to_cpu(rlc_hdr->starting_offsets_start); 732 adev->gfx.rlc.reg_list_format_size_bytes = 733 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 734 adev->gfx.rlc.reg_list_size_bytes = 735 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 736 adev->gfx.rlc.register_list_format = 737 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 738 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 739 if (!adev->gfx.rlc.register_list_format) { 740 err = -ENOMEM; 741 goto out; 742 } 743 744 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 745 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 746 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 747 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 748 749 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 750 751 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 752 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 753 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 754 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 755 756 if (adev->gfx.rlc.is_rlc_v2_1) 757 gfx_v9_0_init_rlc_ext_microcode(adev); 758 759 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 760 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 761 if (err) 762 goto out; 763 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 764 if (err) 765 goto out; 766 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 767 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 768 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 769 770 771 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 772 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 773 if (!err) { 774 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 775 if (err) 776 goto out; 777 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 778 adev->gfx.mec2_fw->data; 779 adev->gfx.mec2_fw_version = 780 le32_to_cpu(cp_hdr->header.ucode_version); 781 adev->gfx.mec2_feature_version = 782 le32_to_cpu(cp_hdr->ucode_feature_version); 783 } else { 784 err = 0; 785 adev->gfx.mec2_fw = NULL; 786 } 787 788 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 789 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 790 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 791 info->fw = adev->gfx.pfp_fw; 792 header = (const struct common_firmware_header *)info->fw->data; 793 adev->firmware.fw_size += 794 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 795 796 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 797 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 798 info->fw = adev->gfx.me_fw; 799 header = (const struct common_firmware_header *)info->fw->data; 800 adev->firmware.fw_size += 801 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 802 803 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 804 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 805 info->fw = adev->gfx.ce_fw; 806 header = (const struct common_firmware_header *)info->fw->data; 807 adev->firmware.fw_size += 808 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 809 810 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 811 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 812 info->fw = adev->gfx.rlc_fw; 813 header = (const struct common_firmware_header *)info->fw->data; 814 adev->firmware.fw_size += 815 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 816 817 if (adev->gfx.rlc.is_rlc_v2_1 && 818 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 819 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 820 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 821 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 822 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 823 info->fw = adev->gfx.rlc_fw; 824 adev->firmware.fw_size += 825 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 826 827 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 828 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 829 info->fw = adev->gfx.rlc_fw; 830 adev->firmware.fw_size += 831 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 832 833 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 834 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 835 info->fw = adev->gfx.rlc_fw; 836 adev->firmware.fw_size += 837 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 838 } 839 840 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 841 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 842 info->fw = adev->gfx.mec_fw; 843 header = (const struct common_firmware_header *)info->fw->data; 844 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 845 adev->firmware.fw_size += 846 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 847 848 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 849 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 850 info->fw = adev->gfx.mec_fw; 851 adev->firmware.fw_size += 852 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 853 854 if (adev->gfx.mec2_fw) { 855 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 856 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 857 info->fw = adev->gfx.mec2_fw; 858 header = (const struct common_firmware_header *)info->fw->data; 859 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 860 adev->firmware.fw_size += 861 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 862 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 863 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 864 info->fw = adev->gfx.mec2_fw; 865 adev->firmware.fw_size += 866 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 867 } 868 869 } 870 871 out: 872 gfx_v9_0_check_if_need_gfxoff(adev); 873 gfx_v9_0_check_fw_write_wait(adev); 874 if (err) { 875 dev_err(adev->dev, 876 "gfx9: Failed to load firmware \"%s\"\n", 877 fw_name); 878 release_firmware(adev->gfx.pfp_fw); 879 adev->gfx.pfp_fw = NULL; 880 release_firmware(adev->gfx.me_fw); 881 adev->gfx.me_fw = NULL; 882 release_firmware(adev->gfx.ce_fw); 883 adev->gfx.ce_fw = NULL; 884 release_firmware(adev->gfx.rlc_fw); 885 adev->gfx.rlc_fw = NULL; 886 release_firmware(adev->gfx.mec_fw); 887 adev->gfx.mec_fw = NULL; 888 release_firmware(adev->gfx.mec2_fw); 889 adev->gfx.mec2_fw = NULL; 890 } 891 return err; 892 } 893 894 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 895 { 896 u32 count = 0; 897 const struct cs_section_def *sect = NULL; 898 const struct cs_extent_def *ext = NULL; 899 900 /* begin clear state */ 901 count += 2; 902 /* context control state */ 903 count += 3; 904 905 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 906 for (ext = sect->section; ext->extent != NULL; ++ext) { 907 if (sect->id == SECT_CONTEXT) 908 count += 2 + ext->reg_count; 909 else 910 return 0; 911 } 912 } 913 914 /* end clear state */ 915 count += 2; 916 /* clear state */ 917 count += 2; 918 919 return count; 920 } 921 922 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 923 volatile u32 *buffer) 924 { 925 u32 count = 0, i; 926 const struct cs_section_def *sect = NULL; 927 const struct cs_extent_def *ext = NULL; 928 929 if (adev->gfx.rlc.cs_data == NULL) 930 return; 931 if (buffer == NULL) 932 return; 933 934 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 935 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 936 937 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 938 buffer[count++] = cpu_to_le32(0x80000000); 939 buffer[count++] = cpu_to_le32(0x80000000); 940 941 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 942 for (ext = sect->section; ext->extent != NULL; ++ext) { 943 if (sect->id == SECT_CONTEXT) { 944 buffer[count++] = 945 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 946 buffer[count++] = cpu_to_le32(ext->reg_index - 947 PACKET3_SET_CONTEXT_REG_START); 948 for (i = 0; i < ext->reg_count; i++) 949 buffer[count++] = cpu_to_le32(ext->extent[i]); 950 } else { 951 return; 952 } 953 } 954 } 955 956 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 957 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 958 959 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 960 buffer[count++] = cpu_to_le32(0); 961 } 962 963 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 964 { 965 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 966 uint32_t pg_always_on_cu_num = 2; 967 uint32_t always_on_cu_num; 968 uint32_t i, j, k; 969 uint32_t mask, cu_bitmap, counter; 970 971 if (adev->flags & AMD_IS_APU) 972 always_on_cu_num = 4; 973 else if (adev->asic_type == CHIP_VEGA12) 974 always_on_cu_num = 8; 975 else 976 always_on_cu_num = 12; 977 978 mutex_lock(&adev->grbm_idx_mutex); 979 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 980 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 981 mask = 1; 982 cu_bitmap = 0; 983 counter = 0; 984 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 985 986 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 987 if (cu_info->bitmap[i][j] & mask) { 988 if (counter == pg_always_on_cu_num) 989 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 990 if (counter < always_on_cu_num) 991 cu_bitmap |= mask; 992 else 993 break; 994 counter++; 995 } 996 mask <<= 1; 997 } 998 999 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1000 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1001 } 1002 } 1003 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1004 mutex_unlock(&adev->grbm_idx_mutex); 1005 } 1006 1007 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1008 { 1009 uint32_t data; 1010 1011 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1012 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1013 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1014 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1015 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1016 1017 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1018 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1019 1020 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1021 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1022 1023 mutex_lock(&adev->grbm_idx_mutex); 1024 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1025 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1026 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1027 1028 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1029 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1030 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1031 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1032 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1033 1034 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1035 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1036 data &= 0x0000FFFF; 1037 data |= 0x00C00000; 1038 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1039 1040 /* 1041 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1042 * programmed in gfx_v9_0_init_always_on_cu_mask() 1043 */ 1044 1045 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1046 * but used for RLC_LB_CNTL configuration */ 1047 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1048 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1049 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1050 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1051 mutex_unlock(&adev->grbm_idx_mutex); 1052 1053 gfx_v9_0_init_always_on_cu_mask(adev); 1054 } 1055 1056 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1057 { 1058 uint32_t data; 1059 1060 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1061 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1062 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1063 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1064 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1065 1066 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1067 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1068 1069 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1070 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1071 1072 mutex_lock(&adev->grbm_idx_mutex); 1073 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1074 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1075 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1076 1077 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1078 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1079 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1080 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1081 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1082 1083 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1084 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1085 data &= 0x0000FFFF; 1086 data |= 0x00C00000; 1087 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1088 1089 /* 1090 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1091 * programmed in gfx_v9_0_init_always_on_cu_mask() 1092 */ 1093 1094 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1095 * but used for RLC_LB_CNTL configuration */ 1096 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1097 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1098 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1099 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1100 mutex_unlock(&adev->grbm_idx_mutex); 1101 1102 gfx_v9_0_init_always_on_cu_mask(adev); 1103 } 1104 1105 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1106 { 1107 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1108 } 1109 1110 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1111 { 1112 return 5; 1113 } 1114 1115 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1116 { 1117 const struct cs_section_def *cs_data; 1118 int r; 1119 1120 adev->gfx.rlc.cs_data = gfx9_cs_data; 1121 1122 cs_data = adev->gfx.rlc.cs_data; 1123 1124 if (cs_data) { 1125 /* init clear state block */ 1126 r = amdgpu_gfx_rlc_init_csb(adev); 1127 if (r) 1128 return r; 1129 } 1130 1131 if (adev->asic_type == CHIP_RAVEN) { 1132 /* TODO: double check the cp_table_size for RV */ 1133 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1134 r = amdgpu_gfx_rlc_init_cpt(adev); 1135 if (r) 1136 return r; 1137 } 1138 1139 switch (adev->asic_type) { 1140 case CHIP_RAVEN: 1141 gfx_v9_0_init_lbpw(adev); 1142 break; 1143 case CHIP_VEGA20: 1144 gfx_v9_4_init_lbpw(adev); 1145 break; 1146 default: 1147 break; 1148 } 1149 1150 return 0; 1151 } 1152 1153 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev) 1154 { 1155 int r; 1156 1157 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1158 if (unlikely(r != 0)) 1159 return r; 1160 1161 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, 1162 AMDGPU_GEM_DOMAIN_VRAM); 1163 if (!r) 1164 adev->gfx.rlc.clear_state_gpu_addr = 1165 amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); 1166 1167 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1168 1169 return r; 1170 } 1171 1172 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev) 1173 { 1174 int r; 1175 1176 if (!adev->gfx.rlc.clear_state_obj) 1177 return; 1178 1179 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); 1180 if (likely(r == 0)) { 1181 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 1182 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1183 } 1184 } 1185 1186 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1187 { 1188 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1189 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1190 } 1191 1192 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1193 { 1194 int r; 1195 u32 *hpd; 1196 const __le32 *fw_data; 1197 unsigned fw_size; 1198 u32 *fw; 1199 size_t mec_hpd_size; 1200 1201 const struct gfx_firmware_header_v1_0 *mec_hdr; 1202 1203 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1204 1205 /* take ownership of the relevant compute queues */ 1206 amdgpu_gfx_compute_queue_acquire(adev); 1207 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1208 1209 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1210 AMDGPU_GEM_DOMAIN_VRAM, 1211 &adev->gfx.mec.hpd_eop_obj, 1212 &adev->gfx.mec.hpd_eop_gpu_addr, 1213 (void **)&hpd); 1214 if (r) { 1215 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1216 gfx_v9_0_mec_fini(adev); 1217 return r; 1218 } 1219 1220 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); 1221 1222 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1223 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1224 1225 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1226 1227 fw_data = (const __le32 *) 1228 (adev->gfx.mec_fw->data + 1229 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1230 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 1231 1232 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 1233 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1234 &adev->gfx.mec.mec_fw_obj, 1235 &adev->gfx.mec.mec_fw_gpu_addr, 1236 (void **)&fw); 1237 if (r) { 1238 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 1239 gfx_v9_0_mec_fini(adev); 1240 return r; 1241 } 1242 1243 memcpy(fw, fw_data, fw_size); 1244 1245 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1246 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1247 1248 return 0; 1249 } 1250 1251 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 1252 { 1253 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1254 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1255 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1256 (address << SQ_IND_INDEX__INDEX__SHIFT) | 1257 (SQ_IND_INDEX__FORCE_READ_MASK)); 1258 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1259 } 1260 1261 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 1262 uint32_t wave, uint32_t thread, 1263 uint32_t regno, uint32_t num, uint32_t *out) 1264 { 1265 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1266 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1267 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1268 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1269 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 1270 (SQ_IND_INDEX__FORCE_READ_MASK) | 1271 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1272 while (num--) 1273 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1274 } 1275 1276 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1277 { 1278 /* type 1 wave data */ 1279 dst[(*no_fields)++] = 1; 1280 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 1281 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 1282 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 1283 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 1284 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 1285 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 1286 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 1287 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 1288 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 1289 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 1290 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 1291 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 1292 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 1293 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 1294 } 1295 1296 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 1297 uint32_t wave, uint32_t start, 1298 uint32_t size, uint32_t *dst) 1299 { 1300 wave_read_regs( 1301 adev, simd, wave, 0, 1302 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 1303 } 1304 1305 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 1306 uint32_t wave, uint32_t thread, 1307 uint32_t start, uint32_t size, 1308 uint32_t *dst) 1309 { 1310 wave_read_regs( 1311 adev, simd, wave, thread, 1312 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1313 } 1314 1315 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 1316 u32 me, u32 pipe, u32 q) 1317 { 1318 soc15_grbm_select(adev, me, pipe, q, 0); 1319 } 1320 1321 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 1322 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 1323 .select_se_sh = &gfx_v9_0_select_se_sh, 1324 .read_wave_data = &gfx_v9_0_read_wave_data, 1325 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 1326 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 1327 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q 1328 }; 1329 1330 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 1331 { 1332 u32 gb_addr_config; 1333 int err; 1334 1335 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 1336 1337 switch (adev->asic_type) { 1338 case CHIP_VEGA10: 1339 adev->gfx.config.max_hw_contexts = 8; 1340 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1341 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1342 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1343 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1344 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 1345 break; 1346 case CHIP_VEGA12: 1347 adev->gfx.config.max_hw_contexts = 8; 1348 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1349 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1350 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1351 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1352 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 1353 DRM_INFO("fix gfx.config for vega12\n"); 1354 break; 1355 case CHIP_VEGA20: 1356 adev->gfx.config.max_hw_contexts = 8; 1357 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1358 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1359 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1360 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1361 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1362 gb_addr_config &= ~0xf3e777ff; 1363 gb_addr_config |= 0x22014042; 1364 /* check vbios table if gpu info is not available */ 1365 err = amdgpu_atomfirmware_get_gfx_info(adev); 1366 if (err) 1367 return err; 1368 break; 1369 case CHIP_RAVEN: 1370 adev->gfx.config.max_hw_contexts = 8; 1371 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1372 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1373 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1374 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1375 if (adev->rev_id >= 8) 1376 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 1377 else 1378 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 1379 break; 1380 default: 1381 BUG(); 1382 break; 1383 } 1384 1385 adev->gfx.config.gb_addr_config = gb_addr_config; 1386 1387 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 1388 REG_GET_FIELD( 1389 adev->gfx.config.gb_addr_config, 1390 GB_ADDR_CONFIG, 1391 NUM_PIPES); 1392 1393 adev->gfx.config.max_tile_pipes = 1394 adev->gfx.config.gb_addr_config_fields.num_pipes; 1395 1396 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 1397 REG_GET_FIELD( 1398 adev->gfx.config.gb_addr_config, 1399 GB_ADDR_CONFIG, 1400 NUM_BANKS); 1401 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 1402 REG_GET_FIELD( 1403 adev->gfx.config.gb_addr_config, 1404 GB_ADDR_CONFIG, 1405 MAX_COMPRESSED_FRAGS); 1406 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 1407 REG_GET_FIELD( 1408 adev->gfx.config.gb_addr_config, 1409 GB_ADDR_CONFIG, 1410 NUM_RB_PER_SE); 1411 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 1412 REG_GET_FIELD( 1413 adev->gfx.config.gb_addr_config, 1414 GB_ADDR_CONFIG, 1415 NUM_SHADER_ENGINES); 1416 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 1417 REG_GET_FIELD( 1418 adev->gfx.config.gb_addr_config, 1419 GB_ADDR_CONFIG, 1420 PIPE_INTERLEAVE_SIZE)); 1421 1422 return 0; 1423 } 1424 1425 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, 1426 struct amdgpu_ngg_buf *ngg_buf, 1427 int size_se, 1428 int default_size_se) 1429 { 1430 int r; 1431 1432 if (size_se < 0) { 1433 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se); 1434 return -EINVAL; 1435 } 1436 size_se = size_se ? size_se : default_size_se; 1437 1438 ngg_buf->size = size_se * adev->gfx.config.max_shader_engines; 1439 r = amdgpu_bo_create_kernel(adev, ngg_buf->size, 1440 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1441 &ngg_buf->bo, 1442 &ngg_buf->gpu_addr, 1443 NULL); 1444 if (r) { 1445 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r); 1446 return r; 1447 } 1448 ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo); 1449 1450 return r; 1451 } 1452 1453 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev) 1454 { 1455 int i; 1456 1457 for (i = 0; i < NGG_BUF_MAX; i++) 1458 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo, 1459 &adev->gfx.ngg.buf[i].gpu_addr, 1460 NULL); 1461 1462 memset(&adev->gfx.ngg.buf[0], 0, 1463 sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX); 1464 1465 adev->gfx.ngg.init = false; 1466 1467 return 0; 1468 } 1469 1470 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) 1471 { 1472 int r; 1473 1474 if (!amdgpu_ngg || adev->gfx.ngg.init == true) 1475 return 0; 1476 1477 /* GDS reserve memory: 64 bytes alignment */ 1478 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); 1479 adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size; 1480 adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); 1481 adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); 1482 1483 /* Primitive Buffer */ 1484 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], 1485 amdgpu_prim_buf_per_se, 1486 64 * 1024); 1487 if (r) { 1488 dev_err(adev->dev, "Failed to create Primitive Buffer\n"); 1489 goto err; 1490 } 1491 1492 /* Position Buffer */ 1493 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS], 1494 amdgpu_pos_buf_per_se, 1495 256 * 1024); 1496 if (r) { 1497 dev_err(adev->dev, "Failed to create Position Buffer\n"); 1498 goto err; 1499 } 1500 1501 /* Control Sideband */ 1502 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL], 1503 amdgpu_cntl_sb_buf_per_se, 1504 256); 1505 if (r) { 1506 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n"); 1507 goto err; 1508 } 1509 1510 /* Parameter Cache, not created by default */ 1511 if (amdgpu_param_buf_per_se <= 0) 1512 goto out; 1513 1514 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM], 1515 amdgpu_param_buf_per_se, 1516 512 * 1024); 1517 if (r) { 1518 dev_err(adev->dev, "Failed to create Parameter Cache\n"); 1519 goto err; 1520 } 1521 1522 out: 1523 adev->gfx.ngg.init = true; 1524 return 0; 1525 err: 1526 gfx_v9_0_ngg_fini(adev); 1527 return r; 1528 } 1529 1530 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) 1531 { 1532 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 1533 int r; 1534 u32 data, base; 1535 1536 if (!amdgpu_ngg) 1537 return 0; 1538 1539 /* Program buffer size */ 1540 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, 1541 adev->gfx.ngg.buf[NGG_PRIM].size >> 8); 1542 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, 1543 adev->gfx.ngg.buf[NGG_POS].size >> 8); 1544 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); 1545 1546 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, 1547 adev->gfx.ngg.buf[NGG_CNTL].size >> 8); 1548 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, 1549 adev->gfx.ngg.buf[NGG_PARAM].size >> 10); 1550 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); 1551 1552 /* Program buffer base address */ 1553 base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 1554 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); 1555 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data); 1556 1557 base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 1558 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); 1559 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data); 1560 1561 base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 1562 data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); 1563 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data); 1564 1565 base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 1566 data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); 1567 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data); 1568 1569 base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 1570 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); 1571 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data); 1572 1573 base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 1574 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); 1575 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data); 1576 1577 /* Clear GDS reserved memory */ 1578 r = amdgpu_ring_alloc(ring, 17); 1579 if (r) { 1580 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n", 1581 ring->name, r); 1582 return r; 1583 } 1584 1585 gfx_v9_0_write_data_to_reg(ring, 0, false, 1586 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 1587 (adev->gds.gds_size + 1588 adev->gfx.ngg.gds_reserve_size)); 1589 1590 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 1591 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 1592 PACKET3_DMA_DATA_DST_SEL(1) | 1593 PACKET3_DMA_DATA_SRC_SEL(2))); 1594 amdgpu_ring_write(ring, 0); 1595 amdgpu_ring_write(ring, 0); 1596 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); 1597 amdgpu_ring_write(ring, 0); 1598 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 1599 adev->gfx.ngg.gds_reserve_size); 1600 1601 gfx_v9_0_write_data_to_reg(ring, 0, false, 1602 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0); 1603 1604 amdgpu_ring_commit(ring); 1605 1606 return 0; 1607 } 1608 1609 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1610 int mec, int pipe, int queue) 1611 { 1612 int r; 1613 unsigned irq_type; 1614 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1615 1616 ring = &adev->gfx.compute_ring[ring_id]; 1617 1618 /* mec0 is me1 */ 1619 ring->me = mec + 1; 1620 ring->pipe = pipe; 1621 ring->queue = queue; 1622 1623 ring->ring_obj = NULL; 1624 ring->use_doorbell = true; 1625 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1626 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1627 + (ring_id * GFX9_MEC_HPD_SIZE); 1628 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1629 1630 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1631 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1632 + ring->pipe; 1633 1634 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1635 r = amdgpu_ring_init(adev, ring, 1024, 1636 &adev->gfx.eop_irq, irq_type); 1637 if (r) 1638 return r; 1639 1640 1641 return 0; 1642 } 1643 1644 static int gfx_v9_0_sw_init(void *handle) 1645 { 1646 int i, j, k, r, ring_id; 1647 struct amdgpu_ring *ring; 1648 struct amdgpu_kiq *kiq; 1649 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1650 1651 switch (adev->asic_type) { 1652 case CHIP_VEGA10: 1653 case CHIP_VEGA12: 1654 case CHIP_VEGA20: 1655 case CHIP_RAVEN: 1656 adev->gfx.mec.num_mec = 2; 1657 break; 1658 default: 1659 adev->gfx.mec.num_mec = 1; 1660 break; 1661 } 1662 1663 adev->gfx.mec.num_pipe_per_mec = 4; 1664 adev->gfx.mec.num_queue_per_pipe = 8; 1665 1666 /* EOP Event */ 1667 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 1668 if (r) 1669 return r; 1670 1671 /* Privileged reg */ 1672 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 1673 &adev->gfx.priv_reg_irq); 1674 if (r) 1675 return r; 1676 1677 /* Privileged inst */ 1678 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 1679 &adev->gfx.priv_inst_irq); 1680 if (r) 1681 return r; 1682 1683 /* ECC error */ 1684 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 1685 &adev->gfx.cp_ecc_error_irq); 1686 if (r) 1687 return r; 1688 1689 /* FUE error */ 1690 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 1691 &adev->gfx.cp_ecc_error_irq); 1692 if (r) 1693 return r; 1694 1695 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1696 1697 gfx_v9_0_scratch_init(adev); 1698 1699 r = gfx_v9_0_init_microcode(adev); 1700 if (r) { 1701 DRM_ERROR("Failed to load gfx firmware!\n"); 1702 return r; 1703 } 1704 1705 r = adev->gfx.rlc.funcs->init(adev); 1706 if (r) { 1707 DRM_ERROR("Failed to init rlc BOs!\n"); 1708 return r; 1709 } 1710 1711 r = gfx_v9_0_mec_init(adev); 1712 if (r) { 1713 DRM_ERROR("Failed to init MEC BOs!\n"); 1714 return r; 1715 } 1716 1717 /* set up the gfx ring */ 1718 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 1719 ring = &adev->gfx.gfx_ring[i]; 1720 ring->ring_obj = NULL; 1721 if (!i) 1722 sprintf(ring->name, "gfx"); 1723 else 1724 sprintf(ring->name, "gfx_%d", i); 1725 ring->use_doorbell = true; 1726 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 1727 r = amdgpu_ring_init(adev, ring, 1024, 1728 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); 1729 if (r) 1730 return r; 1731 } 1732 1733 /* set up the compute queues - allocate horizontally across pipes */ 1734 ring_id = 0; 1735 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1736 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1737 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1738 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 1739 continue; 1740 1741 r = gfx_v9_0_compute_ring_init(adev, 1742 ring_id, 1743 i, k, j); 1744 if (r) 1745 return r; 1746 1747 ring_id++; 1748 } 1749 } 1750 } 1751 1752 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); 1753 if (r) { 1754 DRM_ERROR("Failed to init KIQ BOs!\n"); 1755 return r; 1756 } 1757 1758 kiq = &adev->gfx.kiq; 1759 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 1760 if (r) 1761 return r; 1762 1763 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 1764 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); 1765 if (r) 1766 return r; 1767 1768 adev->gfx.ce_ram_size = 0x8000; 1769 1770 r = gfx_v9_0_gpu_early_init(adev); 1771 if (r) 1772 return r; 1773 1774 r = gfx_v9_0_ngg_init(adev); 1775 if (r) 1776 return r; 1777 1778 return 0; 1779 } 1780 1781 1782 static int gfx_v9_0_sw_fini(void *handle) 1783 { 1784 int i; 1785 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1786 1787 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && 1788 adev->gfx.ras_if) { 1789 struct ras_common_if *ras_if = adev->gfx.ras_if; 1790 struct ras_ih_if ih_info = { 1791 .head = *ras_if, 1792 }; 1793 1794 amdgpu_ras_debugfs_remove(adev, ras_if); 1795 amdgpu_ras_sysfs_remove(adev, ras_if); 1796 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 1797 amdgpu_ras_feature_enable(adev, ras_if, 0); 1798 kfree(ras_if); 1799 } 1800 1801 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1802 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1803 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1804 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1805 1806 amdgpu_gfx_mqd_sw_fini(adev); 1807 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 1808 amdgpu_gfx_kiq_fini(adev); 1809 1810 gfx_v9_0_mec_fini(adev); 1811 gfx_v9_0_ngg_fini(adev); 1812 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 1813 if (adev->asic_type == CHIP_RAVEN) { 1814 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 1815 &adev->gfx.rlc.cp_table_gpu_addr, 1816 (void **)&adev->gfx.rlc.cp_table_ptr); 1817 } 1818 gfx_v9_0_free_microcode(adev); 1819 1820 return 0; 1821 } 1822 1823 1824 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 1825 { 1826 /* TODO */ 1827 } 1828 1829 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) 1830 { 1831 u32 data; 1832 1833 if (instance == 0xffffffff) 1834 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 1835 else 1836 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 1837 1838 if (se_num == 0xffffffff) 1839 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 1840 else 1841 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1842 1843 if (sh_num == 0xffffffff) 1844 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 1845 else 1846 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 1847 1848 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 1849 } 1850 1851 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1852 { 1853 u32 data, mask; 1854 1855 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 1856 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 1857 1858 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 1859 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 1860 1861 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 1862 adev->gfx.config.max_sh_per_se); 1863 1864 return (~data) & mask; 1865 } 1866 1867 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 1868 { 1869 int i, j; 1870 u32 data; 1871 u32 active_rbs = 0; 1872 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 1873 adev->gfx.config.max_sh_per_se; 1874 1875 mutex_lock(&adev->grbm_idx_mutex); 1876 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1877 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1878 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1879 data = gfx_v9_0_get_rb_active_bitmap(adev); 1880 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 1881 rb_bitmap_width_per_sh); 1882 } 1883 } 1884 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1885 mutex_unlock(&adev->grbm_idx_mutex); 1886 1887 adev->gfx.config.backend_enable_mask = active_rbs; 1888 adev->gfx.config.num_rbs = hweight32(active_rbs); 1889 } 1890 1891 #define DEFAULT_SH_MEM_BASES (0x6000) 1892 #define FIRST_COMPUTE_VMID (8) 1893 #define LAST_COMPUTE_VMID (16) 1894 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 1895 { 1896 int i; 1897 uint32_t sh_mem_config; 1898 uint32_t sh_mem_bases; 1899 1900 /* 1901 * Configure apertures: 1902 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 1903 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 1904 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 1905 */ 1906 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 1907 1908 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 1909 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 1910 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 1911 1912 mutex_lock(&adev->srbm_mutex); 1913 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 1914 soc15_grbm_select(adev, 0, 0, 0, i); 1915 /* CP and shaders */ 1916 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 1917 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 1918 } 1919 soc15_grbm_select(adev, 0, 0, 0, 0); 1920 mutex_unlock(&adev->srbm_mutex); 1921 } 1922 1923 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 1924 { 1925 u32 tmp; 1926 int i; 1927 1928 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 1929 1930 gfx_v9_0_tiling_mode_table_init(adev); 1931 1932 gfx_v9_0_setup_rb(adev); 1933 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 1934 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 1935 1936 /* XXX SH_MEM regs */ 1937 /* where to put LDS, scratch, GPUVM in FSA64 space */ 1938 mutex_lock(&adev->srbm_mutex); 1939 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { 1940 soc15_grbm_select(adev, 0, 0, 0, i); 1941 /* CP and shaders */ 1942 if (i == 0) { 1943 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 1944 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1945 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 1946 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 1947 } else { 1948 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 1949 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1950 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 1951 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 1952 (adev->gmc.private_aperture_start >> 48)); 1953 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 1954 (adev->gmc.shared_aperture_start >> 48)); 1955 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 1956 } 1957 } 1958 soc15_grbm_select(adev, 0, 0, 0, 0); 1959 1960 mutex_unlock(&adev->srbm_mutex); 1961 1962 gfx_v9_0_init_compute_vmid(adev); 1963 } 1964 1965 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 1966 { 1967 u32 i, j, k; 1968 u32 mask; 1969 1970 mutex_lock(&adev->grbm_idx_mutex); 1971 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1972 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1973 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1974 for (k = 0; k < adev->usec_timeout; k++) { 1975 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 1976 break; 1977 udelay(1); 1978 } 1979 if (k == adev->usec_timeout) { 1980 gfx_v9_0_select_se_sh(adev, 0xffffffff, 1981 0xffffffff, 0xffffffff); 1982 mutex_unlock(&adev->grbm_idx_mutex); 1983 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 1984 i, j); 1985 return; 1986 } 1987 } 1988 } 1989 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1990 mutex_unlock(&adev->grbm_idx_mutex); 1991 1992 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 1993 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 1994 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 1995 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 1996 for (k = 0; k < adev->usec_timeout; k++) { 1997 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 1998 break; 1999 udelay(1); 2000 } 2001 } 2002 2003 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2004 bool enable) 2005 { 2006 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2007 2008 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2009 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2010 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2011 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2012 2013 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2014 } 2015 2016 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2017 { 2018 /* csib */ 2019 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2020 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2021 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2022 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2023 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2024 adev->gfx.rlc.clear_state_size); 2025 } 2026 2027 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2028 int indirect_offset, 2029 int list_size, 2030 int *unique_indirect_regs, 2031 int unique_indirect_reg_count, 2032 int *indirect_start_offsets, 2033 int *indirect_start_offsets_count, 2034 int max_start_offsets_count) 2035 { 2036 int idx; 2037 2038 for (; indirect_offset < list_size; indirect_offset++) { 2039 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2040 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2041 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2042 2043 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2044 indirect_offset += 2; 2045 2046 /* look for the matching indice */ 2047 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2048 if (unique_indirect_regs[idx] == 2049 register_list_format[indirect_offset] || 2050 !unique_indirect_regs[idx]) 2051 break; 2052 } 2053 2054 BUG_ON(idx >= unique_indirect_reg_count); 2055 2056 if (!unique_indirect_regs[idx]) 2057 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2058 2059 indirect_offset++; 2060 } 2061 } 2062 } 2063 2064 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2065 { 2066 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2067 int unique_indirect_reg_count = 0; 2068 2069 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2070 int indirect_start_offsets_count = 0; 2071 2072 int list_size = 0; 2073 int i = 0, j = 0; 2074 u32 tmp = 0; 2075 2076 u32 *register_list_format = 2077 kmemdup(adev->gfx.rlc.register_list_format, 2078 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2079 if (!register_list_format) 2080 return -ENOMEM; 2081 2082 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2083 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2084 gfx_v9_1_parse_ind_reg_list(register_list_format, 2085 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2086 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2087 unique_indirect_regs, 2088 unique_indirect_reg_count, 2089 indirect_start_offsets, 2090 &indirect_start_offsets_count, 2091 ARRAY_SIZE(indirect_start_offsets)); 2092 2093 /* enable auto inc in case it is disabled */ 2094 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2095 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2096 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2097 2098 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2099 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2100 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2101 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2102 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2103 adev->gfx.rlc.register_restore[i]); 2104 2105 /* load indirect register */ 2106 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2107 adev->gfx.rlc.reg_list_format_start); 2108 2109 /* direct register portion */ 2110 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2111 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2112 register_list_format[i]); 2113 2114 /* indirect register portion */ 2115 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2116 if (register_list_format[i] == 0xFFFFFFFF) { 2117 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2118 continue; 2119 } 2120 2121 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2122 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2123 2124 for (j = 0; j < unique_indirect_reg_count; j++) { 2125 if (register_list_format[i] == unique_indirect_regs[j]) { 2126 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2127 break; 2128 } 2129 } 2130 2131 BUG_ON(j >= unique_indirect_reg_count); 2132 2133 i++; 2134 } 2135 2136 /* set save/restore list size */ 2137 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2138 list_size = list_size >> 1; 2139 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2140 adev->gfx.rlc.reg_restore_list_size); 2141 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2142 2143 /* write the starting offsets to RLC scratch ram */ 2144 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2145 adev->gfx.rlc.starting_offsets_start); 2146 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2147 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2148 indirect_start_offsets[i]); 2149 2150 /* load unique indirect regs*/ 2151 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2152 if (unique_indirect_regs[i] != 0) { 2153 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2154 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2155 unique_indirect_regs[i] & 0x3FFFF); 2156 2157 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2158 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2159 unique_indirect_regs[i] >> 20); 2160 } 2161 } 2162 2163 kfree(register_list_format); 2164 return 0; 2165 } 2166 2167 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2168 { 2169 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2170 } 2171 2172 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2173 bool enable) 2174 { 2175 uint32_t data = 0; 2176 uint32_t default_data = 0; 2177 2178 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2179 if (enable == true) { 2180 /* enable GFXIP control over CGPG */ 2181 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2182 if(default_data != data) 2183 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2184 2185 /* update status */ 2186 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2187 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2188 if(default_data != data) 2189 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2190 } else { 2191 /* restore GFXIP control over GCPG */ 2192 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2193 if(default_data != data) 2194 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2195 } 2196 } 2197 2198 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2199 { 2200 uint32_t data = 0; 2201 2202 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2203 AMD_PG_SUPPORT_GFX_SMG | 2204 AMD_PG_SUPPORT_GFX_DMG)) { 2205 /* init IDLE_POLL_COUNT = 60 */ 2206 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2207 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2208 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2209 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2210 2211 /* init RLC PG Delay */ 2212 data = 0; 2213 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2214 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2215 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2216 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2217 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2218 2219 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2220 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2221 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2222 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2223 2224 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2225 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2226 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2227 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2228 2229 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2230 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2231 2232 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2233 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2234 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2235 2236 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2237 } 2238 } 2239 2240 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2241 bool enable) 2242 { 2243 uint32_t data = 0; 2244 uint32_t default_data = 0; 2245 2246 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2247 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2248 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2249 enable ? 1 : 0); 2250 if (default_data != data) 2251 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2252 } 2253 2254 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2255 bool enable) 2256 { 2257 uint32_t data = 0; 2258 uint32_t default_data = 0; 2259 2260 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2261 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2262 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2263 enable ? 1 : 0); 2264 if(default_data != data) 2265 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2266 } 2267 2268 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2269 bool enable) 2270 { 2271 uint32_t data = 0; 2272 uint32_t default_data = 0; 2273 2274 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2275 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2276 CP_PG_DISABLE, 2277 enable ? 0 : 1); 2278 if(default_data != data) 2279 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2280 } 2281 2282 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2283 bool enable) 2284 { 2285 uint32_t data, default_data; 2286 2287 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2288 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2289 GFX_POWER_GATING_ENABLE, 2290 enable ? 1 : 0); 2291 if(default_data != data) 2292 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2293 } 2294 2295 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 2296 bool enable) 2297 { 2298 uint32_t data, default_data; 2299 2300 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2301 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2302 GFX_PIPELINE_PG_ENABLE, 2303 enable ? 1 : 0); 2304 if(default_data != data) 2305 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2306 2307 if (!enable) 2308 /* read any GFX register to wake up GFX */ 2309 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 2310 } 2311 2312 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 2313 bool enable) 2314 { 2315 uint32_t data, default_data; 2316 2317 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2318 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2319 STATIC_PER_CU_PG_ENABLE, 2320 enable ? 1 : 0); 2321 if(default_data != data) 2322 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2323 } 2324 2325 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 2326 bool enable) 2327 { 2328 uint32_t data, default_data; 2329 2330 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2331 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2332 DYN_PER_CU_PG_ENABLE, 2333 enable ? 1 : 0); 2334 if(default_data != data) 2335 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2336 } 2337 2338 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 2339 { 2340 gfx_v9_0_init_csb(adev); 2341 2342 /* 2343 * Rlc save restore list is workable since v2_1. 2344 * And it's needed by gfxoff feature. 2345 */ 2346 if (adev->gfx.rlc.is_rlc_v2_1) { 2347 gfx_v9_1_init_rlc_save_restore_list(adev); 2348 gfx_v9_0_enable_save_restore_machine(adev); 2349 } 2350 2351 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2352 AMD_PG_SUPPORT_GFX_SMG | 2353 AMD_PG_SUPPORT_GFX_DMG | 2354 AMD_PG_SUPPORT_CP | 2355 AMD_PG_SUPPORT_GDS | 2356 AMD_PG_SUPPORT_RLC_SMU_HS)) { 2357 WREG32(mmRLC_JUMP_TABLE_RESTORE, 2358 adev->gfx.rlc.cp_table_gpu_addr >> 8); 2359 gfx_v9_0_init_gfx_power_gating(adev); 2360 } 2361 } 2362 2363 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 2364 { 2365 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 2366 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 2367 gfx_v9_0_wait_for_rlc_serdes(adev); 2368 } 2369 2370 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 2371 { 2372 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2373 udelay(50); 2374 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2375 udelay(50); 2376 } 2377 2378 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 2379 { 2380 #ifdef AMDGPU_RLC_DEBUG_RETRY 2381 u32 rlc_ucode_ver; 2382 #endif 2383 2384 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2385 udelay(50); 2386 2387 /* carrizo do enable cp interrupt after cp inited */ 2388 if (!(adev->flags & AMD_IS_APU)) { 2389 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 2390 udelay(50); 2391 } 2392 2393 #ifdef AMDGPU_RLC_DEBUG_RETRY 2394 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 2395 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 2396 if(rlc_ucode_ver == 0x108) { 2397 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 2398 rlc_ucode_ver, adev->gfx.rlc_fw_version); 2399 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 2400 * default is 0x9C4 to create a 100us interval */ 2401 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 2402 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 2403 * to disable the page fault retry interrupts, default is 2404 * 0x100 (256) */ 2405 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 2406 } 2407 #endif 2408 } 2409 2410 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 2411 { 2412 const struct rlc_firmware_header_v2_0 *hdr; 2413 const __le32 *fw_data; 2414 unsigned i, fw_size; 2415 2416 if (!adev->gfx.rlc_fw) 2417 return -EINVAL; 2418 2419 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2420 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2421 2422 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2423 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2424 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2425 2426 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 2427 RLCG_UCODE_LOADING_START_ADDRESS); 2428 for (i = 0; i < fw_size; i++) 2429 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 2430 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2431 2432 return 0; 2433 } 2434 2435 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 2436 { 2437 int r; 2438 2439 if (amdgpu_sriov_vf(adev)) { 2440 gfx_v9_0_init_csb(adev); 2441 return 0; 2442 } 2443 2444 adev->gfx.rlc.funcs->stop(adev); 2445 2446 /* disable CG */ 2447 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 2448 2449 gfx_v9_0_init_pg(adev); 2450 2451 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 2452 /* legacy rlc firmware loading */ 2453 r = gfx_v9_0_rlc_load_microcode(adev); 2454 if (r) 2455 return r; 2456 } 2457 2458 switch (adev->asic_type) { 2459 case CHIP_RAVEN: 2460 if (amdgpu_lbpw == 0) 2461 gfx_v9_0_enable_lbpw(adev, false); 2462 else 2463 gfx_v9_0_enable_lbpw(adev, true); 2464 break; 2465 case CHIP_VEGA20: 2466 if (amdgpu_lbpw > 0) 2467 gfx_v9_0_enable_lbpw(adev, true); 2468 else 2469 gfx_v9_0_enable_lbpw(adev, false); 2470 break; 2471 default: 2472 break; 2473 } 2474 2475 adev->gfx.rlc.funcs->start(adev); 2476 2477 return 0; 2478 } 2479 2480 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 2481 { 2482 int i; 2483 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 2484 2485 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 2486 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 2487 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 2488 if (!enable) { 2489 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2490 adev->gfx.gfx_ring[i].sched.ready = false; 2491 } 2492 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 2493 udelay(50); 2494 } 2495 2496 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 2497 { 2498 const struct gfx_firmware_header_v1_0 *pfp_hdr; 2499 const struct gfx_firmware_header_v1_0 *ce_hdr; 2500 const struct gfx_firmware_header_v1_0 *me_hdr; 2501 const __le32 *fw_data; 2502 unsigned i, fw_size; 2503 2504 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 2505 return -EINVAL; 2506 2507 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 2508 adev->gfx.pfp_fw->data; 2509 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 2510 adev->gfx.ce_fw->data; 2511 me_hdr = (const struct gfx_firmware_header_v1_0 *) 2512 adev->gfx.me_fw->data; 2513 2514 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 2515 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 2516 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 2517 2518 gfx_v9_0_cp_gfx_enable(adev, false); 2519 2520 /* PFP */ 2521 fw_data = (const __le32 *) 2522 (adev->gfx.pfp_fw->data + 2523 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 2524 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 2525 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 2526 for (i = 0; i < fw_size; i++) 2527 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 2528 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 2529 2530 /* CE */ 2531 fw_data = (const __le32 *) 2532 (adev->gfx.ce_fw->data + 2533 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 2534 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 2535 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 2536 for (i = 0; i < fw_size; i++) 2537 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 2538 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 2539 2540 /* ME */ 2541 fw_data = (const __le32 *) 2542 (adev->gfx.me_fw->data + 2543 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 2544 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 2545 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 2546 for (i = 0; i < fw_size; i++) 2547 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 2548 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 2549 2550 return 0; 2551 } 2552 2553 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 2554 { 2555 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 2556 const struct cs_section_def *sect = NULL; 2557 const struct cs_extent_def *ext = NULL; 2558 int r, i, tmp; 2559 2560 /* init the CP */ 2561 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 2562 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 2563 2564 gfx_v9_0_cp_gfx_enable(adev, true); 2565 2566 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 2567 if (r) { 2568 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 2569 return r; 2570 } 2571 2572 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2573 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 2574 2575 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 2576 amdgpu_ring_write(ring, 0x80000000); 2577 amdgpu_ring_write(ring, 0x80000000); 2578 2579 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 2580 for (ext = sect->section; ext->extent != NULL; ++ext) { 2581 if (sect->id == SECT_CONTEXT) { 2582 amdgpu_ring_write(ring, 2583 PACKET3(PACKET3_SET_CONTEXT_REG, 2584 ext->reg_count)); 2585 amdgpu_ring_write(ring, 2586 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 2587 for (i = 0; i < ext->reg_count; i++) 2588 amdgpu_ring_write(ring, ext->extent[i]); 2589 } 2590 } 2591 } 2592 2593 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2594 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 2595 2596 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 2597 amdgpu_ring_write(ring, 0); 2598 2599 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 2600 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 2601 amdgpu_ring_write(ring, 0x8000); 2602 amdgpu_ring_write(ring, 0x8000); 2603 2604 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 2605 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 2606 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 2607 amdgpu_ring_write(ring, tmp); 2608 amdgpu_ring_write(ring, 0); 2609 2610 amdgpu_ring_commit(ring); 2611 2612 return 0; 2613 } 2614 2615 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 2616 { 2617 struct amdgpu_ring *ring; 2618 u32 tmp; 2619 u32 rb_bufsz; 2620 u64 rb_addr, rptr_addr, wptr_gpu_addr; 2621 2622 /* Set the write pointer delay */ 2623 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 2624 2625 /* set the RB to use vmid 0 */ 2626 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 2627 2628 /* Set ring buffer size */ 2629 ring = &adev->gfx.gfx_ring[0]; 2630 rb_bufsz = order_base_2(ring->ring_size / 8); 2631 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 2632 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 2633 #ifdef __BIG_ENDIAN 2634 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 2635 #endif 2636 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 2637 2638 /* Initialize the ring buffer's write pointers */ 2639 ring->wptr = 0; 2640 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 2641 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 2642 2643 /* set the wb address wether it's enabled or not */ 2644 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 2645 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 2646 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 2647 2648 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2649 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 2650 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 2651 2652 mdelay(1); 2653 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 2654 2655 rb_addr = ring->gpu_addr >> 8; 2656 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 2657 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 2658 2659 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 2660 if (ring->use_doorbell) { 2661 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2662 DOORBELL_OFFSET, ring->doorbell_index); 2663 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2664 DOORBELL_EN, 1); 2665 } else { 2666 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 2667 } 2668 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 2669 2670 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 2671 DOORBELL_RANGE_LOWER, ring->doorbell_index); 2672 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 2673 2674 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 2675 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 2676 2677 2678 /* start the ring */ 2679 gfx_v9_0_cp_gfx_start(adev); 2680 ring->sched.ready = true; 2681 2682 return 0; 2683 } 2684 2685 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 2686 { 2687 int i; 2688 2689 if (enable) { 2690 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 2691 } else { 2692 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 2693 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 2694 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2695 adev->gfx.compute_ring[i].sched.ready = false; 2696 adev->gfx.kiq.ring.sched.ready = false; 2697 } 2698 udelay(50); 2699 } 2700 2701 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 2702 { 2703 const struct gfx_firmware_header_v1_0 *mec_hdr; 2704 const __le32 *fw_data; 2705 unsigned i; 2706 u32 tmp; 2707 2708 if (!adev->gfx.mec_fw) 2709 return -EINVAL; 2710 2711 gfx_v9_0_cp_compute_enable(adev, false); 2712 2713 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 2714 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 2715 2716 fw_data = (const __le32 *) 2717 (adev->gfx.mec_fw->data + 2718 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 2719 tmp = 0; 2720 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2721 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2722 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 2723 2724 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 2725 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 2726 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 2727 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 2728 2729 /* MEC1 */ 2730 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 2731 mec_hdr->jt_offset); 2732 for (i = 0; i < mec_hdr->jt_size; i++) 2733 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 2734 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 2735 2736 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 2737 adev->gfx.mec_fw_version); 2738 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 2739 2740 return 0; 2741 } 2742 2743 /* KIQ functions */ 2744 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 2745 { 2746 uint32_t tmp; 2747 struct amdgpu_device *adev = ring->adev; 2748 2749 /* tell RLC which is KIQ queue */ 2750 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 2751 tmp &= 0xffffff00; 2752 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 2753 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2754 tmp |= 0x80; 2755 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2756 } 2757 2758 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) 2759 { 2760 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 2761 uint64_t queue_mask = 0; 2762 int r, i; 2763 2764 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 2765 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 2766 continue; 2767 2768 /* This situation may be hit in the future if a new HW 2769 * generation exposes more than 64 queues. If so, the 2770 * definition of queue_mask needs updating */ 2771 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 2772 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 2773 break; 2774 } 2775 2776 queue_mask |= (1ull << i); 2777 } 2778 2779 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); 2780 if (r) { 2781 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 2782 return r; 2783 } 2784 2785 /* set resources */ 2786 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 2787 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 2788 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 2789 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 2790 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 2791 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 2792 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 2793 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 2794 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 2795 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2796 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 2797 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 2798 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2799 2800 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 2801 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 2802 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 2803 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 2804 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 2805 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 2806 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 2807 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 2808 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 2809 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 2810 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ 2811 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 2812 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 2813 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 2814 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 2815 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 2816 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 2817 } 2818 2819 r = amdgpu_ring_test_helper(kiq_ring); 2820 if (r) 2821 DRM_ERROR("KCQ enable failed\n"); 2822 2823 return r; 2824 } 2825 2826 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 2827 { 2828 struct amdgpu_device *adev = ring->adev; 2829 struct v9_mqd *mqd = ring->mqd_ptr; 2830 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 2831 uint32_t tmp; 2832 2833 mqd->header = 0xC0310800; 2834 mqd->compute_pipelinestat_enable = 0x00000001; 2835 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 2836 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 2837 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 2838 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 2839 mqd->compute_misc_reserved = 0x00000003; 2840 2841 mqd->dynamic_cu_mask_addr_lo = 2842 lower_32_bits(ring->mqd_gpu_addr 2843 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 2844 mqd->dynamic_cu_mask_addr_hi = 2845 upper_32_bits(ring->mqd_gpu_addr 2846 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 2847 2848 eop_base_addr = ring->eop_gpu_addr >> 8; 2849 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 2850 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 2851 2852 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2853 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 2854 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 2855 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 2856 2857 mqd->cp_hqd_eop_control = tmp; 2858 2859 /* enable doorbell? */ 2860 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 2861 2862 if (ring->use_doorbell) { 2863 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2864 DOORBELL_OFFSET, ring->doorbell_index); 2865 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2866 DOORBELL_EN, 1); 2867 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2868 DOORBELL_SOURCE, 0); 2869 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2870 DOORBELL_HIT, 0); 2871 } else { 2872 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2873 DOORBELL_EN, 0); 2874 } 2875 2876 mqd->cp_hqd_pq_doorbell_control = tmp; 2877 2878 /* disable the queue if it's active */ 2879 ring->wptr = 0; 2880 mqd->cp_hqd_dequeue_request = 0; 2881 mqd->cp_hqd_pq_rptr = 0; 2882 mqd->cp_hqd_pq_wptr_lo = 0; 2883 mqd->cp_hqd_pq_wptr_hi = 0; 2884 2885 /* set the pointer to the MQD */ 2886 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 2887 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 2888 2889 /* set MQD vmid to 0 */ 2890 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 2891 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 2892 mqd->cp_mqd_control = tmp; 2893 2894 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 2895 hqd_gpu_addr = ring->gpu_addr >> 8; 2896 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 2897 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 2898 2899 /* set up the HQD, this is similar to CP_RB0_CNTL */ 2900 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 2901 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 2902 (order_base_2(ring->ring_size / 4) - 1)); 2903 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 2904 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 2905 #ifdef __BIG_ENDIAN 2906 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 2907 #endif 2908 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 2909 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 2910 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 2911 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 2912 mqd->cp_hqd_pq_control = tmp; 2913 2914 /* set the wb address whether it's enabled or not */ 2915 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 2916 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 2917 mqd->cp_hqd_pq_rptr_report_addr_hi = 2918 upper_32_bits(wb_gpu_addr) & 0xffff; 2919 2920 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 2921 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2922 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 2923 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 2924 2925 tmp = 0; 2926 /* enable the doorbell if requested */ 2927 if (ring->use_doorbell) { 2928 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 2929 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2930 DOORBELL_OFFSET, ring->doorbell_index); 2931 2932 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2933 DOORBELL_EN, 1); 2934 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2935 DOORBELL_SOURCE, 0); 2936 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2937 DOORBELL_HIT, 0); 2938 } 2939 2940 mqd->cp_hqd_pq_doorbell_control = tmp; 2941 2942 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 2943 ring->wptr = 0; 2944 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 2945 2946 /* set the vmid for the queue */ 2947 mqd->cp_hqd_vmid = 0; 2948 2949 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 2950 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 2951 mqd->cp_hqd_persistent_state = tmp; 2952 2953 /* set MIN_IB_AVAIL_SIZE */ 2954 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 2955 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 2956 mqd->cp_hqd_ib_control = tmp; 2957 2958 /* activate the queue */ 2959 mqd->cp_hqd_active = 1; 2960 2961 return 0; 2962 } 2963 2964 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 2965 { 2966 struct amdgpu_device *adev = ring->adev; 2967 struct v9_mqd *mqd = ring->mqd_ptr; 2968 int j; 2969 2970 /* disable wptr polling */ 2971 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 2972 2973 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 2974 mqd->cp_hqd_eop_base_addr_lo); 2975 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 2976 mqd->cp_hqd_eop_base_addr_hi); 2977 2978 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2979 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 2980 mqd->cp_hqd_eop_control); 2981 2982 /* enable doorbell? */ 2983 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 2984 mqd->cp_hqd_pq_doorbell_control); 2985 2986 /* disable the queue if it's active */ 2987 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 2988 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 2989 for (j = 0; j < adev->usec_timeout; j++) { 2990 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 2991 break; 2992 udelay(1); 2993 } 2994 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 2995 mqd->cp_hqd_dequeue_request); 2996 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 2997 mqd->cp_hqd_pq_rptr); 2998 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 2999 mqd->cp_hqd_pq_wptr_lo); 3000 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3001 mqd->cp_hqd_pq_wptr_hi); 3002 } 3003 3004 /* set the pointer to the MQD */ 3005 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3006 mqd->cp_mqd_base_addr_lo); 3007 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3008 mqd->cp_mqd_base_addr_hi); 3009 3010 /* set MQD vmid to 0 */ 3011 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3012 mqd->cp_mqd_control); 3013 3014 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3015 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3016 mqd->cp_hqd_pq_base_lo); 3017 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3018 mqd->cp_hqd_pq_base_hi); 3019 3020 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3021 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3022 mqd->cp_hqd_pq_control); 3023 3024 /* set the wb address whether it's enabled or not */ 3025 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3026 mqd->cp_hqd_pq_rptr_report_addr_lo); 3027 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3028 mqd->cp_hqd_pq_rptr_report_addr_hi); 3029 3030 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3031 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3032 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3033 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3034 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3035 3036 /* enable the doorbell if requested */ 3037 if (ring->use_doorbell) { 3038 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3039 (adev->doorbell_index.kiq * 2) << 2); 3040 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3041 (adev->doorbell_index.userqueue_end * 2) << 2); 3042 } 3043 3044 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3045 mqd->cp_hqd_pq_doorbell_control); 3046 3047 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3048 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3049 mqd->cp_hqd_pq_wptr_lo); 3050 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3051 mqd->cp_hqd_pq_wptr_hi); 3052 3053 /* set the vmid for the queue */ 3054 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3055 3056 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3057 mqd->cp_hqd_persistent_state); 3058 3059 /* activate the queue */ 3060 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3061 mqd->cp_hqd_active); 3062 3063 if (ring->use_doorbell) 3064 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3065 3066 return 0; 3067 } 3068 3069 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3070 { 3071 struct amdgpu_device *adev = ring->adev; 3072 int j; 3073 3074 /* disable the queue if it's active */ 3075 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3076 3077 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3078 3079 for (j = 0; j < adev->usec_timeout; j++) { 3080 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3081 break; 3082 udelay(1); 3083 } 3084 3085 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3086 DRM_DEBUG("KIQ dequeue request failed.\n"); 3087 3088 /* Manual disable if dequeue request times out */ 3089 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3090 } 3091 3092 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3093 0); 3094 } 3095 3096 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3097 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3098 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3099 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3100 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3101 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3102 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3103 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3104 3105 return 0; 3106 } 3107 3108 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3109 { 3110 struct amdgpu_device *adev = ring->adev; 3111 struct v9_mqd *mqd = ring->mqd_ptr; 3112 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3113 3114 gfx_v9_0_kiq_setting(ring); 3115 3116 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3117 /* reset MQD to a clean status */ 3118 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3119 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3120 3121 /* reset ring buffer */ 3122 ring->wptr = 0; 3123 amdgpu_ring_clear_ring(ring); 3124 3125 mutex_lock(&adev->srbm_mutex); 3126 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3127 gfx_v9_0_kiq_init_register(ring); 3128 soc15_grbm_select(adev, 0, 0, 0, 0); 3129 mutex_unlock(&adev->srbm_mutex); 3130 } else { 3131 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3132 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3133 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3134 mutex_lock(&adev->srbm_mutex); 3135 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3136 gfx_v9_0_mqd_init(ring); 3137 gfx_v9_0_kiq_init_register(ring); 3138 soc15_grbm_select(adev, 0, 0, 0, 0); 3139 mutex_unlock(&adev->srbm_mutex); 3140 3141 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3142 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3143 } 3144 3145 return 0; 3146 } 3147 3148 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3149 { 3150 struct amdgpu_device *adev = ring->adev; 3151 struct v9_mqd *mqd = ring->mqd_ptr; 3152 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3153 3154 if (!adev->in_gpu_reset && !adev->in_suspend) { 3155 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3156 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3157 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3158 mutex_lock(&adev->srbm_mutex); 3159 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3160 gfx_v9_0_mqd_init(ring); 3161 soc15_grbm_select(adev, 0, 0, 0, 0); 3162 mutex_unlock(&adev->srbm_mutex); 3163 3164 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3165 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3166 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3167 /* reset MQD to a clean status */ 3168 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3169 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3170 3171 /* reset ring buffer */ 3172 ring->wptr = 0; 3173 amdgpu_ring_clear_ring(ring); 3174 } else { 3175 amdgpu_ring_clear_ring(ring); 3176 } 3177 3178 return 0; 3179 } 3180 3181 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3182 { 3183 struct amdgpu_ring *ring; 3184 int r; 3185 3186 ring = &adev->gfx.kiq.ring; 3187 3188 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3189 if (unlikely(r != 0)) 3190 return r; 3191 3192 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3193 if (unlikely(r != 0)) 3194 return r; 3195 3196 gfx_v9_0_kiq_init_queue(ring); 3197 amdgpu_bo_kunmap(ring->mqd_obj); 3198 ring->mqd_ptr = NULL; 3199 amdgpu_bo_unreserve(ring->mqd_obj); 3200 ring->sched.ready = true; 3201 return 0; 3202 } 3203 3204 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3205 { 3206 struct amdgpu_ring *ring = NULL; 3207 int r = 0, i; 3208 3209 gfx_v9_0_cp_compute_enable(adev, true); 3210 3211 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3212 ring = &adev->gfx.compute_ring[i]; 3213 3214 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3215 if (unlikely(r != 0)) 3216 goto done; 3217 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3218 if (!r) { 3219 r = gfx_v9_0_kcq_init_queue(ring); 3220 amdgpu_bo_kunmap(ring->mqd_obj); 3221 ring->mqd_ptr = NULL; 3222 } 3223 amdgpu_bo_unreserve(ring->mqd_obj); 3224 if (r) 3225 goto done; 3226 } 3227 3228 r = gfx_v9_0_kiq_kcq_enable(adev); 3229 done: 3230 return r; 3231 } 3232 3233 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3234 { 3235 int r, i; 3236 struct amdgpu_ring *ring; 3237 3238 if (!(adev->flags & AMD_IS_APU)) 3239 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3240 3241 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3242 /* legacy firmware loading */ 3243 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3244 if (r) 3245 return r; 3246 3247 r = gfx_v9_0_cp_compute_load_microcode(adev); 3248 if (r) 3249 return r; 3250 } 3251 3252 r = gfx_v9_0_kiq_resume(adev); 3253 if (r) 3254 return r; 3255 3256 r = gfx_v9_0_cp_gfx_resume(adev); 3257 if (r) 3258 return r; 3259 3260 r = gfx_v9_0_kcq_resume(adev); 3261 if (r) 3262 return r; 3263 3264 ring = &adev->gfx.gfx_ring[0]; 3265 r = amdgpu_ring_test_helper(ring); 3266 if (r) 3267 return r; 3268 3269 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3270 ring = &adev->gfx.compute_ring[i]; 3271 amdgpu_ring_test_helper(ring); 3272 } 3273 3274 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3275 3276 return 0; 3277 } 3278 3279 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3280 { 3281 gfx_v9_0_cp_gfx_enable(adev, enable); 3282 gfx_v9_0_cp_compute_enable(adev, enable); 3283 } 3284 3285 static int gfx_v9_0_hw_init(void *handle) 3286 { 3287 int r; 3288 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3289 3290 gfx_v9_0_init_golden_registers(adev); 3291 3292 gfx_v9_0_constants_init(adev); 3293 3294 r = gfx_v9_0_csb_vram_pin(adev); 3295 if (r) 3296 return r; 3297 3298 r = adev->gfx.rlc.funcs->resume(adev); 3299 if (r) 3300 return r; 3301 3302 r = gfx_v9_0_cp_resume(adev); 3303 if (r) 3304 return r; 3305 3306 r = gfx_v9_0_ngg_en(adev); 3307 if (r) 3308 return r; 3309 3310 return r; 3311 } 3312 3313 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) 3314 { 3315 int r, i; 3316 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3317 3318 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); 3319 if (r) 3320 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3321 3322 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3323 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3324 3325 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 3326 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 3327 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 3328 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 3329 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 3330 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 3331 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 3332 amdgpu_ring_write(kiq_ring, 0); 3333 amdgpu_ring_write(kiq_ring, 0); 3334 amdgpu_ring_write(kiq_ring, 0); 3335 } 3336 r = amdgpu_ring_test_helper(kiq_ring); 3337 if (r) 3338 DRM_ERROR("KCQ disable failed\n"); 3339 3340 return r; 3341 } 3342 3343 static int gfx_v9_0_hw_fini(void *handle) 3344 { 3345 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3346 3347 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 3348 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3349 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3350 3351 /* disable KCQ to avoid CPC touch memory not valid anymore */ 3352 gfx_v9_0_kcq_disable(adev); 3353 3354 if (amdgpu_sriov_vf(adev)) { 3355 gfx_v9_0_cp_gfx_enable(adev, false); 3356 /* must disable polling for SRIOV when hw finished, otherwise 3357 * CPC engine may still keep fetching WB address which is already 3358 * invalid after sw finished and trigger DMAR reading error in 3359 * hypervisor side. 3360 */ 3361 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3362 return 0; 3363 } 3364 3365 /* Use deinitialize sequence from CAIL when unbinding device from driver, 3366 * otherwise KIQ is hanging when binding back 3367 */ 3368 if (!adev->in_gpu_reset && !adev->in_suspend) { 3369 mutex_lock(&adev->srbm_mutex); 3370 soc15_grbm_select(adev, adev->gfx.kiq.ring.me, 3371 adev->gfx.kiq.ring.pipe, 3372 adev->gfx.kiq.ring.queue, 0); 3373 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); 3374 soc15_grbm_select(adev, 0, 0, 0, 0); 3375 mutex_unlock(&adev->srbm_mutex); 3376 } 3377 3378 gfx_v9_0_cp_enable(adev, false); 3379 adev->gfx.rlc.funcs->stop(adev); 3380 3381 gfx_v9_0_csb_vram_unpin(adev); 3382 3383 return 0; 3384 } 3385 3386 static int gfx_v9_0_suspend(void *handle) 3387 { 3388 return gfx_v9_0_hw_fini(handle); 3389 } 3390 3391 static int gfx_v9_0_resume(void *handle) 3392 { 3393 return gfx_v9_0_hw_init(handle); 3394 } 3395 3396 static bool gfx_v9_0_is_idle(void *handle) 3397 { 3398 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3399 3400 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 3401 GRBM_STATUS, GUI_ACTIVE)) 3402 return false; 3403 else 3404 return true; 3405 } 3406 3407 static int gfx_v9_0_wait_for_idle(void *handle) 3408 { 3409 unsigned i; 3410 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3411 3412 for (i = 0; i < adev->usec_timeout; i++) { 3413 if (gfx_v9_0_is_idle(handle)) 3414 return 0; 3415 udelay(1); 3416 } 3417 return -ETIMEDOUT; 3418 } 3419 3420 static int gfx_v9_0_soft_reset(void *handle) 3421 { 3422 u32 grbm_soft_reset = 0; 3423 u32 tmp; 3424 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3425 3426 /* GRBM_STATUS */ 3427 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 3428 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 3429 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 3430 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 3431 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 3432 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 3433 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 3434 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3435 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3436 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3437 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 3438 } 3439 3440 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 3441 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3442 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3443 } 3444 3445 /* GRBM_STATUS2 */ 3446 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 3447 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 3448 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3449 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3450 3451 3452 if (grbm_soft_reset) { 3453 /* stop the rlc */ 3454 adev->gfx.rlc.funcs->stop(adev); 3455 3456 /* Disable GFX parsing/prefetching */ 3457 gfx_v9_0_cp_gfx_enable(adev, false); 3458 3459 /* Disable MEC parsing/prefetching */ 3460 gfx_v9_0_cp_compute_enable(adev, false); 3461 3462 if (grbm_soft_reset) { 3463 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3464 tmp |= grbm_soft_reset; 3465 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 3466 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3467 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3468 3469 udelay(50); 3470 3471 tmp &= ~grbm_soft_reset; 3472 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3473 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3474 } 3475 3476 /* Wait a little for things to settle down */ 3477 udelay(50); 3478 } 3479 return 0; 3480 } 3481 3482 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 3483 { 3484 uint64_t clock; 3485 3486 mutex_lock(&adev->gfx.gpu_clock_mutex); 3487 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 3488 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 3489 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 3490 mutex_unlock(&adev->gfx.gpu_clock_mutex); 3491 return clock; 3492 } 3493 3494 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 3495 uint32_t vmid, 3496 uint32_t gds_base, uint32_t gds_size, 3497 uint32_t gws_base, uint32_t gws_size, 3498 uint32_t oa_base, uint32_t oa_size) 3499 { 3500 struct amdgpu_device *adev = ring->adev; 3501 3502 /* GDS Base */ 3503 gfx_v9_0_write_data_to_reg(ring, 0, false, 3504 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 3505 gds_base); 3506 3507 /* GDS Size */ 3508 gfx_v9_0_write_data_to_reg(ring, 0, false, 3509 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 3510 gds_size); 3511 3512 /* GWS */ 3513 gfx_v9_0_write_data_to_reg(ring, 0, false, 3514 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 3515 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 3516 3517 /* OA */ 3518 gfx_v9_0_write_data_to_reg(ring, 0, false, 3519 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 3520 (1 << (oa_size + oa_base)) - (1 << oa_base)); 3521 } 3522 3523 static const u32 vgpr_init_compute_shader[] = 3524 { 3525 0xb07c0000, 0xbe8000ff, 3526 0x000000f8, 0xbf110800, 3527 0x7e000280, 0x7e020280, 3528 0x7e040280, 0x7e060280, 3529 0x7e080280, 0x7e0a0280, 3530 0x7e0c0280, 0x7e0e0280, 3531 0x80808800, 0xbe803200, 3532 0xbf84fff5, 0xbf9c0000, 3533 0xd28c0001, 0x0001007f, 3534 0xd28d0001, 0x0002027e, 3535 0x10020288, 0xb8810904, 3536 0xb7814000, 0xd1196a01, 3537 0x00000301, 0xbe800087, 3538 0xbefc00c1, 0xd89c4000, 3539 0x00020201, 0xd89cc080, 3540 0x00040401, 0x320202ff, 3541 0x00000800, 0x80808100, 3542 0xbf84fff8, 0x7e020280, 3543 0xbf810000, 0x00000000, 3544 }; 3545 3546 static const u32 sgpr_init_compute_shader[] = 3547 { 3548 0xb07c0000, 0xbe8000ff, 3549 0x0000005f, 0xbee50080, 3550 0xbe812c65, 0xbe822c65, 3551 0xbe832c65, 0xbe842c65, 3552 0xbe852c65, 0xb77c0005, 3553 0x80808500, 0xbf84fff8, 3554 0xbe800080, 0xbf810000, 3555 }; 3556 3557 static const struct soc15_reg_entry vgpr_init_regs[] = { 3558 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 3559 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 3560 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 3561 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 3562 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 3563 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 3564 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 3565 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 3566 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ 3567 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 3568 }; 3569 3570 static const struct soc15_reg_entry sgpr_init_regs[] = { 3571 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 3572 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 3573 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 3574 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 3575 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 3576 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 3577 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 3578 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 3579 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ 3580 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 3581 }; 3582 3583 static const struct soc15_reg_entry sec_ded_counter_registers[] = { 3584 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 3585 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 3586 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 3587 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 3588 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 3589 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 3590 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 3591 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 3592 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 3593 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 3594 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 3595 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 3596 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 3597 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 3598 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 3599 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 3600 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 3601 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 3602 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 3603 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 3604 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 3605 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 3606 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 3607 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 3608 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 3609 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 3610 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 3611 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 3612 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 3613 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 3614 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 3615 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 3616 }; 3617 3618 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 3619 { 3620 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 3621 int i, r; 3622 3623 r = amdgpu_ring_alloc(ring, 7); 3624 if (r) { 3625 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 3626 ring->name, r); 3627 return r; 3628 } 3629 3630 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 3631 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 3632 3633 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 3634 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 3635 PACKET3_DMA_DATA_DST_SEL(1) | 3636 PACKET3_DMA_DATA_SRC_SEL(2) | 3637 PACKET3_DMA_DATA_ENGINE(0))); 3638 amdgpu_ring_write(ring, 0); 3639 amdgpu_ring_write(ring, 0); 3640 amdgpu_ring_write(ring, 0); 3641 amdgpu_ring_write(ring, 0); 3642 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 3643 adev->gds.gds_size); 3644 3645 amdgpu_ring_commit(ring); 3646 3647 for (i = 0; i < adev->usec_timeout; i++) { 3648 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 3649 break; 3650 udelay(1); 3651 } 3652 3653 if (i >= adev->usec_timeout) 3654 r = -ETIMEDOUT; 3655 3656 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 3657 3658 return r; 3659 } 3660 3661 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 3662 { 3663 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 3664 struct amdgpu_ib ib; 3665 struct dma_fence *f = NULL; 3666 int r, i, j, k; 3667 unsigned total_size, vgpr_offset, sgpr_offset; 3668 u64 gpu_addr; 3669 3670 /* only support when RAS is enabled */ 3671 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 3672 return 0; 3673 3674 /* bail if the compute ring is not ready */ 3675 if (!ring->sched.ready) 3676 return 0; 3677 3678 total_size = 3679 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 3680 total_size += 3681 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 3682 total_size = ALIGN(total_size, 256); 3683 vgpr_offset = total_size; 3684 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 3685 sgpr_offset = total_size; 3686 total_size += sizeof(sgpr_init_compute_shader); 3687 3688 /* allocate an indirect buffer to put the commands in */ 3689 memset(&ib, 0, sizeof(ib)); 3690 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 3691 if (r) { 3692 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 3693 return r; 3694 } 3695 3696 /* load the compute shaders */ 3697 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 3698 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 3699 3700 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 3701 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 3702 3703 /* init the ib length to 0 */ 3704 ib.length_dw = 0; 3705 3706 /* VGPR */ 3707 /* write the register state for the compute dispatch */ 3708 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { 3709 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 3710 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) 3711 - PACKET3_SET_SH_REG_START; 3712 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value; 3713 } 3714 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 3715 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 3716 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 3717 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 3718 - PACKET3_SET_SH_REG_START; 3719 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 3720 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 3721 3722 /* write dispatch packet */ 3723 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 3724 ib.ptr[ib.length_dw++] = 128; /* x */ 3725 ib.ptr[ib.length_dw++] = 1; /* y */ 3726 ib.ptr[ib.length_dw++] = 1; /* z */ 3727 ib.ptr[ib.length_dw++] = 3728 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 3729 3730 /* write CS partial flush packet */ 3731 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 3732 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 3733 3734 /* SGPR */ 3735 /* write the register state for the compute dispatch */ 3736 for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { 3737 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 3738 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) 3739 - PACKET3_SET_SH_REG_START; 3740 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; 3741 } 3742 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 3743 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 3744 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 3745 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 3746 - PACKET3_SET_SH_REG_START; 3747 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 3748 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 3749 3750 /* write dispatch packet */ 3751 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 3752 ib.ptr[ib.length_dw++] = 128; /* x */ 3753 ib.ptr[ib.length_dw++] = 1; /* y */ 3754 ib.ptr[ib.length_dw++] = 1; /* z */ 3755 ib.ptr[ib.length_dw++] = 3756 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 3757 3758 /* write CS partial flush packet */ 3759 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 3760 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 3761 3762 /* shedule the ib on the ring */ 3763 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 3764 if (r) { 3765 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 3766 goto fail; 3767 } 3768 3769 /* wait for the GPU to finish processing the IB */ 3770 r = dma_fence_wait(f, false); 3771 if (r) { 3772 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 3773 goto fail; 3774 } 3775 3776 /* read back registers to clear the counters */ 3777 mutex_lock(&adev->grbm_idx_mutex); 3778 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { 3779 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { 3780 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { 3781 gfx_v9_0_select_se_sh(adev, j, 0x0, k); 3782 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); 3783 } 3784 } 3785 } 3786 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 3787 mutex_unlock(&adev->grbm_idx_mutex); 3788 3789 fail: 3790 amdgpu_ib_free(adev, &ib, NULL); 3791 dma_fence_put(f); 3792 3793 return r; 3794 } 3795 3796 static int gfx_v9_0_early_init(void *handle) 3797 { 3798 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3799 3800 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 3801 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 3802 gfx_v9_0_set_ring_funcs(adev); 3803 gfx_v9_0_set_irq_funcs(adev); 3804 gfx_v9_0_set_gds_init(adev); 3805 gfx_v9_0_set_rlc_funcs(adev); 3806 3807 return 0; 3808 } 3809 3810 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 3811 struct amdgpu_iv_entry *entry); 3812 3813 static int gfx_v9_0_ecc_late_init(void *handle) 3814 { 3815 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3816 struct ras_common_if **ras_if = &adev->gfx.ras_if; 3817 struct ras_ih_if ih_info = { 3818 .cb = gfx_v9_0_process_ras_data_cb, 3819 }; 3820 struct ras_fs_if fs_info = { 3821 .sysfs_name = "gfx_err_count", 3822 .debugfs_name = "gfx_err_inject", 3823 }; 3824 struct ras_common_if ras_block = { 3825 .block = AMDGPU_RAS_BLOCK__GFX, 3826 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, 3827 .sub_block_index = 0, 3828 .name = "gfx", 3829 }; 3830 int r; 3831 3832 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { 3833 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); 3834 return 0; 3835 } 3836 3837 r = gfx_v9_0_do_edc_gds_workarounds(adev); 3838 if (r) 3839 return r; 3840 3841 /* requires IBs so do in late init after IB pool is initialized */ 3842 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 3843 if (r) 3844 return r; 3845 3846 /* handle resume path. */ 3847 if (*ras_if) { 3848 /* resend ras TA enable cmd during resume. 3849 * prepare to handle failure. 3850 */ 3851 ih_info.head = **ras_if; 3852 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 3853 if (r) { 3854 if (r == -EAGAIN) { 3855 /* request a gpu reset. will run again. */ 3856 amdgpu_ras_request_reset_on_boot(adev, 3857 AMDGPU_RAS_BLOCK__GFX); 3858 return 0; 3859 } 3860 /* fail to enable ras, cleanup all. */ 3861 goto irq; 3862 } 3863 /* enable successfully. continue. */ 3864 goto resume; 3865 } 3866 3867 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); 3868 if (!*ras_if) 3869 return -ENOMEM; 3870 3871 **ras_if = ras_block; 3872 3873 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 3874 if (r) { 3875 if (r == -EAGAIN) { 3876 amdgpu_ras_request_reset_on_boot(adev, 3877 AMDGPU_RAS_BLOCK__GFX); 3878 r = 0; 3879 } 3880 goto feature; 3881 } 3882 3883 ih_info.head = **ras_if; 3884 fs_info.head = **ras_if; 3885 3886 r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); 3887 if (r) 3888 goto interrupt; 3889 3890 amdgpu_ras_debugfs_create(adev, &fs_info); 3891 3892 r = amdgpu_ras_sysfs_create(adev, &fs_info); 3893 if (r) 3894 goto sysfs; 3895 resume: 3896 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 3897 if (r) 3898 goto irq; 3899 3900 return 0; 3901 irq: 3902 amdgpu_ras_sysfs_remove(adev, *ras_if); 3903 sysfs: 3904 amdgpu_ras_debugfs_remove(adev, *ras_if); 3905 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 3906 interrupt: 3907 amdgpu_ras_feature_enable(adev, *ras_if, 0); 3908 feature: 3909 kfree(*ras_if); 3910 *ras_if = NULL; 3911 return r; 3912 } 3913 3914 static int gfx_v9_0_late_init(void *handle) 3915 { 3916 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3917 int r; 3918 3919 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 3920 if (r) 3921 return r; 3922 3923 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 3924 if (r) 3925 return r; 3926 3927 r = gfx_v9_0_ecc_late_init(handle); 3928 if (r) 3929 return r; 3930 3931 return 0; 3932 } 3933 3934 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 3935 { 3936 uint32_t rlc_setting; 3937 3938 /* if RLC is not enabled, do nothing */ 3939 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 3940 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 3941 return false; 3942 3943 return true; 3944 } 3945 3946 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) 3947 { 3948 uint32_t data; 3949 unsigned i; 3950 3951 data = RLC_SAFE_MODE__CMD_MASK; 3952 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 3953 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 3954 3955 /* wait for RLC_SAFE_MODE */ 3956 for (i = 0; i < adev->usec_timeout; i++) { 3957 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 3958 break; 3959 udelay(1); 3960 } 3961 } 3962 3963 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) 3964 { 3965 uint32_t data; 3966 3967 data = RLC_SAFE_MODE__CMD_MASK; 3968 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 3969 } 3970 3971 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 3972 bool enable) 3973 { 3974 amdgpu_gfx_rlc_enter_safe_mode(adev); 3975 3976 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 3977 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 3978 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 3979 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 3980 } else { 3981 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 3982 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 3983 } 3984 3985 amdgpu_gfx_rlc_exit_safe_mode(adev); 3986 } 3987 3988 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 3989 bool enable) 3990 { 3991 /* TODO: double check if we need to perform under safe mode */ 3992 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 3993 3994 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 3995 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 3996 else 3997 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 3998 3999 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4000 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4001 else 4002 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4003 4004 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4005 } 4006 4007 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4008 bool enable) 4009 { 4010 uint32_t data, def; 4011 4012 amdgpu_gfx_rlc_enter_safe_mode(adev); 4013 4014 /* It is disabled by HW by default */ 4015 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4016 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4017 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4018 4019 if (adev->asic_type != CHIP_VEGA12) 4020 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4021 4022 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4023 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4024 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4025 4026 /* only for Vega10 & Raven1 */ 4027 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4028 4029 if (def != data) 4030 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4031 4032 /* MGLS is a global flag to control all MGLS in GFX */ 4033 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4034 /* 2 - RLC memory Light sleep */ 4035 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4036 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4037 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4038 if (def != data) 4039 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4040 } 4041 /* 3 - CP memory Light sleep */ 4042 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4043 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4044 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4045 if (def != data) 4046 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4047 } 4048 } 4049 } else { 4050 /* 1 - MGCG_OVERRIDE */ 4051 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4052 4053 if (adev->asic_type != CHIP_VEGA12) 4054 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4055 4056 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4057 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4058 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4059 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4060 4061 if (def != data) 4062 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4063 4064 /* 2 - disable MGLS in RLC */ 4065 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4066 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4067 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4068 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4069 } 4070 4071 /* 3 - disable MGLS in CP */ 4072 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4073 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4074 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4075 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4076 } 4077 } 4078 4079 amdgpu_gfx_rlc_exit_safe_mode(adev); 4080 } 4081 4082 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4083 bool enable) 4084 { 4085 uint32_t data, def; 4086 4087 amdgpu_gfx_rlc_enter_safe_mode(adev); 4088 4089 /* Enable 3D CGCG/CGLS */ 4090 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 4091 /* write cmd to clear cgcg/cgls ov */ 4092 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4093 /* unset CGCG override */ 4094 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4095 /* update CGCG and CGLS override bits */ 4096 if (def != data) 4097 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4098 4099 /* enable 3Dcgcg FSM(0x0000363f) */ 4100 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4101 4102 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4103 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4104 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4105 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4106 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4107 if (def != data) 4108 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4109 4110 /* set IDLE_POLL_COUNT(0x00900100) */ 4111 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4112 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4113 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4114 if (def != data) 4115 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4116 } else { 4117 /* Disable CGCG/CGLS */ 4118 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4119 /* disable cgcg, cgls should be disabled */ 4120 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 4121 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 4122 /* disable cgcg and cgls in FSM */ 4123 if (def != data) 4124 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4125 } 4126 4127 amdgpu_gfx_rlc_exit_safe_mode(adev); 4128 } 4129 4130 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4131 bool enable) 4132 { 4133 uint32_t def, data; 4134 4135 amdgpu_gfx_rlc_enter_safe_mode(adev); 4136 4137 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 4138 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4139 /* unset CGCG override */ 4140 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 4141 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4142 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4143 else 4144 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4145 /* update CGCG and CGLS override bits */ 4146 if (def != data) 4147 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4148 4149 /* enable cgcg FSM(0x0000363F) */ 4150 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4151 4152 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4153 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4154 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4155 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4156 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4157 if (def != data) 4158 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4159 4160 /* set IDLE_POLL_COUNT(0x00900100) */ 4161 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4162 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4163 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4164 if (def != data) 4165 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4166 } else { 4167 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4168 /* reset CGCG/CGLS bits */ 4169 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4170 /* disable cgcg and cgls in FSM */ 4171 if (def != data) 4172 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4173 } 4174 4175 amdgpu_gfx_rlc_exit_safe_mode(adev); 4176 } 4177 4178 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 4179 bool enable) 4180 { 4181 if (enable) { 4182 /* CGCG/CGLS should be enabled after MGCG/MGLS 4183 * === MGCG + MGLS === 4184 */ 4185 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4186 /* === CGCG /CGLS for GFX 3D Only === */ 4187 gfx_v9_0_update_3d_clock_gating(adev, enable); 4188 /* === CGCG + CGLS === */ 4189 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4190 } else { 4191 /* CGCG/CGLS should be disabled before MGCG/MGLS 4192 * === CGCG + CGLS === 4193 */ 4194 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4195 /* === CGCG /CGLS for GFX 3D Only === */ 4196 gfx_v9_0_update_3d_clock_gating(adev, enable); 4197 /* === MGCG + MGLS === */ 4198 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4199 } 4200 return 0; 4201 } 4202 4203 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 4204 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 4205 .set_safe_mode = gfx_v9_0_set_safe_mode, 4206 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 4207 .init = gfx_v9_0_rlc_init, 4208 .get_csb_size = gfx_v9_0_get_csb_size, 4209 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 4210 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 4211 .resume = gfx_v9_0_rlc_resume, 4212 .stop = gfx_v9_0_rlc_stop, 4213 .reset = gfx_v9_0_rlc_reset, 4214 .start = gfx_v9_0_rlc_start 4215 }; 4216 4217 static int gfx_v9_0_set_powergating_state(void *handle, 4218 enum amd_powergating_state state) 4219 { 4220 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4221 bool enable = (state == AMD_PG_STATE_GATE) ? true : false; 4222 4223 switch (adev->asic_type) { 4224 case CHIP_RAVEN: 4225 if (!enable) { 4226 amdgpu_gfx_off_ctrl(adev, false); 4227 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4228 } 4229 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 4230 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 4231 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 4232 } else { 4233 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 4234 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 4235 } 4236 4237 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 4238 gfx_v9_0_enable_cp_power_gating(adev, true); 4239 else 4240 gfx_v9_0_enable_cp_power_gating(adev, false); 4241 4242 /* update gfx cgpg state */ 4243 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 4244 4245 /* update mgcg state */ 4246 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 4247 4248 if (enable) 4249 amdgpu_gfx_off_ctrl(adev, true); 4250 break; 4251 case CHIP_VEGA12: 4252 if (!enable) { 4253 amdgpu_gfx_off_ctrl(adev, false); 4254 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4255 } else { 4256 amdgpu_gfx_off_ctrl(adev, true); 4257 } 4258 break; 4259 default: 4260 break; 4261 } 4262 4263 return 0; 4264 } 4265 4266 static int gfx_v9_0_set_clockgating_state(void *handle, 4267 enum amd_clockgating_state state) 4268 { 4269 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4270 4271 if (amdgpu_sriov_vf(adev)) 4272 return 0; 4273 4274 switch (adev->asic_type) { 4275 case CHIP_VEGA10: 4276 case CHIP_VEGA12: 4277 case CHIP_VEGA20: 4278 case CHIP_RAVEN: 4279 gfx_v9_0_update_gfx_clock_gating(adev, 4280 state == AMD_CG_STATE_GATE ? true : false); 4281 break; 4282 default: 4283 break; 4284 } 4285 return 0; 4286 } 4287 4288 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 4289 { 4290 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4291 int data; 4292 4293 if (amdgpu_sriov_vf(adev)) 4294 *flags = 0; 4295 4296 /* AMD_CG_SUPPORT_GFX_MGCG */ 4297 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4298 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 4299 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 4300 4301 /* AMD_CG_SUPPORT_GFX_CGCG */ 4302 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4303 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 4304 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 4305 4306 /* AMD_CG_SUPPORT_GFX_CGLS */ 4307 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 4308 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 4309 4310 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 4311 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4312 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 4313 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 4314 4315 /* AMD_CG_SUPPORT_GFX_CP_LS */ 4316 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4317 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 4318 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 4319 4320 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 4321 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4322 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 4323 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 4324 4325 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 4326 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 4327 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 4328 } 4329 4330 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 4331 { 4332 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 4333 } 4334 4335 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 4336 { 4337 struct amdgpu_device *adev = ring->adev; 4338 u64 wptr; 4339 4340 /* XXX check if swapping is necessary on BE */ 4341 if (ring->use_doorbell) { 4342 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 4343 } else { 4344 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 4345 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 4346 } 4347 4348 return wptr; 4349 } 4350 4351 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 4352 { 4353 struct amdgpu_device *adev = ring->adev; 4354 4355 if (ring->use_doorbell) { 4356 /* XXX check if swapping is necessary on BE */ 4357 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4358 WDOORBELL64(ring->doorbell_index, ring->wptr); 4359 } else { 4360 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4361 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 4362 } 4363 } 4364 4365 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 4366 { 4367 struct amdgpu_device *adev = ring->adev; 4368 u32 ref_and_mask, reg_mem_engine; 4369 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; 4370 4371 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 4372 switch (ring->me) { 4373 case 1: 4374 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 4375 break; 4376 case 2: 4377 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 4378 break; 4379 default: 4380 return; 4381 } 4382 reg_mem_engine = 0; 4383 } else { 4384 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 4385 reg_mem_engine = 1; /* pfp */ 4386 } 4387 4388 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 4389 adev->nbio_funcs->get_hdp_flush_req_offset(adev), 4390 adev->nbio_funcs->get_hdp_flush_done_offset(adev), 4391 ref_and_mask, ref_and_mask, 0x20); 4392 } 4393 4394 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 4395 struct amdgpu_job *job, 4396 struct amdgpu_ib *ib, 4397 uint32_t flags) 4398 { 4399 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4400 u32 header, control = 0; 4401 4402 if (ib->flags & AMDGPU_IB_FLAG_CE) 4403 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 4404 else 4405 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 4406 4407 control |= ib->length_dw | (vmid << 24); 4408 4409 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 4410 control |= INDIRECT_BUFFER_PRE_ENB(1); 4411 4412 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 4413 gfx_v9_0_ring_emit_de_meta(ring); 4414 } 4415 4416 amdgpu_ring_write(ring, header); 4417 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4418 amdgpu_ring_write(ring, 4419 #ifdef __BIG_ENDIAN 4420 (2 << 0) | 4421 #endif 4422 lower_32_bits(ib->gpu_addr)); 4423 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4424 amdgpu_ring_write(ring, control); 4425 } 4426 4427 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 4428 struct amdgpu_job *job, 4429 struct amdgpu_ib *ib, 4430 uint32_t flags) 4431 { 4432 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4433 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 4434 4435 /* Currently, there is a high possibility to get wave ID mismatch 4436 * between ME and GDS, leading to a hw deadlock, because ME generates 4437 * different wave IDs than the GDS expects. This situation happens 4438 * randomly when at least 5 compute pipes use GDS ordered append. 4439 * The wave IDs generated by ME are also wrong after suspend/resume. 4440 * Those are probably bugs somewhere else in the kernel driver. 4441 * 4442 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 4443 * GDS to 0 for this ring (me/pipe). 4444 */ 4445 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 4446 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 4447 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 4448 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 4449 } 4450 4451 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 4452 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4453 amdgpu_ring_write(ring, 4454 #ifdef __BIG_ENDIAN 4455 (2 << 0) | 4456 #endif 4457 lower_32_bits(ib->gpu_addr)); 4458 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4459 amdgpu_ring_write(ring, control); 4460 } 4461 4462 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 4463 u64 seq, unsigned flags) 4464 { 4465 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 4466 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 4467 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 4468 4469 /* RELEASE_MEM - flush caches, send int */ 4470 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 4471 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 4472 EOP_TC_NC_ACTION_EN) : 4473 (EOP_TCL1_ACTION_EN | 4474 EOP_TC_ACTION_EN | 4475 EOP_TC_WB_ACTION_EN | 4476 EOP_TC_MD_ACTION_EN)) | 4477 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 4478 EVENT_INDEX(5))); 4479 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 4480 4481 /* 4482 * the address should be Qword aligned if 64bit write, Dword 4483 * aligned if only send 32bit data low (discard data high) 4484 */ 4485 if (write64bit) 4486 BUG_ON(addr & 0x7); 4487 else 4488 BUG_ON(addr & 0x3); 4489 amdgpu_ring_write(ring, lower_32_bits(addr)); 4490 amdgpu_ring_write(ring, upper_32_bits(addr)); 4491 amdgpu_ring_write(ring, lower_32_bits(seq)); 4492 amdgpu_ring_write(ring, upper_32_bits(seq)); 4493 amdgpu_ring_write(ring, 0); 4494 } 4495 4496 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 4497 { 4498 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 4499 uint32_t seq = ring->fence_drv.sync_seq; 4500 uint64_t addr = ring->fence_drv.gpu_addr; 4501 4502 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 4503 lower_32_bits(addr), upper_32_bits(addr), 4504 seq, 0xffffffff, 4); 4505 } 4506 4507 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 4508 unsigned vmid, uint64_t pd_addr) 4509 { 4510 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 4511 4512 /* compute doesn't have PFP */ 4513 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 4514 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 4515 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 4516 amdgpu_ring_write(ring, 0x0); 4517 } 4518 } 4519 4520 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 4521 { 4522 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 4523 } 4524 4525 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 4526 { 4527 u64 wptr; 4528 4529 /* XXX check if swapping is necessary on BE */ 4530 if (ring->use_doorbell) 4531 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 4532 else 4533 BUG(); 4534 return wptr; 4535 } 4536 4537 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 4538 bool acquire) 4539 { 4540 struct amdgpu_device *adev = ring->adev; 4541 int pipe_num, tmp, reg; 4542 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 4543 4544 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 4545 4546 /* first me only has 2 entries, GFX and HP3D */ 4547 if (ring->me > 0) 4548 pipe_num -= 2; 4549 4550 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num; 4551 tmp = RREG32(reg); 4552 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 4553 WREG32(reg, tmp); 4554 } 4555 4556 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, 4557 struct amdgpu_ring *ring, 4558 bool acquire) 4559 { 4560 int i, pipe; 4561 bool reserve; 4562 struct amdgpu_ring *iring; 4563 4564 mutex_lock(&adev->gfx.pipe_reserve_mutex); 4565 pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0); 4566 if (acquire) 4567 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4568 else 4569 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4570 4571 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 4572 /* Clear all reservations - everyone reacquires all resources */ 4573 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 4574 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 4575 true); 4576 4577 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 4578 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 4579 true); 4580 } else { 4581 /* Lower all pipes without a current reservation */ 4582 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 4583 iring = &adev->gfx.gfx_ring[i]; 4584 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 4585 iring->me, 4586 iring->pipe, 4587 0); 4588 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4589 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 4590 } 4591 4592 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 4593 iring = &adev->gfx.compute_ring[i]; 4594 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 4595 iring->me, 4596 iring->pipe, 4597 0); 4598 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4599 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 4600 } 4601 } 4602 4603 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 4604 } 4605 4606 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, 4607 struct amdgpu_ring *ring, 4608 bool acquire) 4609 { 4610 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 4611 uint32_t queue_priority = acquire ? 0xf : 0x0; 4612 4613 mutex_lock(&adev->srbm_mutex); 4614 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4615 4616 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); 4617 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); 4618 4619 soc15_grbm_select(adev, 0, 0, 0, 0); 4620 mutex_unlock(&adev->srbm_mutex); 4621 } 4622 4623 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring, 4624 enum drm_sched_priority priority) 4625 { 4626 struct amdgpu_device *adev = ring->adev; 4627 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 4628 4629 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 4630 return; 4631 4632 gfx_v9_0_hqd_set_priority(adev, ring, acquire); 4633 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire); 4634 } 4635 4636 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 4637 { 4638 struct amdgpu_device *adev = ring->adev; 4639 4640 /* XXX check if swapping is necessary on BE */ 4641 if (ring->use_doorbell) { 4642 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4643 WDOORBELL64(ring->doorbell_index, ring->wptr); 4644 } else{ 4645 BUG(); /* only DOORBELL method supported on gfx9 now */ 4646 } 4647 } 4648 4649 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 4650 u64 seq, unsigned int flags) 4651 { 4652 struct amdgpu_device *adev = ring->adev; 4653 4654 /* we only allocate 32bit for each seq wb address */ 4655 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 4656 4657 /* write fence seq to the "addr" */ 4658 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4659 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4660 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 4661 amdgpu_ring_write(ring, lower_32_bits(addr)); 4662 amdgpu_ring_write(ring, upper_32_bits(addr)); 4663 amdgpu_ring_write(ring, lower_32_bits(seq)); 4664 4665 if (flags & AMDGPU_FENCE_FLAG_INT) { 4666 /* set register to trigger INT */ 4667 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4668 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4669 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 4670 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 4671 amdgpu_ring_write(ring, 0); 4672 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 4673 } 4674 } 4675 4676 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 4677 { 4678 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 4679 amdgpu_ring_write(ring, 0); 4680 } 4681 4682 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 4683 { 4684 struct v9_ce_ib_state ce_payload = {0}; 4685 uint64_t csa_addr; 4686 int cnt; 4687 4688 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 4689 csa_addr = amdgpu_csa_vaddr(ring->adev); 4690 4691 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 4692 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 4693 WRITE_DATA_DST_SEL(8) | 4694 WR_CONFIRM) | 4695 WRITE_DATA_CACHE_POLICY(0)); 4696 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 4697 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 4698 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 4699 } 4700 4701 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 4702 { 4703 struct v9_de_ib_state de_payload = {0}; 4704 uint64_t csa_addr, gds_addr; 4705 int cnt; 4706 4707 csa_addr = amdgpu_csa_vaddr(ring->adev); 4708 gds_addr = csa_addr + 4096; 4709 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 4710 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 4711 4712 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 4713 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 4714 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 4715 WRITE_DATA_DST_SEL(8) | 4716 WR_CONFIRM) | 4717 WRITE_DATA_CACHE_POLICY(0)); 4718 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 4719 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 4720 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 4721 } 4722 4723 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) 4724 { 4725 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 4726 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ 4727 } 4728 4729 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 4730 { 4731 uint32_t dw2 = 0; 4732 4733 if (amdgpu_sriov_vf(ring->adev)) 4734 gfx_v9_0_ring_emit_ce_meta(ring); 4735 4736 gfx_v9_0_ring_emit_tmz(ring, true); 4737 4738 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 4739 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 4740 /* set load_global_config & load_global_uconfig */ 4741 dw2 |= 0x8001; 4742 /* set load_cs_sh_regs */ 4743 dw2 |= 0x01000000; 4744 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 4745 dw2 |= 0x10002; 4746 4747 /* set load_ce_ram if preamble presented */ 4748 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 4749 dw2 |= 0x10000000; 4750 } else { 4751 /* still load_ce_ram if this is the first time preamble presented 4752 * although there is no context switch happens. 4753 */ 4754 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 4755 dw2 |= 0x10000000; 4756 } 4757 4758 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4759 amdgpu_ring_write(ring, dw2); 4760 amdgpu_ring_write(ring, 0); 4761 } 4762 4763 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 4764 { 4765 unsigned ret; 4766 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 4767 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 4768 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 4769 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 4770 ret = ring->wptr & ring->buf_mask; 4771 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 4772 return ret; 4773 } 4774 4775 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 4776 { 4777 unsigned cur; 4778 BUG_ON(offset > ring->buf_mask); 4779 BUG_ON(ring->ring[offset] != 0x55aa55aa); 4780 4781 cur = (ring->wptr & ring->buf_mask) - 1; 4782 if (likely(cur > offset)) 4783 ring->ring[offset] = cur - offset; 4784 else 4785 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 4786 } 4787 4788 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 4789 { 4790 struct amdgpu_device *adev = ring->adev; 4791 4792 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 4793 amdgpu_ring_write(ring, 0 | /* src: register*/ 4794 (5 << 8) | /* dst: memory */ 4795 (1 << 20)); /* write confirm */ 4796 amdgpu_ring_write(ring, reg); 4797 amdgpu_ring_write(ring, 0); 4798 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 4799 adev->virt.reg_val_offs * 4)); 4800 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 4801 adev->virt.reg_val_offs * 4)); 4802 } 4803 4804 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 4805 uint32_t val) 4806 { 4807 uint32_t cmd = 0; 4808 4809 switch (ring->funcs->type) { 4810 case AMDGPU_RING_TYPE_GFX: 4811 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 4812 break; 4813 case AMDGPU_RING_TYPE_KIQ: 4814 cmd = (1 << 16); /* no inc addr */ 4815 break; 4816 default: 4817 cmd = WR_CONFIRM; 4818 break; 4819 } 4820 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4821 amdgpu_ring_write(ring, cmd); 4822 amdgpu_ring_write(ring, reg); 4823 amdgpu_ring_write(ring, 0); 4824 amdgpu_ring_write(ring, val); 4825 } 4826 4827 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 4828 uint32_t val, uint32_t mask) 4829 { 4830 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 4831 } 4832 4833 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 4834 uint32_t reg0, uint32_t reg1, 4835 uint32_t ref, uint32_t mask) 4836 { 4837 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 4838 struct amdgpu_device *adev = ring->adev; 4839 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 4840 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 4841 4842 if (fw_version_ok) 4843 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 4844 ref, mask, 0x20); 4845 else 4846 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 4847 ref, mask); 4848 } 4849 4850 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 4851 { 4852 struct amdgpu_device *adev = ring->adev; 4853 uint32_t value = 0; 4854 4855 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 4856 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 4857 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 4858 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 4859 WREG32(mmSQ_CMD, value); 4860 } 4861 4862 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 4863 enum amdgpu_interrupt_state state) 4864 { 4865 switch (state) { 4866 case AMDGPU_IRQ_STATE_DISABLE: 4867 case AMDGPU_IRQ_STATE_ENABLE: 4868 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4869 TIME_STAMP_INT_ENABLE, 4870 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4871 break; 4872 default: 4873 break; 4874 } 4875 } 4876 4877 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 4878 int me, int pipe, 4879 enum amdgpu_interrupt_state state) 4880 { 4881 u32 mec_int_cntl, mec_int_cntl_reg; 4882 4883 /* 4884 * amdgpu controls only the first MEC. That's why this function only 4885 * handles the setting of interrupts for this specific MEC. All other 4886 * pipes' interrupts are set by amdkfd. 4887 */ 4888 4889 if (me == 1) { 4890 switch (pipe) { 4891 case 0: 4892 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 4893 break; 4894 case 1: 4895 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 4896 break; 4897 case 2: 4898 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 4899 break; 4900 case 3: 4901 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 4902 break; 4903 default: 4904 DRM_DEBUG("invalid pipe %d\n", pipe); 4905 return; 4906 } 4907 } else { 4908 DRM_DEBUG("invalid me %d\n", me); 4909 return; 4910 } 4911 4912 switch (state) { 4913 case AMDGPU_IRQ_STATE_DISABLE: 4914 mec_int_cntl = RREG32(mec_int_cntl_reg); 4915 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 4916 TIME_STAMP_INT_ENABLE, 0); 4917 WREG32(mec_int_cntl_reg, mec_int_cntl); 4918 break; 4919 case AMDGPU_IRQ_STATE_ENABLE: 4920 mec_int_cntl = RREG32(mec_int_cntl_reg); 4921 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 4922 TIME_STAMP_INT_ENABLE, 1); 4923 WREG32(mec_int_cntl_reg, mec_int_cntl); 4924 break; 4925 default: 4926 break; 4927 } 4928 } 4929 4930 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 4931 struct amdgpu_irq_src *source, 4932 unsigned type, 4933 enum amdgpu_interrupt_state state) 4934 { 4935 switch (state) { 4936 case AMDGPU_IRQ_STATE_DISABLE: 4937 case AMDGPU_IRQ_STATE_ENABLE: 4938 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4939 PRIV_REG_INT_ENABLE, 4940 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4941 break; 4942 default: 4943 break; 4944 } 4945 4946 return 0; 4947 } 4948 4949 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 4950 struct amdgpu_irq_src *source, 4951 unsigned type, 4952 enum amdgpu_interrupt_state state) 4953 { 4954 switch (state) { 4955 case AMDGPU_IRQ_STATE_DISABLE: 4956 case AMDGPU_IRQ_STATE_ENABLE: 4957 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4958 PRIV_INSTR_INT_ENABLE, 4959 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4960 default: 4961 break; 4962 } 4963 4964 return 0; 4965 } 4966 4967 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 4968 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 4969 CP_ECC_ERROR_INT_ENABLE, 1) 4970 4971 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 4972 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 4973 CP_ECC_ERROR_INT_ENABLE, 0) 4974 4975 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 4976 struct amdgpu_irq_src *source, 4977 unsigned type, 4978 enum amdgpu_interrupt_state state) 4979 { 4980 switch (state) { 4981 case AMDGPU_IRQ_STATE_DISABLE: 4982 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4983 CP_ECC_ERROR_INT_ENABLE, 0); 4984 DISABLE_ECC_ON_ME_PIPE(1, 0); 4985 DISABLE_ECC_ON_ME_PIPE(1, 1); 4986 DISABLE_ECC_ON_ME_PIPE(1, 2); 4987 DISABLE_ECC_ON_ME_PIPE(1, 3); 4988 break; 4989 4990 case AMDGPU_IRQ_STATE_ENABLE: 4991 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4992 CP_ECC_ERROR_INT_ENABLE, 1); 4993 ENABLE_ECC_ON_ME_PIPE(1, 0); 4994 ENABLE_ECC_ON_ME_PIPE(1, 1); 4995 ENABLE_ECC_ON_ME_PIPE(1, 2); 4996 ENABLE_ECC_ON_ME_PIPE(1, 3); 4997 break; 4998 default: 4999 break; 5000 } 5001 5002 return 0; 5003 } 5004 5005 5006 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5007 struct amdgpu_irq_src *src, 5008 unsigned type, 5009 enum amdgpu_interrupt_state state) 5010 { 5011 switch (type) { 5012 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 5013 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 5014 break; 5015 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5016 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5017 break; 5018 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5019 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5020 break; 5021 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5022 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5023 break; 5024 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5025 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5026 break; 5027 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 5028 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 5029 break; 5030 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 5031 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 5032 break; 5033 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 5034 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 5035 break; 5036 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 5037 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 5038 break; 5039 default: 5040 break; 5041 } 5042 return 0; 5043 } 5044 5045 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 5046 struct amdgpu_irq_src *source, 5047 struct amdgpu_iv_entry *entry) 5048 { 5049 int i; 5050 u8 me_id, pipe_id, queue_id; 5051 struct amdgpu_ring *ring; 5052 5053 DRM_DEBUG("IH: CP EOP\n"); 5054 me_id = (entry->ring_id & 0x0c) >> 2; 5055 pipe_id = (entry->ring_id & 0x03) >> 0; 5056 queue_id = (entry->ring_id & 0x70) >> 4; 5057 5058 switch (me_id) { 5059 case 0: 5060 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5061 break; 5062 case 1: 5063 case 2: 5064 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5065 ring = &adev->gfx.compute_ring[i]; 5066 /* Per-queue interrupt is supported for MEC starting from VI. 5067 * The interrupt can only be enabled/disabled per pipe instead of per queue. 5068 */ 5069 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 5070 amdgpu_fence_process(ring); 5071 } 5072 break; 5073 } 5074 return 0; 5075 } 5076 5077 static void gfx_v9_0_fault(struct amdgpu_device *adev, 5078 struct amdgpu_iv_entry *entry) 5079 { 5080 u8 me_id, pipe_id, queue_id; 5081 struct amdgpu_ring *ring; 5082 int i; 5083 5084 me_id = (entry->ring_id & 0x0c) >> 2; 5085 pipe_id = (entry->ring_id & 0x03) >> 0; 5086 queue_id = (entry->ring_id & 0x70) >> 4; 5087 5088 switch (me_id) { 5089 case 0: 5090 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 5091 break; 5092 case 1: 5093 case 2: 5094 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5095 ring = &adev->gfx.compute_ring[i]; 5096 if (ring->me == me_id && ring->pipe == pipe_id && 5097 ring->queue == queue_id) 5098 drm_sched_fault(&ring->sched); 5099 } 5100 break; 5101 } 5102 } 5103 5104 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 5105 struct amdgpu_irq_src *source, 5106 struct amdgpu_iv_entry *entry) 5107 { 5108 DRM_ERROR("Illegal register access in command stream\n"); 5109 gfx_v9_0_fault(adev, entry); 5110 return 0; 5111 } 5112 5113 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 5114 struct amdgpu_irq_src *source, 5115 struct amdgpu_iv_entry *entry) 5116 { 5117 DRM_ERROR("Illegal instruction in command stream\n"); 5118 gfx_v9_0_fault(adev, entry); 5119 return 0; 5120 } 5121 5122 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 5123 struct amdgpu_iv_entry *entry) 5124 { 5125 /* TODO ue will trigger an interrupt. */ 5126 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); 5127 amdgpu_ras_reset_gpu(adev, 0); 5128 return AMDGPU_RAS_UE; 5129 } 5130 5131 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev, 5132 struct amdgpu_irq_src *source, 5133 struct amdgpu_iv_entry *entry) 5134 { 5135 struct ras_common_if *ras_if = adev->gfx.ras_if; 5136 struct ras_dispatch_if ih_data = { 5137 .entry = entry, 5138 }; 5139 5140 if (!ras_if) 5141 return 0; 5142 5143 ih_data.head = *ras_if; 5144 5145 DRM_ERROR("CP ECC ERROR IRQ\n"); 5146 amdgpu_ras_interrupt_dispatch(adev, &ih_data); 5147 return 0; 5148 } 5149 5150 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 5151 .name = "gfx_v9_0", 5152 .early_init = gfx_v9_0_early_init, 5153 .late_init = gfx_v9_0_late_init, 5154 .sw_init = gfx_v9_0_sw_init, 5155 .sw_fini = gfx_v9_0_sw_fini, 5156 .hw_init = gfx_v9_0_hw_init, 5157 .hw_fini = gfx_v9_0_hw_fini, 5158 .suspend = gfx_v9_0_suspend, 5159 .resume = gfx_v9_0_resume, 5160 .is_idle = gfx_v9_0_is_idle, 5161 .wait_for_idle = gfx_v9_0_wait_for_idle, 5162 .soft_reset = gfx_v9_0_soft_reset, 5163 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 5164 .set_powergating_state = gfx_v9_0_set_powergating_state, 5165 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 5166 }; 5167 5168 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 5169 .type = AMDGPU_RING_TYPE_GFX, 5170 .align_mask = 0xff, 5171 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5172 .support_64bit_ptrs = true, 5173 .vmhub = AMDGPU_GFXHUB, 5174 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 5175 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 5176 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 5177 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 5178 5 + /* COND_EXEC */ 5179 7 + /* PIPELINE_SYNC */ 5180 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5181 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5182 2 + /* VM_FLUSH */ 5183 8 + /* FENCE for VM_FLUSH */ 5184 20 + /* GDS switch */ 5185 4 + /* double SWITCH_BUFFER, 5186 the first COND_EXEC jump to the place just 5187 prior to this double SWITCH_BUFFER */ 5188 5 + /* COND_EXEC */ 5189 7 + /* HDP_flush */ 5190 4 + /* VGT_flush */ 5191 14 + /* CE_META */ 5192 31 + /* DE_META */ 5193 3 + /* CNTX_CTRL */ 5194 5 + /* HDP_INVL */ 5195 8 + 8 + /* FENCE x2 */ 5196 2, /* SWITCH_BUFFER */ 5197 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 5198 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 5199 .emit_fence = gfx_v9_0_ring_emit_fence, 5200 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 5201 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 5202 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 5203 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 5204 .test_ring = gfx_v9_0_ring_test_ring, 5205 .test_ib = gfx_v9_0_ring_test_ib, 5206 .insert_nop = amdgpu_ring_insert_nop, 5207 .pad_ib = amdgpu_ring_generic_pad_ib, 5208 .emit_switch_buffer = gfx_v9_ring_emit_sb, 5209 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 5210 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 5211 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 5212 .emit_tmz = gfx_v9_0_ring_emit_tmz, 5213 .emit_wreg = gfx_v9_0_ring_emit_wreg, 5214 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 5215 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 5216 .soft_recovery = gfx_v9_0_ring_soft_recovery, 5217 }; 5218 5219 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 5220 .type = AMDGPU_RING_TYPE_COMPUTE, 5221 .align_mask = 0xff, 5222 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5223 .support_64bit_ptrs = true, 5224 .vmhub = AMDGPU_GFXHUB, 5225 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 5226 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 5227 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 5228 .emit_frame_size = 5229 20 + /* gfx_v9_0_ring_emit_gds_switch */ 5230 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5231 5 + /* hdp invalidate */ 5232 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 5233 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5234 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5235 2 + /* gfx_v9_0_ring_emit_vm_flush */ 5236 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 5237 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 5238 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 5239 .emit_fence = gfx_v9_0_ring_emit_fence, 5240 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 5241 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 5242 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 5243 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 5244 .test_ring = gfx_v9_0_ring_test_ring, 5245 .test_ib = gfx_v9_0_ring_test_ib, 5246 .insert_nop = amdgpu_ring_insert_nop, 5247 .pad_ib = amdgpu_ring_generic_pad_ib, 5248 .set_priority = gfx_v9_0_ring_set_priority_compute, 5249 .emit_wreg = gfx_v9_0_ring_emit_wreg, 5250 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 5251 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 5252 }; 5253 5254 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 5255 .type = AMDGPU_RING_TYPE_KIQ, 5256 .align_mask = 0xff, 5257 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5258 .support_64bit_ptrs = true, 5259 .vmhub = AMDGPU_GFXHUB, 5260 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 5261 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 5262 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 5263 .emit_frame_size = 5264 20 + /* gfx_v9_0_ring_emit_gds_switch */ 5265 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5266 5 + /* hdp invalidate */ 5267 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 5268 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5269 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5270 2 + /* gfx_v9_0_ring_emit_vm_flush */ 5271 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 5272 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 5273 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 5274 .test_ring = gfx_v9_0_ring_test_ring, 5275 .insert_nop = amdgpu_ring_insert_nop, 5276 .pad_ib = amdgpu_ring_generic_pad_ib, 5277 .emit_rreg = gfx_v9_0_ring_emit_rreg, 5278 .emit_wreg = gfx_v9_0_ring_emit_wreg, 5279 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 5280 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 5281 }; 5282 5283 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 5284 { 5285 int i; 5286 5287 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 5288 5289 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 5290 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 5291 5292 for (i = 0; i < adev->gfx.num_compute_rings; i++) 5293 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 5294 } 5295 5296 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 5297 .set = gfx_v9_0_set_eop_interrupt_state, 5298 .process = gfx_v9_0_eop_irq, 5299 }; 5300 5301 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 5302 .set = gfx_v9_0_set_priv_reg_fault_state, 5303 .process = gfx_v9_0_priv_reg_irq, 5304 }; 5305 5306 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 5307 .set = gfx_v9_0_set_priv_inst_fault_state, 5308 .process = gfx_v9_0_priv_inst_irq, 5309 }; 5310 5311 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 5312 .set = gfx_v9_0_set_cp_ecc_error_state, 5313 .process = gfx_v9_0_cp_ecc_error_irq, 5314 }; 5315 5316 5317 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 5318 { 5319 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 5320 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 5321 5322 adev->gfx.priv_reg_irq.num_types = 1; 5323 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 5324 5325 adev->gfx.priv_inst_irq.num_types = 1; 5326 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 5327 5328 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 5329 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 5330 } 5331 5332 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 5333 { 5334 switch (adev->asic_type) { 5335 case CHIP_VEGA10: 5336 case CHIP_VEGA12: 5337 case CHIP_VEGA20: 5338 case CHIP_RAVEN: 5339 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 5340 break; 5341 default: 5342 break; 5343 } 5344 } 5345 5346 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 5347 { 5348 /* init asci gds info */ 5349 switch (adev->asic_type) { 5350 case CHIP_VEGA10: 5351 case CHIP_VEGA12: 5352 case CHIP_VEGA20: 5353 adev->gds.gds_size = 0x10000; 5354 break; 5355 case CHIP_RAVEN: 5356 adev->gds.gds_size = 0x1000; 5357 break; 5358 default: 5359 adev->gds.gds_size = 0x10000; 5360 break; 5361 } 5362 5363 switch (adev->asic_type) { 5364 case CHIP_VEGA10: 5365 case CHIP_VEGA20: 5366 adev->gds.gds_compute_max_wave_id = 0x7ff; 5367 break; 5368 case CHIP_VEGA12: 5369 adev->gds.gds_compute_max_wave_id = 0x27f; 5370 break; 5371 case CHIP_RAVEN: 5372 if (adev->rev_id >= 0x8) 5373 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 5374 else 5375 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 5376 break; 5377 default: 5378 /* this really depends on the chip */ 5379 adev->gds.gds_compute_max_wave_id = 0x7ff; 5380 break; 5381 } 5382 5383 adev->gds.gws_size = 64; 5384 adev->gds.oa_size = 16; 5385 } 5386 5387 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 5388 u32 bitmap) 5389 { 5390 u32 data; 5391 5392 if (!bitmap) 5393 return; 5394 5395 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 5396 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 5397 5398 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 5399 } 5400 5401 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 5402 { 5403 u32 data, mask; 5404 5405 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 5406 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 5407 5408 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 5409 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 5410 5411 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 5412 5413 return (~data) & mask; 5414 } 5415 5416 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 5417 struct amdgpu_cu_info *cu_info) 5418 { 5419 int i, j, k, counter, active_cu_number = 0; 5420 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 5421 unsigned disable_masks[4 * 2]; 5422 5423 if (!adev || !cu_info) 5424 return -EINVAL; 5425 5426 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 5427 5428 mutex_lock(&adev->grbm_idx_mutex); 5429 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 5430 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 5431 mask = 1; 5432 ao_bitmap = 0; 5433 counter = 0; 5434 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 5435 if (i < 4 && j < 2) 5436 gfx_v9_0_set_user_cu_inactive_bitmap( 5437 adev, disable_masks[i * 2 + j]); 5438 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 5439 cu_info->bitmap[i][j] = bitmap; 5440 5441 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 5442 if (bitmap & mask) { 5443 if (counter < adev->gfx.config.max_cu_per_sh) 5444 ao_bitmap |= mask; 5445 counter ++; 5446 } 5447 mask <<= 1; 5448 } 5449 active_cu_number += counter; 5450 if (i < 2 && j < 2) 5451 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 5452 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 5453 } 5454 } 5455 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5456 mutex_unlock(&adev->grbm_idx_mutex); 5457 5458 cu_info->number = active_cu_number; 5459 cu_info->ao_cu_mask = ao_cu_mask; 5460 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 5461 5462 return 0; 5463 } 5464 5465 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 5466 { 5467 .type = AMD_IP_BLOCK_TYPE_GFX, 5468 .major = 9, 5469 .minor = 0, 5470 .rev = 0, 5471 .funcs = &gfx_v9_0_ip_funcs, 5472 }; 5473