1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 #include "vega10_enum.h" 40 #include "hdp/hdp_4_0_offset.h" 41 42 #include "soc15.h" 43 #include "soc15_common.h" 44 #include "clearstate_gfx9.h" 45 #include "v9_structs.h" 46 47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 48 49 #include "amdgpu_ras.h" 50 51 #define GFX9_NUM_GFX_RINGS 1 52 #define GFX9_MEC_HPD_SIZE 4096 53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 55 56 #define mmPWR_MISC_CNTL_STATUS 0x0183 57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0 59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1 60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L 61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L 62 63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 65 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 69 70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 72 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 76 77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 79 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 83 84 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 86 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 87 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 90 91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 93 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 98 99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 101 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 106 107 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 108 { 109 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 110 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 111 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 112 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 113 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 114 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 115 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 116 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 117 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 118 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 119 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 120 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 121 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 122 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 123 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 124 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 125 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 126 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 127 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 128 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 129 }; 130 131 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 132 { 133 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 134 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 135 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 136 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 137 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 138 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 139 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 140 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 141 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 142 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 143 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 144 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 145 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 146 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 147 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 148 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 149 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 150 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 151 }; 152 153 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 154 { 155 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 156 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 157 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 158 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 159 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 160 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 161 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 162 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 163 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 164 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 165 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 166 }; 167 168 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 169 { 170 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 171 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 172 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 173 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 174 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 175 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 176 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 177 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 178 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 179 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 180 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 181 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 182 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 183 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 184 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 185 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 186 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 187 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 188 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 189 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 190 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 191 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 192 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 193 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 194 }; 195 196 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 197 { 198 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 199 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 200 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 201 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 202 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 203 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 204 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 205 }; 206 207 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 208 { 209 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 210 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 211 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 212 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 213 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 214 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 215 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 216 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 217 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 218 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 219 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 220 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 221 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 222 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 223 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 224 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 225 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 226 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 227 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 228 }; 229 230 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 231 { 232 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 233 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 234 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 235 }; 236 237 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 238 { 239 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 240 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 241 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 242 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 243 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 244 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 245 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 246 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 247 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 248 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 249 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 250 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 251 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 252 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 253 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 254 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 255 }; 256 257 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 258 { 259 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 260 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 261 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 262 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 263 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 264 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 265 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 266 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 267 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 268 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 269 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 270 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 271 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 272 }; 273 274 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 275 { 276 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 277 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 278 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 279 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 280 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 281 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 282 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 283 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 284 }; 285 286 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 287 { 288 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 289 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 290 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 291 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 292 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 293 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 294 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 295 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 296 }; 297 298 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 299 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 300 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 301 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 302 303 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 304 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 305 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 306 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 307 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 308 struct amdgpu_cu_info *cu_info); 309 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 310 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); 311 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 312 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 313 314 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 315 { 316 switch (adev->asic_type) { 317 case CHIP_VEGA10: 318 if (!amdgpu_virt_support_skip_setting(adev)) { 319 soc15_program_register_sequence(adev, 320 golden_settings_gc_9_0, 321 ARRAY_SIZE(golden_settings_gc_9_0)); 322 soc15_program_register_sequence(adev, 323 golden_settings_gc_9_0_vg10, 324 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 325 } 326 break; 327 case CHIP_VEGA12: 328 soc15_program_register_sequence(adev, 329 golden_settings_gc_9_2_1, 330 ARRAY_SIZE(golden_settings_gc_9_2_1)); 331 soc15_program_register_sequence(adev, 332 golden_settings_gc_9_2_1_vg12, 333 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 334 break; 335 case CHIP_VEGA20: 336 soc15_program_register_sequence(adev, 337 golden_settings_gc_9_0, 338 ARRAY_SIZE(golden_settings_gc_9_0)); 339 soc15_program_register_sequence(adev, 340 golden_settings_gc_9_0_vg20, 341 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 342 break; 343 case CHIP_RAVEN: 344 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 345 ARRAY_SIZE(golden_settings_gc_9_1)); 346 if (adev->rev_id >= 8) 347 soc15_program_register_sequence(adev, 348 golden_settings_gc_9_1_rv2, 349 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 350 else 351 soc15_program_register_sequence(adev, 352 golden_settings_gc_9_1_rv1, 353 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 354 break; 355 default: 356 break; 357 } 358 359 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 360 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 361 } 362 363 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 364 { 365 adev->gfx.scratch.num_reg = 8; 366 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 367 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 368 } 369 370 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 371 bool wc, uint32_t reg, uint32_t val) 372 { 373 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 374 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 375 WRITE_DATA_DST_SEL(0) | 376 (wc ? WR_CONFIRM : 0)); 377 amdgpu_ring_write(ring, reg); 378 amdgpu_ring_write(ring, 0); 379 amdgpu_ring_write(ring, val); 380 } 381 382 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 383 int mem_space, int opt, uint32_t addr0, 384 uint32_t addr1, uint32_t ref, uint32_t mask, 385 uint32_t inv) 386 { 387 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 388 amdgpu_ring_write(ring, 389 /* memory (1) or register (0) */ 390 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 391 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 392 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 393 WAIT_REG_MEM_ENGINE(eng_sel))); 394 395 if (mem_space) 396 BUG_ON(addr0 & 0x3); /* Dword align */ 397 amdgpu_ring_write(ring, addr0); 398 amdgpu_ring_write(ring, addr1); 399 amdgpu_ring_write(ring, ref); 400 amdgpu_ring_write(ring, mask); 401 amdgpu_ring_write(ring, inv); /* poll interval */ 402 } 403 404 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 405 { 406 struct amdgpu_device *adev = ring->adev; 407 uint32_t scratch; 408 uint32_t tmp = 0; 409 unsigned i; 410 int r; 411 412 r = amdgpu_gfx_scratch_get(adev, &scratch); 413 if (r) 414 return r; 415 416 WREG32(scratch, 0xCAFEDEAD); 417 r = amdgpu_ring_alloc(ring, 3); 418 if (r) 419 goto error_free_scratch; 420 421 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 422 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 423 amdgpu_ring_write(ring, 0xDEADBEEF); 424 amdgpu_ring_commit(ring); 425 426 for (i = 0; i < adev->usec_timeout; i++) { 427 tmp = RREG32(scratch); 428 if (tmp == 0xDEADBEEF) 429 break; 430 udelay(1); 431 } 432 433 if (i >= adev->usec_timeout) 434 r = -ETIMEDOUT; 435 436 error_free_scratch: 437 amdgpu_gfx_scratch_free(adev, scratch); 438 return r; 439 } 440 441 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 442 { 443 struct amdgpu_device *adev = ring->adev; 444 struct amdgpu_ib ib; 445 struct dma_fence *f = NULL; 446 447 unsigned index; 448 uint64_t gpu_addr; 449 uint32_t tmp; 450 long r; 451 452 r = amdgpu_device_wb_get(adev, &index); 453 if (r) 454 return r; 455 456 gpu_addr = adev->wb.gpu_addr + (index * 4); 457 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 458 memset(&ib, 0, sizeof(ib)); 459 r = amdgpu_ib_get(adev, NULL, 16, &ib); 460 if (r) 461 goto err1; 462 463 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 464 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 465 ib.ptr[2] = lower_32_bits(gpu_addr); 466 ib.ptr[3] = upper_32_bits(gpu_addr); 467 ib.ptr[4] = 0xDEADBEEF; 468 ib.length_dw = 5; 469 470 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 471 if (r) 472 goto err2; 473 474 r = dma_fence_wait_timeout(f, false, timeout); 475 if (r == 0) { 476 r = -ETIMEDOUT; 477 goto err2; 478 } else if (r < 0) { 479 goto err2; 480 } 481 482 tmp = adev->wb.wb[index]; 483 if (tmp == 0xDEADBEEF) 484 r = 0; 485 else 486 r = -EINVAL; 487 488 err2: 489 amdgpu_ib_free(adev, &ib, NULL); 490 dma_fence_put(f); 491 err1: 492 amdgpu_device_wb_free(adev, index); 493 return r; 494 } 495 496 497 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 498 { 499 release_firmware(adev->gfx.pfp_fw); 500 adev->gfx.pfp_fw = NULL; 501 release_firmware(adev->gfx.me_fw); 502 adev->gfx.me_fw = NULL; 503 release_firmware(adev->gfx.ce_fw); 504 adev->gfx.ce_fw = NULL; 505 release_firmware(adev->gfx.rlc_fw); 506 adev->gfx.rlc_fw = NULL; 507 release_firmware(adev->gfx.mec_fw); 508 adev->gfx.mec_fw = NULL; 509 release_firmware(adev->gfx.mec2_fw); 510 adev->gfx.mec2_fw = NULL; 511 512 kfree(adev->gfx.rlc.register_list_format); 513 } 514 515 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 516 { 517 const struct rlc_firmware_header_v2_1 *rlc_hdr; 518 519 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 520 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 521 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 522 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 523 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 524 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 525 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 526 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 527 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 528 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 529 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 530 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 531 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 532 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 533 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 534 } 535 536 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 537 { 538 adev->gfx.me_fw_write_wait = false; 539 adev->gfx.mec_fw_write_wait = false; 540 541 switch (adev->asic_type) { 542 case CHIP_VEGA10: 543 if ((adev->gfx.me_fw_version >= 0x0000009c) && 544 (adev->gfx.me_feature_version >= 42) && 545 (adev->gfx.pfp_fw_version >= 0x000000b1) && 546 (adev->gfx.pfp_feature_version >= 42)) 547 adev->gfx.me_fw_write_wait = true; 548 549 if ((adev->gfx.mec_fw_version >= 0x00000193) && 550 (adev->gfx.mec_feature_version >= 42)) 551 adev->gfx.mec_fw_write_wait = true; 552 break; 553 case CHIP_VEGA12: 554 if ((adev->gfx.me_fw_version >= 0x0000009c) && 555 (adev->gfx.me_feature_version >= 44) && 556 (adev->gfx.pfp_fw_version >= 0x000000b2) && 557 (adev->gfx.pfp_feature_version >= 44)) 558 adev->gfx.me_fw_write_wait = true; 559 560 if ((adev->gfx.mec_fw_version >= 0x00000196) && 561 (adev->gfx.mec_feature_version >= 44)) 562 adev->gfx.mec_fw_write_wait = true; 563 break; 564 case CHIP_VEGA20: 565 if ((adev->gfx.me_fw_version >= 0x0000009c) && 566 (adev->gfx.me_feature_version >= 44) && 567 (adev->gfx.pfp_fw_version >= 0x000000b2) && 568 (adev->gfx.pfp_feature_version >= 44)) 569 adev->gfx.me_fw_write_wait = true; 570 571 if ((adev->gfx.mec_fw_version >= 0x00000197) && 572 (adev->gfx.mec_feature_version >= 44)) 573 adev->gfx.mec_fw_write_wait = true; 574 break; 575 case CHIP_RAVEN: 576 if ((adev->gfx.me_fw_version >= 0x0000009c) && 577 (adev->gfx.me_feature_version >= 42) && 578 (adev->gfx.pfp_fw_version >= 0x000000b1) && 579 (adev->gfx.pfp_feature_version >= 42)) 580 adev->gfx.me_fw_write_wait = true; 581 582 if ((adev->gfx.mec_fw_version >= 0x00000192) && 583 (adev->gfx.mec_feature_version >= 42)) 584 adev->gfx.mec_fw_write_wait = true; 585 break; 586 default: 587 break; 588 } 589 } 590 591 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 592 { 593 switch (adev->asic_type) { 594 case CHIP_VEGA10: 595 case CHIP_VEGA12: 596 case CHIP_VEGA20: 597 break; 598 case CHIP_RAVEN: 599 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) 600 break; 601 if ((adev->gfx.rlc_fw_version != 106 && 602 adev->gfx.rlc_fw_version < 531) || 603 (adev->gfx.rlc_fw_version == 53815) || 604 (adev->gfx.rlc_feature_version < 1) || 605 !adev->gfx.rlc.is_rlc_v2_1) 606 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 607 break; 608 default: 609 break; 610 } 611 } 612 613 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 614 { 615 const char *chip_name; 616 char fw_name[30]; 617 int err; 618 struct amdgpu_firmware_info *info = NULL; 619 const struct common_firmware_header *header = NULL; 620 const struct gfx_firmware_header_v1_0 *cp_hdr; 621 const struct rlc_firmware_header_v2_0 *rlc_hdr; 622 unsigned int *tmp = NULL; 623 unsigned int i = 0; 624 uint16_t version_major; 625 uint16_t version_minor; 626 uint32_t smu_version; 627 628 DRM_DEBUG("\n"); 629 630 switch (adev->asic_type) { 631 case CHIP_VEGA10: 632 chip_name = "vega10"; 633 break; 634 case CHIP_VEGA12: 635 chip_name = "vega12"; 636 break; 637 case CHIP_VEGA20: 638 chip_name = "vega20"; 639 break; 640 case CHIP_RAVEN: 641 if (adev->rev_id >= 8) 642 chip_name = "raven2"; 643 else if (adev->pdev->device == 0x15d8) 644 chip_name = "picasso"; 645 else 646 chip_name = "raven"; 647 break; 648 default: 649 BUG(); 650 } 651 652 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 653 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 654 if (err) 655 goto out; 656 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 657 if (err) 658 goto out; 659 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 660 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 661 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 662 663 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 664 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 665 if (err) 666 goto out; 667 err = amdgpu_ucode_validate(adev->gfx.me_fw); 668 if (err) 669 goto out; 670 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 671 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 672 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 673 674 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 675 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 676 if (err) 677 goto out; 678 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 679 if (err) 680 goto out; 681 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 682 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 683 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 684 685 /* 686 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 687 * instead of picasso_rlc.bin. 688 * Judgment method: 689 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 690 * or revision >= 0xD8 && revision <= 0xDF 691 * otherwise is PCO FP5 692 */ 693 if (!strcmp(chip_name, "picasso") && 694 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 695 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 696 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); 697 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 698 (smu_version >= 0x41e2b)) 699 /** 700 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 701 */ 702 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); 703 else 704 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 705 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 706 if (err) 707 goto out; 708 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 709 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 710 711 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 712 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 713 if (version_major == 2 && version_minor == 1) 714 adev->gfx.rlc.is_rlc_v2_1 = true; 715 716 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 717 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 718 adev->gfx.rlc.save_and_restore_offset = 719 le32_to_cpu(rlc_hdr->save_and_restore_offset); 720 adev->gfx.rlc.clear_state_descriptor_offset = 721 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 722 adev->gfx.rlc.avail_scratch_ram_locations = 723 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 724 adev->gfx.rlc.reg_restore_list_size = 725 le32_to_cpu(rlc_hdr->reg_restore_list_size); 726 adev->gfx.rlc.reg_list_format_start = 727 le32_to_cpu(rlc_hdr->reg_list_format_start); 728 adev->gfx.rlc.reg_list_format_separate_start = 729 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 730 adev->gfx.rlc.starting_offsets_start = 731 le32_to_cpu(rlc_hdr->starting_offsets_start); 732 adev->gfx.rlc.reg_list_format_size_bytes = 733 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 734 adev->gfx.rlc.reg_list_size_bytes = 735 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 736 adev->gfx.rlc.register_list_format = 737 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 738 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 739 if (!adev->gfx.rlc.register_list_format) { 740 err = -ENOMEM; 741 goto out; 742 } 743 744 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 745 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 746 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 747 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 748 749 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 750 751 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 752 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 753 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 754 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 755 756 if (adev->gfx.rlc.is_rlc_v2_1) 757 gfx_v9_0_init_rlc_ext_microcode(adev); 758 759 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 760 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 761 if (err) 762 goto out; 763 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 764 if (err) 765 goto out; 766 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 767 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 768 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 769 770 771 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 772 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 773 if (!err) { 774 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 775 if (err) 776 goto out; 777 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 778 adev->gfx.mec2_fw->data; 779 adev->gfx.mec2_fw_version = 780 le32_to_cpu(cp_hdr->header.ucode_version); 781 adev->gfx.mec2_feature_version = 782 le32_to_cpu(cp_hdr->ucode_feature_version); 783 } else { 784 err = 0; 785 adev->gfx.mec2_fw = NULL; 786 } 787 788 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 789 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 790 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 791 info->fw = adev->gfx.pfp_fw; 792 header = (const struct common_firmware_header *)info->fw->data; 793 adev->firmware.fw_size += 794 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 795 796 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 797 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 798 info->fw = adev->gfx.me_fw; 799 header = (const struct common_firmware_header *)info->fw->data; 800 adev->firmware.fw_size += 801 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 802 803 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 804 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 805 info->fw = adev->gfx.ce_fw; 806 header = (const struct common_firmware_header *)info->fw->data; 807 adev->firmware.fw_size += 808 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 809 810 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 811 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 812 info->fw = adev->gfx.rlc_fw; 813 header = (const struct common_firmware_header *)info->fw->data; 814 adev->firmware.fw_size += 815 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 816 817 if (adev->gfx.rlc.is_rlc_v2_1 && 818 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 819 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 820 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 821 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 822 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 823 info->fw = adev->gfx.rlc_fw; 824 adev->firmware.fw_size += 825 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 826 827 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 828 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 829 info->fw = adev->gfx.rlc_fw; 830 adev->firmware.fw_size += 831 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 832 833 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 834 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 835 info->fw = adev->gfx.rlc_fw; 836 adev->firmware.fw_size += 837 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 838 } 839 840 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 841 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 842 info->fw = adev->gfx.mec_fw; 843 header = (const struct common_firmware_header *)info->fw->data; 844 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 845 adev->firmware.fw_size += 846 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 847 848 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 849 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 850 info->fw = adev->gfx.mec_fw; 851 adev->firmware.fw_size += 852 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 853 854 if (adev->gfx.mec2_fw) { 855 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 856 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 857 info->fw = adev->gfx.mec2_fw; 858 header = (const struct common_firmware_header *)info->fw->data; 859 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 860 adev->firmware.fw_size += 861 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 862 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 863 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 864 info->fw = adev->gfx.mec2_fw; 865 adev->firmware.fw_size += 866 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 867 } 868 869 } 870 871 out: 872 gfx_v9_0_check_if_need_gfxoff(adev); 873 gfx_v9_0_check_fw_write_wait(adev); 874 if (err) { 875 dev_err(adev->dev, 876 "gfx9: Failed to load firmware \"%s\"\n", 877 fw_name); 878 release_firmware(adev->gfx.pfp_fw); 879 adev->gfx.pfp_fw = NULL; 880 release_firmware(adev->gfx.me_fw); 881 adev->gfx.me_fw = NULL; 882 release_firmware(adev->gfx.ce_fw); 883 adev->gfx.ce_fw = NULL; 884 release_firmware(adev->gfx.rlc_fw); 885 adev->gfx.rlc_fw = NULL; 886 release_firmware(adev->gfx.mec_fw); 887 adev->gfx.mec_fw = NULL; 888 release_firmware(adev->gfx.mec2_fw); 889 adev->gfx.mec2_fw = NULL; 890 } 891 return err; 892 } 893 894 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 895 { 896 u32 count = 0; 897 const struct cs_section_def *sect = NULL; 898 const struct cs_extent_def *ext = NULL; 899 900 /* begin clear state */ 901 count += 2; 902 /* context control state */ 903 count += 3; 904 905 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 906 for (ext = sect->section; ext->extent != NULL; ++ext) { 907 if (sect->id == SECT_CONTEXT) 908 count += 2 + ext->reg_count; 909 else 910 return 0; 911 } 912 } 913 914 /* end clear state */ 915 count += 2; 916 /* clear state */ 917 count += 2; 918 919 return count; 920 } 921 922 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 923 volatile u32 *buffer) 924 { 925 u32 count = 0, i; 926 const struct cs_section_def *sect = NULL; 927 const struct cs_extent_def *ext = NULL; 928 929 if (adev->gfx.rlc.cs_data == NULL) 930 return; 931 if (buffer == NULL) 932 return; 933 934 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 935 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 936 937 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 938 buffer[count++] = cpu_to_le32(0x80000000); 939 buffer[count++] = cpu_to_le32(0x80000000); 940 941 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 942 for (ext = sect->section; ext->extent != NULL; ++ext) { 943 if (sect->id == SECT_CONTEXT) { 944 buffer[count++] = 945 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 946 buffer[count++] = cpu_to_le32(ext->reg_index - 947 PACKET3_SET_CONTEXT_REG_START); 948 for (i = 0; i < ext->reg_count; i++) 949 buffer[count++] = cpu_to_le32(ext->extent[i]); 950 } else { 951 return; 952 } 953 } 954 } 955 956 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 957 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 958 959 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 960 buffer[count++] = cpu_to_le32(0); 961 } 962 963 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 964 { 965 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 966 uint32_t pg_always_on_cu_num = 2; 967 uint32_t always_on_cu_num; 968 uint32_t i, j, k; 969 uint32_t mask, cu_bitmap, counter; 970 971 if (adev->flags & AMD_IS_APU) 972 always_on_cu_num = 4; 973 else if (adev->asic_type == CHIP_VEGA12) 974 always_on_cu_num = 8; 975 else 976 always_on_cu_num = 12; 977 978 mutex_lock(&adev->grbm_idx_mutex); 979 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 980 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 981 mask = 1; 982 cu_bitmap = 0; 983 counter = 0; 984 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 985 986 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 987 if (cu_info->bitmap[i][j] & mask) { 988 if (counter == pg_always_on_cu_num) 989 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 990 if (counter < always_on_cu_num) 991 cu_bitmap |= mask; 992 else 993 break; 994 counter++; 995 } 996 mask <<= 1; 997 } 998 999 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1000 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1001 } 1002 } 1003 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1004 mutex_unlock(&adev->grbm_idx_mutex); 1005 } 1006 1007 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1008 { 1009 uint32_t data; 1010 1011 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1012 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1013 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1014 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1015 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1016 1017 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1018 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1019 1020 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1021 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1022 1023 mutex_lock(&adev->grbm_idx_mutex); 1024 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1025 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1026 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1027 1028 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1029 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1030 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1031 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1032 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1033 1034 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1035 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1036 data &= 0x0000FFFF; 1037 data |= 0x00C00000; 1038 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1039 1040 /* 1041 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1042 * programmed in gfx_v9_0_init_always_on_cu_mask() 1043 */ 1044 1045 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1046 * but used for RLC_LB_CNTL configuration */ 1047 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1048 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1049 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1050 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1051 mutex_unlock(&adev->grbm_idx_mutex); 1052 1053 gfx_v9_0_init_always_on_cu_mask(adev); 1054 } 1055 1056 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1057 { 1058 uint32_t data; 1059 1060 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1061 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1062 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1063 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1064 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1065 1066 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1067 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1068 1069 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1070 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1071 1072 mutex_lock(&adev->grbm_idx_mutex); 1073 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1074 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1075 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1076 1077 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1078 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1079 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1080 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1081 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1082 1083 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1084 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1085 data &= 0x0000FFFF; 1086 data |= 0x00C00000; 1087 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1088 1089 /* 1090 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1091 * programmed in gfx_v9_0_init_always_on_cu_mask() 1092 */ 1093 1094 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1095 * but used for RLC_LB_CNTL configuration */ 1096 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1097 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1098 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1099 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1100 mutex_unlock(&adev->grbm_idx_mutex); 1101 1102 gfx_v9_0_init_always_on_cu_mask(adev); 1103 } 1104 1105 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1106 { 1107 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1108 } 1109 1110 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1111 { 1112 return 5; 1113 } 1114 1115 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1116 { 1117 const struct cs_section_def *cs_data; 1118 int r; 1119 1120 adev->gfx.rlc.cs_data = gfx9_cs_data; 1121 1122 cs_data = adev->gfx.rlc.cs_data; 1123 1124 if (cs_data) { 1125 /* init clear state block */ 1126 r = amdgpu_gfx_rlc_init_csb(adev); 1127 if (r) 1128 return r; 1129 } 1130 1131 if (adev->asic_type == CHIP_RAVEN) { 1132 /* TODO: double check the cp_table_size for RV */ 1133 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1134 r = amdgpu_gfx_rlc_init_cpt(adev); 1135 if (r) 1136 return r; 1137 } 1138 1139 switch (adev->asic_type) { 1140 case CHIP_RAVEN: 1141 gfx_v9_0_init_lbpw(adev); 1142 break; 1143 case CHIP_VEGA20: 1144 gfx_v9_4_init_lbpw(adev); 1145 break; 1146 default: 1147 break; 1148 } 1149 1150 return 0; 1151 } 1152 1153 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev) 1154 { 1155 int r; 1156 1157 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1158 if (unlikely(r != 0)) 1159 return r; 1160 1161 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, 1162 AMDGPU_GEM_DOMAIN_VRAM); 1163 if (!r) 1164 adev->gfx.rlc.clear_state_gpu_addr = 1165 amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); 1166 1167 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1168 1169 return r; 1170 } 1171 1172 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev) 1173 { 1174 int r; 1175 1176 if (!adev->gfx.rlc.clear_state_obj) 1177 return; 1178 1179 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); 1180 if (likely(r == 0)) { 1181 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 1182 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1183 } 1184 } 1185 1186 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1187 { 1188 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1189 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1190 } 1191 1192 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1193 { 1194 int r; 1195 u32 *hpd; 1196 const __le32 *fw_data; 1197 unsigned fw_size; 1198 u32 *fw; 1199 size_t mec_hpd_size; 1200 1201 const struct gfx_firmware_header_v1_0 *mec_hdr; 1202 1203 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1204 1205 /* take ownership of the relevant compute queues */ 1206 amdgpu_gfx_compute_queue_acquire(adev); 1207 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1208 1209 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1210 AMDGPU_GEM_DOMAIN_VRAM, 1211 &adev->gfx.mec.hpd_eop_obj, 1212 &adev->gfx.mec.hpd_eop_gpu_addr, 1213 (void **)&hpd); 1214 if (r) { 1215 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1216 gfx_v9_0_mec_fini(adev); 1217 return r; 1218 } 1219 1220 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); 1221 1222 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1223 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1224 1225 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1226 1227 fw_data = (const __le32 *) 1228 (adev->gfx.mec_fw->data + 1229 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1230 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 1231 1232 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 1233 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1234 &adev->gfx.mec.mec_fw_obj, 1235 &adev->gfx.mec.mec_fw_gpu_addr, 1236 (void **)&fw); 1237 if (r) { 1238 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 1239 gfx_v9_0_mec_fini(adev); 1240 return r; 1241 } 1242 1243 memcpy(fw, fw_data, fw_size); 1244 1245 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1246 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1247 1248 return 0; 1249 } 1250 1251 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 1252 { 1253 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1254 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1255 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1256 (address << SQ_IND_INDEX__INDEX__SHIFT) | 1257 (SQ_IND_INDEX__FORCE_READ_MASK)); 1258 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1259 } 1260 1261 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 1262 uint32_t wave, uint32_t thread, 1263 uint32_t regno, uint32_t num, uint32_t *out) 1264 { 1265 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1266 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1267 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1268 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1269 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 1270 (SQ_IND_INDEX__FORCE_READ_MASK) | 1271 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1272 while (num--) 1273 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1274 } 1275 1276 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1277 { 1278 /* type 1 wave data */ 1279 dst[(*no_fields)++] = 1; 1280 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 1281 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 1282 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 1283 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 1284 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 1285 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 1286 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 1287 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 1288 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 1289 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 1290 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 1291 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 1292 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 1293 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 1294 } 1295 1296 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 1297 uint32_t wave, uint32_t start, 1298 uint32_t size, uint32_t *dst) 1299 { 1300 wave_read_regs( 1301 adev, simd, wave, 0, 1302 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 1303 } 1304 1305 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 1306 uint32_t wave, uint32_t thread, 1307 uint32_t start, uint32_t size, 1308 uint32_t *dst) 1309 { 1310 wave_read_regs( 1311 adev, simd, wave, thread, 1312 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1313 } 1314 1315 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 1316 u32 me, u32 pipe, u32 q, u32 vm) 1317 { 1318 soc15_grbm_select(adev, me, pipe, q, vm); 1319 } 1320 1321 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 1322 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 1323 .select_se_sh = &gfx_v9_0_select_se_sh, 1324 .read_wave_data = &gfx_v9_0_read_wave_data, 1325 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 1326 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 1327 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q 1328 }; 1329 1330 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 1331 { 1332 u32 gb_addr_config; 1333 int err; 1334 1335 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 1336 1337 switch (adev->asic_type) { 1338 case CHIP_VEGA10: 1339 adev->gfx.config.max_hw_contexts = 8; 1340 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1341 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1342 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1343 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1344 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 1345 break; 1346 case CHIP_VEGA12: 1347 adev->gfx.config.max_hw_contexts = 8; 1348 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1349 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1350 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1351 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1352 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 1353 DRM_INFO("fix gfx.config for vega12\n"); 1354 break; 1355 case CHIP_VEGA20: 1356 adev->gfx.config.max_hw_contexts = 8; 1357 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1358 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1359 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1360 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1361 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1362 gb_addr_config &= ~0xf3e777ff; 1363 gb_addr_config |= 0x22014042; 1364 /* check vbios table if gpu info is not available */ 1365 err = amdgpu_atomfirmware_get_gfx_info(adev); 1366 if (err) 1367 return err; 1368 break; 1369 case CHIP_RAVEN: 1370 adev->gfx.config.max_hw_contexts = 8; 1371 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1372 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1373 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1374 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1375 if (adev->rev_id >= 8) 1376 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 1377 else 1378 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 1379 break; 1380 default: 1381 BUG(); 1382 break; 1383 } 1384 1385 adev->gfx.config.gb_addr_config = gb_addr_config; 1386 1387 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 1388 REG_GET_FIELD( 1389 adev->gfx.config.gb_addr_config, 1390 GB_ADDR_CONFIG, 1391 NUM_PIPES); 1392 1393 adev->gfx.config.max_tile_pipes = 1394 adev->gfx.config.gb_addr_config_fields.num_pipes; 1395 1396 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 1397 REG_GET_FIELD( 1398 adev->gfx.config.gb_addr_config, 1399 GB_ADDR_CONFIG, 1400 NUM_BANKS); 1401 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 1402 REG_GET_FIELD( 1403 adev->gfx.config.gb_addr_config, 1404 GB_ADDR_CONFIG, 1405 MAX_COMPRESSED_FRAGS); 1406 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 1407 REG_GET_FIELD( 1408 adev->gfx.config.gb_addr_config, 1409 GB_ADDR_CONFIG, 1410 NUM_RB_PER_SE); 1411 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 1412 REG_GET_FIELD( 1413 adev->gfx.config.gb_addr_config, 1414 GB_ADDR_CONFIG, 1415 NUM_SHADER_ENGINES); 1416 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 1417 REG_GET_FIELD( 1418 adev->gfx.config.gb_addr_config, 1419 GB_ADDR_CONFIG, 1420 PIPE_INTERLEAVE_SIZE)); 1421 1422 return 0; 1423 } 1424 1425 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, 1426 struct amdgpu_ngg_buf *ngg_buf, 1427 int size_se, 1428 int default_size_se) 1429 { 1430 int r; 1431 1432 if (size_se < 0) { 1433 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se); 1434 return -EINVAL; 1435 } 1436 size_se = size_se ? size_se : default_size_se; 1437 1438 ngg_buf->size = size_se * adev->gfx.config.max_shader_engines; 1439 r = amdgpu_bo_create_kernel(adev, ngg_buf->size, 1440 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1441 &ngg_buf->bo, 1442 &ngg_buf->gpu_addr, 1443 NULL); 1444 if (r) { 1445 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r); 1446 return r; 1447 } 1448 ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo); 1449 1450 return r; 1451 } 1452 1453 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev) 1454 { 1455 int i; 1456 1457 for (i = 0; i < NGG_BUF_MAX; i++) 1458 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo, 1459 &adev->gfx.ngg.buf[i].gpu_addr, 1460 NULL); 1461 1462 memset(&adev->gfx.ngg.buf[0], 0, 1463 sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX); 1464 1465 adev->gfx.ngg.init = false; 1466 1467 return 0; 1468 } 1469 1470 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) 1471 { 1472 int r; 1473 1474 if (!amdgpu_ngg || adev->gfx.ngg.init == true) 1475 return 0; 1476 1477 /* GDS reserve memory: 64 bytes alignment */ 1478 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); 1479 adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size; 1480 adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); 1481 adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); 1482 1483 /* Primitive Buffer */ 1484 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], 1485 amdgpu_prim_buf_per_se, 1486 64 * 1024); 1487 if (r) { 1488 dev_err(adev->dev, "Failed to create Primitive Buffer\n"); 1489 goto err; 1490 } 1491 1492 /* Position Buffer */ 1493 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS], 1494 amdgpu_pos_buf_per_se, 1495 256 * 1024); 1496 if (r) { 1497 dev_err(adev->dev, "Failed to create Position Buffer\n"); 1498 goto err; 1499 } 1500 1501 /* Control Sideband */ 1502 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL], 1503 amdgpu_cntl_sb_buf_per_se, 1504 256); 1505 if (r) { 1506 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n"); 1507 goto err; 1508 } 1509 1510 /* Parameter Cache, not created by default */ 1511 if (amdgpu_param_buf_per_se <= 0) 1512 goto out; 1513 1514 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM], 1515 amdgpu_param_buf_per_se, 1516 512 * 1024); 1517 if (r) { 1518 dev_err(adev->dev, "Failed to create Parameter Cache\n"); 1519 goto err; 1520 } 1521 1522 out: 1523 adev->gfx.ngg.init = true; 1524 return 0; 1525 err: 1526 gfx_v9_0_ngg_fini(adev); 1527 return r; 1528 } 1529 1530 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) 1531 { 1532 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 1533 int r; 1534 u32 data, base; 1535 1536 if (!amdgpu_ngg) 1537 return 0; 1538 1539 /* Program buffer size */ 1540 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, 1541 adev->gfx.ngg.buf[NGG_PRIM].size >> 8); 1542 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, 1543 adev->gfx.ngg.buf[NGG_POS].size >> 8); 1544 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); 1545 1546 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, 1547 adev->gfx.ngg.buf[NGG_CNTL].size >> 8); 1548 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, 1549 adev->gfx.ngg.buf[NGG_PARAM].size >> 10); 1550 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); 1551 1552 /* Program buffer base address */ 1553 base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 1554 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); 1555 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data); 1556 1557 base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 1558 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); 1559 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data); 1560 1561 base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 1562 data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); 1563 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data); 1564 1565 base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 1566 data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); 1567 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data); 1568 1569 base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 1570 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); 1571 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data); 1572 1573 base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 1574 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); 1575 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data); 1576 1577 /* Clear GDS reserved memory */ 1578 r = amdgpu_ring_alloc(ring, 17); 1579 if (r) { 1580 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n", 1581 ring->name, r); 1582 return r; 1583 } 1584 1585 gfx_v9_0_write_data_to_reg(ring, 0, false, 1586 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 1587 (adev->gds.gds_size + 1588 adev->gfx.ngg.gds_reserve_size)); 1589 1590 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 1591 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 1592 PACKET3_DMA_DATA_DST_SEL(1) | 1593 PACKET3_DMA_DATA_SRC_SEL(2))); 1594 amdgpu_ring_write(ring, 0); 1595 amdgpu_ring_write(ring, 0); 1596 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); 1597 amdgpu_ring_write(ring, 0); 1598 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 1599 adev->gfx.ngg.gds_reserve_size); 1600 1601 gfx_v9_0_write_data_to_reg(ring, 0, false, 1602 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0); 1603 1604 amdgpu_ring_commit(ring); 1605 1606 return 0; 1607 } 1608 1609 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1610 int mec, int pipe, int queue) 1611 { 1612 int r; 1613 unsigned irq_type; 1614 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1615 1616 ring = &adev->gfx.compute_ring[ring_id]; 1617 1618 /* mec0 is me1 */ 1619 ring->me = mec + 1; 1620 ring->pipe = pipe; 1621 ring->queue = queue; 1622 1623 ring->ring_obj = NULL; 1624 ring->use_doorbell = true; 1625 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1626 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1627 + (ring_id * GFX9_MEC_HPD_SIZE); 1628 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1629 1630 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1631 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1632 + ring->pipe; 1633 1634 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1635 r = amdgpu_ring_init(adev, ring, 1024, 1636 &adev->gfx.eop_irq, irq_type); 1637 if (r) 1638 return r; 1639 1640 1641 return 0; 1642 } 1643 1644 static int gfx_v9_0_sw_init(void *handle) 1645 { 1646 int i, j, k, r, ring_id; 1647 struct amdgpu_ring *ring; 1648 struct amdgpu_kiq *kiq; 1649 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1650 1651 switch (adev->asic_type) { 1652 case CHIP_VEGA10: 1653 case CHIP_VEGA12: 1654 case CHIP_VEGA20: 1655 case CHIP_RAVEN: 1656 adev->gfx.mec.num_mec = 2; 1657 break; 1658 default: 1659 adev->gfx.mec.num_mec = 1; 1660 break; 1661 } 1662 1663 adev->gfx.mec.num_pipe_per_mec = 4; 1664 adev->gfx.mec.num_queue_per_pipe = 8; 1665 1666 /* EOP Event */ 1667 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 1668 if (r) 1669 return r; 1670 1671 /* Privileged reg */ 1672 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 1673 &adev->gfx.priv_reg_irq); 1674 if (r) 1675 return r; 1676 1677 /* Privileged inst */ 1678 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 1679 &adev->gfx.priv_inst_irq); 1680 if (r) 1681 return r; 1682 1683 /* ECC error */ 1684 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 1685 &adev->gfx.cp_ecc_error_irq); 1686 if (r) 1687 return r; 1688 1689 /* FUE error */ 1690 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 1691 &adev->gfx.cp_ecc_error_irq); 1692 if (r) 1693 return r; 1694 1695 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1696 1697 gfx_v9_0_scratch_init(adev); 1698 1699 r = gfx_v9_0_init_microcode(adev); 1700 if (r) { 1701 DRM_ERROR("Failed to load gfx firmware!\n"); 1702 return r; 1703 } 1704 1705 r = adev->gfx.rlc.funcs->init(adev); 1706 if (r) { 1707 DRM_ERROR("Failed to init rlc BOs!\n"); 1708 return r; 1709 } 1710 1711 r = gfx_v9_0_mec_init(adev); 1712 if (r) { 1713 DRM_ERROR("Failed to init MEC BOs!\n"); 1714 return r; 1715 } 1716 1717 /* set up the gfx ring */ 1718 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 1719 ring = &adev->gfx.gfx_ring[i]; 1720 ring->ring_obj = NULL; 1721 if (!i) 1722 sprintf(ring->name, "gfx"); 1723 else 1724 sprintf(ring->name, "gfx_%d", i); 1725 ring->use_doorbell = true; 1726 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 1727 r = amdgpu_ring_init(adev, ring, 1024, 1728 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); 1729 if (r) 1730 return r; 1731 } 1732 1733 /* set up the compute queues - allocate horizontally across pipes */ 1734 ring_id = 0; 1735 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1736 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1737 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1738 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 1739 continue; 1740 1741 r = gfx_v9_0_compute_ring_init(adev, 1742 ring_id, 1743 i, k, j); 1744 if (r) 1745 return r; 1746 1747 ring_id++; 1748 } 1749 } 1750 } 1751 1752 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); 1753 if (r) { 1754 DRM_ERROR("Failed to init KIQ BOs!\n"); 1755 return r; 1756 } 1757 1758 kiq = &adev->gfx.kiq; 1759 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 1760 if (r) 1761 return r; 1762 1763 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 1764 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); 1765 if (r) 1766 return r; 1767 1768 adev->gfx.ce_ram_size = 0x8000; 1769 1770 r = gfx_v9_0_gpu_early_init(adev); 1771 if (r) 1772 return r; 1773 1774 r = gfx_v9_0_ngg_init(adev); 1775 if (r) 1776 return r; 1777 1778 return 0; 1779 } 1780 1781 1782 static int gfx_v9_0_sw_fini(void *handle) 1783 { 1784 int i; 1785 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1786 1787 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && 1788 adev->gfx.ras_if) { 1789 struct ras_common_if *ras_if = adev->gfx.ras_if; 1790 struct ras_ih_if ih_info = { 1791 .head = *ras_if, 1792 }; 1793 1794 amdgpu_ras_debugfs_remove(adev, ras_if); 1795 amdgpu_ras_sysfs_remove(adev, ras_if); 1796 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 1797 amdgpu_ras_feature_enable(adev, ras_if, 0); 1798 kfree(ras_if); 1799 } 1800 1801 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1802 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1803 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1804 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1805 1806 amdgpu_gfx_mqd_sw_fini(adev); 1807 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 1808 amdgpu_gfx_kiq_fini(adev); 1809 1810 gfx_v9_0_mec_fini(adev); 1811 gfx_v9_0_ngg_fini(adev); 1812 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 1813 if (adev->asic_type == CHIP_RAVEN) { 1814 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 1815 &adev->gfx.rlc.cp_table_gpu_addr, 1816 (void **)&adev->gfx.rlc.cp_table_ptr); 1817 } 1818 gfx_v9_0_free_microcode(adev); 1819 1820 return 0; 1821 } 1822 1823 1824 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 1825 { 1826 /* TODO */ 1827 } 1828 1829 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) 1830 { 1831 u32 data; 1832 1833 if (instance == 0xffffffff) 1834 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 1835 else 1836 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 1837 1838 if (se_num == 0xffffffff) 1839 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 1840 else 1841 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1842 1843 if (sh_num == 0xffffffff) 1844 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 1845 else 1846 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 1847 1848 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 1849 } 1850 1851 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1852 { 1853 u32 data, mask; 1854 1855 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 1856 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 1857 1858 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 1859 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 1860 1861 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 1862 adev->gfx.config.max_sh_per_se); 1863 1864 return (~data) & mask; 1865 } 1866 1867 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 1868 { 1869 int i, j; 1870 u32 data; 1871 u32 active_rbs = 0; 1872 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 1873 adev->gfx.config.max_sh_per_se; 1874 1875 mutex_lock(&adev->grbm_idx_mutex); 1876 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1877 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1878 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1879 data = gfx_v9_0_get_rb_active_bitmap(adev); 1880 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 1881 rb_bitmap_width_per_sh); 1882 } 1883 } 1884 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1885 mutex_unlock(&adev->grbm_idx_mutex); 1886 1887 adev->gfx.config.backend_enable_mask = active_rbs; 1888 adev->gfx.config.num_rbs = hweight32(active_rbs); 1889 } 1890 1891 #define DEFAULT_SH_MEM_BASES (0x6000) 1892 #define FIRST_COMPUTE_VMID (8) 1893 #define LAST_COMPUTE_VMID (16) 1894 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 1895 { 1896 int i; 1897 uint32_t sh_mem_config; 1898 uint32_t sh_mem_bases; 1899 1900 /* 1901 * Configure apertures: 1902 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 1903 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 1904 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 1905 */ 1906 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 1907 1908 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 1909 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 1910 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 1911 1912 mutex_lock(&adev->srbm_mutex); 1913 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 1914 soc15_grbm_select(adev, 0, 0, 0, i); 1915 /* CP and shaders */ 1916 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 1917 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 1918 } 1919 soc15_grbm_select(adev, 0, 0, 0, 0); 1920 mutex_unlock(&adev->srbm_mutex); 1921 1922 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 1923 acccess. These should be enabled by FW for target VMIDs. */ 1924 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 1925 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 1926 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 1927 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 1928 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 1929 } 1930 } 1931 1932 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 1933 { 1934 u32 tmp; 1935 int i; 1936 1937 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 1938 1939 gfx_v9_0_tiling_mode_table_init(adev); 1940 1941 gfx_v9_0_setup_rb(adev); 1942 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 1943 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 1944 1945 /* XXX SH_MEM regs */ 1946 /* where to put LDS, scratch, GPUVM in FSA64 space */ 1947 mutex_lock(&adev->srbm_mutex); 1948 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { 1949 soc15_grbm_select(adev, 0, 0, 0, i); 1950 /* CP and shaders */ 1951 if (i == 0) { 1952 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 1953 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1954 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 1955 !!amdgpu_noretry); 1956 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 1957 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 1958 } else { 1959 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 1960 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1961 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 1962 !!amdgpu_noretry); 1963 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 1964 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 1965 (adev->gmc.private_aperture_start >> 48)); 1966 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 1967 (adev->gmc.shared_aperture_start >> 48)); 1968 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 1969 } 1970 } 1971 soc15_grbm_select(adev, 0, 0, 0, 0); 1972 1973 mutex_unlock(&adev->srbm_mutex); 1974 1975 gfx_v9_0_init_compute_vmid(adev); 1976 } 1977 1978 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 1979 { 1980 u32 i, j, k; 1981 u32 mask; 1982 1983 mutex_lock(&adev->grbm_idx_mutex); 1984 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1985 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1986 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1987 for (k = 0; k < adev->usec_timeout; k++) { 1988 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 1989 break; 1990 udelay(1); 1991 } 1992 if (k == adev->usec_timeout) { 1993 gfx_v9_0_select_se_sh(adev, 0xffffffff, 1994 0xffffffff, 0xffffffff); 1995 mutex_unlock(&adev->grbm_idx_mutex); 1996 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 1997 i, j); 1998 return; 1999 } 2000 } 2001 } 2002 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2003 mutex_unlock(&adev->grbm_idx_mutex); 2004 2005 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2006 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2007 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2008 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2009 for (k = 0; k < adev->usec_timeout; k++) { 2010 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2011 break; 2012 udelay(1); 2013 } 2014 } 2015 2016 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2017 bool enable) 2018 { 2019 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2020 2021 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2022 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2023 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2024 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2025 2026 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2027 } 2028 2029 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2030 { 2031 /* csib */ 2032 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2033 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2034 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2035 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2036 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2037 adev->gfx.rlc.clear_state_size); 2038 } 2039 2040 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2041 int indirect_offset, 2042 int list_size, 2043 int *unique_indirect_regs, 2044 int unique_indirect_reg_count, 2045 int *indirect_start_offsets, 2046 int *indirect_start_offsets_count, 2047 int max_start_offsets_count) 2048 { 2049 int idx; 2050 2051 for (; indirect_offset < list_size; indirect_offset++) { 2052 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2053 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2054 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2055 2056 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2057 indirect_offset += 2; 2058 2059 /* look for the matching indice */ 2060 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2061 if (unique_indirect_regs[idx] == 2062 register_list_format[indirect_offset] || 2063 !unique_indirect_regs[idx]) 2064 break; 2065 } 2066 2067 BUG_ON(idx >= unique_indirect_reg_count); 2068 2069 if (!unique_indirect_regs[idx]) 2070 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2071 2072 indirect_offset++; 2073 } 2074 } 2075 } 2076 2077 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2078 { 2079 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2080 int unique_indirect_reg_count = 0; 2081 2082 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2083 int indirect_start_offsets_count = 0; 2084 2085 int list_size = 0; 2086 int i = 0, j = 0; 2087 u32 tmp = 0; 2088 2089 u32 *register_list_format = 2090 kmemdup(adev->gfx.rlc.register_list_format, 2091 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2092 if (!register_list_format) 2093 return -ENOMEM; 2094 2095 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2096 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2097 gfx_v9_1_parse_ind_reg_list(register_list_format, 2098 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2099 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2100 unique_indirect_regs, 2101 unique_indirect_reg_count, 2102 indirect_start_offsets, 2103 &indirect_start_offsets_count, 2104 ARRAY_SIZE(indirect_start_offsets)); 2105 2106 /* enable auto inc in case it is disabled */ 2107 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2108 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2109 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2110 2111 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2112 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2113 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2114 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2115 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2116 adev->gfx.rlc.register_restore[i]); 2117 2118 /* load indirect register */ 2119 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2120 adev->gfx.rlc.reg_list_format_start); 2121 2122 /* direct register portion */ 2123 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2124 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2125 register_list_format[i]); 2126 2127 /* indirect register portion */ 2128 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2129 if (register_list_format[i] == 0xFFFFFFFF) { 2130 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2131 continue; 2132 } 2133 2134 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2135 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2136 2137 for (j = 0; j < unique_indirect_reg_count; j++) { 2138 if (register_list_format[i] == unique_indirect_regs[j]) { 2139 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2140 break; 2141 } 2142 } 2143 2144 BUG_ON(j >= unique_indirect_reg_count); 2145 2146 i++; 2147 } 2148 2149 /* set save/restore list size */ 2150 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2151 list_size = list_size >> 1; 2152 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2153 adev->gfx.rlc.reg_restore_list_size); 2154 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2155 2156 /* write the starting offsets to RLC scratch ram */ 2157 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2158 adev->gfx.rlc.starting_offsets_start); 2159 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2160 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2161 indirect_start_offsets[i]); 2162 2163 /* load unique indirect regs*/ 2164 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2165 if (unique_indirect_regs[i] != 0) { 2166 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2167 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2168 unique_indirect_regs[i] & 0x3FFFF); 2169 2170 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2171 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2172 unique_indirect_regs[i] >> 20); 2173 } 2174 } 2175 2176 kfree(register_list_format); 2177 return 0; 2178 } 2179 2180 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2181 { 2182 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2183 } 2184 2185 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2186 bool enable) 2187 { 2188 uint32_t data = 0; 2189 uint32_t default_data = 0; 2190 2191 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2192 if (enable == true) { 2193 /* enable GFXIP control over CGPG */ 2194 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2195 if(default_data != data) 2196 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2197 2198 /* update status */ 2199 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2200 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2201 if(default_data != data) 2202 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2203 } else { 2204 /* restore GFXIP control over GCPG */ 2205 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2206 if(default_data != data) 2207 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2208 } 2209 } 2210 2211 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2212 { 2213 uint32_t data = 0; 2214 2215 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2216 AMD_PG_SUPPORT_GFX_SMG | 2217 AMD_PG_SUPPORT_GFX_DMG)) { 2218 /* init IDLE_POLL_COUNT = 60 */ 2219 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2220 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2221 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2222 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2223 2224 /* init RLC PG Delay */ 2225 data = 0; 2226 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2227 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2228 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2229 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2230 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2231 2232 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2233 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2234 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2235 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2236 2237 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2238 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2239 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2240 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2241 2242 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2243 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2244 2245 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2246 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2247 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2248 2249 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2250 } 2251 } 2252 2253 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2254 bool enable) 2255 { 2256 uint32_t data = 0; 2257 uint32_t default_data = 0; 2258 2259 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2260 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2261 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2262 enable ? 1 : 0); 2263 if (default_data != data) 2264 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2265 } 2266 2267 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2268 bool enable) 2269 { 2270 uint32_t data = 0; 2271 uint32_t default_data = 0; 2272 2273 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2274 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2275 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2276 enable ? 1 : 0); 2277 if(default_data != data) 2278 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2279 } 2280 2281 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2282 bool enable) 2283 { 2284 uint32_t data = 0; 2285 uint32_t default_data = 0; 2286 2287 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2288 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2289 CP_PG_DISABLE, 2290 enable ? 0 : 1); 2291 if(default_data != data) 2292 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2293 } 2294 2295 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2296 bool enable) 2297 { 2298 uint32_t data, default_data; 2299 2300 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2301 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2302 GFX_POWER_GATING_ENABLE, 2303 enable ? 1 : 0); 2304 if(default_data != data) 2305 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2306 } 2307 2308 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 2309 bool enable) 2310 { 2311 uint32_t data, default_data; 2312 2313 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2314 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2315 GFX_PIPELINE_PG_ENABLE, 2316 enable ? 1 : 0); 2317 if(default_data != data) 2318 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2319 2320 if (!enable) 2321 /* read any GFX register to wake up GFX */ 2322 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 2323 } 2324 2325 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 2326 bool enable) 2327 { 2328 uint32_t data, default_data; 2329 2330 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2331 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2332 STATIC_PER_CU_PG_ENABLE, 2333 enable ? 1 : 0); 2334 if(default_data != data) 2335 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2336 } 2337 2338 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 2339 bool enable) 2340 { 2341 uint32_t data, default_data; 2342 2343 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2344 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2345 DYN_PER_CU_PG_ENABLE, 2346 enable ? 1 : 0); 2347 if(default_data != data) 2348 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2349 } 2350 2351 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 2352 { 2353 gfx_v9_0_init_csb(adev); 2354 2355 /* 2356 * Rlc save restore list is workable since v2_1. 2357 * And it's needed by gfxoff feature. 2358 */ 2359 if (adev->gfx.rlc.is_rlc_v2_1) { 2360 gfx_v9_1_init_rlc_save_restore_list(adev); 2361 gfx_v9_0_enable_save_restore_machine(adev); 2362 } 2363 2364 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2365 AMD_PG_SUPPORT_GFX_SMG | 2366 AMD_PG_SUPPORT_GFX_DMG | 2367 AMD_PG_SUPPORT_CP | 2368 AMD_PG_SUPPORT_GDS | 2369 AMD_PG_SUPPORT_RLC_SMU_HS)) { 2370 WREG32(mmRLC_JUMP_TABLE_RESTORE, 2371 adev->gfx.rlc.cp_table_gpu_addr >> 8); 2372 gfx_v9_0_init_gfx_power_gating(adev); 2373 } 2374 } 2375 2376 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 2377 { 2378 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 2379 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 2380 gfx_v9_0_wait_for_rlc_serdes(adev); 2381 } 2382 2383 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 2384 { 2385 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2386 udelay(50); 2387 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2388 udelay(50); 2389 } 2390 2391 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 2392 { 2393 #ifdef AMDGPU_RLC_DEBUG_RETRY 2394 u32 rlc_ucode_ver; 2395 #endif 2396 2397 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2398 udelay(50); 2399 2400 /* carrizo do enable cp interrupt after cp inited */ 2401 if (!(adev->flags & AMD_IS_APU)) { 2402 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 2403 udelay(50); 2404 } 2405 2406 #ifdef AMDGPU_RLC_DEBUG_RETRY 2407 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 2408 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 2409 if(rlc_ucode_ver == 0x108) { 2410 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 2411 rlc_ucode_ver, adev->gfx.rlc_fw_version); 2412 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 2413 * default is 0x9C4 to create a 100us interval */ 2414 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 2415 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 2416 * to disable the page fault retry interrupts, default is 2417 * 0x100 (256) */ 2418 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 2419 } 2420 #endif 2421 } 2422 2423 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 2424 { 2425 const struct rlc_firmware_header_v2_0 *hdr; 2426 const __le32 *fw_data; 2427 unsigned i, fw_size; 2428 2429 if (!adev->gfx.rlc_fw) 2430 return -EINVAL; 2431 2432 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2433 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2434 2435 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2436 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2437 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2438 2439 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 2440 RLCG_UCODE_LOADING_START_ADDRESS); 2441 for (i = 0; i < fw_size; i++) 2442 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 2443 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2444 2445 return 0; 2446 } 2447 2448 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 2449 { 2450 int r; 2451 2452 if (amdgpu_sriov_vf(adev)) { 2453 gfx_v9_0_init_csb(adev); 2454 return 0; 2455 } 2456 2457 adev->gfx.rlc.funcs->stop(adev); 2458 2459 /* disable CG */ 2460 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 2461 2462 gfx_v9_0_init_pg(adev); 2463 2464 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 2465 /* legacy rlc firmware loading */ 2466 r = gfx_v9_0_rlc_load_microcode(adev); 2467 if (r) 2468 return r; 2469 } 2470 2471 switch (adev->asic_type) { 2472 case CHIP_RAVEN: 2473 if (amdgpu_lbpw == 0) 2474 gfx_v9_0_enable_lbpw(adev, false); 2475 else 2476 gfx_v9_0_enable_lbpw(adev, true); 2477 break; 2478 case CHIP_VEGA20: 2479 if (amdgpu_lbpw > 0) 2480 gfx_v9_0_enable_lbpw(adev, true); 2481 else 2482 gfx_v9_0_enable_lbpw(adev, false); 2483 break; 2484 default: 2485 break; 2486 } 2487 2488 adev->gfx.rlc.funcs->start(adev); 2489 2490 return 0; 2491 } 2492 2493 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 2494 { 2495 int i; 2496 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 2497 2498 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 2499 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 2500 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 2501 if (!enable) { 2502 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2503 adev->gfx.gfx_ring[i].sched.ready = false; 2504 } 2505 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 2506 udelay(50); 2507 } 2508 2509 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 2510 { 2511 const struct gfx_firmware_header_v1_0 *pfp_hdr; 2512 const struct gfx_firmware_header_v1_0 *ce_hdr; 2513 const struct gfx_firmware_header_v1_0 *me_hdr; 2514 const __le32 *fw_data; 2515 unsigned i, fw_size; 2516 2517 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 2518 return -EINVAL; 2519 2520 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 2521 adev->gfx.pfp_fw->data; 2522 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 2523 adev->gfx.ce_fw->data; 2524 me_hdr = (const struct gfx_firmware_header_v1_0 *) 2525 adev->gfx.me_fw->data; 2526 2527 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 2528 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 2529 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 2530 2531 gfx_v9_0_cp_gfx_enable(adev, false); 2532 2533 /* PFP */ 2534 fw_data = (const __le32 *) 2535 (adev->gfx.pfp_fw->data + 2536 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 2537 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 2538 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 2539 for (i = 0; i < fw_size; i++) 2540 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 2541 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 2542 2543 /* CE */ 2544 fw_data = (const __le32 *) 2545 (adev->gfx.ce_fw->data + 2546 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 2547 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 2548 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 2549 for (i = 0; i < fw_size; i++) 2550 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 2551 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 2552 2553 /* ME */ 2554 fw_data = (const __le32 *) 2555 (adev->gfx.me_fw->data + 2556 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 2557 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 2558 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 2559 for (i = 0; i < fw_size; i++) 2560 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 2561 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 2562 2563 return 0; 2564 } 2565 2566 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 2567 { 2568 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 2569 const struct cs_section_def *sect = NULL; 2570 const struct cs_extent_def *ext = NULL; 2571 int r, i, tmp; 2572 2573 /* init the CP */ 2574 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 2575 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 2576 2577 gfx_v9_0_cp_gfx_enable(adev, true); 2578 2579 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 2580 if (r) { 2581 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 2582 return r; 2583 } 2584 2585 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2586 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 2587 2588 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 2589 amdgpu_ring_write(ring, 0x80000000); 2590 amdgpu_ring_write(ring, 0x80000000); 2591 2592 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 2593 for (ext = sect->section; ext->extent != NULL; ++ext) { 2594 if (sect->id == SECT_CONTEXT) { 2595 amdgpu_ring_write(ring, 2596 PACKET3(PACKET3_SET_CONTEXT_REG, 2597 ext->reg_count)); 2598 amdgpu_ring_write(ring, 2599 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 2600 for (i = 0; i < ext->reg_count; i++) 2601 amdgpu_ring_write(ring, ext->extent[i]); 2602 } 2603 } 2604 } 2605 2606 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2607 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 2608 2609 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 2610 amdgpu_ring_write(ring, 0); 2611 2612 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 2613 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 2614 amdgpu_ring_write(ring, 0x8000); 2615 amdgpu_ring_write(ring, 0x8000); 2616 2617 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 2618 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 2619 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 2620 amdgpu_ring_write(ring, tmp); 2621 amdgpu_ring_write(ring, 0); 2622 2623 amdgpu_ring_commit(ring); 2624 2625 return 0; 2626 } 2627 2628 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 2629 { 2630 struct amdgpu_ring *ring; 2631 u32 tmp; 2632 u32 rb_bufsz; 2633 u64 rb_addr, rptr_addr, wptr_gpu_addr; 2634 2635 /* Set the write pointer delay */ 2636 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 2637 2638 /* set the RB to use vmid 0 */ 2639 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 2640 2641 /* Set ring buffer size */ 2642 ring = &adev->gfx.gfx_ring[0]; 2643 rb_bufsz = order_base_2(ring->ring_size / 8); 2644 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 2645 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 2646 #ifdef __BIG_ENDIAN 2647 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 2648 #endif 2649 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 2650 2651 /* Initialize the ring buffer's write pointers */ 2652 ring->wptr = 0; 2653 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 2654 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 2655 2656 /* set the wb address wether it's enabled or not */ 2657 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 2658 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 2659 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 2660 2661 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2662 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 2663 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 2664 2665 mdelay(1); 2666 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 2667 2668 rb_addr = ring->gpu_addr >> 8; 2669 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 2670 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 2671 2672 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 2673 if (ring->use_doorbell) { 2674 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2675 DOORBELL_OFFSET, ring->doorbell_index); 2676 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2677 DOORBELL_EN, 1); 2678 } else { 2679 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 2680 } 2681 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 2682 2683 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 2684 DOORBELL_RANGE_LOWER, ring->doorbell_index); 2685 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 2686 2687 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 2688 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 2689 2690 2691 /* start the ring */ 2692 gfx_v9_0_cp_gfx_start(adev); 2693 ring->sched.ready = true; 2694 2695 return 0; 2696 } 2697 2698 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 2699 { 2700 int i; 2701 2702 if (enable) { 2703 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 2704 } else { 2705 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 2706 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 2707 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2708 adev->gfx.compute_ring[i].sched.ready = false; 2709 adev->gfx.kiq.ring.sched.ready = false; 2710 } 2711 udelay(50); 2712 } 2713 2714 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 2715 { 2716 const struct gfx_firmware_header_v1_0 *mec_hdr; 2717 const __le32 *fw_data; 2718 unsigned i; 2719 u32 tmp; 2720 2721 if (!adev->gfx.mec_fw) 2722 return -EINVAL; 2723 2724 gfx_v9_0_cp_compute_enable(adev, false); 2725 2726 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 2727 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 2728 2729 fw_data = (const __le32 *) 2730 (adev->gfx.mec_fw->data + 2731 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 2732 tmp = 0; 2733 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2734 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2735 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 2736 2737 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 2738 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 2739 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 2740 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 2741 2742 /* MEC1 */ 2743 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 2744 mec_hdr->jt_offset); 2745 for (i = 0; i < mec_hdr->jt_size; i++) 2746 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 2747 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 2748 2749 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 2750 adev->gfx.mec_fw_version); 2751 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 2752 2753 return 0; 2754 } 2755 2756 /* KIQ functions */ 2757 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 2758 { 2759 uint32_t tmp; 2760 struct amdgpu_device *adev = ring->adev; 2761 2762 /* tell RLC which is KIQ queue */ 2763 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 2764 tmp &= 0xffffff00; 2765 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 2766 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2767 tmp |= 0x80; 2768 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2769 } 2770 2771 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) 2772 { 2773 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 2774 uint64_t queue_mask = 0; 2775 int r, i; 2776 2777 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 2778 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 2779 continue; 2780 2781 /* This situation may be hit in the future if a new HW 2782 * generation exposes more than 64 queues. If so, the 2783 * definition of queue_mask needs updating */ 2784 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 2785 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 2786 break; 2787 } 2788 2789 queue_mask |= (1ull << i); 2790 } 2791 2792 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); 2793 if (r) { 2794 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 2795 return r; 2796 } 2797 2798 /* set resources */ 2799 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 2800 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 2801 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 2802 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 2803 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 2804 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 2805 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 2806 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 2807 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 2808 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2809 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 2810 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 2811 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2812 2813 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 2814 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 2815 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 2816 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 2817 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 2818 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 2819 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 2820 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 2821 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 2822 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 2823 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ 2824 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 2825 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 2826 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 2827 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 2828 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 2829 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 2830 } 2831 2832 r = amdgpu_ring_test_helper(kiq_ring); 2833 if (r) 2834 DRM_ERROR("KCQ enable failed\n"); 2835 2836 return r; 2837 } 2838 2839 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 2840 { 2841 struct amdgpu_device *adev = ring->adev; 2842 struct v9_mqd *mqd = ring->mqd_ptr; 2843 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 2844 uint32_t tmp; 2845 2846 mqd->header = 0xC0310800; 2847 mqd->compute_pipelinestat_enable = 0x00000001; 2848 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 2849 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 2850 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 2851 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 2852 mqd->compute_misc_reserved = 0x00000003; 2853 2854 mqd->dynamic_cu_mask_addr_lo = 2855 lower_32_bits(ring->mqd_gpu_addr 2856 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 2857 mqd->dynamic_cu_mask_addr_hi = 2858 upper_32_bits(ring->mqd_gpu_addr 2859 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 2860 2861 eop_base_addr = ring->eop_gpu_addr >> 8; 2862 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 2863 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 2864 2865 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2866 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 2867 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 2868 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 2869 2870 mqd->cp_hqd_eop_control = tmp; 2871 2872 /* enable doorbell? */ 2873 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 2874 2875 if (ring->use_doorbell) { 2876 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2877 DOORBELL_OFFSET, ring->doorbell_index); 2878 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2879 DOORBELL_EN, 1); 2880 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2881 DOORBELL_SOURCE, 0); 2882 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2883 DOORBELL_HIT, 0); 2884 } else { 2885 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2886 DOORBELL_EN, 0); 2887 } 2888 2889 mqd->cp_hqd_pq_doorbell_control = tmp; 2890 2891 /* disable the queue if it's active */ 2892 ring->wptr = 0; 2893 mqd->cp_hqd_dequeue_request = 0; 2894 mqd->cp_hqd_pq_rptr = 0; 2895 mqd->cp_hqd_pq_wptr_lo = 0; 2896 mqd->cp_hqd_pq_wptr_hi = 0; 2897 2898 /* set the pointer to the MQD */ 2899 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 2900 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 2901 2902 /* set MQD vmid to 0 */ 2903 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 2904 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 2905 mqd->cp_mqd_control = tmp; 2906 2907 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 2908 hqd_gpu_addr = ring->gpu_addr >> 8; 2909 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 2910 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 2911 2912 /* set up the HQD, this is similar to CP_RB0_CNTL */ 2913 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 2914 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 2915 (order_base_2(ring->ring_size / 4) - 1)); 2916 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 2917 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 2918 #ifdef __BIG_ENDIAN 2919 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 2920 #endif 2921 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 2922 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 2923 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 2924 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 2925 mqd->cp_hqd_pq_control = tmp; 2926 2927 /* set the wb address whether it's enabled or not */ 2928 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 2929 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 2930 mqd->cp_hqd_pq_rptr_report_addr_hi = 2931 upper_32_bits(wb_gpu_addr) & 0xffff; 2932 2933 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 2934 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2935 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 2936 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 2937 2938 tmp = 0; 2939 /* enable the doorbell if requested */ 2940 if (ring->use_doorbell) { 2941 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 2942 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2943 DOORBELL_OFFSET, ring->doorbell_index); 2944 2945 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2946 DOORBELL_EN, 1); 2947 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2948 DOORBELL_SOURCE, 0); 2949 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2950 DOORBELL_HIT, 0); 2951 } 2952 2953 mqd->cp_hqd_pq_doorbell_control = tmp; 2954 2955 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 2956 ring->wptr = 0; 2957 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 2958 2959 /* set the vmid for the queue */ 2960 mqd->cp_hqd_vmid = 0; 2961 2962 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 2963 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 2964 mqd->cp_hqd_persistent_state = tmp; 2965 2966 /* set MIN_IB_AVAIL_SIZE */ 2967 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 2968 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 2969 mqd->cp_hqd_ib_control = tmp; 2970 2971 /* activate the queue */ 2972 mqd->cp_hqd_active = 1; 2973 2974 return 0; 2975 } 2976 2977 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 2978 { 2979 struct amdgpu_device *adev = ring->adev; 2980 struct v9_mqd *mqd = ring->mqd_ptr; 2981 int j; 2982 2983 /* disable wptr polling */ 2984 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 2985 2986 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 2987 mqd->cp_hqd_eop_base_addr_lo); 2988 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 2989 mqd->cp_hqd_eop_base_addr_hi); 2990 2991 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2992 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 2993 mqd->cp_hqd_eop_control); 2994 2995 /* enable doorbell? */ 2996 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 2997 mqd->cp_hqd_pq_doorbell_control); 2998 2999 /* disable the queue if it's active */ 3000 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3001 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3002 for (j = 0; j < adev->usec_timeout; j++) { 3003 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3004 break; 3005 udelay(1); 3006 } 3007 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3008 mqd->cp_hqd_dequeue_request); 3009 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3010 mqd->cp_hqd_pq_rptr); 3011 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3012 mqd->cp_hqd_pq_wptr_lo); 3013 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3014 mqd->cp_hqd_pq_wptr_hi); 3015 } 3016 3017 /* set the pointer to the MQD */ 3018 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3019 mqd->cp_mqd_base_addr_lo); 3020 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3021 mqd->cp_mqd_base_addr_hi); 3022 3023 /* set MQD vmid to 0 */ 3024 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3025 mqd->cp_mqd_control); 3026 3027 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3028 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3029 mqd->cp_hqd_pq_base_lo); 3030 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3031 mqd->cp_hqd_pq_base_hi); 3032 3033 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3034 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3035 mqd->cp_hqd_pq_control); 3036 3037 /* set the wb address whether it's enabled or not */ 3038 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3039 mqd->cp_hqd_pq_rptr_report_addr_lo); 3040 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3041 mqd->cp_hqd_pq_rptr_report_addr_hi); 3042 3043 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3044 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3045 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3046 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3047 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3048 3049 /* enable the doorbell if requested */ 3050 if (ring->use_doorbell) { 3051 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3052 (adev->doorbell_index.kiq * 2) << 2); 3053 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3054 (adev->doorbell_index.userqueue_end * 2) << 2); 3055 } 3056 3057 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3058 mqd->cp_hqd_pq_doorbell_control); 3059 3060 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3061 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3062 mqd->cp_hqd_pq_wptr_lo); 3063 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3064 mqd->cp_hqd_pq_wptr_hi); 3065 3066 /* set the vmid for the queue */ 3067 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3068 3069 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3070 mqd->cp_hqd_persistent_state); 3071 3072 /* activate the queue */ 3073 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3074 mqd->cp_hqd_active); 3075 3076 if (ring->use_doorbell) 3077 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3078 3079 return 0; 3080 } 3081 3082 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3083 { 3084 struct amdgpu_device *adev = ring->adev; 3085 int j; 3086 3087 /* disable the queue if it's active */ 3088 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3089 3090 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3091 3092 for (j = 0; j < adev->usec_timeout; j++) { 3093 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3094 break; 3095 udelay(1); 3096 } 3097 3098 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3099 DRM_DEBUG("KIQ dequeue request failed.\n"); 3100 3101 /* Manual disable if dequeue request times out */ 3102 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3103 } 3104 3105 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3106 0); 3107 } 3108 3109 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3110 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3111 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3112 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3113 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3114 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3115 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3116 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3117 3118 return 0; 3119 } 3120 3121 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3122 { 3123 struct amdgpu_device *adev = ring->adev; 3124 struct v9_mqd *mqd = ring->mqd_ptr; 3125 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3126 3127 gfx_v9_0_kiq_setting(ring); 3128 3129 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3130 /* reset MQD to a clean status */ 3131 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3132 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3133 3134 /* reset ring buffer */ 3135 ring->wptr = 0; 3136 amdgpu_ring_clear_ring(ring); 3137 3138 mutex_lock(&adev->srbm_mutex); 3139 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3140 gfx_v9_0_kiq_init_register(ring); 3141 soc15_grbm_select(adev, 0, 0, 0, 0); 3142 mutex_unlock(&adev->srbm_mutex); 3143 } else { 3144 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3145 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3146 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3147 mutex_lock(&adev->srbm_mutex); 3148 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3149 gfx_v9_0_mqd_init(ring); 3150 gfx_v9_0_kiq_init_register(ring); 3151 soc15_grbm_select(adev, 0, 0, 0, 0); 3152 mutex_unlock(&adev->srbm_mutex); 3153 3154 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3155 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3156 } 3157 3158 return 0; 3159 } 3160 3161 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3162 { 3163 struct amdgpu_device *adev = ring->adev; 3164 struct v9_mqd *mqd = ring->mqd_ptr; 3165 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3166 3167 if (!adev->in_gpu_reset && !adev->in_suspend) { 3168 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3169 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3170 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3171 mutex_lock(&adev->srbm_mutex); 3172 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3173 gfx_v9_0_mqd_init(ring); 3174 soc15_grbm_select(adev, 0, 0, 0, 0); 3175 mutex_unlock(&adev->srbm_mutex); 3176 3177 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3178 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3179 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3180 /* reset MQD to a clean status */ 3181 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3182 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3183 3184 /* reset ring buffer */ 3185 ring->wptr = 0; 3186 amdgpu_ring_clear_ring(ring); 3187 } else { 3188 amdgpu_ring_clear_ring(ring); 3189 } 3190 3191 return 0; 3192 } 3193 3194 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3195 { 3196 struct amdgpu_ring *ring; 3197 int r; 3198 3199 ring = &adev->gfx.kiq.ring; 3200 3201 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3202 if (unlikely(r != 0)) 3203 return r; 3204 3205 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3206 if (unlikely(r != 0)) 3207 return r; 3208 3209 gfx_v9_0_kiq_init_queue(ring); 3210 amdgpu_bo_kunmap(ring->mqd_obj); 3211 ring->mqd_ptr = NULL; 3212 amdgpu_bo_unreserve(ring->mqd_obj); 3213 ring->sched.ready = true; 3214 return 0; 3215 } 3216 3217 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3218 { 3219 struct amdgpu_ring *ring = NULL; 3220 int r = 0, i; 3221 3222 gfx_v9_0_cp_compute_enable(adev, true); 3223 3224 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3225 ring = &adev->gfx.compute_ring[i]; 3226 3227 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3228 if (unlikely(r != 0)) 3229 goto done; 3230 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3231 if (!r) { 3232 r = gfx_v9_0_kcq_init_queue(ring); 3233 amdgpu_bo_kunmap(ring->mqd_obj); 3234 ring->mqd_ptr = NULL; 3235 } 3236 amdgpu_bo_unreserve(ring->mqd_obj); 3237 if (r) 3238 goto done; 3239 } 3240 3241 r = gfx_v9_0_kiq_kcq_enable(adev); 3242 done: 3243 return r; 3244 } 3245 3246 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3247 { 3248 int r, i; 3249 struct amdgpu_ring *ring; 3250 3251 if (!(adev->flags & AMD_IS_APU)) 3252 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3253 3254 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3255 /* legacy firmware loading */ 3256 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3257 if (r) 3258 return r; 3259 3260 r = gfx_v9_0_cp_compute_load_microcode(adev); 3261 if (r) 3262 return r; 3263 } 3264 3265 r = gfx_v9_0_kiq_resume(adev); 3266 if (r) 3267 return r; 3268 3269 r = gfx_v9_0_cp_gfx_resume(adev); 3270 if (r) 3271 return r; 3272 3273 r = gfx_v9_0_kcq_resume(adev); 3274 if (r) 3275 return r; 3276 3277 ring = &adev->gfx.gfx_ring[0]; 3278 r = amdgpu_ring_test_helper(ring); 3279 if (r) 3280 return r; 3281 3282 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3283 ring = &adev->gfx.compute_ring[i]; 3284 amdgpu_ring_test_helper(ring); 3285 } 3286 3287 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3288 3289 return 0; 3290 } 3291 3292 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3293 { 3294 gfx_v9_0_cp_gfx_enable(adev, enable); 3295 gfx_v9_0_cp_compute_enable(adev, enable); 3296 } 3297 3298 static int gfx_v9_0_hw_init(void *handle) 3299 { 3300 int r; 3301 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3302 3303 gfx_v9_0_init_golden_registers(adev); 3304 3305 gfx_v9_0_constants_init(adev); 3306 3307 r = gfx_v9_0_csb_vram_pin(adev); 3308 if (r) 3309 return r; 3310 3311 r = adev->gfx.rlc.funcs->resume(adev); 3312 if (r) 3313 return r; 3314 3315 r = gfx_v9_0_cp_resume(adev); 3316 if (r) 3317 return r; 3318 3319 r = gfx_v9_0_ngg_en(adev); 3320 if (r) 3321 return r; 3322 3323 return r; 3324 } 3325 3326 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) 3327 { 3328 int r, i; 3329 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3330 3331 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); 3332 if (r) 3333 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3334 3335 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3336 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3337 3338 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 3339 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 3340 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 3341 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 3342 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 3343 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 3344 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 3345 amdgpu_ring_write(kiq_ring, 0); 3346 amdgpu_ring_write(kiq_ring, 0); 3347 amdgpu_ring_write(kiq_ring, 0); 3348 } 3349 r = amdgpu_ring_test_helper(kiq_ring); 3350 if (r) 3351 DRM_ERROR("KCQ disable failed\n"); 3352 3353 return r; 3354 } 3355 3356 static int gfx_v9_0_hw_fini(void *handle) 3357 { 3358 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3359 3360 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 3361 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3362 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3363 3364 /* disable KCQ to avoid CPC touch memory not valid anymore */ 3365 gfx_v9_0_kcq_disable(adev); 3366 3367 if (amdgpu_sriov_vf(adev)) { 3368 gfx_v9_0_cp_gfx_enable(adev, false); 3369 /* must disable polling for SRIOV when hw finished, otherwise 3370 * CPC engine may still keep fetching WB address which is already 3371 * invalid after sw finished and trigger DMAR reading error in 3372 * hypervisor side. 3373 */ 3374 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3375 return 0; 3376 } 3377 3378 /* Use deinitialize sequence from CAIL when unbinding device from driver, 3379 * otherwise KIQ is hanging when binding back 3380 */ 3381 if (!adev->in_gpu_reset && !adev->in_suspend) { 3382 mutex_lock(&adev->srbm_mutex); 3383 soc15_grbm_select(adev, adev->gfx.kiq.ring.me, 3384 adev->gfx.kiq.ring.pipe, 3385 adev->gfx.kiq.ring.queue, 0); 3386 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); 3387 soc15_grbm_select(adev, 0, 0, 0, 0); 3388 mutex_unlock(&adev->srbm_mutex); 3389 } 3390 3391 gfx_v9_0_cp_enable(adev, false); 3392 adev->gfx.rlc.funcs->stop(adev); 3393 3394 gfx_v9_0_csb_vram_unpin(adev); 3395 3396 return 0; 3397 } 3398 3399 static int gfx_v9_0_suspend(void *handle) 3400 { 3401 return gfx_v9_0_hw_fini(handle); 3402 } 3403 3404 static int gfx_v9_0_resume(void *handle) 3405 { 3406 return gfx_v9_0_hw_init(handle); 3407 } 3408 3409 static bool gfx_v9_0_is_idle(void *handle) 3410 { 3411 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3412 3413 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 3414 GRBM_STATUS, GUI_ACTIVE)) 3415 return false; 3416 else 3417 return true; 3418 } 3419 3420 static int gfx_v9_0_wait_for_idle(void *handle) 3421 { 3422 unsigned i; 3423 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3424 3425 for (i = 0; i < adev->usec_timeout; i++) { 3426 if (gfx_v9_0_is_idle(handle)) 3427 return 0; 3428 udelay(1); 3429 } 3430 return -ETIMEDOUT; 3431 } 3432 3433 static int gfx_v9_0_soft_reset(void *handle) 3434 { 3435 u32 grbm_soft_reset = 0; 3436 u32 tmp; 3437 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3438 3439 /* GRBM_STATUS */ 3440 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 3441 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 3442 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 3443 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 3444 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 3445 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 3446 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 3447 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3448 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3449 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3450 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 3451 } 3452 3453 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 3454 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3455 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3456 } 3457 3458 /* GRBM_STATUS2 */ 3459 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 3460 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 3461 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3462 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3463 3464 3465 if (grbm_soft_reset) { 3466 /* stop the rlc */ 3467 adev->gfx.rlc.funcs->stop(adev); 3468 3469 /* Disable GFX parsing/prefetching */ 3470 gfx_v9_0_cp_gfx_enable(adev, false); 3471 3472 /* Disable MEC parsing/prefetching */ 3473 gfx_v9_0_cp_compute_enable(adev, false); 3474 3475 if (grbm_soft_reset) { 3476 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3477 tmp |= grbm_soft_reset; 3478 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 3479 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3480 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3481 3482 udelay(50); 3483 3484 tmp &= ~grbm_soft_reset; 3485 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3486 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3487 } 3488 3489 /* Wait a little for things to settle down */ 3490 udelay(50); 3491 } 3492 return 0; 3493 } 3494 3495 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 3496 { 3497 uint64_t clock; 3498 3499 mutex_lock(&adev->gfx.gpu_clock_mutex); 3500 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 3501 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 3502 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 3503 mutex_unlock(&adev->gfx.gpu_clock_mutex); 3504 return clock; 3505 } 3506 3507 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 3508 uint32_t vmid, 3509 uint32_t gds_base, uint32_t gds_size, 3510 uint32_t gws_base, uint32_t gws_size, 3511 uint32_t oa_base, uint32_t oa_size) 3512 { 3513 struct amdgpu_device *adev = ring->adev; 3514 3515 /* GDS Base */ 3516 gfx_v9_0_write_data_to_reg(ring, 0, false, 3517 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 3518 gds_base); 3519 3520 /* GDS Size */ 3521 gfx_v9_0_write_data_to_reg(ring, 0, false, 3522 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 3523 gds_size); 3524 3525 /* GWS */ 3526 gfx_v9_0_write_data_to_reg(ring, 0, false, 3527 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 3528 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 3529 3530 /* OA */ 3531 gfx_v9_0_write_data_to_reg(ring, 0, false, 3532 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 3533 (1 << (oa_size + oa_base)) - (1 << oa_base)); 3534 } 3535 3536 static const u32 vgpr_init_compute_shader[] = 3537 { 3538 0xb07c0000, 0xbe8000ff, 3539 0x000000f8, 0xbf110800, 3540 0x7e000280, 0x7e020280, 3541 0x7e040280, 0x7e060280, 3542 0x7e080280, 0x7e0a0280, 3543 0x7e0c0280, 0x7e0e0280, 3544 0x80808800, 0xbe803200, 3545 0xbf84fff5, 0xbf9c0000, 3546 0xd28c0001, 0x0001007f, 3547 0xd28d0001, 0x0002027e, 3548 0x10020288, 0xb8810904, 3549 0xb7814000, 0xd1196a01, 3550 0x00000301, 0xbe800087, 3551 0xbefc00c1, 0xd89c4000, 3552 0x00020201, 0xd89cc080, 3553 0x00040401, 0x320202ff, 3554 0x00000800, 0x80808100, 3555 0xbf84fff8, 0x7e020280, 3556 0xbf810000, 0x00000000, 3557 }; 3558 3559 static const u32 sgpr_init_compute_shader[] = 3560 { 3561 0xb07c0000, 0xbe8000ff, 3562 0x0000005f, 0xbee50080, 3563 0xbe812c65, 0xbe822c65, 3564 0xbe832c65, 0xbe842c65, 3565 0xbe852c65, 0xb77c0005, 3566 0x80808500, 0xbf84fff8, 3567 0xbe800080, 0xbf810000, 3568 }; 3569 3570 static const struct soc15_reg_entry vgpr_init_regs[] = { 3571 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 3572 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 3573 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 3574 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 3575 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 3576 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 3577 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 3578 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 3579 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ 3580 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 3581 }; 3582 3583 static const struct soc15_reg_entry sgpr_init_regs[] = { 3584 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 3585 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 3586 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 3587 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 3588 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 3589 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 3590 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 3591 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 3592 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ 3593 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 3594 }; 3595 3596 static const struct soc15_reg_entry sec_ded_counter_registers[] = { 3597 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 3598 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 3599 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 3600 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 3601 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 3602 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 3603 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 3604 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 3605 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 3606 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 3607 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 3608 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 3609 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 3610 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 3611 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 3612 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 3613 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 3614 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 3615 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 3616 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 3617 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 3618 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 3619 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 3620 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 3621 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 3622 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 3623 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 3624 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 3625 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 3626 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 3627 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 3628 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 3629 }; 3630 3631 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 3632 { 3633 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 3634 int i, r; 3635 3636 r = amdgpu_ring_alloc(ring, 7); 3637 if (r) { 3638 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 3639 ring->name, r); 3640 return r; 3641 } 3642 3643 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 3644 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 3645 3646 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 3647 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 3648 PACKET3_DMA_DATA_DST_SEL(1) | 3649 PACKET3_DMA_DATA_SRC_SEL(2) | 3650 PACKET3_DMA_DATA_ENGINE(0))); 3651 amdgpu_ring_write(ring, 0); 3652 amdgpu_ring_write(ring, 0); 3653 amdgpu_ring_write(ring, 0); 3654 amdgpu_ring_write(ring, 0); 3655 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 3656 adev->gds.gds_size); 3657 3658 amdgpu_ring_commit(ring); 3659 3660 for (i = 0; i < adev->usec_timeout; i++) { 3661 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 3662 break; 3663 udelay(1); 3664 } 3665 3666 if (i >= adev->usec_timeout) 3667 r = -ETIMEDOUT; 3668 3669 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 3670 3671 return r; 3672 } 3673 3674 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 3675 { 3676 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 3677 struct amdgpu_ib ib; 3678 struct dma_fence *f = NULL; 3679 int r, i, j, k; 3680 unsigned total_size, vgpr_offset, sgpr_offset; 3681 u64 gpu_addr; 3682 3683 /* only support when RAS is enabled */ 3684 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 3685 return 0; 3686 3687 /* bail if the compute ring is not ready */ 3688 if (!ring->sched.ready) 3689 return 0; 3690 3691 total_size = 3692 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 3693 total_size += 3694 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 3695 total_size = ALIGN(total_size, 256); 3696 vgpr_offset = total_size; 3697 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 3698 sgpr_offset = total_size; 3699 total_size += sizeof(sgpr_init_compute_shader); 3700 3701 /* allocate an indirect buffer to put the commands in */ 3702 memset(&ib, 0, sizeof(ib)); 3703 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 3704 if (r) { 3705 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 3706 return r; 3707 } 3708 3709 /* load the compute shaders */ 3710 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 3711 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 3712 3713 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 3714 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 3715 3716 /* init the ib length to 0 */ 3717 ib.length_dw = 0; 3718 3719 /* VGPR */ 3720 /* write the register state for the compute dispatch */ 3721 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { 3722 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 3723 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) 3724 - PACKET3_SET_SH_REG_START; 3725 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value; 3726 } 3727 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 3728 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 3729 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 3730 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 3731 - PACKET3_SET_SH_REG_START; 3732 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 3733 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 3734 3735 /* write dispatch packet */ 3736 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 3737 ib.ptr[ib.length_dw++] = 128; /* x */ 3738 ib.ptr[ib.length_dw++] = 1; /* y */ 3739 ib.ptr[ib.length_dw++] = 1; /* z */ 3740 ib.ptr[ib.length_dw++] = 3741 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 3742 3743 /* write CS partial flush packet */ 3744 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 3745 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 3746 3747 /* SGPR */ 3748 /* write the register state for the compute dispatch */ 3749 for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { 3750 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 3751 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) 3752 - PACKET3_SET_SH_REG_START; 3753 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; 3754 } 3755 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 3756 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 3757 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 3758 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 3759 - PACKET3_SET_SH_REG_START; 3760 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 3761 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 3762 3763 /* write dispatch packet */ 3764 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 3765 ib.ptr[ib.length_dw++] = 128; /* x */ 3766 ib.ptr[ib.length_dw++] = 1; /* y */ 3767 ib.ptr[ib.length_dw++] = 1; /* z */ 3768 ib.ptr[ib.length_dw++] = 3769 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 3770 3771 /* write CS partial flush packet */ 3772 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 3773 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 3774 3775 /* shedule the ib on the ring */ 3776 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 3777 if (r) { 3778 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 3779 goto fail; 3780 } 3781 3782 /* wait for the GPU to finish processing the IB */ 3783 r = dma_fence_wait(f, false); 3784 if (r) { 3785 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 3786 goto fail; 3787 } 3788 3789 /* read back registers to clear the counters */ 3790 mutex_lock(&adev->grbm_idx_mutex); 3791 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { 3792 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { 3793 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { 3794 gfx_v9_0_select_se_sh(adev, j, 0x0, k); 3795 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); 3796 } 3797 } 3798 } 3799 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 3800 mutex_unlock(&adev->grbm_idx_mutex); 3801 3802 fail: 3803 amdgpu_ib_free(adev, &ib, NULL); 3804 dma_fence_put(f); 3805 3806 return r; 3807 } 3808 3809 static int gfx_v9_0_early_init(void *handle) 3810 { 3811 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3812 3813 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 3814 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 3815 gfx_v9_0_set_ring_funcs(adev); 3816 gfx_v9_0_set_irq_funcs(adev); 3817 gfx_v9_0_set_gds_init(adev); 3818 gfx_v9_0_set_rlc_funcs(adev); 3819 3820 return 0; 3821 } 3822 3823 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 3824 struct amdgpu_iv_entry *entry); 3825 3826 static int gfx_v9_0_ecc_late_init(void *handle) 3827 { 3828 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3829 struct ras_common_if **ras_if = &adev->gfx.ras_if; 3830 struct ras_ih_if ih_info = { 3831 .cb = gfx_v9_0_process_ras_data_cb, 3832 }; 3833 struct ras_fs_if fs_info = { 3834 .sysfs_name = "gfx_err_count", 3835 .debugfs_name = "gfx_err_inject", 3836 }; 3837 struct ras_common_if ras_block = { 3838 .block = AMDGPU_RAS_BLOCK__GFX, 3839 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, 3840 .sub_block_index = 0, 3841 .name = "gfx", 3842 }; 3843 int r; 3844 3845 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { 3846 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); 3847 return 0; 3848 } 3849 3850 r = gfx_v9_0_do_edc_gds_workarounds(adev); 3851 if (r) 3852 return r; 3853 3854 /* requires IBs so do in late init after IB pool is initialized */ 3855 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 3856 if (r) 3857 return r; 3858 3859 /* handle resume path. */ 3860 if (*ras_if) { 3861 /* resend ras TA enable cmd during resume. 3862 * prepare to handle failure. 3863 */ 3864 ih_info.head = **ras_if; 3865 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 3866 if (r) { 3867 if (r == -EAGAIN) { 3868 /* request a gpu reset. will run again. */ 3869 amdgpu_ras_request_reset_on_boot(adev, 3870 AMDGPU_RAS_BLOCK__GFX); 3871 return 0; 3872 } 3873 /* fail to enable ras, cleanup all. */ 3874 goto irq; 3875 } 3876 /* enable successfully. continue. */ 3877 goto resume; 3878 } 3879 3880 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); 3881 if (!*ras_if) 3882 return -ENOMEM; 3883 3884 **ras_if = ras_block; 3885 3886 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 3887 if (r) { 3888 if (r == -EAGAIN) { 3889 amdgpu_ras_request_reset_on_boot(adev, 3890 AMDGPU_RAS_BLOCK__GFX); 3891 r = 0; 3892 } 3893 goto feature; 3894 } 3895 3896 ih_info.head = **ras_if; 3897 fs_info.head = **ras_if; 3898 3899 r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); 3900 if (r) 3901 goto interrupt; 3902 3903 amdgpu_ras_debugfs_create(adev, &fs_info); 3904 3905 r = amdgpu_ras_sysfs_create(adev, &fs_info); 3906 if (r) 3907 goto sysfs; 3908 resume: 3909 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 3910 if (r) 3911 goto irq; 3912 3913 return 0; 3914 irq: 3915 amdgpu_ras_sysfs_remove(adev, *ras_if); 3916 sysfs: 3917 amdgpu_ras_debugfs_remove(adev, *ras_if); 3918 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 3919 interrupt: 3920 amdgpu_ras_feature_enable(adev, *ras_if, 0); 3921 feature: 3922 kfree(*ras_if); 3923 *ras_if = NULL; 3924 return r; 3925 } 3926 3927 static int gfx_v9_0_late_init(void *handle) 3928 { 3929 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3930 int r; 3931 3932 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 3933 if (r) 3934 return r; 3935 3936 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 3937 if (r) 3938 return r; 3939 3940 r = gfx_v9_0_ecc_late_init(handle); 3941 if (r) 3942 return r; 3943 3944 return 0; 3945 } 3946 3947 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 3948 { 3949 uint32_t rlc_setting; 3950 3951 /* if RLC is not enabled, do nothing */ 3952 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 3953 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 3954 return false; 3955 3956 return true; 3957 } 3958 3959 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) 3960 { 3961 uint32_t data; 3962 unsigned i; 3963 3964 data = RLC_SAFE_MODE__CMD_MASK; 3965 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 3966 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 3967 3968 /* wait for RLC_SAFE_MODE */ 3969 for (i = 0; i < adev->usec_timeout; i++) { 3970 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 3971 break; 3972 udelay(1); 3973 } 3974 } 3975 3976 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) 3977 { 3978 uint32_t data; 3979 3980 data = RLC_SAFE_MODE__CMD_MASK; 3981 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 3982 } 3983 3984 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 3985 bool enable) 3986 { 3987 amdgpu_gfx_rlc_enter_safe_mode(adev); 3988 3989 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 3990 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 3991 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 3992 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 3993 } else { 3994 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 3995 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 3996 } 3997 3998 amdgpu_gfx_rlc_exit_safe_mode(adev); 3999 } 4000 4001 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4002 bool enable) 4003 { 4004 /* TODO: double check if we need to perform under safe mode */ 4005 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4006 4007 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4008 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4009 else 4010 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4011 4012 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4013 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4014 else 4015 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4016 4017 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4018 } 4019 4020 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4021 bool enable) 4022 { 4023 uint32_t data, def; 4024 4025 amdgpu_gfx_rlc_enter_safe_mode(adev); 4026 4027 /* It is disabled by HW by default */ 4028 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4029 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4030 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4031 4032 if (adev->asic_type != CHIP_VEGA12) 4033 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4034 4035 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4036 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4037 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4038 4039 /* only for Vega10 & Raven1 */ 4040 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4041 4042 if (def != data) 4043 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4044 4045 /* MGLS is a global flag to control all MGLS in GFX */ 4046 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4047 /* 2 - RLC memory Light sleep */ 4048 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4049 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4050 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4051 if (def != data) 4052 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4053 } 4054 /* 3 - CP memory Light sleep */ 4055 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4056 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4057 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4058 if (def != data) 4059 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4060 } 4061 } 4062 } else { 4063 /* 1 - MGCG_OVERRIDE */ 4064 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4065 4066 if (adev->asic_type != CHIP_VEGA12) 4067 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4068 4069 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4070 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4071 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4072 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4073 4074 if (def != data) 4075 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4076 4077 /* 2 - disable MGLS in RLC */ 4078 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4079 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4080 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4081 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4082 } 4083 4084 /* 3 - disable MGLS in CP */ 4085 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4086 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4087 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4088 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4089 } 4090 } 4091 4092 amdgpu_gfx_rlc_exit_safe_mode(adev); 4093 } 4094 4095 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4096 bool enable) 4097 { 4098 uint32_t data, def; 4099 4100 amdgpu_gfx_rlc_enter_safe_mode(adev); 4101 4102 /* Enable 3D CGCG/CGLS */ 4103 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 4104 /* write cmd to clear cgcg/cgls ov */ 4105 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4106 /* unset CGCG override */ 4107 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4108 /* update CGCG and CGLS override bits */ 4109 if (def != data) 4110 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4111 4112 /* enable 3Dcgcg FSM(0x0000363f) */ 4113 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4114 4115 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4116 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4117 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4118 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4119 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4120 if (def != data) 4121 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4122 4123 /* set IDLE_POLL_COUNT(0x00900100) */ 4124 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4125 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4126 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4127 if (def != data) 4128 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4129 } else { 4130 /* Disable CGCG/CGLS */ 4131 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4132 /* disable cgcg, cgls should be disabled */ 4133 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 4134 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 4135 /* disable cgcg and cgls in FSM */ 4136 if (def != data) 4137 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4138 } 4139 4140 amdgpu_gfx_rlc_exit_safe_mode(adev); 4141 } 4142 4143 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4144 bool enable) 4145 { 4146 uint32_t def, data; 4147 4148 amdgpu_gfx_rlc_enter_safe_mode(adev); 4149 4150 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 4151 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4152 /* unset CGCG override */ 4153 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 4154 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4155 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4156 else 4157 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4158 /* update CGCG and CGLS override bits */ 4159 if (def != data) 4160 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4161 4162 /* enable cgcg FSM(0x0000363F) */ 4163 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4164 4165 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4166 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4167 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4168 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4169 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4170 if (def != data) 4171 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4172 4173 /* set IDLE_POLL_COUNT(0x00900100) */ 4174 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4175 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4176 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4177 if (def != data) 4178 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4179 } else { 4180 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4181 /* reset CGCG/CGLS bits */ 4182 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4183 /* disable cgcg and cgls in FSM */ 4184 if (def != data) 4185 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4186 } 4187 4188 amdgpu_gfx_rlc_exit_safe_mode(adev); 4189 } 4190 4191 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 4192 bool enable) 4193 { 4194 if (enable) { 4195 /* CGCG/CGLS should be enabled after MGCG/MGLS 4196 * === MGCG + MGLS === 4197 */ 4198 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4199 /* === CGCG /CGLS for GFX 3D Only === */ 4200 gfx_v9_0_update_3d_clock_gating(adev, enable); 4201 /* === CGCG + CGLS === */ 4202 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4203 } else { 4204 /* CGCG/CGLS should be disabled before MGCG/MGLS 4205 * === CGCG + CGLS === 4206 */ 4207 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4208 /* === CGCG /CGLS for GFX 3D Only === */ 4209 gfx_v9_0_update_3d_clock_gating(adev, enable); 4210 /* === MGCG + MGLS === */ 4211 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4212 } 4213 return 0; 4214 } 4215 4216 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 4217 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 4218 .set_safe_mode = gfx_v9_0_set_safe_mode, 4219 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 4220 .init = gfx_v9_0_rlc_init, 4221 .get_csb_size = gfx_v9_0_get_csb_size, 4222 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 4223 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 4224 .resume = gfx_v9_0_rlc_resume, 4225 .stop = gfx_v9_0_rlc_stop, 4226 .reset = gfx_v9_0_rlc_reset, 4227 .start = gfx_v9_0_rlc_start 4228 }; 4229 4230 static int gfx_v9_0_set_powergating_state(void *handle, 4231 enum amd_powergating_state state) 4232 { 4233 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4234 bool enable = (state == AMD_PG_STATE_GATE) ? true : false; 4235 4236 switch (adev->asic_type) { 4237 case CHIP_RAVEN: 4238 if (!enable) { 4239 amdgpu_gfx_off_ctrl(adev, false); 4240 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4241 } 4242 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 4243 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 4244 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 4245 } else { 4246 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 4247 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 4248 } 4249 4250 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 4251 gfx_v9_0_enable_cp_power_gating(adev, true); 4252 else 4253 gfx_v9_0_enable_cp_power_gating(adev, false); 4254 4255 /* update gfx cgpg state */ 4256 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 4257 4258 /* update mgcg state */ 4259 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 4260 4261 if (enable) 4262 amdgpu_gfx_off_ctrl(adev, true); 4263 break; 4264 case CHIP_VEGA12: 4265 if (!enable) { 4266 amdgpu_gfx_off_ctrl(adev, false); 4267 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4268 } else { 4269 amdgpu_gfx_off_ctrl(adev, true); 4270 } 4271 break; 4272 default: 4273 break; 4274 } 4275 4276 return 0; 4277 } 4278 4279 static int gfx_v9_0_set_clockgating_state(void *handle, 4280 enum amd_clockgating_state state) 4281 { 4282 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4283 4284 if (amdgpu_sriov_vf(adev)) 4285 return 0; 4286 4287 switch (adev->asic_type) { 4288 case CHIP_VEGA10: 4289 case CHIP_VEGA12: 4290 case CHIP_VEGA20: 4291 case CHIP_RAVEN: 4292 gfx_v9_0_update_gfx_clock_gating(adev, 4293 state == AMD_CG_STATE_GATE ? true : false); 4294 break; 4295 default: 4296 break; 4297 } 4298 return 0; 4299 } 4300 4301 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 4302 { 4303 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4304 int data; 4305 4306 if (amdgpu_sriov_vf(adev)) 4307 *flags = 0; 4308 4309 /* AMD_CG_SUPPORT_GFX_MGCG */ 4310 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4311 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 4312 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 4313 4314 /* AMD_CG_SUPPORT_GFX_CGCG */ 4315 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4316 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 4317 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 4318 4319 /* AMD_CG_SUPPORT_GFX_CGLS */ 4320 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 4321 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 4322 4323 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 4324 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4325 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 4326 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 4327 4328 /* AMD_CG_SUPPORT_GFX_CP_LS */ 4329 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4330 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 4331 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 4332 4333 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 4334 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4335 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 4336 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 4337 4338 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 4339 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 4340 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 4341 } 4342 4343 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 4344 { 4345 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 4346 } 4347 4348 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 4349 { 4350 struct amdgpu_device *adev = ring->adev; 4351 u64 wptr; 4352 4353 /* XXX check if swapping is necessary on BE */ 4354 if (ring->use_doorbell) { 4355 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 4356 } else { 4357 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 4358 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 4359 } 4360 4361 return wptr; 4362 } 4363 4364 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 4365 { 4366 struct amdgpu_device *adev = ring->adev; 4367 4368 if (ring->use_doorbell) { 4369 /* XXX check if swapping is necessary on BE */ 4370 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4371 WDOORBELL64(ring->doorbell_index, ring->wptr); 4372 } else { 4373 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4374 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 4375 } 4376 } 4377 4378 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 4379 { 4380 struct amdgpu_device *adev = ring->adev; 4381 u32 ref_and_mask, reg_mem_engine; 4382 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; 4383 4384 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 4385 switch (ring->me) { 4386 case 1: 4387 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 4388 break; 4389 case 2: 4390 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 4391 break; 4392 default: 4393 return; 4394 } 4395 reg_mem_engine = 0; 4396 } else { 4397 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 4398 reg_mem_engine = 1; /* pfp */ 4399 } 4400 4401 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 4402 adev->nbio_funcs->get_hdp_flush_req_offset(adev), 4403 adev->nbio_funcs->get_hdp_flush_done_offset(adev), 4404 ref_and_mask, ref_and_mask, 0x20); 4405 } 4406 4407 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 4408 struct amdgpu_job *job, 4409 struct amdgpu_ib *ib, 4410 uint32_t flags) 4411 { 4412 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4413 u32 header, control = 0; 4414 4415 if (ib->flags & AMDGPU_IB_FLAG_CE) 4416 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 4417 else 4418 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 4419 4420 control |= ib->length_dw | (vmid << 24); 4421 4422 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 4423 control |= INDIRECT_BUFFER_PRE_ENB(1); 4424 4425 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 4426 gfx_v9_0_ring_emit_de_meta(ring); 4427 } 4428 4429 amdgpu_ring_write(ring, header); 4430 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4431 amdgpu_ring_write(ring, 4432 #ifdef __BIG_ENDIAN 4433 (2 << 0) | 4434 #endif 4435 lower_32_bits(ib->gpu_addr)); 4436 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4437 amdgpu_ring_write(ring, control); 4438 } 4439 4440 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 4441 struct amdgpu_job *job, 4442 struct amdgpu_ib *ib, 4443 uint32_t flags) 4444 { 4445 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4446 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 4447 4448 /* Currently, there is a high possibility to get wave ID mismatch 4449 * between ME and GDS, leading to a hw deadlock, because ME generates 4450 * different wave IDs than the GDS expects. This situation happens 4451 * randomly when at least 5 compute pipes use GDS ordered append. 4452 * The wave IDs generated by ME are also wrong after suspend/resume. 4453 * Those are probably bugs somewhere else in the kernel driver. 4454 * 4455 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 4456 * GDS to 0 for this ring (me/pipe). 4457 */ 4458 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 4459 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 4460 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 4461 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 4462 } 4463 4464 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 4465 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4466 amdgpu_ring_write(ring, 4467 #ifdef __BIG_ENDIAN 4468 (2 << 0) | 4469 #endif 4470 lower_32_bits(ib->gpu_addr)); 4471 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4472 amdgpu_ring_write(ring, control); 4473 } 4474 4475 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 4476 u64 seq, unsigned flags) 4477 { 4478 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 4479 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 4480 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 4481 4482 /* RELEASE_MEM - flush caches, send int */ 4483 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 4484 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 4485 EOP_TC_NC_ACTION_EN) : 4486 (EOP_TCL1_ACTION_EN | 4487 EOP_TC_ACTION_EN | 4488 EOP_TC_WB_ACTION_EN | 4489 EOP_TC_MD_ACTION_EN)) | 4490 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 4491 EVENT_INDEX(5))); 4492 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 4493 4494 /* 4495 * the address should be Qword aligned if 64bit write, Dword 4496 * aligned if only send 32bit data low (discard data high) 4497 */ 4498 if (write64bit) 4499 BUG_ON(addr & 0x7); 4500 else 4501 BUG_ON(addr & 0x3); 4502 amdgpu_ring_write(ring, lower_32_bits(addr)); 4503 amdgpu_ring_write(ring, upper_32_bits(addr)); 4504 amdgpu_ring_write(ring, lower_32_bits(seq)); 4505 amdgpu_ring_write(ring, upper_32_bits(seq)); 4506 amdgpu_ring_write(ring, 0); 4507 } 4508 4509 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 4510 { 4511 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 4512 uint32_t seq = ring->fence_drv.sync_seq; 4513 uint64_t addr = ring->fence_drv.gpu_addr; 4514 4515 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 4516 lower_32_bits(addr), upper_32_bits(addr), 4517 seq, 0xffffffff, 4); 4518 } 4519 4520 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 4521 unsigned vmid, uint64_t pd_addr) 4522 { 4523 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 4524 4525 /* compute doesn't have PFP */ 4526 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 4527 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 4528 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 4529 amdgpu_ring_write(ring, 0x0); 4530 } 4531 } 4532 4533 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 4534 { 4535 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 4536 } 4537 4538 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 4539 { 4540 u64 wptr; 4541 4542 /* XXX check if swapping is necessary on BE */ 4543 if (ring->use_doorbell) 4544 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 4545 else 4546 BUG(); 4547 return wptr; 4548 } 4549 4550 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 4551 bool acquire) 4552 { 4553 struct amdgpu_device *adev = ring->adev; 4554 int pipe_num, tmp, reg; 4555 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 4556 4557 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 4558 4559 /* first me only has 2 entries, GFX and HP3D */ 4560 if (ring->me > 0) 4561 pipe_num -= 2; 4562 4563 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num; 4564 tmp = RREG32(reg); 4565 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 4566 WREG32(reg, tmp); 4567 } 4568 4569 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, 4570 struct amdgpu_ring *ring, 4571 bool acquire) 4572 { 4573 int i, pipe; 4574 bool reserve; 4575 struct amdgpu_ring *iring; 4576 4577 mutex_lock(&adev->gfx.pipe_reserve_mutex); 4578 pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0); 4579 if (acquire) 4580 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4581 else 4582 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4583 4584 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 4585 /* Clear all reservations - everyone reacquires all resources */ 4586 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 4587 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 4588 true); 4589 4590 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 4591 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 4592 true); 4593 } else { 4594 /* Lower all pipes without a current reservation */ 4595 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 4596 iring = &adev->gfx.gfx_ring[i]; 4597 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 4598 iring->me, 4599 iring->pipe, 4600 0); 4601 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4602 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 4603 } 4604 4605 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 4606 iring = &adev->gfx.compute_ring[i]; 4607 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 4608 iring->me, 4609 iring->pipe, 4610 0); 4611 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4612 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 4613 } 4614 } 4615 4616 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 4617 } 4618 4619 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, 4620 struct amdgpu_ring *ring, 4621 bool acquire) 4622 { 4623 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 4624 uint32_t queue_priority = acquire ? 0xf : 0x0; 4625 4626 mutex_lock(&adev->srbm_mutex); 4627 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4628 4629 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); 4630 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); 4631 4632 soc15_grbm_select(adev, 0, 0, 0, 0); 4633 mutex_unlock(&adev->srbm_mutex); 4634 } 4635 4636 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring, 4637 enum drm_sched_priority priority) 4638 { 4639 struct amdgpu_device *adev = ring->adev; 4640 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 4641 4642 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 4643 return; 4644 4645 gfx_v9_0_hqd_set_priority(adev, ring, acquire); 4646 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire); 4647 } 4648 4649 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 4650 { 4651 struct amdgpu_device *adev = ring->adev; 4652 4653 /* XXX check if swapping is necessary on BE */ 4654 if (ring->use_doorbell) { 4655 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4656 WDOORBELL64(ring->doorbell_index, ring->wptr); 4657 } else{ 4658 BUG(); /* only DOORBELL method supported on gfx9 now */ 4659 } 4660 } 4661 4662 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 4663 u64 seq, unsigned int flags) 4664 { 4665 struct amdgpu_device *adev = ring->adev; 4666 4667 /* we only allocate 32bit for each seq wb address */ 4668 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 4669 4670 /* write fence seq to the "addr" */ 4671 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4672 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4673 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 4674 amdgpu_ring_write(ring, lower_32_bits(addr)); 4675 amdgpu_ring_write(ring, upper_32_bits(addr)); 4676 amdgpu_ring_write(ring, lower_32_bits(seq)); 4677 4678 if (flags & AMDGPU_FENCE_FLAG_INT) { 4679 /* set register to trigger INT */ 4680 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4681 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4682 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 4683 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 4684 amdgpu_ring_write(ring, 0); 4685 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 4686 } 4687 } 4688 4689 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 4690 { 4691 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 4692 amdgpu_ring_write(ring, 0); 4693 } 4694 4695 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 4696 { 4697 struct v9_ce_ib_state ce_payload = {0}; 4698 uint64_t csa_addr; 4699 int cnt; 4700 4701 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 4702 csa_addr = amdgpu_csa_vaddr(ring->adev); 4703 4704 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 4705 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 4706 WRITE_DATA_DST_SEL(8) | 4707 WR_CONFIRM) | 4708 WRITE_DATA_CACHE_POLICY(0)); 4709 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 4710 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 4711 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 4712 } 4713 4714 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 4715 { 4716 struct v9_de_ib_state de_payload = {0}; 4717 uint64_t csa_addr, gds_addr; 4718 int cnt; 4719 4720 csa_addr = amdgpu_csa_vaddr(ring->adev); 4721 gds_addr = csa_addr + 4096; 4722 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 4723 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 4724 4725 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 4726 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 4727 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 4728 WRITE_DATA_DST_SEL(8) | 4729 WR_CONFIRM) | 4730 WRITE_DATA_CACHE_POLICY(0)); 4731 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 4732 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 4733 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 4734 } 4735 4736 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) 4737 { 4738 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 4739 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ 4740 } 4741 4742 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 4743 { 4744 uint32_t dw2 = 0; 4745 4746 if (amdgpu_sriov_vf(ring->adev)) 4747 gfx_v9_0_ring_emit_ce_meta(ring); 4748 4749 gfx_v9_0_ring_emit_tmz(ring, true); 4750 4751 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 4752 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 4753 /* set load_global_config & load_global_uconfig */ 4754 dw2 |= 0x8001; 4755 /* set load_cs_sh_regs */ 4756 dw2 |= 0x01000000; 4757 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 4758 dw2 |= 0x10002; 4759 4760 /* set load_ce_ram if preamble presented */ 4761 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 4762 dw2 |= 0x10000000; 4763 } else { 4764 /* still load_ce_ram if this is the first time preamble presented 4765 * although there is no context switch happens. 4766 */ 4767 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 4768 dw2 |= 0x10000000; 4769 } 4770 4771 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4772 amdgpu_ring_write(ring, dw2); 4773 amdgpu_ring_write(ring, 0); 4774 } 4775 4776 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 4777 { 4778 unsigned ret; 4779 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 4780 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 4781 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 4782 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 4783 ret = ring->wptr & ring->buf_mask; 4784 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 4785 return ret; 4786 } 4787 4788 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 4789 { 4790 unsigned cur; 4791 BUG_ON(offset > ring->buf_mask); 4792 BUG_ON(ring->ring[offset] != 0x55aa55aa); 4793 4794 cur = (ring->wptr & ring->buf_mask) - 1; 4795 if (likely(cur > offset)) 4796 ring->ring[offset] = cur - offset; 4797 else 4798 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 4799 } 4800 4801 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 4802 { 4803 struct amdgpu_device *adev = ring->adev; 4804 4805 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 4806 amdgpu_ring_write(ring, 0 | /* src: register*/ 4807 (5 << 8) | /* dst: memory */ 4808 (1 << 20)); /* write confirm */ 4809 amdgpu_ring_write(ring, reg); 4810 amdgpu_ring_write(ring, 0); 4811 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 4812 adev->virt.reg_val_offs * 4)); 4813 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 4814 adev->virt.reg_val_offs * 4)); 4815 } 4816 4817 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 4818 uint32_t val) 4819 { 4820 uint32_t cmd = 0; 4821 4822 switch (ring->funcs->type) { 4823 case AMDGPU_RING_TYPE_GFX: 4824 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 4825 break; 4826 case AMDGPU_RING_TYPE_KIQ: 4827 cmd = (1 << 16); /* no inc addr */ 4828 break; 4829 default: 4830 cmd = WR_CONFIRM; 4831 break; 4832 } 4833 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4834 amdgpu_ring_write(ring, cmd); 4835 amdgpu_ring_write(ring, reg); 4836 amdgpu_ring_write(ring, 0); 4837 amdgpu_ring_write(ring, val); 4838 } 4839 4840 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 4841 uint32_t val, uint32_t mask) 4842 { 4843 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 4844 } 4845 4846 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 4847 uint32_t reg0, uint32_t reg1, 4848 uint32_t ref, uint32_t mask) 4849 { 4850 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 4851 struct amdgpu_device *adev = ring->adev; 4852 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 4853 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 4854 4855 if (fw_version_ok) 4856 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 4857 ref, mask, 0x20); 4858 else 4859 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 4860 ref, mask); 4861 } 4862 4863 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 4864 { 4865 struct amdgpu_device *adev = ring->adev; 4866 uint32_t value = 0; 4867 4868 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 4869 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 4870 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 4871 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 4872 WREG32(mmSQ_CMD, value); 4873 } 4874 4875 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 4876 enum amdgpu_interrupt_state state) 4877 { 4878 switch (state) { 4879 case AMDGPU_IRQ_STATE_DISABLE: 4880 case AMDGPU_IRQ_STATE_ENABLE: 4881 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4882 TIME_STAMP_INT_ENABLE, 4883 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4884 break; 4885 default: 4886 break; 4887 } 4888 } 4889 4890 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 4891 int me, int pipe, 4892 enum amdgpu_interrupt_state state) 4893 { 4894 u32 mec_int_cntl, mec_int_cntl_reg; 4895 4896 /* 4897 * amdgpu controls only the first MEC. That's why this function only 4898 * handles the setting of interrupts for this specific MEC. All other 4899 * pipes' interrupts are set by amdkfd. 4900 */ 4901 4902 if (me == 1) { 4903 switch (pipe) { 4904 case 0: 4905 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 4906 break; 4907 case 1: 4908 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 4909 break; 4910 case 2: 4911 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 4912 break; 4913 case 3: 4914 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 4915 break; 4916 default: 4917 DRM_DEBUG("invalid pipe %d\n", pipe); 4918 return; 4919 } 4920 } else { 4921 DRM_DEBUG("invalid me %d\n", me); 4922 return; 4923 } 4924 4925 switch (state) { 4926 case AMDGPU_IRQ_STATE_DISABLE: 4927 mec_int_cntl = RREG32(mec_int_cntl_reg); 4928 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 4929 TIME_STAMP_INT_ENABLE, 0); 4930 WREG32(mec_int_cntl_reg, mec_int_cntl); 4931 break; 4932 case AMDGPU_IRQ_STATE_ENABLE: 4933 mec_int_cntl = RREG32(mec_int_cntl_reg); 4934 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 4935 TIME_STAMP_INT_ENABLE, 1); 4936 WREG32(mec_int_cntl_reg, mec_int_cntl); 4937 break; 4938 default: 4939 break; 4940 } 4941 } 4942 4943 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 4944 struct amdgpu_irq_src *source, 4945 unsigned type, 4946 enum amdgpu_interrupt_state state) 4947 { 4948 switch (state) { 4949 case AMDGPU_IRQ_STATE_DISABLE: 4950 case AMDGPU_IRQ_STATE_ENABLE: 4951 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4952 PRIV_REG_INT_ENABLE, 4953 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4954 break; 4955 default: 4956 break; 4957 } 4958 4959 return 0; 4960 } 4961 4962 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 4963 struct amdgpu_irq_src *source, 4964 unsigned type, 4965 enum amdgpu_interrupt_state state) 4966 { 4967 switch (state) { 4968 case AMDGPU_IRQ_STATE_DISABLE: 4969 case AMDGPU_IRQ_STATE_ENABLE: 4970 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4971 PRIV_INSTR_INT_ENABLE, 4972 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4973 default: 4974 break; 4975 } 4976 4977 return 0; 4978 } 4979 4980 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 4981 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 4982 CP_ECC_ERROR_INT_ENABLE, 1) 4983 4984 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 4985 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 4986 CP_ECC_ERROR_INT_ENABLE, 0) 4987 4988 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 4989 struct amdgpu_irq_src *source, 4990 unsigned type, 4991 enum amdgpu_interrupt_state state) 4992 { 4993 switch (state) { 4994 case AMDGPU_IRQ_STATE_DISABLE: 4995 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4996 CP_ECC_ERROR_INT_ENABLE, 0); 4997 DISABLE_ECC_ON_ME_PIPE(1, 0); 4998 DISABLE_ECC_ON_ME_PIPE(1, 1); 4999 DISABLE_ECC_ON_ME_PIPE(1, 2); 5000 DISABLE_ECC_ON_ME_PIPE(1, 3); 5001 break; 5002 5003 case AMDGPU_IRQ_STATE_ENABLE: 5004 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5005 CP_ECC_ERROR_INT_ENABLE, 1); 5006 ENABLE_ECC_ON_ME_PIPE(1, 0); 5007 ENABLE_ECC_ON_ME_PIPE(1, 1); 5008 ENABLE_ECC_ON_ME_PIPE(1, 2); 5009 ENABLE_ECC_ON_ME_PIPE(1, 3); 5010 break; 5011 default: 5012 break; 5013 } 5014 5015 return 0; 5016 } 5017 5018 5019 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5020 struct amdgpu_irq_src *src, 5021 unsigned type, 5022 enum amdgpu_interrupt_state state) 5023 { 5024 switch (type) { 5025 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 5026 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 5027 break; 5028 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5029 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5030 break; 5031 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5032 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5033 break; 5034 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5035 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5036 break; 5037 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5038 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5039 break; 5040 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 5041 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 5042 break; 5043 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 5044 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 5045 break; 5046 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 5047 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 5048 break; 5049 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 5050 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 5051 break; 5052 default: 5053 break; 5054 } 5055 return 0; 5056 } 5057 5058 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 5059 struct amdgpu_irq_src *source, 5060 struct amdgpu_iv_entry *entry) 5061 { 5062 int i; 5063 u8 me_id, pipe_id, queue_id; 5064 struct amdgpu_ring *ring; 5065 5066 DRM_DEBUG("IH: CP EOP\n"); 5067 me_id = (entry->ring_id & 0x0c) >> 2; 5068 pipe_id = (entry->ring_id & 0x03) >> 0; 5069 queue_id = (entry->ring_id & 0x70) >> 4; 5070 5071 switch (me_id) { 5072 case 0: 5073 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5074 break; 5075 case 1: 5076 case 2: 5077 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5078 ring = &adev->gfx.compute_ring[i]; 5079 /* Per-queue interrupt is supported for MEC starting from VI. 5080 * The interrupt can only be enabled/disabled per pipe instead of per queue. 5081 */ 5082 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 5083 amdgpu_fence_process(ring); 5084 } 5085 break; 5086 } 5087 return 0; 5088 } 5089 5090 static void gfx_v9_0_fault(struct amdgpu_device *adev, 5091 struct amdgpu_iv_entry *entry) 5092 { 5093 u8 me_id, pipe_id, queue_id; 5094 struct amdgpu_ring *ring; 5095 int i; 5096 5097 me_id = (entry->ring_id & 0x0c) >> 2; 5098 pipe_id = (entry->ring_id & 0x03) >> 0; 5099 queue_id = (entry->ring_id & 0x70) >> 4; 5100 5101 switch (me_id) { 5102 case 0: 5103 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 5104 break; 5105 case 1: 5106 case 2: 5107 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5108 ring = &adev->gfx.compute_ring[i]; 5109 if (ring->me == me_id && ring->pipe == pipe_id && 5110 ring->queue == queue_id) 5111 drm_sched_fault(&ring->sched); 5112 } 5113 break; 5114 } 5115 } 5116 5117 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 5118 struct amdgpu_irq_src *source, 5119 struct amdgpu_iv_entry *entry) 5120 { 5121 DRM_ERROR("Illegal register access in command stream\n"); 5122 gfx_v9_0_fault(adev, entry); 5123 return 0; 5124 } 5125 5126 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 5127 struct amdgpu_irq_src *source, 5128 struct amdgpu_iv_entry *entry) 5129 { 5130 DRM_ERROR("Illegal instruction in command stream\n"); 5131 gfx_v9_0_fault(adev, entry); 5132 return 0; 5133 } 5134 5135 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 5136 struct amdgpu_iv_entry *entry) 5137 { 5138 /* TODO ue will trigger an interrupt. */ 5139 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); 5140 amdgpu_ras_reset_gpu(adev, 0); 5141 return AMDGPU_RAS_UE; 5142 } 5143 5144 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev, 5145 struct amdgpu_irq_src *source, 5146 struct amdgpu_iv_entry *entry) 5147 { 5148 struct ras_common_if *ras_if = adev->gfx.ras_if; 5149 struct ras_dispatch_if ih_data = { 5150 .entry = entry, 5151 }; 5152 5153 if (!ras_if) 5154 return 0; 5155 5156 ih_data.head = *ras_if; 5157 5158 DRM_ERROR("CP ECC ERROR IRQ\n"); 5159 amdgpu_ras_interrupt_dispatch(adev, &ih_data); 5160 return 0; 5161 } 5162 5163 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 5164 .name = "gfx_v9_0", 5165 .early_init = gfx_v9_0_early_init, 5166 .late_init = gfx_v9_0_late_init, 5167 .sw_init = gfx_v9_0_sw_init, 5168 .sw_fini = gfx_v9_0_sw_fini, 5169 .hw_init = gfx_v9_0_hw_init, 5170 .hw_fini = gfx_v9_0_hw_fini, 5171 .suspend = gfx_v9_0_suspend, 5172 .resume = gfx_v9_0_resume, 5173 .is_idle = gfx_v9_0_is_idle, 5174 .wait_for_idle = gfx_v9_0_wait_for_idle, 5175 .soft_reset = gfx_v9_0_soft_reset, 5176 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 5177 .set_powergating_state = gfx_v9_0_set_powergating_state, 5178 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 5179 }; 5180 5181 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 5182 .type = AMDGPU_RING_TYPE_GFX, 5183 .align_mask = 0xff, 5184 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5185 .support_64bit_ptrs = true, 5186 .vmhub = AMDGPU_GFXHUB, 5187 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 5188 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 5189 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 5190 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 5191 5 + /* COND_EXEC */ 5192 7 + /* PIPELINE_SYNC */ 5193 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5194 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5195 2 + /* VM_FLUSH */ 5196 8 + /* FENCE for VM_FLUSH */ 5197 20 + /* GDS switch */ 5198 4 + /* double SWITCH_BUFFER, 5199 the first COND_EXEC jump to the place just 5200 prior to this double SWITCH_BUFFER */ 5201 5 + /* COND_EXEC */ 5202 7 + /* HDP_flush */ 5203 4 + /* VGT_flush */ 5204 14 + /* CE_META */ 5205 31 + /* DE_META */ 5206 3 + /* CNTX_CTRL */ 5207 5 + /* HDP_INVL */ 5208 8 + 8 + /* FENCE x2 */ 5209 2, /* SWITCH_BUFFER */ 5210 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 5211 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 5212 .emit_fence = gfx_v9_0_ring_emit_fence, 5213 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 5214 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 5215 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 5216 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 5217 .test_ring = gfx_v9_0_ring_test_ring, 5218 .test_ib = gfx_v9_0_ring_test_ib, 5219 .insert_nop = amdgpu_ring_insert_nop, 5220 .pad_ib = amdgpu_ring_generic_pad_ib, 5221 .emit_switch_buffer = gfx_v9_ring_emit_sb, 5222 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 5223 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 5224 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 5225 .emit_tmz = gfx_v9_0_ring_emit_tmz, 5226 .emit_wreg = gfx_v9_0_ring_emit_wreg, 5227 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 5228 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 5229 .soft_recovery = gfx_v9_0_ring_soft_recovery, 5230 }; 5231 5232 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 5233 .type = AMDGPU_RING_TYPE_COMPUTE, 5234 .align_mask = 0xff, 5235 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5236 .support_64bit_ptrs = true, 5237 .vmhub = AMDGPU_GFXHUB, 5238 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 5239 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 5240 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 5241 .emit_frame_size = 5242 20 + /* gfx_v9_0_ring_emit_gds_switch */ 5243 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5244 5 + /* hdp invalidate */ 5245 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 5246 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5247 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5248 2 + /* gfx_v9_0_ring_emit_vm_flush */ 5249 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 5250 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 5251 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 5252 .emit_fence = gfx_v9_0_ring_emit_fence, 5253 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 5254 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 5255 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 5256 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 5257 .test_ring = gfx_v9_0_ring_test_ring, 5258 .test_ib = gfx_v9_0_ring_test_ib, 5259 .insert_nop = amdgpu_ring_insert_nop, 5260 .pad_ib = amdgpu_ring_generic_pad_ib, 5261 .set_priority = gfx_v9_0_ring_set_priority_compute, 5262 .emit_wreg = gfx_v9_0_ring_emit_wreg, 5263 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 5264 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 5265 }; 5266 5267 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 5268 .type = AMDGPU_RING_TYPE_KIQ, 5269 .align_mask = 0xff, 5270 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5271 .support_64bit_ptrs = true, 5272 .vmhub = AMDGPU_GFXHUB, 5273 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 5274 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 5275 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 5276 .emit_frame_size = 5277 20 + /* gfx_v9_0_ring_emit_gds_switch */ 5278 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5279 5 + /* hdp invalidate */ 5280 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 5281 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5282 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5283 2 + /* gfx_v9_0_ring_emit_vm_flush */ 5284 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 5285 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 5286 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 5287 .test_ring = gfx_v9_0_ring_test_ring, 5288 .insert_nop = amdgpu_ring_insert_nop, 5289 .pad_ib = amdgpu_ring_generic_pad_ib, 5290 .emit_rreg = gfx_v9_0_ring_emit_rreg, 5291 .emit_wreg = gfx_v9_0_ring_emit_wreg, 5292 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 5293 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 5294 }; 5295 5296 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 5297 { 5298 int i; 5299 5300 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 5301 5302 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 5303 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 5304 5305 for (i = 0; i < adev->gfx.num_compute_rings; i++) 5306 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 5307 } 5308 5309 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 5310 .set = gfx_v9_0_set_eop_interrupt_state, 5311 .process = gfx_v9_0_eop_irq, 5312 }; 5313 5314 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 5315 .set = gfx_v9_0_set_priv_reg_fault_state, 5316 .process = gfx_v9_0_priv_reg_irq, 5317 }; 5318 5319 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 5320 .set = gfx_v9_0_set_priv_inst_fault_state, 5321 .process = gfx_v9_0_priv_inst_irq, 5322 }; 5323 5324 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 5325 .set = gfx_v9_0_set_cp_ecc_error_state, 5326 .process = gfx_v9_0_cp_ecc_error_irq, 5327 }; 5328 5329 5330 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 5331 { 5332 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 5333 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 5334 5335 adev->gfx.priv_reg_irq.num_types = 1; 5336 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 5337 5338 adev->gfx.priv_inst_irq.num_types = 1; 5339 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 5340 5341 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 5342 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 5343 } 5344 5345 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 5346 { 5347 switch (adev->asic_type) { 5348 case CHIP_VEGA10: 5349 case CHIP_VEGA12: 5350 case CHIP_VEGA20: 5351 case CHIP_RAVEN: 5352 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 5353 break; 5354 default: 5355 break; 5356 } 5357 } 5358 5359 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 5360 { 5361 /* init asci gds info */ 5362 switch (adev->asic_type) { 5363 case CHIP_VEGA10: 5364 case CHIP_VEGA12: 5365 case CHIP_VEGA20: 5366 adev->gds.gds_size = 0x10000; 5367 break; 5368 case CHIP_RAVEN: 5369 adev->gds.gds_size = 0x1000; 5370 break; 5371 default: 5372 adev->gds.gds_size = 0x10000; 5373 break; 5374 } 5375 5376 switch (adev->asic_type) { 5377 case CHIP_VEGA10: 5378 case CHIP_VEGA20: 5379 adev->gds.gds_compute_max_wave_id = 0x7ff; 5380 break; 5381 case CHIP_VEGA12: 5382 adev->gds.gds_compute_max_wave_id = 0x27f; 5383 break; 5384 case CHIP_RAVEN: 5385 if (adev->rev_id >= 0x8) 5386 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 5387 else 5388 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 5389 break; 5390 default: 5391 /* this really depends on the chip */ 5392 adev->gds.gds_compute_max_wave_id = 0x7ff; 5393 break; 5394 } 5395 5396 adev->gds.gws_size = 64; 5397 adev->gds.oa_size = 16; 5398 } 5399 5400 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 5401 u32 bitmap) 5402 { 5403 u32 data; 5404 5405 if (!bitmap) 5406 return; 5407 5408 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 5409 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 5410 5411 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 5412 } 5413 5414 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 5415 { 5416 u32 data, mask; 5417 5418 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 5419 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 5420 5421 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 5422 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 5423 5424 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 5425 5426 return (~data) & mask; 5427 } 5428 5429 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 5430 struct amdgpu_cu_info *cu_info) 5431 { 5432 int i, j, k, counter, active_cu_number = 0; 5433 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 5434 unsigned disable_masks[4 * 2]; 5435 5436 if (!adev || !cu_info) 5437 return -EINVAL; 5438 5439 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 5440 5441 mutex_lock(&adev->grbm_idx_mutex); 5442 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 5443 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 5444 mask = 1; 5445 ao_bitmap = 0; 5446 counter = 0; 5447 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 5448 if (i < 4 && j < 2) 5449 gfx_v9_0_set_user_cu_inactive_bitmap( 5450 adev, disable_masks[i * 2 + j]); 5451 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 5452 cu_info->bitmap[i][j] = bitmap; 5453 5454 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 5455 if (bitmap & mask) { 5456 if (counter < adev->gfx.config.max_cu_per_sh) 5457 ao_bitmap |= mask; 5458 counter ++; 5459 } 5460 mask <<= 1; 5461 } 5462 active_cu_number += counter; 5463 if (i < 2 && j < 2) 5464 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 5465 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 5466 } 5467 } 5468 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5469 mutex_unlock(&adev->grbm_idx_mutex); 5470 5471 cu_info->number = active_cu_number; 5472 cu_info->ao_cu_mask = ao_cu_mask; 5473 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 5474 5475 return 0; 5476 } 5477 5478 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 5479 { 5480 .type = AMD_IP_BLOCK_TYPE_GFX, 5481 .major = 9, 5482 .minor = 0, 5483 .rev = 0, 5484 .funcs = &gfx_v9_0_ip_funcs, 5485 }; 5486