1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 #include "vega10_enum.h" 40 #include "hdp/hdp_4_0_offset.h" 41 42 #include "soc15.h" 43 #include "soc15_common.h" 44 #include "clearstate_gfx9.h" 45 #include "v9_structs.h" 46 47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 48 49 #include "amdgpu_ras.h" 50 51 #define GFX9_NUM_GFX_RINGS 1 52 #define GFX9_MEC_HPD_SIZE 4096 53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 55 56 #define mmPWR_MISC_CNTL_STATUS 0x0183 57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0 59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1 60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L 61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L 62 63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 65 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 69 70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 72 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 76 77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 79 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 83 84 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 86 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 87 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 90 91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 93 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 98 99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 101 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 106 107 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 108 { 109 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 110 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 111 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 112 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 113 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 114 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 115 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 116 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 117 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 118 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 119 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 120 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 121 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 122 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 123 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 124 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 125 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 126 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 127 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 128 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 129 }; 130 131 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 132 { 133 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 134 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 135 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 136 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 137 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 138 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 139 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 140 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 141 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 142 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 143 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 144 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 145 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 146 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 147 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 148 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 149 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 150 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 151 }; 152 153 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 154 { 155 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 156 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 157 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 158 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 159 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 160 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 161 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 162 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 163 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 164 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 165 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 166 }; 167 168 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 169 { 170 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 171 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 172 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 173 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 174 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 175 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 176 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 177 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 178 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 179 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 180 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 181 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 182 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 183 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 184 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 185 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 186 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 187 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 188 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 189 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 190 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 191 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 192 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 193 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 194 }; 195 196 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 197 { 198 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 199 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 200 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 201 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 202 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 203 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 204 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 205 }; 206 207 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 208 { 209 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 210 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 211 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 212 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 213 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 214 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 215 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 216 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 217 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 218 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 219 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 220 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 221 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 222 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 223 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 224 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 225 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 226 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 227 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 228 }; 229 230 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 231 { 232 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 233 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 234 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 235 }; 236 237 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 238 { 239 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 240 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 241 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 242 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 243 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 244 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 245 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 246 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 247 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 248 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 249 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 250 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 251 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 252 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 253 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 254 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 255 }; 256 257 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 258 { 259 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 260 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 261 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 262 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 263 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 264 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 265 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 266 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 267 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 268 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 269 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 270 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 271 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 272 }; 273 274 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 275 { 276 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 277 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 278 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 279 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 280 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 281 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 282 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 283 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 284 }; 285 286 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 287 { 288 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 289 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 290 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 291 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 292 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 293 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 294 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 295 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 296 }; 297 298 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 299 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 300 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 301 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 302 303 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 304 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 305 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 306 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 307 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 308 struct amdgpu_cu_info *cu_info); 309 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 310 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); 311 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 312 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 313 314 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 315 { 316 switch (adev->asic_type) { 317 case CHIP_VEGA10: 318 if (!amdgpu_virt_support_skip_setting(adev)) { 319 soc15_program_register_sequence(adev, 320 golden_settings_gc_9_0, 321 ARRAY_SIZE(golden_settings_gc_9_0)); 322 soc15_program_register_sequence(adev, 323 golden_settings_gc_9_0_vg10, 324 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 325 } 326 break; 327 case CHIP_VEGA12: 328 soc15_program_register_sequence(adev, 329 golden_settings_gc_9_2_1, 330 ARRAY_SIZE(golden_settings_gc_9_2_1)); 331 soc15_program_register_sequence(adev, 332 golden_settings_gc_9_2_1_vg12, 333 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 334 break; 335 case CHIP_VEGA20: 336 soc15_program_register_sequence(adev, 337 golden_settings_gc_9_0, 338 ARRAY_SIZE(golden_settings_gc_9_0)); 339 soc15_program_register_sequence(adev, 340 golden_settings_gc_9_0_vg20, 341 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 342 break; 343 case CHIP_RAVEN: 344 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 345 ARRAY_SIZE(golden_settings_gc_9_1)); 346 if (adev->rev_id >= 8) 347 soc15_program_register_sequence(adev, 348 golden_settings_gc_9_1_rv2, 349 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 350 else 351 soc15_program_register_sequence(adev, 352 golden_settings_gc_9_1_rv1, 353 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 354 break; 355 default: 356 break; 357 } 358 359 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 360 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 361 } 362 363 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 364 { 365 adev->gfx.scratch.num_reg = 8; 366 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 367 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 368 } 369 370 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 371 bool wc, uint32_t reg, uint32_t val) 372 { 373 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 374 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 375 WRITE_DATA_DST_SEL(0) | 376 (wc ? WR_CONFIRM : 0)); 377 amdgpu_ring_write(ring, reg); 378 amdgpu_ring_write(ring, 0); 379 amdgpu_ring_write(ring, val); 380 } 381 382 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 383 int mem_space, int opt, uint32_t addr0, 384 uint32_t addr1, uint32_t ref, uint32_t mask, 385 uint32_t inv) 386 { 387 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 388 amdgpu_ring_write(ring, 389 /* memory (1) or register (0) */ 390 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 391 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 392 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 393 WAIT_REG_MEM_ENGINE(eng_sel))); 394 395 if (mem_space) 396 BUG_ON(addr0 & 0x3); /* Dword align */ 397 amdgpu_ring_write(ring, addr0); 398 amdgpu_ring_write(ring, addr1); 399 amdgpu_ring_write(ring, ref); 400 amdgpu_ring_write(ring, mask); 401 amdgpu_ring_write(ring, inv); /* poll interval */ 402 } 403 404 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 405 { 406 struct amdgpu_device *adev = ring->adev; 407 uint32_t scratch; 408 uint32_t tmp = 0; 409 unsigned i; 410 int r; 411 412 r = amdgpu_gfx_scratch_get(adev, &scratch); 413 if (r) 414 return r; 415 416 WREG32(scratch, 0xCAFEDEAD); 417 r = amdgpu_ring_alloc(ring, 3); 418 if (r) 419 goto error_free_scratch; 420 421 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 422 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 423 amdgpu_ring_write(ring, 0xDEADBEEF); 424 amdgpu_ring_commit(ring); 425 426 for (i = 0; i < adev->usec_timeout; i++) { 427 tmp = RREG32(scratch); 428 if (tmp == 0xDEADBEEF) 429 break; 430 udelay(1); 431 } 432 433 if (i >= adev->usec_timeout) 434 r = -ETIMEDOUT; 435 436 error_free_scratch: 437 amdgpu_gfx_scratch_free(adev, scratch); 438 return r; 439 } 440 441 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 442 { 443 struct amdgpu_device *adev = ring->adev; 444 struct amdgpu_ib ib; 445 struct dma_fence *f = NULL; 446 447 unsigned index; 448 uint64_t gpu_addr; 449 uint32_t tmp; 450 long r; 451 452 r = amdgpu_device_wb_get(adev, &index); 453 if (r) 454 return r; 455 456 gpu_addr = adev->wb.gpu_addr + (index * 4); 457 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 458 memset(&ib, 0, sizeof(ib)); 459 r = amdgpu_ib_get(adev, NULL, 16, &ib); 460 if (r) 461 goto err1; 462 463 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 464 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 465 ib.ptr[2] = lower_32_bits(gpu_addr); 466 ib.ptr[3] = upper_32_bits(gpu_addr); 467 ib.ptr[4] = 0xDEADBEEF; 468 ib.length_dw = 5; 469 470 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 471 if (r) 472 goto err2; 473 474 r = dma_fence_wait_timeout(f, false, timeout); 475 if (r == 0) { 476 r = -ETIMEDOUT; 477 goto err2; 478 } else if (r < 0) { 479 goto err2; 480 } 481 482 tmp = adev->wb.wb[index]; 483 if (tmp == 0xDEADBEEF) 484 r = 0; 485 else 486 r = -EINVAL; 487 488 err2: 489 amdgpu_ib_free(adev, &ib, NULL); 490 dma_fence_put(f); 491 err1: 492 amdgpu_device_wb_free(adev, index); 493 return r; 494 } 495 496 497 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 498 { 499 release_firmware(adev->gfx.pfp_fw); 500 adev->gfx.pfp_fw = NULL; 501 release_firmware(adev->gfx.me_fw); 502 adev->gfx.me_fw = NULL; 503 release_firmware(adev->gfx.ce_fw); 504 adev->gfx.ce_fw = NULL; 505 release_firmware(adev->gfx.rlc_fw); 506 adev->gfx.rlc_fw = NULL; 507 release_firmware(adev->gfx.mec_fw); 508 adev->gfx.mec_fw = NULL; 509 release_firmware(adev->gfx.mec2_fw); 510 adev->gfx.mec2_fw = NULL; 511 512 kfree(adev->gfx.rlc.register_list_format); 513 } 514 515 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 516 { 517 const struct rlc_firmware_header_v2_1 *rlc_hdr; 518 519 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 520 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 521 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 522 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 523 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 524 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 525 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 526 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 527 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 528 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 529 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 530 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 531 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 532 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 533 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 534 } 535 536 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 537 { 538 adev->gfx.me_fw_write_wait = false; 539 adev->gfx.mec_fw_write_wait = false; 540 541 switch (adev->asic_type) { 542 case CHIP_VEGA10: 543 if ((adev->gfx.me_fw_version >= 0x0000009c) && 544 (adev->gfx.me_feature_version >= 42) && 545 (adev->gfx.pfp_fw_version >= 0x000000b1) && 546 (adev->gfx.pfp_feature_version >= 42)) 547 adev->gfx.me_fw_write_wait = true; 548 549 if ((adev->gfx.mec_fw_version >= 0x00000193) && 550 (adev->gfx.mec_feature_version >= 42)) 551 adev->gfx.mec_fw_write_wait = true; 552 break; 553 case CHIP_VEGA12: 554 if ((adev->gfx.me_fw_version >= 0x0000009c) && 555 (adev->gfx.me_feature_version >= 44) && 556 (adev->gfx.pfp_fw_version >= 0x000000b2) && 557 (adev->gfx.pfp_feature_version >= 44)) 558 adev->gfx.me_fw_write_wait = true; 559 560 if ((adev->gfx.mec_fw_version >= 0x00000196) && 561 (adev->gfx.mec_feature_version >= 44)) 562 adev->gfx.mec_fw_write_wait = true; 563 break; 564 case CHIP_VEGA20: 565 if ((adev->gfx.me_fw_version >= 0x0000009c) && 566 (adev->gfx.me_feature_version >= 44) && 567 (adev->gfx.pfp_fw_version >= 0x000000b2) && 568 (adev->gfx.pfp_feature_version >= 44)) 569 adev->gfx.me_fw_write_wait = true; 570 571 if ((adev->gfx.mec_fw_version >= 0x00000197) && 572 (adev->gfx.mec_feature_version >= 44)) 573 adev->gfx.mec_fw_write_wait = true; 574 break; 575 case CHIP_RAVEN: 576 if ((adev->gfx.me_fw_version >= 0x0000009c) && 577 (adev->gfx.me_feature_version >= 42) && 578 (adev->gfx.pfp_fw_version >= 0x000000b1) && 579 (adev->gfx.pfp_feature_version >= 42)) 580 adev->gfx.me_fw_write_wait = true; 581 582 if ((adev->gfx.mec_fw_version >= 0x00000192) && 583 (adev->gfx.mec_feature_version >= 42)) 584 adev->gfx.mec_fw_write_wait = true; 585 break; 586 default: 587 break; 588 } 589 } 590 591 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 592 { 593 switch (adev->asic_type) { 594 case CHIP_VEGA10: 595 case CHIP_VEGA12: 596 case CHIP_VEGA20: 597 break; 598 case CHIP_RAVEN: 599 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) 600 break; 601 if ((adev->gfx.rlc_fw_version != 106 && 602 adev->gfx.rlc_fw_version < 531) || 603 (adev->gfx.rlc_fw_version == 53815) || 604 (adev->gfx.rlc_feature_version < 1) || 605 !adev->gfx.rlc.is_rlc_v2_1) 606 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 607 break; 608 default: 609 break; 610 } 611 } 612 613 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 614 { 615 const char *chip_name; 616 char fw_name[30]; 617 int err; 618 struct amdgpu_firmware_info *info = NULL; 619 const struct common_firmware_header *header = NULL; 620 const struct gfx_firmware_header_v1_0 *cp_hdr; 621 const struct rlc_firmware_header_v2_0 *rlc_hdr; 622 unsigned int *tmp = NULL; 623 unsigned int i = 0; 624 uint16_t version_major; 625 uint16_t version_minor; 626 uint32_t smu_version; 627 628 DRM_DEBUG("\n"); 629 630 switch (adev->asic_type) { 631 case CHIP_VEGA10: 632 chip_name = "vega10"; 633 break; 634 case CHIP_VEGA12: 635 chip_name = "vega12"; 636 break; 637 case CHIP_VEGA20: 638 chip_name = "vega20"; 639 break; 640 case CHIP_RAVEN: 641 if (adev->rev_id >= 8) 642 chip_name = "raven2"; 643 else if (adev->pdev->device == 0x15d8) 644 chip_name = "picasso"; 645 else 646 chip_name = "raven"; 647 break; 648 default: 649 BUG(); 650 } 651 652 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 653 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 654 if (err) 655 goto out; 656 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 657 if (err) 658 goto out; 659 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 660 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 661 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 662 663 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 664 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 665 if (err) 666 goto out; 667 err = amdgpu_ucode_validate(adev->gfx.me_fw); 668 if (err) 669 goto out; 670 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 671 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 672 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 673 674 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 675 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 676 if (err) 677 goto out; 678 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 679 if (err) 680 goto out; 681 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 682 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 683 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 684 685 /* 686 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 687 * instead of picasso_rlc.bin. 688 * Judgment method: 689 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 690 * or revision >= 0xD8 && revision <= 0xDF 691 * otherwise is PCO FP5 692 */ 693 if (!strcmp(chip_name, "picasso") && 694 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 695 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 696 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); 697 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 698 (smu_version >= 0x41e2b)) 699 /** 700 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 701 */ 702 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); 703 else 704 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 705 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 706 if (err) 707 goto out; 708 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 709 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 710 711 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 712 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 713 if (version_major == 2 && version_minor == 1) 714 adev->gfx.rlc.is_rlc_v2_1 = true; 715 716 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 717 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 718 adev->gfx.rlc.save_and_restore_offset = 719 le32_to_cpu(rlc_hdr->save_and_restore_offset); 720 adev->gfx.rlc.clear_state_descriptor_offset = 721 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 722 adev->gfx.rlc.avail_scratch_ram_locations = 723 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 724 adev->gfx.rlc.reg_restore_list_size = 725 le32_to_cpu(rlc_hdr->reg_restore_list_size); 726 adev->gfx.rlc.reg_list_format_start = 727 le32_to_cpu(rlc_hdr->reg_list_format_start); 728 adev->gfx.rlc.reg_list_format_separate_start = 729 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 730 adev->gfx.rlc.starting_offsets_start = 731 le32_to_cpu(rlc_hdr->starting_offsets_start); 732 adev->gfx.rlc.reg_list_format_size_bytes = 733 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 734 adev->gfx.rlc.reg_list_size_bytes = 735 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 736 adev->gfx.rlc.register_list_format = 737 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 738 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 739 if (!adev->gfx.rlc.register_list_format) { 740 err = -ENOMEM; 741 goto out; 742 } 743 744 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 745 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 746 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 747 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 748 749 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 750 751 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 752 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 753 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 754 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 755 756 if (adev->gfx.rlc.is_rlc_v2_1) 757 gfx_v9_0_init_rlc_ext_microcode(adev); 758 759 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 760 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 761 if (err) 762 goto out; 763 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 764 if (err) 765 goto out; 766 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 767 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 768 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 769 770 771 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 772 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 773 if (!err) { 774 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 775 if (err) 776 goto out; 777 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 778 adev->gfx.mec2_fw->data; 779 adev->gfx.mec2_fw_version = 780 le32_to_cpu(cp_hdr->header.ucode_version); 781 adev->gfx.mec2_feature_version = 782 le32_to_cpu(cp_hdr->ucode_feature_version); 783 } else { 784 err = 0; 785 adev->gfx.mec2_fw = NULL; 786 } 787 788 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 789 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 790 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 791 info->fw = adev->gfx.pfp_fw; 792 header = (const struct common_firmware_header *)info->fw->data; 793 adev->firmware.fw_size += 794 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 795 796 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 797 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 798 info->fw = adev->gfx.me_fw; 799 header = (const struct common_firmware_header *)info->fw->data; 800 adev->firmware.fw_size += 801 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 802 803 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 804 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 805 info->fw = adev->gfx.ce_fw; 806 header = (const struct common_firmware_header *)info->fw->data; 807 adev->firmware.fw_size += 808 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 809 810 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 811 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 812 info->fw = adev->gfx.rlc_fw; 813 header = (const struct common_firmware_header *)info->fw->data; 814 adev->firmware.fw_size += 815 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 816 817 if (adev->gfx.rlc.is_rlc_v2_1 && 818 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 819 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 820 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 821 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 822 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 823 info->fw = adev->gfx.rlc_fw; 824 adev->firmware.fw_size += 825 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 826 827 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 828 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 829 info->fw = adev->gfx.rlc_fw; 830 adev->firmware.fw_size += 831 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 832 833 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 834 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 835 info->fw = adev->gfx.rlc_fw; 836 adev->firmware.fw_size += 837 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 838 } 839 840 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 841 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 842 info->fw = adev->gfx.mec_fw; 843 header = (const struct common_firmware_header *)info->fw->data; 844 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 845 adev->firmware.fw_size += 846 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 847 848 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 849 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 850 info->fw = adev->gfx.mec_fw; 851 adev->firmware.fw_size += 852 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 853 854 if (adev->gfx.mec2_fw) { 855 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 856 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 857 info->fw = adev->gfx.mec2_fw; 858 header = (const struct common_firmware_header *)info->fw->data; 859 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 860 adev->firmware.fw_size += 861 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 862 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 863 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 864 info->fw = adev->gfx.mec2_fw; 865 adev->firmware.fw_size += 866 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 867 } 868 869 } 870 871 out: 872 gfx_v9_0_check_if_need_gfxoff(adev); 873 gfx_v9_0_check_fw_write_wait(adev); 874 if (err) { 875 dev_err(adev->dev, 876 "gfx9: Failed to load firmware \"%s\"\n", 877 fw_name); 878 release_firmware(adev->gfx.pfp_fw); 879 adev->gfx.pfp_fw = NULL; 880 release_firmware(adev->gfx.me_fw); 881 adev->gfx.me_fw = NULL; 882 release_firmware(adev->gfx.ce_fw); 883 adev->gfx.ce_fw = NULL; 884 release_firmware(adev->gfx.rlc_fw); 885 adev->gfx.rlc_fw = NULL; 886 release_firmware(adev->gfx.mec_fw); 887 adev->gfx.mec_fw = NULL; 888 release_firmware(adev->gfx.mec2_fw); 889 adev->gfx.mec2_fw = NULL; 890 } 891 return err; 892 } 893 894 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 895 { 896 u32 count = 0; 897 const struct cs_section_def *sect = NULL; 898 const struct cs_extent_def *ext = NULL; 899 900 /* begin clear state */ 901 count += 2; 902 /* context control state */ 903 count += 3; 904 905 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 906 for (ext = sect->section; ext->extent != NULL; ++ext) { 907 if (sect->id == SECT_CONTEXT) 908 count += 2 + ext->reg_count; 909 else 910 return 0; 911 } 912 } 913 914 /* end clear state */ 915 count += 2; 916 /* clear state */ 917 count += 2; 918 919 return count; 920 } 921 922 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 923 volatile u32 *buffer) 924 { 925 u32 count = 0, i; 926 const struct cs_section_def *sect = NULL; 927 const struct cs_extent_def *ext = NULL; 928 929 if (adev->gfx.rlc.cs_data == NULL) 930 return; 931 if (buffer == NULL) 932 return; 933 934 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 935 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 936 937 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 938 buffer[count++] = cpu_to_le32(0x80000000); 939 buffer[count++] = cpu_to_le32(0x80000000); 940 941 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 942 for (ext = sect->section; ext->extent != NULL; ++ext) { 943 if (sect->id == SECT_CONTEXT) { 944 buffer[count++] = 945 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 946 buffer[count++] = cpu_to_le32(ext->reg_index - 947 PACKET3_SET_CONTEXT_REG_START); 948 for (i = 0; i < ext->reg_count; i++) 949 buffer[count++] = cpu_to_le32(ext->extent[i]); 950 } else { 951 return; 952 } 953 } 954 } 955 956 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 957 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 958 959 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 960 buffer[count++] = cpu_to_le32(0); 961 } 962 963 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 964 { 965 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 966 uint32_t pg_always_on_cu_num = 2; 967 uint32_t always_on_cu_num; 968 uint32_t i, j, k; 969 uint32_t mask, cu_bitmap, counter; 970 971 if (adev->flags & AMD_IS_APU) 972 always_on_cu_num = 4; 973 else if (adev->asic_type == CHIP_VEGA12) 974 always_on_cu_num = 8; 975 else 976 always_on_cu_num = 12; 977 978 mutex_lock(&adev->grbm_idx_mutex); 979 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 980 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 981 mask = 1; 982 cu_bitmap = 0; 983 counter = 0; 984 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 985 986 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 987 if (cu_info->bitmap[i][j] & mask) { 988 if (counter == pg_always_on_cu_num) 989 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 990 if (counter < always_on_cu_num) 991 cu_bitmap |= mask; 992 else 993 break; 994 counter++; 995 } 996 mask <<= 1; 997 } 998 999 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1000 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1001 } 1002 } 1003 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1004 mutex_unlock(&adev->grbm_idx_mutex); 1005 } 1006 1007 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1008 { 1009 uint32_t data; 1010 1011 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1012 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1013 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1014 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1015 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1016 1017 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1018 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1019 1020 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1021 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1022 1023 mutex_lock(&adev->grbm_idx_mutex); 1024 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1025 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1026 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1027 1028 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1029 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1030 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1031 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1032 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1033 1034 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1035 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1036 data &= 0x0000FFFF; 1037 data |= 0x00C00000; 1038 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1039 1040 /* 1041 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1042 * programmed in gfx_v9_0_init_always_on_cu_mask() 1043 */ 1044 1045 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1046 * but used for RLC_LB_CNTL configuration */ 1047 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1048 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1049 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1050 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1051 mutex_unlock(&adev->grbm_idx_mutex); 1052 1053 gfx_v9_0_init_always_on_cu_mask(adev); 1054 } 1055 1056 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1057 { 1058 uint32_t data; 1059 1060 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1061 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1062 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1063 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1064 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1065 1066 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1067 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1068 1069 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1070 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1071 1072 mutex_lock(&adev->grbm_idx_mutex); 1073 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1074 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1075 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1076 1077 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1078 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1079 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1080 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1081 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1082 1083 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1084 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1085 data &= 0x0000FFFF; 1086 data |= 0x00C00000; 1087 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1088 1089 /* 1090 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1091 * programmed in gfx_v9_0_init_always_on_cu_mask() 1092 */ 1093 1094 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1095 * but used for RLC_LB_CNTL configuration */ 1096 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1097 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1098 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1099 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1100 mutex_unlock(&adev->grbm_idx_mutex); 1101 1102 gfx_v9_0_init_always_on_cu_mask(adev); 1103 } 1104 1105 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1106 { 1107 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1108 } 1109 1110 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1111 { 1112 return 5; 1113 } 1114 1115 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1116 { 1117 const struct cs_section_def *cs_data; 1118 int r; 1119 1120 adev->gfx.rlc.cs_data = gfx9_cs_data; 1121 1122 cs_data = adev->gfx.rlc.cs_data; 1123 1124 if (cs_data) { 1125 /* init clear state block */ 1126 r = amdgpu_gfx_rlc_init_csb(adev); 1127 if (r) 1128 return r; 1129 } 1130 1131 if (adev->asic_type == CHIP_RAVEN) { 1132 /* TODO: double check the cp_table_size for RV */ 1133 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1134 r = amdgpu_gfx_rlc_init_cpt(adev); 1135 if (r) 1136 return r; 1137 } 1138 1139 switch (adev->asic_type) { 1140 case CHIP_RAVEN: 1141 gfx_v9_0_init_lbpw(adev); 1142 break; 1143 case CHIP_VEGA20: 1144 gfx_v9_4_init_lbpw(adev); 1145 break; 1146 default: 1147 break; 1148 } 1149 1150 return 0; 1151 } 1152 1153 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev) 1154 { 1155 int r; 1156 1157 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1158 if (unlikely(r != 0)) 1159 return r; 1160 1161 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, 1162 AMDGPU_GEM_DOMAIN_VRAM); 1163 if (!r) 1164 adev->gfx.rlc.clear_state_gpu_addr = 1165 amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); 1166 1167 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1168 1169 return r; 1170 } 1171 1172 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev) 1173 { 1174 int r; 1175 1176 if (!adev->gfx.rlc.clear_state_obj) 1177 return; 1178 1179 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); 1180 if (likely(r == 0)) { 1181 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 1182 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1183 } 1184 } 1185 1186 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1187 { 1188 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1189 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1190 } 1191 1192 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1193 { 1194 int r; 1195 u32 *hpd; 1196 const __le32 *fw_data; 1197 unsigned fw_size; 1198 u32 *fw; 1199 size_t mec_hpd_size; 1200 1201 const struct gfx_firmware_header_v1_0 *mec_hdr; 1202 1203 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1204 1205 /* take ownership of the relevant compute queues */ 1206 amdgpu_gfx_compute_queue_acquire(adev); 1207 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1208 1209 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1210 AMDGPU_GEM_DOMAIN_VRAM, 1211 &adev->gfx.mec.hpd_eop_obj, 1212 &adev->gfx.mec.hpd_eop_gpu_addr, 1213 (void **)&hpd); 1214 if (r) { 1215 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1216 gfx_v9_0_mec_fini(adev); 1217 return r; 1218 } 1219 1220 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); 1221 1222 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1223 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1224 1225 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1226 1227 fw_data = (const __le32 *) 1228 (adev->gfx.mec_fw->data + 1229 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1230 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 1231 1232 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 1233 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1234 &adev->gfx.mec.mec_fw_obj, 1235 &adev->gfx.mec.mec_fw_gpu_addr, 1236 (void **)&fw); 1237 if (r) { 1238 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 1239 gfx_v9_0_mec_fini(adev); 1240 return r; 1241 } 1242 1243 memcpy(fw, fw_data, fw_size); 1244 1245 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1246 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1247 1248 return 0; 1249 } 1250 1251 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 1252 { 1253 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1254 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1255 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1256 (address << SQ_IND_INDEX__INDEX__SHIFT) | 1257 (SQ_IND_INDEX__FORCE_READ_MASK)); 1258 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1259 } 1260 1261 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 1262 uint32_t wave, uint32_t thread, 1263 uint32_t regno, uint32_t num, uint32_t *out) 1264 { 1265 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1266 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1267 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1268 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1269 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 1270 (SQ_IND_INDEX__FORCE_READ_MASK) | 1271 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1272 while (num--) 1273 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1274 } 1275 1276 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1277 { 1278 /* type 1 wave data */ 1279 dst[(*no_fields)++] = 1; 1280 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 1281 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 1282 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 1283 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 1284 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 1285 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 1286 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 1287 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 1288 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 1289 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 1290 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 1291 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 1292 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 1293 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 1294 } 1295 1296 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 1297 uint32_t wave, uint32_t start, 1298 uint32_t size, uint32_t *dst) 1299 { 1300 wave_read_regs( 1301 adev, simd, wave, 0, 1302 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 1303 } 1304 1305 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 1306 uint32_t wave, uint32_t thread, 1307 uint32_t start, uint32_t size, 1308 uint32_t *dst) 1309 { 1310 wave_read_regs( 1311 adev, simd, wave, thread, 1312 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1313 } 1314 1315 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 1316 u32 me, u32 pipe, u32 q, u32 vm) 1317 { 1318 soc15_grbm_select(adev, me, pipe, q, vm); 1319 } 1320 1321 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 1322 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 1323 .select_se_sh = &gfx_v9_0_select_se_sh, 1324 .read_wave_data = &gfx_v9_0_read_wave_data, 1325 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 1326 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 1327 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q 1328 }; 1329 1330 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 1331 { 1332 u32 gb_addr_config; 1333 int err; 1334 1335 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 1336 1337 switch (adev->asic_type) { 1338 case CHIP_VEGA10: 1339 adev->gfx.config.max_hw_contexts = 8; 1340 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1341 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1342 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1343 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1344 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 1345 break; 1346 case CHIP_VEGA12: 1347 adev->gfx.config.max_hw_contexts = 8; 1348 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1349 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1350 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1351 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1352 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 1353 DRM_INFO("fix gfx.config for vega12\n"); 1354 break; 1355 case CHIP_VEGA20: 1356 adev->gfx.config.max_hw_contexts = 8; 1357 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1358 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1359 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1360 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1361 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1362 gb_addr_config &= ~0xf3e777ff; 1363 gb_addr_config |= 0x22014042; 1364 /* check vbios table if gpu info is not available */ 1365 err = amdgpu_atomfirmware_get_gfx_info(adev); 1366 if (err) 1367 return err; 1368 break; 1369 case CHIP_RAVEN: 1370 adev->gfx.config.max_hw_contexts = 8; 1371 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1372 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1373 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1374 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1375 if (adev->rev_id >= 8) 1376 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 1377 else 1378 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 1379 break; 1380 default: 1381 BUG(); 1382 break; 1383 } 1384 1385 adev->gfx.config.gb_addr_config = gb_addr_config; 1386 1387 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 1388 REG_GET_FIELD( 1389 adev->gfx.config.gb_addr_config, 1390 GB_ADDR_CONFIG, 1391 NUM_PIPES); 1392 1393 adev->gfx.config.max_tile_pipes = 1394 adev->gfx.config.gb_addr_config_fields.num_pipes; 1395 1396 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 1397 REG_GET_FIELD( 1398 adev->gfx.config.gb_addr_config, 1399 GB_ADDR_CONFIG, 1400 NUM_BANKS); 1401 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 1402 REG_GET_FIELD( 1403 adev->gfx.config.gb_addr_config, 1404 GB_ADDR_CONFIG, 1405 MAX_COMPRESSED_FRAGS); 1406 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 1407 REG_GET_FIELD( 1408 adev->gfx.config.gb_addr_config, 1409 GB_ADDR_CONFIG, 1410 NUM_RB_PER_SE); 1411 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 1412 REG_GET_FIELD( 1413 adev->gfx.config.gb_addr_config, 1414 GB_ADDR_CONFIG, 1415 NUM_SHADER_ENGINES); 1416 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 1417 REG_GET_FIELD( 1418 adev->gfx.config.gb_addr_config, 1419 GB_ADDR_CONFIG, 1420 PIPE_INTERLEAVE_SIZE)); 1421 1422 return 0; 1423 } 1424 1425 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, 1426 struct amdgpu_ngg_buf *ngg_buf, 1427 int size_se, 1428 int default_size_se) 1429 { 1430 int r; 1431 1432 if (size_se < 0) { 1433 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se); 1434 return -EINVAL; 1435 } 1436 size_se = size_se ? size_se : default_size_se; 1437 1438 ngg_buf->size = size_se * adev->gfx.config.max_shader_engines; 1439 r = amdgpu_bo_create_kernel(adev, ngg_buf->size, 1440 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1441 &ngg_buf->bo, 1442 &ngg_buf->gpu_addr, 1443 NULL); 1444 if (r) { 1445 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r); 1446 return r; 1447 } 1448 ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo); 1449 1450 return r; 1451 } 1452 1453 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev) 1454 { 1455 int i; 1456 1457 for (i = 0; i < NGG_BUF_MAX; i++) 1458 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo, 1459 &adev->gfx.ngg.buf[i].gpu_addr, 1460 NULL); 1461 1462 memset(&adev->gfx.ngg.buf[0], 0, 1463 sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX); 1464 1465 adev->gfx.ngg.init = false; 1466 1467 return 0; 1468 } 1469 1470 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) 1471 { 1472 int r; 1473 1474 if (!amdgpu_ngg || adev->gfx.ngg.init == true) 1475 return 0; 1476 1477 /* GDS reserve memory: 64 bytes alignment */ 1478 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); 1479 adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size; 1480 adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); 1481 adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); 1482 1483 /* Primitive Buffer */ 1484 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], 1485 amdgpu_prim_buf_per_se, 1486 64 * 1024); 1487 if (r) { 1488 dev_err(adev->dev, "Failed to create Primitive Buffer\n"); 1489 goto err; 1490 } 1491 1492 /* Position Buffer */ 1493 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS], 1494 amdgpu_pos_buf_per_se, 1495 256 * 1024); 1496 if (r) { 1497 dev_err(adev->dev, "Failed to create Position Buffer\n"); 1498 goto err; 1499 } 1500 1501 /* Control Sideband */ 1502 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL], 1503 amdgpu_cntl_sb_buf_per_se, 1504 256); 1505 if (r) { 1506 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n"); 1507 goto err; 1508 } 1509 1510 /* Parameter Cache, not created by default */ 1511 if (amdgpu_param_buf_per_se <= 0) 1512 goto out; 1513 1514 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM], 1515 amdgpu_param_buf_per_se, 1516 512 * 1024); 1517 if (r) { 1518 dev_err(adev->dev, "Failed to create Parameter Cache\n"); 1519 goto err; 1520 } 1521 1522 out: 1523 adev->gfx.ngg.init = true; 1524 return 0; 1525 err: 1526 gfx_v9_0_ngg_fini(adev); 1527 return r; 1528 } 1529 1530 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) 1531 { 1532 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 1533 int r; 1534 u32 data, base; 1535 1536 if (!amdgpu_ngg) 1537 return 0; 1538 1539 /* Program buffer size */ 1540 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, 1541 adev->gfx.ngg.buf[NGG_PRIM].size >> 8); 1542 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, 1543 adev->gfx.ngg.buf[NGG_POS].size >> 8); 1544 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); 1545 1546 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, 1547 adev->gfx.ngg.buf[NGG_CNTL].size >> 8); 1548 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, 1549 adev->gfx.ngg.buf[NGG_PARAM].size >> 10); 1550 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); 1551 1552 /* Program buffer base address */ 1553 base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 1554 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); 1555 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data); 1556 1557 base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 1558 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); 1559 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data); 1560 1561 base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 1562 data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); 1563 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data); 1564 1565 base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 1566 data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); 1567 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data); 1568 1569 base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 1570 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); 1571 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data); 1572 1573 base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 1574 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); 1575 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data); 1576 1577 /* Clear GDS reserved memory */ 1578 r = amdgpu_ring_alloc(ring, 17); 1579 if (r) { 1580 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n", 1581 ring->name, r); 1582 return r; 1583 } 1584 1585 gfx_v9_0_write_data_to_reg(ring, 0, false, 1586 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 1587 (adev->gds.gds_size + 1588 adev->gfx.ngg.gds_reserve_size)); 1589 1590 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 1591 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 1592 PACKET3_DMA_DATA_DST_SEL(1) | 1593 PACKET3_DMA_DATA_SRC_SEL(2))); 1594 amdgpu_ring_write(ring, 0); 1595 amdgpu_ring_write(ring, 0); 1596 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); 1597 amdgpu_ring_write(ring, 0); 1598 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 1599 adev->gfx.ngg.gds_reserve_size); 1600 1601 gfx_v9_0_write_data_to_reg(ring, 0, false, 1602 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0); 1603 1604 amdgpu_ring_commit(ring); 1605 1606 return 0; 1607 } 1608 1609 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1610 int mec, int pipe, int queue) 1611 { 1612 int r; 1613 unsigned irq_type; 1614 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1615 1616 ring = &adev->gfx.compute_ring[ring_id]; 1617 1618 /* mec0 is me1 */ 1619 ring->me = mec + 1; 1620 ring->pipe = pipe; 1621 ring->queue = queue; 1622 1623 ring->ring_obj = NULL; 1624 ring->use_doorbell = true; 1625 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1626 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1627 + (ring_id * GFX9_MEC_HPD_SIZE); 1628 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1629 1630 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1631 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1632 + ring->pipe; 1633 1634 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1635 r = amdgpu_ring_init(adev, ring, 1024, 1636 &adev->gfx.eop_irq, irq_type); 1637 if (r) 1638 return r; 1639 1640 1641 return 0; 1642 } 1643 1644 static int gfx_v9_0_sw_init(void *handle) 1645 { 1646 int i, j, k, r, ring_id; 1647 struct amdgpu_ring *ring; 1648 struct amdgpu_kiq *kiq; 1649 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1650 1651 switch (adev->asic_type) { 1652 case CHIP_VEGA10: 1653 case CHIP_VEGA12: 1654 case CHIP_VEGA20: 1655 case CHIP_RAVEN: 1656 adev->gfx.mec.num_mec = 2; 1657 break; 1658 default: 1659 adev->gfx.mec.num_mec = 1; 1660 break; 1661 } 1662 1663 adev->gfx.mec.num_pipe_per_mec = 4; 1664 adev->gfx.mec.num_queue_per_pipe = 8; 1665 1666 /* EOP Event */ 1667 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 1668 if (r) 1669 return r; 1670 1671 /* Privileged reg */ 1672 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 1673 &adev->gfx.priv_reg_irq); 1674 if (r) 1675 return r; 1676 1677 /* Privileged inst */ 1678 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 1679 &adev->gfx.priv_inst_irq); 1680 if (r) 1681 return r; 1682 1683 /* ECC error */ 1684 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 1685 &adev->gfx.cp_ecc_error_irq); 1686 if (r) 1687 return r; 1688 1689 /* FUE error */ 1690 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 1691 &adev->gfx.cp_ecc_error_irq); 1692 if (r) 1693 return r; 1694 1695 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1696 1697 gfx_v9_0_scratch_init(adev); 1698 1699 r = gfx_v9_0_init_microcode(adev); 1700 if (r) { 1701 DRM_ERROR("Failed to load gfx firmware!\n"); 1702 return r; 1703 } 1704 1705 r = adev->gfx.rlc.funcs->init(adev); 1706 if (r) { 1707 DRM_ERROR("Failed to init rlc BOs!\n"); 1708 return r; 1709 } 1710 1711 r = gfx_v9_0_mec_init(adev); 1712 if (r) { 1713 DRM_ERROR("Failed to init MEC BOs!\n"); 1714 return r; 1715 } 1716 1717 /* set up the gfx ring */ 1718 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 1719 ring = &adev->gfx.gfx_ring[i]; 1720 ring->ring_obj = NULL; 1721 if (!i) 1722 sprintf(ring->name, "gfx"); 1723 else 1724 sprintf(ring->name, "gfx_%d", i); 1725 ring->use_doorbell = true; 1726 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 1727 r = amdgpu_ring_init(adev, ring, 1024, 1728 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); 1729 if (r) 1730 return r; 1731 } 1732 1733 /* set up the compute queues - allocate horizontally across pipes */ 1734 ring_id = 0; 1735 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1736 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1737 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1738 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 1739 continue; 1740 1741 r = gfx_v9_0_compute_ring_init(adev, 1742 ring_id, 1743 i, k, j); 1744 if (r) 1745 return r; 1746 1747 ring_id++; 1748 } 1749 } 1750 } 1751 1752 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); 1753 if (r) { 1754 DRM_ERROR("Failed to init KIQ BOs!\n"); 1755 return r; 1756 } 1757 1758 kiq = &adev->gfx.kiq; 1759 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 1760 if (r) 1761 return r; 1762 1763 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 1764 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); 1765 if (r) 1766 return r; 1767 1768 adev->gfx.ce_ram_size = 0x8000; 1769 1770 r = gfx_v9_0_gpu_early_init(adev); 1771 if (r) 1772 return r; 1773 1774 r = gfx_v9_0_ngg_init(adev); 1775 if (r) 1776 return r; 1777 1778 return 0; 1779 } 1780 1781 1782 static int gfx_v9_0_sw_fini(void *handle) 1783 { 1784 int i; 1785 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1786 1787 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && 1788 adev->gfx.ras_if) { 1789 struct ras_common_if *ras_if = adev->gfx.ras_if; 1790 struct ras_ih_if ih_info = { 1791 .head = *ras_if, 1792 }; 1793 1794 amdgpu_ras_debugfs_remove(adev, ras_if); 1795 amdgpu_ras_sysfs_remove(adev, ras_if); 1796 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 1797 amdgpu_ras_feature_enable(adev, ras_if, 0); 1798 kfree(ras_if); 1799 } 1800 1801 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1802 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1803 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1804 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1805 1806 amdgpu_gfx_mqd_sw_fini(adev); 1807 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 1808 amdgpu_gfx_kiq_fini(adev); 1809 1810 gfx_v9_0_mec_fini(adev); 1811 gfx_v9_0_ngg_fini(adev); 1812 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 1813 if (adev->asic_type == CHIP_RAVEN) { 1814 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 1815 &adev->gfx.rlc.cp_table_gpu_addr, 1816 (void **)&adev->gfx.rlc.cp_table_ptr); 1817 } 1818 gfx_v9_0_free_microcode(adev); 1819 1820 return 0; 1821 } 1822 1823 1824 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 1825 { 1826 /* TODO */ 1827 } 1828 1829 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) 1830 { 1831 u32 data; 1832 1833 if (instance == 0xffffffff) 1834 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 1835 else 1836 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 1837 1838 if (se_num == 0xffffffff) 1839 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 1840 else 1841 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1842 1843 if (sh_num == 0xffffffff) 1844 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 1845 else 1846 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 1847 1848 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 1849 } 1850 1851 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1852 { 1853 u32 data, mask; 1854 1855 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 1856 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 1857 1858 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 1859 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 1860 1861 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 1862 adev->gfx.config.max_sh_per_se); 1863 1864 return (~data) & mask; 1865 } 1866 1867 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 1868 { 1869 int i, j; 1870 u32 data; 1871 u32 active_rbs = 0; 1872 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 1873 adev->gfx.config.max_sh_per_se; 1874 1875 mutex_lock(&adev->grbm_idx_mutex); 1876 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1877 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1878 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1879 data = gfx_v9_0_get_rb_active_bitmap(adev); 1880 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 1881 rb_bitmap_width_per_sh); 1882 } 1883 } 1884 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1885 mutex_unlock(&adev->grbm_idx_mutex); 1886 1887 adev->gfx.config.backend_enable_mask = active_rbs; 1888 adev->gfx.config.num_rbs = hweight32(active_rbs); 1889 } 1890 1891 #define DEFAULT_SH_MEM_BASES (0x6000) 1892 #define FIRST_COMPUTE_VMID (8) 1893 #define LAST_COMPUTE_VMID (16) 1894 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 1895 { 1896 int i; 1897 uint32_t sh_mem_config; 1898 uint32_t sh_mem_bases; 1899 1900 /* 1901 * Configure apertures: 1902 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 1903 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 1904 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 1905 */ 1906 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 1907 1908 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 1909 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 1910 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 1911 1912 mutex_lock(&adev->srbm_mutex); 1913 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 1914 soc15_grbm_select(adev, 0, 0, 0, i); 1915 /* CP and shaders */ 1916 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 1917 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 1918 } 1919 soc15_grbm_select(adev, 0, 0, 0, 0); 1920 mutex_unlock(&adev->srbm_mutex); 1921 } 1922 1923 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 1924 { 1925 u32 tmp; 1926 int i; 1927 1928 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 1929 1930 gfx_v9_0_tiling_mode_table_init(adev); 1931 1932 gfx_v9_0_setup_rb(adev); 1933 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 1934 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 1935 1936 /* XXX SH_MEM regs */ 1937 /* where to put LDS, scratch, GPUVM in FSA64 space */ 1938 mutex_lock(&adev->srbm_mutex); 1939 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { 1940 soc15_grbm_select(adev, 0, 0, 0, i); 1941 /* CP and shaders */ 1942 if (i == 0) { 1943 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 1944 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1945 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 1946 !!amdgpu_noretry); 1947 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 1948 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 1949 } else { 1950 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 1951 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1952 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 1953 !!amdgpu_noretry); 1954 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 1955 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 1956 (adev->gmc.private_aperture_start >> 48)); 1957 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 1958 (adev->gmc.shared_aperture_start >> 48)); 1959 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 1960 } 1961 } 1962 soc15_grbm_select(adev, 0, 0, 0, 0); 1963 1964 mutex_unlock(&adev->srbm_mutex); 1965 1966 gfx_v9_0_init_compute_vmid(adev); 1967 } 1968 1969 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 1970 { 1971 u32 i, j, k; 1972 u32 mask; 1973 1974 mutex_lock(&adev->grbm_idx_mutex); 1975 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1976 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1977 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1978 for (k = 0; k < adev->usec_timeout; k++) { 1979 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 1980 break; 1981 udelay(1); 1982 } 1983 if (k == adev->usec_timeout) { 1984 gfx_v9_0_select_se_sh(adev, 0xffffffff, 1985 0xffffffff, 0xffffffff); 1986 mutex_unlock(&adev->grbm_idx_mutex); 1987 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 1988 i, j); 1989 return; 1990 } 1991 } 1992 } 1993 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1994 mutex_unlock(&adev->grbm_idx_mutex); 1995 1996 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 1997 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 1998 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 1999 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2000 for (k = 0; k < adev->usec_timeout; k++) { 2001 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2002 break; 2003 udelay(1); 2004 } 2005 } 2006 2007 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2008 bool enable) 2009 { 2010 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2011 2012 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2013 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2014 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2015 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2016 2017 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2018 } 2019 2020 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2021 { 2022 /* csib */ 2023 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2024 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2025 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2026 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2027 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2028 adev->gfx.rlc.clear_state_size); 2029 } 2030 2031 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2032 int indirect_offset, 2033 int list_size, 2034 int *unique_indirect_regs, 2035 int unique_indirect_reg_count, 2036 int *indirect_start_offsets, 2037 int *indirect_start_offsets_count, 2038 int max_start_offsets_count) 2039 { 2040 int idx; 2041 2042 for (; indirect_offset < list_size; indirect_offset++) { 2043 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2044 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2045 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2046 2047 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2048 indirect_offset += 2; 2049 2050 /* look for the matching indice */ 2051 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2052 if (unique_indirect_regs[idx] == 2053 register_list_format[indirect_offset] || 2054 !unique_indirect_regs[idx]) 2055 break; 2056 } 2057 2058 BUG_ON(idx >= unique_indirect_reg_count); 2059 2060 if (!unique_indirect_regs[idx]) 2061 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2062 2063 indirect_offset++; 2064 } 2065 } 2066 } 2067 2068 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2069 { 2070 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2071 int unique_indirect_reg_count = 0; 2072 2073 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2074 int indirect_start_offsets_count = 0; 2075 2076 int list_size = 0; 2077 int i = 0, j = 0; 2078 u32 tmp = 0; 2079 2080 u32 *register_list_format = 2081 kmemdup(adev->gfx.rlc.register_list_format, 2082 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2083 if (!register_list_format) 2084 return -ENOMEM; 2085 2086 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2087 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2088 gfx_v9_1_parse_ind_reg_list(register_list_format, 2089 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2090 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2091 unique_indirect_regs, 2092 unique_indirect_reg_count, 2093 indirect_start_offsets, 2094 &indirect_start_offsets_count, 2095 ARRAY_SIZE(indirect_start_offsets)); 2096 2097 /* enable auto inc in case it is disabled */ 2098 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2099 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2100 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2101 2102 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2103 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2104 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2105 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2106 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2107 adev->gfx.rlc.register_restore[i]); 2108 2109 /* load indirect register */ 2110 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2111 adev->gfx.rlc.reg_list_format_start); 2112 2113 /* direct register portion */ 2114 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2115 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2116 register_list_format[i]); 2117 2118 /* indirect register portion */ 2119 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2120 if (register_list_format[i] == 0xFFFFFFFF) { 2121 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2122 continue; 2123 } 2124 2125 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2126 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2127 2128 for (j = 0; j < unique_indirect_reg_count; j++) { 2129 if (register_list_format[i] == unique_indirect_regs[j]) { 2130 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2131 break; 2132 } 2133 } 2134 2135 BUG_ON(j >= unique_indirect_reg_count); 2136 2137 i++; 2138 } 2139 2140 /* set save/restore list size */ 2141 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2142 list_size = list_size >> 1; 2143 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2144 adev->gfx.rlc.reg_restore_list_size); 2145 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2146 2147 /* write the starting offsets to RLC scratch ram */ 2148 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2149 adev->gfx.rlc.starting_offsets_start); 2150 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2151 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2152 indirect_start_offsets[i]); 2153 2154 /* load unique indirect regs*/ 2155 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2156 if (unique_indirect_regs[i] != 0) { 2157 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2158 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2159 unique_indirect_regs[i] & 0x3FFFF); 2160 2161 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2162 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2163 unique_indirect_regs[i] >> 20); 2164 } 2165 } 2166 2167 kfree(register_list_format); 2168 return 0; 2169 } 2170 2171 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2172 { 2173 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2174 } 2175 2176 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2177 bool enable) 2178 { 2179 uint32_t data = 0; 2180 uint32_t default_data = 0; 2181 2182 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2183 if (enable == true) { 2184 /* enable GFXIP control over CGPG */ 2185 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2186 if(default_data != data) 2187 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2188 2189 /* update status */ 2190 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2191 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2192 if(default_data != data) 2193 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2194 } else { 2195 /* restore GFXIP control over GCPG */ 2196 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2197 if(default_data != data) 2198 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2199 } 2200 } 2201 2202 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2203 { 2204 uint32_t data = 0; 2205 2206 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2207 AMD_PG_SUPPORT_GFX_SMG | 2208 AMD_PG_SUPPORT_GFX_DMG)) { 2209 /* init IDLE_POLL_COUNT = 60 */ 2210 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2211 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2212 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2213 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2214 2215 /* init RLC PG Delay */ 2216 data = 0; 2217 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2218 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2219 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2220 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2221 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2222 2223 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2224 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2225 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2226 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2227 2228 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2229 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2230 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2231 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2232 2233 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2234 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2235 2236 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2237 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2238 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2239 2240 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2241 } 2242 } 2243 2244 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2245 bool enable) 2246 { 2247 uint32_t data = 0; 2248 uint32_t default_data = 0; 2249 2250 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2251 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2252 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2253 enable ? 1 : 0); 2254 if (default_data != data) 2255 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2256 } 2257 2258 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2259 bool enable) 2260 { 2261 uint32_t data = 0; 2262 uint32_t default_data = 0; 2263 2264 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2265 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2266 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2267 enable ? 1 : 0); 2268 if(default_data != data) 2269 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2270 } 2271 2272 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2273 bool enable) 2274 { 2275 uint32_t data = 0; 2276 uint32_t default_data = 0; 2277 2278 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2279 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2280 CP_PG_DISABLE, 2281 enable ? 0 : 1); 2282 if(default_data != data) 2283 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2284 } 2285 2286 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2287 bool enable) 2288 { 2289 uint32_t data, default_data; 2290 2291 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2292 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2293 GFX_POWER_GATING_ENABLE, 2294 enable ? 1 : 0); 2295 if(default_data != data) 2296 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2297 } 2298 2299 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 2300 bool enable) 2301 { 2302 uint32_t data, default_data; 2303 2304 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2305 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2306 GFX_PIPELINE_PG_ENABLE, 2307 enable ? 1 : 0); 2308 if(default_data != data) 2309 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2310 2311 if (!enable) 2312 /* read any GFX register to wake up GFX */ 2313 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 2314 } 2315 2316 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 2317 bool enable) 2318 { 2319 uint32_t data, default_data; 2320 2321 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2322 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2323 STATIC_PER_CU_PG_ENABLE, 2324 enable ? 1 : 0); 2325 if(default_data != data) 2326 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2327 } 2328 2329 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 2330 bool enable) 2331 { 2332 uint32_t data, default_data; 2333 2334 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2335 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2336 DYN_PER_CU_PG_ENABLE, 2337 enable ? 1 : 0); 2338 if(default_data != data) 2339 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2340 } 2341 2342 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 2343 { 2344 gfx_v9_0_init_csb(adev); 2345 2346 /* 2347 * Rlc save restore list is workable since v2_1. 2348 * And it's needed by gfxoff feature. 2349 */ 2350 if (adev->gfx.rlc.is_rlc_v2_1) { 2351 gfx_v9_1_init_rlc_save_restore_list(adev); 2352 gfx_v9_0_enable_save_restore_machine(adev); 2353 } 2354 2355 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2356 AMD_PG_SUPPORT_GFX_SMG | 2357 AMD_PG_SUPPORT_GFX_DMG | 2358 AMD_PG_SUPPORT_CP | 2359 AMD_PG_SUPPORT_GDS | 2360 AMD_PG_SUPPORT_RLC_SMU_HS)) { 2361 WREG32(mmRLC_JUMP_TABLE_RESTORE, 2362 adev->gfx.rlc.cp_table_gpu_addr >> 8); 2363 gfx_v9_0_init_gfx_power_gating(adev); 2364 } 2365 } 2366 2367 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 2368 { 2369 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 2370 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 2371 gfx_v9_0_wait_for_rlc_serdes(adev); 2372 } 2373 2374 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 2375 { 2376 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2377 udelay(50); 2378 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2379 udelay(50); 2380 } 2381 2382 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 2383 { 2384 #ifdef AMDGPU_RLC_DEBUG_RETRY 2385 u32 rlc_ucode_ver; 2386 #endif 2387 2388 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2389 udelay(50); 2390 2391 /* carrizo do enable cp interrupt after cp inited */ 2392 if (!(adev->flags & AMD_IS_APU)) { 2393 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 2394 udelay(50); 2395 } 2396 2397 #ifdef AMDGPU_RLC_DEBUG_RETRY 2398 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 2399 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 2400 if(rlc_ucode_ver == 0x108) { 2401 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 2402 rlc_ucode_ver, adev->gfx.rlc_fw_version); 2403 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 2404 * default is 0x9C4 to create a 100us interval */ 2405 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 2406 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 2407 * to disable the page fault retry interrupts, default is 2408 * 0x100 (256) */ 2409 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 2410 } 2411 #endif 2412 } 2413 2414 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 2415 { 2416 const struct rlc_firmware_header_v2_0 *hdr; 2417 const __le32 *fw_data; 2418 unsigned i, fw_size; 2419 2420 if (!adev->gfx.rlc_fw) 2421 return -EINVAL; 2422 2423 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2424 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2425 2426 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2427 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2428 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2429 2430 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 2431 RLCG_UCODE_LOADING_START_ADDRESS); 2432 for (i = 0; i < fw_size; i++) 2433 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 2434 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2435 2436 return 0; 2437 } 2438 2439 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 2440 { 2441 int r; 2442 2443 if (amdgpu_sriov_vf(adev)) { 2444 gfx_v9_0_init_csb(adev); 2445 return 0; 2446 } 2447 2448 adev->gfx.rlc.funcs->stop(adev); 2449 2450 /* disable CG */ 2451 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 2452 2453 gfx_v9_0_init_pg(adev); 2454 2455 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 2456 /* legacy rlc firmware loading */ 2457 r = gfx_v9_0_rlc_load_microcode(adev); 2458 if (r) 2459 return r; 2460 } 2461 2462 switch (adev->asic_type) { 2463 case CHIP_RAVEN: 2464 if (amdgpu_lbpw == 0) 2465 gfx_v9_0_enable_lbpw(adev, false); 2466 else 2467 gfx_v9_0_enable_lbpw(adev, true); 2468 break; 2469 case CHIP_VEGA20: 2470 if (amdgpu_lbpw > 0) 2471 gfx_v9_0_enable_lbpw(adev, true); 2472 else 2473 gfx_v9_0_enable_lbpw(adev, false); 2474 break; 2475 default: 2476 break; 2477 } 2478 2479 adev->gfx.rlc.funcs->start(adev); 2480 2481 return 0; 2482 } 2483 2484 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 2485 { 2486 int i; 2487 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 2488 2489 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 2490 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 2491 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 2492 if (!enable) { 2493 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2494 adev->gfx.gfx_ring[i].sched.ready = false; 2495 } 2496 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 2497 udelay(50); 2498 } 2499 2500 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 2501 { 2502 const struct gfx_firmware_header_v1_0 *pfp_hdr; 2503 const struct gfx_firmware_header_v1_0 *ce_hdr; 2504 const struct gfx_firmware_header_v1_0 *me_hdr; 2505 const __le32 *fw_data; 2506 unsigned i, fw_size; 2507 2508 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 2509 return -EINVAL; 2510 2511 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 2512 adev->gfx.pfp_fw->data; 2513 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 2514 adev->gfx.ce_fw->data; 2515 me_hdr = (const struct gfx_firmware_header_v1_0 *) 2516 adev->gfx.me_fw->data; 2517 2518 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 2519 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 2520 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 2521 2522 gfx_v9_0_cp_gfx_enable(adev, false); 2523 2524 /* PFP */ 2525 fw_data = (const __le32 *) 2526 (adev->gfx.pfp_fw->data + 2527 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 2528 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 2529 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 2530 for (i = 0; i < fw_size; i++) 2531 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 2532 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 2533 2534 /* CE */ 2535 fw_data = (const __le32 *) 2536 (adev->gfx.ce_fw->data + 2537 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 2538 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 2539 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 2540 for (i = 0; i < fw_size; i++) 2541 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 2542 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 2543 2544 /* ME */ 2545 fw_data = (const __le32 *) 2546 (adev->gfx.me_fw->data + 2547 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 2548 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 2549 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 2550 for (i = 0; i < fw_size; i++) 2551 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 2552 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 2553 2554 return 0; 2555 } 2556 2557 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 2558 { 2559 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 2560 const struct cs_section_def *sect = NULL; 2561 const struct cs_extent_def *ext = NULL; 2562 int r, i, tmp; 2563 2564 /* init the CP */ 2565 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 2566 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 2567 2568 gfx_v9_0_cp_gfx_enable(adev, true); 2569 2570 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 2571 if (r) { 2572 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 2573 return r; 2574 } 2575 2576 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2577 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 2578 2579 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 2580 amdgpu_ring_write(ring, 0x80000000); 2581 amdgpu_ring_write(ring, 0x80000000); 2582 2583 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 2584 for (ext = sect->section; ext->extent != NULL; ++ext) { 2585 if (sect->id == SECT_CONTEXT) { 2586 amdgpu_ring_write(ring, 2587 PACKET3(PACKET3_SET_CONTEXT_REG, 2588 ext->reg_count)); 2589 amdgpu_ring_write(ring, 2590 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 2591 for (i = 0; i < ext->reg_count; i++) 2592 amdgpu_ring_write(ring, ext->extent[i]); 2593 } 2594 } 2595 } 2596 2597 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2598 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 2599 2600 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 2601 amdgpu_ring_write(ring, 0); 2602 2603 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 2604 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 2605 amdgpu_ring_write(ring, 0x8000); 2606 amdgpu_ring_write(ring, 0x8000); 2607 2608 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 2609 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 2610 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 2611 amdgpu_ring_write(ring, tmp); 2612 amdgpu_ring_write(ring, 0); 2613 2614 amdgpu_ring_commit(ring); 2615 2616 return 0; 2617 } 2618 2619 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 2620 { 2621 struct amdgpu_ring *ring; 2622 u32 tmp; 2623 u32 rb_bufsz; 2624 u64 rb_addr, rptr_addr, wptr_gpu_addr; 2625 2626 /* Set the write pointer delay */ 2627 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 2628 2629 /* set the RB to use vmid 0 */ 2630 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 2631 2632 /* Set ring buffer size */ 2633 ring = &adev->gfx.gfx_ring[0]; 2634 rb_bufsz = order_base_2(ring->ring_size / 8); 2635 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 2636 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 2637 #ifdef __BIG_ENDIAN 2638 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 2639 #endif 2640 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 2641 2642 /* Initialize the ring buffer's write pointers */ 2643 ring->wptr = 0; 2644 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 2645 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 2646 2647 /* set the wb address wether it's enabled or not */ 2648 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 2649 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 2650 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 2651 2652 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2653 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 2654 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 2655 2656 mdelay(1); 2657 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 2658 2659 rb_addr = ring->gpu_addr >> 8; 2660 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 2661 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 2662 2663 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 2664 if (ring->use_doorbell) { 2665 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2666 DOORBELL_OFFSET, ring->doorbell_index); 2667 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2668 DOORBELL_EN, 1); 2669 } else { 2670 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 2671 } 2672 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 2673 2674 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 2675 DOORBELL_RANGE_LOWER, ring->doorbell_index); 2676 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 2677 2678 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 2679 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 2680 2681 2682 /* start the ring */ 2683 gfx_v9_0_cp_gfx_start(adev); 2684 ring->sched.ready = true; 2685 2686 return 0; 2687 } 2688 2689 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 2690 { 2691 int i; 2692 2693 if (enable) { 2694 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 2695 } else { 2696 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 2697 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 2698 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2699 adev->gfx.compute_ring[i].sched.ready = false; 2700 adev->gfx.kiq.ring.sched.ready = false; 2701 } 2702 udelay(50); 2703 } 2704 2705 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 2706 { 2707 const struct gfx_firmware_header_v1_0 *mec_hdr; 2708 const __le32 *fw_data; 2709 unsigned i; 2710 u32 tmp; 2711 2712 if (!adev->gfx.mec_fw) 2713 return -EINVAL; 2714 2715 gfx_v9_0_cp_compute_enable(adev, false); 2716 2717 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 2718 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 2719 2720 fw_data = (const __le32 *) 2721 (adev->gfx.mec_fw->data + 2722 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 2723 tmp = 0; 2724 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2725 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2726 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 2727 2728 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 2729 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 2730 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 2731 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 2732 2733 /* MEC1 */ 2734 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 2735 mec_hdr->jt_offset); 2736 for (i = 0; i < mec_hdr->jt_size; i++) 2737 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 2738 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 2739 2740 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 2741 adev->gfx.mec_fw_version); 2742 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 2743 2744 return 0; 2745 } 2746 2747 /* KIQ functions */ 2748 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 2749 { 2750 uint32_t tmp; 2751 struct amdgpu_device *adev = ring->adev; 2752 2753 /* tell RLC which is KIQ queue */ 2754 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 2755 tmp &= 0xffffff00; 2756 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 2757 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2758 tmp |= 0x80; 2759 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2760 } 2761 2762 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) 2763 { 2764 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 2765 uint64_t queue_mask = 0; 2766 int r, i; 2767 2768 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 2769 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 2770 continue; 2771 2772 /* This situation may be hit in the future if a new HW 2773 * generation exposes more than 64 queues. If so, the 2774 * definition of queue_mask needs updating */ 2775 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 2776 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 2777 break; 2778 } 2779 2780 queue_mask |= (1ull << i); 2781 } 2782 2783 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); 2784 if (r) { 2785 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 2786 return r; 2787 } 2788 2789 /* set resources */ 2790 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 2791 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 2792 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 2793 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 2794 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 2795 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 2796 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 2797 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 2798 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 2799 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2800 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 2801 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 2802 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2803 2804 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 2805 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 2806 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 2807 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 2808 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 2809 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 2810 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 2811 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 2812 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 2813 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 2814 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ 2815 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 2816 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 2817 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 2818 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 2819 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 2820 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 2821 } 2822 2823 r = amdgpu_ring_test_helper(kiq_ring); 2824 if (r) 2825 DRM_ERROR("KCQ enable failed\n"); 2826 2827 return r; 2828 } 2829 2830 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 2831 { 2832 struct amdgpu_device *adev = ring->adev; 2833 struct v9_mqd *mqd = ring->mqd_ptr; 2834 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 2835 uint32_t tmp; 2836 2837 mqd->header = 0xC0310800; 2838 mqd->compute_pipelinestat_enable = 0x00000001; 2839 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 2840 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 2841 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 2842 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 2843 mqd->compute_misc_reserved = 0x00000003; 2844 2845 mqd->dynamic_cu_mask_addr_lo = 2846 lower_32_bits(ring->mqd_gpu_addr 2847 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 2848 mqd->dynamic_cu_mask_addr_hi = 2849 upper_32_bits(ring->mqd_gpu_addr 2850 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 2851 2852 eop_base_addr = ring->eop_gpu_addr >> 8; 2853 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 2854 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 2855 2856 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2857 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 2858 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 2859 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 2860 2861 mqd->cp_hqd_eop_control = tmp; 2862 2863 /* enable doorbell? */ 2864 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 2865 2866 if (ring->use_doorbell) { 2867 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2868 DOORBELL_OFFSET, ring->doorbell_index); 2869 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2870 DOORBELL_EN, 1); 2871 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2872 DOORBELL_SOURCE, 0); 2873 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2874 DOORBELL_HIT, 0); 2875 } else { 2876 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2877 DOORBELL_EN, 0); 2878 } 2879 2880 mqd->cp_hqd_pq_doorbell_control = tmp; 2881 2882 /* disable the queue if it's active */ 2883 ring->wptr = 0; 2884 mqd->cp_hqd_dequeue_request = 0; 2885 mqd->cp_hqd_pq_rptr = 0; 2886 mqd->cp_hqd_pq_wptr_lo = 0; 2887 mqd->cp_hqd_pq_wptr_hi = 0; 2888 2889 /* set the pointer to the MQD */ 2890 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 2891 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 2892 2893 /* set MQD vmid to 0 */ 2894 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 2895 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 2896 mqd->cp_mqd_control = tmp; 2897 2898 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 2899 hqd_gpu_addr = ring->gpu_addr >> 8; 2900 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 2901 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 2902 2903 /* set up the HQD, this is similar to CP_RB0_CNTL */ 2904 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 2905 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 2906 (order_base_2(ring->ring_size / 4) - 1)); 2907 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 2908 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 2909 #ifdef __BIG_ENDIAN 2910 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 2911 #endif 2912 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 2913 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 2914 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 2915 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 2916 mqd->cp_hqd_pq_control = tmp; 2917 2918 /* set the wb address whether it's enabled or not */ 2919 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 2920 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 2921 mqd->cp_hqd_pq_rptr_report_addr_hi = 2922 upper_32_bits(wb_gpu_addr) & 0xffff; 2923 2924 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 2925 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2926 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 2927 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 2928 2929 tmp = 0; 2930 /* enable the doorbell if requested */ 2931 if (ring->use_doorbell) { 2932 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 2933 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2934 DOORBELL_OFFSET, ring->doorbell_index); 2935 2936 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2937 DOORBELL_EN, 1); 2938 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2939 DOORBELL_SOURCE, 0); 2940 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2941 DOORBELL_HIT, 0); 2942 } 2943 2944 mqd->cp_hqd_pq_doorbell_control = tmp; 2945 2946 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 2947 ring->wptr = 0; 2948 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 2949 2950 /* set the vmid for the queue */ 2951 mqd->cp_hqd_vmid = 0; 2952 2953 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 2954 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 2955 mqd->cp_hqd_persistent_state = tmp; 2956 2957 /* set MIN_IB_AVAIL_SIZE */ 2958 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 2959 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 2960 mqd->cp_hqd_ib_control = tmp; 2961 2962 /* activate the queue */ 2963 mqd->cp_hqd_active = 1; 2964 2965 return 0; 2966 } 2967 2968 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 2969 { 2970 struct amdgpu_device *adev = ring->adev; 2971 struct v9_mqd *mqd = ring->mqd_ptr; 2972 int j; 2973 2974 /* disable wptr polling */ 2975 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 2976 2977 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 2978 mqd->cp_hqd_eop_base_addr_lo); 2979 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 2980 mqd->cp_hqd_eop_base_addr_hi); 2981 2982 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2983 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 2984 mqd->cp_hqd_eop_control); 2985 2986 /* enable doorbell? */ 2987 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 2988 mqd->cp_hqd_pq_doorbell_control); 2989 2990 /* disable the queue if it's active */ 2991 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 2992 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 2993 for (j = 0; j < adev->usec_timeout; j++) { 2994 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 2995 break; 2996 udelay(1); 2997 } 2998 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 2999 mqd->cp_hqd_dequeue_request); 3000 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3001 mqd->cp_hqd_pq_rptr); 3002 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3003 mqd->cp_hqd_pq_wptr_lo); 3004 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3005 mqd->cp_hqd_pq_wptr_hi); 3006 } 3007 3008 /* set the pointer to the MQD */ 3009 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3010 mqd->cp_mqd_base_addr_lo); 3011 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3012 mqd->cp_mqd_base_addr_hi); 3013 3014 /* set MQD vmid to 0 */ 3015 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3016 mqd->cp_mqd_control); 3017 3018 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3019 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3020 mqd->cp_hqd_pq_base_lo); 3021 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3022 mqd->cp_hqd_pq_base_hi); 3023 3024 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3025 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3026 mqd->cp_hqd_pq_control); 3027 3028 /* set the wb address whether it's enabled or not */ 3029 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3030 mqd->cp_hqd_pq_rptr_report_addr_lo); 3031 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3032 mqd->cp_hqd_pq_rptr_report_addr_hi); 3033 3034 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3035 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3036 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3037 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3038 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3039 3040 /* enable the doorbell if requested */ 3041 if (ring->use_doorbell) { 3042 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3043 (adev->doorbell_index.kiq * 2) << 2); 3044 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3045 (adev->doorbell_index.userqueue_end * 2) << 2); 3046 } 3047 3048 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3049 mqd->cp_hqd_pq_doorbell_control); 3050 3051 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3052 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3053 mqd->cp_hqd_pq_wptr_lo); 3054 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3055 mqd->cp_hqd_pq_wptr_hi); 3056 3057 /* set the vmid for the queue */ 3058 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3059 3060 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3061 mqd->cp_hqd_persistent_state); 3062 3063 /* activate the queue */ 3064 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3065 mqd->cp_hqd_active); 3066 3067 if (ring->use_doorbell) 3068 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3069 3070 return 0; 3071 } 3072 3073 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3074 { 3075 struct amdgpu_device *adev = ring->adev; 3076 int j; 3077 3078 /* disable the queue if it's active */ 3079 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3080 3081 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3082 3083 for (j = 0; j < adev->usec_timeout; j++) { 3084 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3085 break; 3086 udelay(1); 3087 } 3088 3089 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3090 DRM_DEBUG("KIQ dequeue request failed.\n"); 3091 3092 /* Manual disable if dequeue request times out */ 3093 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3094 } 3095 3096 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3097 0); 3098 } 3099 3100 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3101 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3102 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3103 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3104 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3105 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3106 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3107 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3108 3109 return 0; 3110 } 3111 3112 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3113 { 3114 struct amdgpu_device *adev = ring->adev; 3115 struct v9_mqd *mqd = ring->mqd_ptr; 3116 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3117 3118 gfx_v9_0_kiq_setting(ring); 3119 3120 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3121 /* reset MQD to a clean status */ 3122 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3123 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3124 3125 /* reset ring buffer */ 3126 ring->wptr = 0; 3127 amdgpu_ring_clear_ring(ring); 3128 3129 mutex_lock(&adev->srbm_mutex); 3130 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3131 gfx_v9_0_kiq_init_register(ring); 3132 soc15_grbm_select(adev, 0, 0, 0, 0); 3133 mutex_unlock(&adev->srbm_mutex); 3134 } else { 3135 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3136 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3137 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3138 mutex_lock(&adev->srbm_mutex); 3139 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3140 gfx_v9_0_mqd_init(ring); 3141 gfx_v9_0_kiq_init_register(ring); 3142 soc15_grbm_select(adev, 0, 0, 0, 0); 3143 mutex_unlock(&adev->srbm_mutex); 3144 3145 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3146 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3147 } 3148 3149 return 0; 3150 } 3151 3152 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3153 { 3154 struct amdgpu_device *adev = ring->adev; 3155 struct v9_mqd *mqd = ring->mqd_ptr; 3156 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3157 3158 if (!adev->in_gpu_reset && !adev->in_suspend) { 3159 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3160 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3161 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3162 mutex_lock(&adev->srbm_mutex); 3163 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3164 gfx_v9_0_mqd_init(ring); 3165 soc15_grbm_select(adev, 0, 0, 0, 0); 3166 mutex_unlock(&adev->srbm_mutex); 3167 3168 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3169 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3170 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3171 /* reset MQD to a clean status */ 3172 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3173 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3174 3175 /* reset ring buffer */ 3176 ring->wptr = 0; 3177 amdgpu_ring_clear_ring(ring); 3178 } else { 3179 amdgpu_ring_clear_ring(ring); 3180 } 3181 3182 return 0; 3183 } 3184 3185 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3186 { 3187 struct amdgpu_ring *ring; 3188 int r; 3189 3190 ring = &adev->gfx.kiq.ring; 3191 3192 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3193 if (unlikely(r != 0)) 3194 return r; 3195 3196 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3197 if (unlikely(r != 0)) 3198 return r; 3199 3200 gfx_v9_0_kiq_init_queue(ring); 3201 amdgpu_bo_kunmap(ring->mqd_obj); 3202 ring->mqd_ptr = NULL; 3203 amdgpu_bo_unreserve(ring->mqd_obj); 3204 ring->sched.ready = true; 3205 return 0; 3206 } 3207 3208 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3209 { 3210 struct amdgpu_ring *ring = NULL; 3211 int r = 0, i; 3212 3213 gfx_v9_0_cp_compute_enable(adev, true); 3214 3215 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3216 ring = &adev->gfx.compute_ring[i]; 3217 3218 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3219 if (unlikely(r != 0)) 3220 goto done; 3221 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3222 if (!r) { 3223 r = gfx_v9_0_kcq_init_queue(ring); 3224 amdgpu_bo_kunmap(ring->mqd_obj); 3225 ring->mqd_ptr = NULL; 3226 } 3227 amdgpu_bo_unreserve(ring->mqd_obj); 3228 if (r) 3229 goto done; 3230 } 3231 3232 r = gfx_v9_0_kiq_kcq_enable(adev); 3233 done: 3234 return r; 3235 } 3236 3237 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3238 { 3239 int r, i; 3240 struct amdgpu_ring *ring; 3241 3242 if (!(adev->flags & AMD_IS_APU)) 3243 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3244 3245 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3246 /* legacy firmware loading */ 3247 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3248 if (r) 3249 return r; 3250 3251 r = gfx_v9_0_cp_compute_load_microcode(adev); 3252 if (r) 3253 return r; 3254 } 3255 3256 r = gfx_v9_0_kiq_resume(adev); 3257 if (r) 3258 return r; 3259 3260 r = gfx_v9_0_cp_gfx_resume(adev); 3261 if (r) 3262 return r; 3263 3264 r = gfx_v9_0_kcq_resume(adev); 3265 if (r) 3266 return r; 3267 3268 ring = &adev->gfx.gfx_ring[0]; 3269 r = amdgpu_ring_test_helper(ring); 3270 if (r) 3271 return r; 3272 3273 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3274 ring = &adev->gfx.compute_ring[i]; 3275 amdgpu_ring_test_helper(ring); 3276 } 3277 3278 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3279 3280 return 0; 3281 } 3282 3283 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3284 { 3285 gfx_v9_0_cp_gfx_enable(adev, enable); 3286 gfx_v9_0_cp_compute_enable(adev, enable); 3287 } 3288 3289 static int gfx_v9_0_hw_init(void *handle) 3290 { 3291 int r; 3292 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3293 3294 gfx_v9_0_init_golden_registers(adev); 3295 3296 gfx_v9_0_constants_init(adev); 3297 3298 r = gfx_v9_0_csb_vram_pin(adev); 3299 if (r) 3300 return r; 3301 3302 r = adev->gfx.rlc.funcs->resume(adev); 3303 if (r) 3304 return r; 3305 3306 r = gfx_v9_0_cp_resume(adev); 3307 if (r) 3308 return r; 3309 3310 r = gfx_v9_0_ngg_en(adev); 3311 if (r) 3312 return r; 3313 3314 return r; 3315 } 3316 3317 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) 3318 { 3319 int r, i; 3320 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3321 3322 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); 3323 if (r) 3324 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3325 3326 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3327 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3328 3329 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 3330 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 3331 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 3332 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 3333 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 3334 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 3335 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 3336 amdgpu_ring_write(kiq_ring, 0); 3337 amdgpu_ring_write(kiq_ring, 0); 3338 amdgpu_ring_write(kiq_ring, 0); 3339 } 3340 r = amdgpu_ring_test_helper(kiq_ring); 3341 if (r) 3342 DRM_ERROR("KCQ disable failed\n"); 3343 3344 return r; 3345 } 3346 3347 static int gfx_v9_0_hw_fini(void *handle) 3348 { 3349 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3350 3351 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 3352 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3353 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3354 3355 /* disable KCQ to avoid CPC touch memory not valid anymore */ 3356 gfx_v9_0_kcq_disable(adev); 3357 3358 if (amdgpu_sriov_vf(adev)) { 3359 gfx_v9_0_cp_gfx_enable(adev, false); 3360 /* must disable polling for SRIOV when hw finished, otherwise 3361 * CPC engine may still keep fetching WB address which is already 3362 * invalid after sw finished and trigger DMAR reading error in 3363 * hypervisor side. 3364 */ 3365 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3366 return 0; 3367 } 3368 3369 /* Use deinitialize sequence from CAIL when unbinding device from driver, 3370 * otherwise KIQ is hanging when binding back 3371 */ 3372 if (!adev->in_gpu_reset && !adev->in_suspend) { 3373 mutex_lock(&adev->srbm_mutex); 3374 soc15_grbm_select(adev, adev->gfx.kiq.ring.me, 3375 adev->gfx.kiq.ring.pipe, 3376 adev->gfx.kiq.ring.queue, 0); 3377 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); 3378 soc15_grbm_select(adev, 0, 0, 0, 0); 3379 mutex_unlock(&adev->srbm_mutex); 3380 } 3381 3382 gfx_v9_0_cp_enable(adev, false); 3383 adev->gfx.rlc.funcs->stop(adev); 3384 3385 gfx_v9_0_csb_vram_unpin(adev); 3386 3387 return 0; 3388 } 3389 3390 static int gfx_v9_0_suspend(void *handle) 3391 { 3392 return gfx_v9_0_hw_fini(handle); 3393 } 3394 3395 static int gfx_v9_0_resume(void *handle) 3396 { 3397 return gfx_v9_0_hw_init(handle); 3398 } 3399 3400 static bool gfx_v9_0_is_idle(void *handle) 3401 { 3402 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3403 3404 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 3405 GRBM_STATUS, GUI_ACTIVE)) 3406 return false; 3407 else 3408 return true; 3409 } 3410 3411 static int gfx_v9_0_wait_for_idle(void *handle) 3412 { 3413 unsigned i; 3414 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3415 3416 for (i = 0; i < adev->usec_timeout; i++) { 3417 if (gfx_v9_0_is_idle(handle)) 3418 return 0; 3419 udelay(1); 3420 } 3421 return -ETIMEDOUT; 3422 } 3423 3424 static int gfx_v9_0_soft_reset(void *handle) 3425 { 3426 u32 grbm_soft_reset = 0; 3427 u32 tmp; 3428 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3429 3430 /* GRBM_STATUS */ 3431 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 3432 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 3433 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 3434 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 3435 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 3436 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 3437 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 3438 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3439 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3440 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3441 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 3442 } 3443 3444 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 3445 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3446 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3447 } 3448 3449 /* GRBM_STATUS2 */ 3450 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 3451 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 3452 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3453 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3454 3455 3456 if (grbm_soft_reset) { 3457 /* stop the rlc */ 3458 adev->gfx.rlc.funcs->stop(adev); 3459 3460 /* Disable GFX parsing/prefetching */ 3461 gfx_v9_0_cp_gfx_enable(adev, false); 3462 3463 /* Disable MEC parsing/prefetching */ 3464 gfx_v9_0_cp_compute_enable(adev, false); 3465 3466 if (grbm_soft_reset) { 3467 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3468 tmp |= grbm_soft_reset; 3469 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 3470 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3471 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3472 3473 udelay(50); 3474 3475 tmp &= ~grbm_soft_reset; 3476 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3477 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3478 } 3479 3480 /* Wait a little for things to settle down */ 3481 udelay(50); 3482 } 3483 return 0; 3484 } 3485 3486 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 3487 { 3488 uint64_t clock; 3489 3490 mutex_lock(&adev->gfx.gpu_clock_mutex); 3491 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 3492 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 3493 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 3494 mutex_unlock(&adev->gfx.gpu_clock_mutex); 3495 return clock; 3496 } 3497 3498 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 3499 uint32_t vmid, 3500 uint32_t gds_base, uint32_t gds_size, 3501 uint32_t gws_base, uint32_t gws_size, 3502 uint32_t oa_base, uint32_t oa_size) 3503 { 3504 struct amdgpu_device *adev = ring->adev; 3505 3506 /* GDS Base */ 3507 gfx_v9_0_write_data_to_reg(ring, 0, false, 3508 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 3509 gds_base); 3510 3511 /* GDS Size */ 3512 gfx_v9_0_write_data_to_reg(ring, 0, false, 3513 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 3514 gds_size); 3515 3516 /* GWS */ 3517 gfx_v9_0_write_data_to_reg(ring, 0, false, 3518 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 3519 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 3520 3521 /* OA */ 3522 gfx_v9_0_write_data_to_reg(ring, 0, false, 3523 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 3524 (1 << (oa_size + oa_base)) - (1 << oa_base)); 3525 } 3526 3527 static const u32 vgpr_init_compute_shader[] = 3528 { 3529 0xb07c0000, 0xbe8000ff, 3530 0x000000f8, 0xbf110800, 3531 0x7e000280, 0x7e020280, 3532 0x7e040280, 0x7e060280, 3533 0x7e080280, 0x7e0a0280, 3534 0x7e0c0280, 0x7e0e0280, 3535 0x80808800, 0xbe803200, 3536 0xbf84fff5, 0xbf9c0000, 3537 0xd28c0001, 0x0001007f, 3538 0xd28d0001, 0x0002027e, 3539 0x10020288, 0xb8810904, 3540 0xb7814000, 0xd1196a01, 3541 0x00000301, 0xbe800087, 3542 0xbefc00c1, 0xd89c4000, 3543 0x00020201, 0xd89cc080, 3544 0x00040401, 0x320202ff, 3545 0x00000800, 0x80808100, 3546 0xbf84fff8, 0x7e020280, 3547 0xbf810000, 0x00000000, 3548 }; 3549 3550 static const u32 sgpr_init_compute_shader[] = 3551 { 3552 0xb07c0000, 0xbe8000ff, 3553 0x0000005f, 0xbee50080, 3554 0xbe812c65, 0xbe822c65, 3555 0xbe832c65, 0xbe842c65, 3556 0xbe852c65, 0xb77c0005, 3557 0x80808500, 0xbf84fff8, 3558 0xbe800080, 0xbf810000, 3559 }; 3560 3561 static const struct soc15_reg_entry vgpr_init_regs[] = { 3562 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 3563 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 3564 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 3565 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 3566 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 3567 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 3568 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 3569 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 3570 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ 3571 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 3572 }; 3573 3574 static const struct soc15_reg_entry sgpr_init_regs[] = { 3575 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 3576 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 3577 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 3578 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 3579 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 3580 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 3581 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 3582 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 3583 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ 3584 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 3585 }; 3586 3587 static const struct soc15_reg_entry sec_ded_counter_registers[] = { 3588 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 3589 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 3590 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 3591 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 3592 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 3593 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 3594 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 3595 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 3596 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 3597 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 3598 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 3599 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 3600 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 3601 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 3602 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 3603 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 3604 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 3605 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 3606 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 3607 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 3608 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 3609 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 3610 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 3611 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 3612 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 3613 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 3614 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 3615 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 3616 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 3617 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 3618 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 3619 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 3620 }; 3621 3622 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 3623 { 3624 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 3625 int i, r; 3626 3627 r = amdgpu_ring_alloc(ring, 7); 3628 if (r) { 3629 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 3630 ring->name, r); 3631 return r; 3632 } 3633 3634 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 3635 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 3636 3637 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 3638 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 3639 PACKET3_DMA_DATA_DST_SEL(1) | 3640 PACKET3_DMA_DATA_SRC_SEL(2) | 3641 PACKET3_DMA_DATA_ENGINE(0))); 3642 amdgpu_ring_write(ring, 0); 3643 amdgpu_ring_write(ring, 0); 3644 amdgpu_ring_write(ring, 0); 3645 amdgpu_ring_write(ring, 0); 3646 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 3647 adev->gds.gds_size); 3648 3649 amdgpu_ring_commit(ring); 3650 3651 for (i = 0; i < adev->usec_timeout; i++) { 3652 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 3653 break; 3654 udelay(1); 3655 } 3656 3657 if (i >= adev->usec_timeout) 3658 r = -ETIMEDOUT; 3659 3660 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 3661 3662 return r; 3663 } 3664 3665 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 3666 { 3667 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 3668 struct amdgpu_ib ib; 3669 struct dma_fence *f = NULL; 3670 int r, i, j, k; 3671 unsigned total_size, vgpr_offset, sgpr_offset; 3672 u64 gpu_addr; 3673 3674 /* only support when RAS is enabled */ 3675 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 3676 return 0; 3677 3678 /* bail if the compute ring is not ready */ 3679 if (!ring->sched.ready) 3680 return 0; 3681 3682 total_size = 3683 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 3684 total_size += 3685 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 3686 total_size = ALIGN(total_size, 256); 3687 vgpr_offset = total_size; 3688 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 3689 sgpr_offset = total_size; 3690 total_size += sizeof(sgpr_init_compute_shader); 3691 3692 /* allocate an indirect buffer to put the commands in */ 3693 memset(&ib, 0, sizeof(ib)); 3694 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 3695 if (r) { 3696 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 3697 return r; 3698 } 3699 3700 /* load the compute shaders */ 3701 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 3702 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 3703 3704 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 3705 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 3706 3707 /* init the ib length to 0 */ 3708 ib.length_dw = 0; 3709 3710 /* VGPR */ 3711 /* write the register state for the compute dispatch */ 3712 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { 3713 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 3714 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) 3715 - PACKET3_SET_SH_REG_START; 3716 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value; 3717 } 3718 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 3719 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 3720 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 3721 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 3722 - PACKET3_SET_SH_REG_START; 3723 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 3724 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 3725 3726 /* write dispatch packet */ 3727 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 3728 ib.ptr[ib.length_dw++] = 128; /* x */ 3729 ib.ptr[ib.length_dw++] = 1; /* y */ 3730 ib.ptr[ib.length_dw++] = 1; /* z */ 3731 ib.ptr[ib.length_dw++] = 3732 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 3733 3734 /* write CS partial flush packet */ 3735 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 3736 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 3737 3738 /* SGPR */ 3739 /* write the register state for the compute dispatch */ 3740 for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { 3741 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 3742 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) 3743 - PACKET3_SET_SH_REG_START; 3744 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; 3745 } 3746 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 3747 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 3748 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 3749 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 3750 - PACKET3_SET_SH_REG_START; 3751 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 3752 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 3753 3754 /* write dispatch packet */ 3755 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 3756 ib.ptr[ib.length_dw++] = 128; /* x */ 3757 ib.ptr[ib.length_dw++] = 1; /* y */ 3758 ib.ptr[ib.length_dw++] = 1; /* z */ 3759 ib.ptr[ib.length_dw++] = 3760 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 3761 3762 /* write CS partial flush packet */ 3763 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 3764 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 3765 3766 /* shedule the ib on the ring */ 3767 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 3768 if (r) { 3769 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 3770 goto fail; 3771 } 3772 3773 /* wait for the GPU to finish processing the IB */ 3774 r = dma_fence_wait(f, false); 3775 if (r) { 3776 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 3777 goto fail; 3778 } 3779 3780 /* read back registers to clear the counters */ 3781 mutex_lock(&adev->grbm_idx_mutex); 3782 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { 3783 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { 3784 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { 3785 gfx_v9_0_select_se_sh(adev, j, 0x0, k); 3786 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); 3787 } 3788 } 3789 } 3790 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 3791 mutex_unlock(&adev->grbm_idx_mutex); 3792 3793 fail: 3794 amdgpu_ib_free(adev, &ib, NULL); 3795 dma_fence_put(f); 3796 3797 return r; 3798 } 3799 3800 static int gfx_v9_0_early_init(void *handle) 3801 { 3802 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3803 3804 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 3805 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 3806 gfx_v9_0_set_ring_funcs(adev); 3807 gfx_v9_0_set_irq_funcs(adev); 3808 gfx_v9_0_set_gds_init(adev); 3809 gfx_v9_0_set_rlc_funcs(adev); 3810 3811 return 0; 3812 } 3813 3814 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 3815 struct amdgpu_iv_entry *entry); 3816 3817 static int gfx_v9_0_ecc_late_init(void *handle) 3818 { 3819 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3820 struct ras_common_if **ras_if = &adev->gfx.ras_if; 3821 struct ras_ih_if ih_info = { 3822 .cb = gfx_v9_0_process_ras_data_cb, 3823 }; 3824 struct ras_fs_if fs_info = { 3825 .sysfs_name = "gfx_err_count", 3826 .debugfs_name = "gfx_err_inject", 3827 }; 3828 struct ras_common_if ras_block = { 3829 .block = AMDGPU_RAS_BLOCK__GFX, 3830 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, 3831 .sub_block_index = 0, 3832 .name = "gfx", 3833 }; 3834 int r; 3835 3836 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { 3837 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); 3838 return 0; 3839 } 3840 3841 r = gfx_v9_0_do_edc_gds_workarounds(adev); 3842 if (r) 3843 return r; 3844 3845 /* requires IBs so do in late init after IB pool is initialized */ 3846 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 3847 if (r) 3848 return r; 3849 3850 /* handle resume path. */ 3851 if (*ras_if) { 3852 /* resend ras TA enable cmd during resume. 3853 * prepare to handle failure. 3854 */ 3855 ih_info.head = **ras_if; 3856 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 3857 if (r) { 3858 if (r == -EAGAIN) { 3859 /* request a gpu reset. will run again. */ 3860 amdgpu_ras_request_reset_on_boot(adev, 3861 AMDGPU_RAS_BLOCK__GFX); 3862 return 0; 3863 } 3864 /* fail to enable ras, cleanup all. */ 3865 goto irq; 3866 } 3867 /* enable successfully. continue. */ 3868 goto resume; 3869 } 3870 3871 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); 3872 if (!*ras_if) 3873 return -ENOMEM; 3874 3875 **ras_if = ras_block; 3876 3877 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 3878 if (r) { 3879 if (r == -EAGAIN) { 3880 amdgpu_ras_request_reset_on_boot(adev, 3881 AMDGPU_RAS_BLOCK__GFX); 3882 r = 0; 3883 } 3884 goto feature; 3885 } 3886 3887 ih_info.head = **ras_if; 3888 fs_info.head = **ras_if; 3889 3890 r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); 3891 if (r) 3892 goto interrupt; 3893 3894 amdgpu_ras_debugfs_create(adev, &fs_info); 3895 3896 r = amdgpu_ras_sysfs_create(adev, &fs_info); 3897 if (r) 3898 goto sysfs; 3899 resume: 3900 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 3901 if (r) 3902 goto irq; 3903 3904 return 0; 3905 irq: 3906 amdgpu_ras_sysfs_remove(adev, *ras_if); 3907 sysfs: 3908 amdgpu_ras_debugfs_remove(adev, *ras_if); 3909 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 3910 interrupt: 3911 amdgpu_ras_feature_enable(adev, *ras_if, 0); 3912 feature: 3913 kfree(*ras_if); 3914 *ras_if = NULL; 3915 return r; 3916 } 3917 3918 static int gfx_v9_0_late_init(void *handle) 3919 { 3920 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3921 int r; 3922 3923 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 3924 if (r) 3925 return r; 3926 3927 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 3928 if (r) 3929 return r; 3930 3931 r = gfx_v9_0_ecc_late_init(handle); 3932 if (r) 3933 return r; 3934 3935 return 0; 3936 } 3937 3938 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 3939 { 3940 uint32_t rlc_setting; 3941 3942 /* if RLC is not enabled, do nothing */ 3943 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 3944 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 3945 return false; 3946 3947 return true; 3948 } 3949 3950 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) 3951 { 3952 uint32_t data; 3953 unsigned i; 3954 3955 data = RLC_SAFE_MODE__CMD_MASK; 3956 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 3957 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 3958 3959 /* wait for RLC_SAFE_MODE */ 3960 for (i = 0; i < adev->usec_timeout; i++) { 3961 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 3962 break; 3963 udelay(1); 3964 } 3965 } 3966 3967 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) 3968 { 3969 uint32_t data; 3970 3971 data = RLC_SAFE_MODE__CMD_MASK; 3972 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 3973 } 3974 3975 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 3976 bool enable) 3977 { 3978 amdgpu_gfx_rlc_enter_safe_mode(adev); 3979 3980 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 3981 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 3982 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 3983 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 3984 } else { 3985 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 3986 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 3987 } 3988 3989 amdgpu_gfx_rlc_exit_safe_mode(adev); 3990 } 3991 3992 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 3993 bool enable) 3994 { 3995 /* TODO: double check if we need to perform under safe mode */ 3996 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 3997 3998 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 3999 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4000 else 4001 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4002 4003 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4004 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4005 else 4006 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4007 4008 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4009 } 4010 4011 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4012 bool enable) 4013 { 4014 uint32_t data, def; 4015 4016 amdgpu_gfx_rlc_enter_safe_mode(adev); 4017 4018 /* It is disabled by HW by default */ 4019 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4020 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4021 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4022 4023 if (adev->asic_type != CHIP_VEGA12) 4024 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4025 4026 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4027 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4028 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4029 4030 /* only for Vega10 & Raven1 */ 4031 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4032 4033 if (def != data) 4034 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4035 4036 /* MGLS is a global flag to control all MGLS in GFX */ 4037 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4038 /* 2 - RLC memory Light sleep */ 4039 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4040 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4041 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4042 if (def != data) 4043 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4044 } 4045 /* 3 - CP memory Light sleep */ 4046 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4047 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4048 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4049 if (def != data) 4050 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4051 } 4052 } 4053 } else { 4054 /* 1 - MGCG_OVERRIDE */ 4055 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4056 4057 if (adev->asic_type != CHIP_VEGA12) 4058 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4059 4060 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4061 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4062 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4063 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4064 4065 if (def != data) 4066 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4067 4068 /* 2 - disable MGLS in RLC */ 4069 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4070 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4071 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4072 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4073 } 4074 4075 /* 3 - disable MGLS in CP */ 4076 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4077 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4078 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4079 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4080 } 4081 } 4082 4083 amdgpu_gfx_rlc_exit_safe_mode(adev); 4084 } 4085 4086 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4087 bool enable) 4088 { 4089 uint32_t data, def; 4090 4091 amdgpu_gfx_rlc_enter_safe_mode(adev); 4092 4093 /* Enable 3D CGCG/CGLS */ 4094 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 4095 /* write cmd to clear cgcg/cgls ov */ 4096 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4097 /* unset CGCG override */ 4098 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4099 /* update CGCG and CGLS override bits */ 4100 if (def != data) 4101 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4102 4103 /* enable 3Dcgcg FSM(0x0000363f) */ 4104 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4105 4106 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4107 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4108 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4109 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4110 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4111 if (def != data) 4112 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4113 4114 /* set IDLE_POLL_COUNT(0x00900100) */ 4115 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4116 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4117 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4118 if (def != data) 4119 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4120 } else { 4121 /* Disable CGCG/CGLS */ 4122 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4123 /* disable cgcg, cgls should be disabled */ 4124 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 4125 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 4126 /* disable cgcg and cgls in FSM */ 4127 if (def != data) 4128 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4129 } 4130 4131 amdgpu_gfx_rlc_exit_safe_mode(adev); 4132 } 4133 4134 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4135 bool enable) 4136 { 4137 uint32_t def, data; 4138 4139 amdgpu_gfx_rlc_enter_safe_mode(adev); 4140 4141 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 4142 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4143 /* unset CGCG override */ 4144 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 4145 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4146 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4147 else 4148 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4149 /* update CGCG and CGLS override bits */ 4150 if (def != data) 4151 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4152 4153 /* enable cgcg FSM(0x0000363F) */ 4154 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4155 4156 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4157 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4158 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4159 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4160 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4161 if (def != data) 4162 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4163 4164 /* set IDLE_POLL_COUNT(0x00900100) */ 4165 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4166 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4167 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4168 if (def != data) 4169 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4170 } else { 4171 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4172 /* reset CGCG/CGLS bits */ 4173 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4174 /* disable cgcg and cgls in FSM */ 4175 if (def != data) 4176 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4177 } 4178 4179 amdgpu_gfx_rlc_exit_safe_mode(adev); 4180 } 4181 4182 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 4183 bool enable) 4184 { 4185 if (enable) { 4186 /* CGCG/CGLS should be enabled after MGCG/MGLS 4187 * === MGCG + MGLS === 4188 */ 4189 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4190 /* === CGCG /CGLS for GFX 3D Only === */ 4191 gfx_v9_0_update_3d_clock_gating(adev, enable); 4192 /* === CGCG + CGLS === */ 4193 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4194 } else { 4195 /* CGCG/CGLS should be disabled before MGCG/MGLS 4196 * === CGCG + CGLS === 4197 */ 4198 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4199 /* === CGCG /CGLS for GFX 3D Only === */ 4200 gfx_v9_0_update_3d_clock_gating(adev, enable); 4201 /* === MGCG + MGLS === */ 4202 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4203 } 4204 return 0; 4205 } 4206 4207 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 4208 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 4209 .set_safe_mode = gfx_v9_0_set_safe_mode, 4210 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 4211 .init = gfx_v9_0_rlc_init, 4212 .get_csb_size = gfx_v9_0_get_csb_size, 4213 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 4214 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 4215 .resume = gfx_v9_0_rlc_resume, 4216 .stop = gfx_v9_0_rlc_stop, 4217 .reset = gfx_v9_0_rlc_reset, 4218 .start = gfx_v9_0_rlc_start 4219 }; 4220 4221 static int gfx_v9_0_set_powergating_state(void *handle, 4222 enum amd_powergating_state state) 4223 { 4224 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4225 bool enable = (state == AMD_PG_STATE_GATE) ? true : false; 4226 4227 switch (adev->asic_type) { 4228 case CHIP_RAVEN: 4229 if (!enable) { 4230 amdgpu_gfx_off_ctrl(adev, false); 4231 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4232 } 4233 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 4234 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 4235 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 4236 } else { 4237 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 4238 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 4239 } 4240 4241 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 4242 gfx_v9_0_enable_cp_power_gating(adev, true); 4243 else 4244 gfx_v9_0_enable_cp_power_gating(adev, false); 4245 4246 /* update gfx cgpg state */ 4247 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 4248 4249 /* update mgcg state */ 4250 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 4251 4252 if (enable) 4253 amdgpu_gfx_off_ctrl(adev, true); 4254 break; 4255 case CHIP_VEGA12: 4256 if (!enable) { 4257 amdgpu_gfx_off_ctrl(adev, false); 4258 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4259 } else { 4260 amdgpu_gfx_off_ctrl(adev, true); 4261 } 4262 break; 4263 default: 4264 break; 4265 } 4266 4267 return 0; 4268 } 4269 4270 static int gfx_v9_0_set_clockgating_state(void *handle, 4271 enum amd_clockgating_state state) 4272 { 4273 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4274 4275 if (amdgpu_sriov_vf(adev)) 4276 return 0; 4277 4278 switch (adev->asic_type) { 4279 case CHIP_VEGA10: 4280 case CHIP_VEGA12: 4281 case CHIP_VEGA20: 4282 case CHIP_RAVEN: 4283 gfx_v9_0_update_gfx_clock_gating(adev, 4284 state == AMD_CG_STATE_GATE ? true : false); 4285 break; 4286 default: 4287 break; 4288 } 4289 return 0; 4290 } 4291 4292 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 4293 { 4294 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4295 int data; 4296 4297 if (amdgpu_sriov_vf(adev)) 4298 *flags = 0; 4299 4300 /* AMD_CG_SUPPORT_GFX_MGCG */ 4301 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4302 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 4303 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 4304 4305 /* AMD_CG_SUPPORT_GFX_CGCG */ 4306 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4307 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 4308 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 4309 4310 /* AMD_CG_SUPPORT_GFX_CGLS */ 4311 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 4312 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 4313 4314 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 4315 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4316 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 4317 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 4318 4319 /* AMD_CG_SUPPORT_GFX_CP_LS */ 4320 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4321 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 4322 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 4323 4324 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 4325 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4326 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 4327 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 4328 4329 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 4330 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 4331 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 4332 } 4333 4334 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 4335 { 4336 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 4337 } 4338 4339 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 4340 { 4341 struct amdgpu_device *adev = ring->adev; 4342 u64 wptr; 4343 4344 /* XXX check if swapping is necessary on BE */ 4345 if (ring->use_doorbell) { 4346 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 4347 } else { 4348 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 4349 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 4350 } 4351 4352 return wptr; 4353 } 4354 4355 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 4356 { 4357 struct amdgpu_device *adev = ring->adev; 4358 4359 if (ring->use_doorbell) { 4360 /* XXX check if swapping is necessary on BE */ 4361 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4362 WDOORBELL64(ring->doorbell_index, ring->wptr); 4363 } else { 4364 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4365 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 4366 } 4367 } 4368 4369 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 4370 { 4371 struct amdgpu_device *adev = ring->adev; 4372 u32 ref_and_mask, reg_mem_engine; 4373 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; 4374 4375 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 4376 switch (ring->me) { 4377 case 1: 4378 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 4379 break; 4380 case 2: 4381 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 4382 break; 4383 default: 4384 return; 4385 } 4386 reg_mem_engine = 0; 4387 } else { 4388 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 4389 reg_mem_engine = 1; /* pfp */ 4390 } 4391 4392 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 4393 adev->nbio_funcs->get_hdp_flush_req_offset(adev), 4394 adev->nbio_funcs->get_hdp_flush_done_offset(adev), 4395 ref_and_mask, ref_and_mask, 0x20); 4396 } 4397 4398 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 4399 struct amdgpu_job *job, 4400 struct amdgpu_ib *ib, 4401 uint32_t flags) 4402 { 4403 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4404 u32 header, control = 0; 4405 4406 if (ib->flags & AMDGPU_IB_FLAG_CE) 4407 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 4408 else 4409 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 4410 4411 control |= ib->length_dw | (vmid << 24); 4412 4413 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 4414 control |= INDIRECT_BUFFER_PRE_ENB(1); 4415 4416 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 4417 gfx_v9_0_ring_emit_de_meta(ring); 4418 } 4419 4420 amdgpu_ring_write(ring, header); 4421 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4422 amdgpu_ring_write(ring, 4423 #ifdef __BIG_ENDIAN 4424 (2 << 0) | 4425 #endif 4426 lower_32_bits(ib->gpu_addr)); 4427 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4428 amdgpu_ring_write(ring, control); 4429 } 4430 4431 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 4432 struct amdgpu_job *job, 4433 struct amdgpu_ib *ib, 4434 uint32_t flags) 4435 { 4436 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4437 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 4438 4439 /* Currently, there is a high possibility to get wave ID mismatch 4440 * between ME and GDS, leading to a hw deadlock, because ME generates 4441 * different wave IDs than the GDS expects. This situation happens 4442 * randomly when at least 5 compute pipes use GDS ordered append. 4443 * The wave IDs generated by ME are also wrong after suspend/resume. 4444 * Those are probably bugs somewhere else in the kernel driver. 4445 * 4446 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 4447 * GDS to 0 for this ring (me/pipe). 4448 */ 4449 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 4450 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 4451 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 4452 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 4453 } 4454 4455 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 4456 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4457 amdgpu_ring_write(ring, 4458 #ifdef __BIG_ENDIAN 4459 (2 << 0) | 4460 #endif 4461 lower_32_bits(ib->gpu_addr)); 4462 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4463 amdgpu_ring_write(ring, control); 4464 } 4465 4466 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 4467 u64 seq, unsigned flags) 4468 { 4469 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 4470 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 4471 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 4472 4473 /* RELEASE_MEM - flush caches, send int */ 4474 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 4475 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 4476 EOP_TC_NC_ACTION_EN) : 4477 (EOP_TCL1_ACTION_EN | 4478 EOP_TC_ACTION_EN | 4479 EOP_TC_WB_ACTION_EN | 4480 EOP_TC_MD_ACTION_EN)) | 4481 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 4482 EVENT_INDEX(5))); 4483 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 4484 4485 /* 4486 * the address should be Qword aligned if 64bit write, Dword 4487 * aligned if only send 32bit data low (discard data high) 4488 */ 4489 if (write64bit) 4490 BUG_ON(addr & 0x7); 4491 else 4492 BUG_ON(addr & 0x3); 4493 amdgpu_ring_write(ring, lower_32_bits(addr)); 4494 amdgpu_ring_write(ring, upper_32_bits(addr)); 4495 amdgpu_ring_write(ring, lower_32_bits(seq)); 4496 amdgpu_ring_write(ring, upper_32_bits(seq)); 4497 amdgpu_ring_write(ring, 0); 4498 } 4499 4500 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 4501 { 4502 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 4503 uint32_t seq = ring->fence_drv.sync_seq; 4504 uint64_t addr = ring->fence_drv.gpu_addr; 4505 4506 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 4507 lower_32_bits(addr), upper_32_bits(addr), 4508 seq, 0xffffffff, 4); 4509 } 4510 4511 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 4512 unsigned vmid, uint64_t pd_addr) 4513 { 4514 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 4515 4516 /* compute doesn't have PFP */ 4517 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 4518 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 4519 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 4520 amdgpu_ring_write(ring, 0x0); 4521 } 4522 } 4523 4524 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 4525 { 4526 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 4527 } 4528 4529 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 4530 { 4531 u64 wptr; 4532 4533 /* XXX check if swapping is necessary on BE */ 4534 if (ring->use_doorbell) 4535 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 4536 else 4537 BUG(); 4538 return wptr; 4539 } 4540 4541 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 4542 bool acquire) 4543 { 4544 struct amdgpu_device *adev = ring->adev; 4545 int pipe_num, tmp, reg; 4546 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 4547 4548 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 4549 4550 /* first me only has 2 entries, GFX and HP3D */ 4551 if (ring->me > 0) 4552 pipe_num -= 2; 4553 4554 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num; 4555 tmp = RREG32(reg); 4556 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 4557 WREG32(reg, tmp); 4558 } 4559 4560 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, 4561 struct amdgpu_ring *ring, 4562 bool acquire) 4563 { 4564 int i, pipe; 4565 bool reserve; 4566 struct amdgpu_ring *iring; 4567 4568 mutex_lock(&adev->gfx.pipe_reserve_mutex); 4569 pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0); 4570 if (acquire) 4571 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4572 else 4573 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4574 4575 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 4576 /* Clear all reservations - everyone reacquires all resources */ 4577 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 4578 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 4579 true); 4580 4581 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 4582 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 4583 true); 4584 } else { 4585 /* Lower all pipes without a current reservation */ 4586 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 4587 iring = &adev->gfx.gfx_ring[i]; 4588 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 4589 iring->me, 4590 iring->pipe, 4591 0); 4592 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4593 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 4594 } 4595 4596 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 4597 iring = &adev->gfx.compute_ring[i]; 4598 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 4599 iring->me, 4600 iring->pipe, 4601 0); 4602 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4603 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 4604 } 4605 } 4606 4607 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 4608 } 4609 4610 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, 4611 struct amdgpu_ring *ring, 4612 bool acquire) 4613 { 4614 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 4615 uint32_t queue_priority = acquire ? 0xf : 0x0; 4616 4617 mutex_lock(&adev->srbm_mutex); 4618 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4619 4620 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); 4621 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); 4622 4623 soc15_grbm_select(adev, 0, 0, 0, 0); 4624 mutex_unlock(&adev->srbm_mutex); 4625 } 4626 4627 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring, 4628 enum drm_sched_priority priority) 4629 { 4630 struct amdgpu_device *adev = ring->adev; 4631 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 4632 4633 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 4634 return; 4635 4636 gfx_v9_0_hqd_set_priority(adev, ring, acquire); 4637 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire); 4638 } 4639 4640 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 4641 { 4642 struct amdgpu_device *adev = ring->adev; 4643 4644 /* XXX check if swapping is necessary on BE */ 4645 if (ring->use_doorbell) { 4646 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4647 WDOORBELL64(ring->doorbell_index, ring->wptr); 4648 } else{ 4649 BUG(); /* only DOORBELL method supported on gfx9 now */ 4650 } 4651 } 4652 4653 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 4654 u64 seq, unsigned int flags) 4655 { 4656 struct amdgpu_device *adev = ring->adev; 4657 4658 /* we only allocate 32bit for each seq wb address */ 4659 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 4660 4661 /* write fence seq to the "addr" */ 4662 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4663 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4664 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 4665 amdgpu_ring_write(ring, lower_32_bits(addr)); 4666 amdgpu_ring_write(ring, upper_32_bits(addr)); 4667 amdgpu_ring_write(ring, lower_32_bits(seq)); 4668 4669 if (flags & AMDGPU_FENCE_FLAG_INT) { 4670 /* set register to trigger INT */ 4671 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4672 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4673 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 4674 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 4675 amdgpu_ring_write(ring, 0); 4676 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 4677 } 4678 } 4679 4680 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 4681 { 4682 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 4683 amdgpu_ring_write(ring, 0); 4684 } 4685 4686 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 4687 { 4688 struct v9_ce_ib_state ce_payload = {0}; 4689 uint64_t csa_addr; 4690 int cnt; 4691 4692 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 4693 csa_addr = amdgpu_csa_vaddr(ring->adev); 4694 4695 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 4696 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 4697 WRITE_DATA_DST_SEL(8) | 4698 WR_CONFIRM) | 4699 WRITE_DATA_CACHE_POLICY(0)); 4700 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 4701 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 4702 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 4703 } 4704 4705 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 4706 { 4707 struct v9_de_ib_state de_payload = {0}; 4708 uint64_t csa_addr, gds_addr; 4709 int cnt; 4710 4711 csa_addr = amdgpu_csa_vaddr(ring->adev); 4712 gds_addr = csa_addr + 4096; 4713 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 4714 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 4715 4716 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 4717 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 4718 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 4719 WRITE_DATA_DST_SEL(8) | 4720 WR_CONFIRM) | 4721 WRITE_DATA_CACHE_POLICY(0)); 4722 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 4723 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 4724 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 4725 } 4726 4727 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) 4728 { 4729 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 4730 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ 4731 } 4732 4733 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 4734 { 4735 uint32_t dw2 = 0; 4736 4737 if (amdgpu_sriov_vf(ring->adev)) 4738 gfx_v9_0_ring_emit_ce_meta(ring); 4739 4740 gfx_v9_0_ring_emit_tmz(ring, true); 4741 4742 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 4743 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 4744 /* set load_global_config & load_global_uconfig */ 4745 dw2 |= 0x8001; 4746 /* set load_cs_sh_regs */ 4747 dw2 |= 0x01000000; 4748 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 4749 dw2 |= 0x10002; 4750 4751 /* set load_ce_ram if preamble presented */ 4752 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 4753 dw2 |= 0x10000000; 4754 } else { 4755 /* still load_ce_ram if this is the first time preamble presented 4756 * although there is no context switch happens. 4757 */ 4758 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 4759 dw2 |= 0x10000000; 4760 } 4761 4762 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4763 amdgpu_ring_write(ring, dw2); 4764 amdgpu_ring_write(ring, 0); 4765 } 4766 4767 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 4768 { 4769 unsigned ret; 4770 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 4771 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 4772 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 4773 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 4774 ret = ring->wptr & ring->buf_mask; 4775 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 4776 return ret; 4777 } 4778 4779 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 4780 { 4781 unsigned cur; 4782 BUG_ON(offset > ring->buf_mask); 4783 BUG_ON(ring->ring[offset] != 0x55aa55aa); 4784 4785 cur = (ring->wptr & ring->buf_mask) - 1; 4786 if (likely(cur > offset)) 4787 ring->ring[offset] = cur - offset; 4788 else 4789 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 4790 } 4791 4792 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 4793 { 4794 struct amdgpu_device *adev = ring->adev; 4795 4796 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 4797 amdgpu_ring_write(ring, 0 | /* src: register*/ 4798 (5 << 8) | /* dst: memory */ 4799 (1 << 20)); /* write confirm */ 4800 amdgpu_ring_write(ring, reg); 4801 amdgpu_ring_write(ring, 0); 4802 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 4803 adev->virt.reg_val_offs * 4)); 4804 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 4805 adev->virt.reg_val_offs * 4)); 4806 } 4807 4808 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 4809 uint32_t val) 4810 { 4811 uint32_t cmd = 0; 4812 4813 switch (ring->funcs->type) { 4814 case AMDGPU_RING_TYPE_GFX: 4815 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 4816 break; 4817 case AMDGPU_RING_TYPE_KIQ: 4818 cmd = (1 << 16); /* no inc addr */ 4819 break; 4820 default: 4821 cmd = WR_CONFIRM; 4822 break; 4823 } 4824 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4825 amdgpu_ring_write(ring, cmd); 4826 amdgpu_ring_write(ring, reg); 4827 amdgpu_ring_write(ring, 0); 4828 amdgpu_ring_write(ring, val); 4829 } 4830 4831 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 4832 uint32_t val, uint32_t mask) 4833 { 4834 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 4835 } 4836 4837 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 4838 uint32_t reg0, uint32_t reg1, 4839 uint32_t ref, uint32_t mask) 4840 { 4841 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 4842 struct amdgpu_device *adev = ring->adev; 4843 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 4844 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 4845 4846 if (fw_version_ok) 4847 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 4848 ref, mask, 0x20); 4849 else 4850 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 4851 ref, mask); 4852 } 4853 4854 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 4855 { 4856 struct amdgpu_device *adev = ring->adev; 4857 uint32_t value = 0; 4858 4859 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 4860 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 4861 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 4862 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 4863 WREG32(mmSQ_CMD, value); 4864 } 4865 4866 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 4867 enum amdgpu_interrupt_state state) 4868 { 4869 switch (state) { 4870 case AMDGPU_IRQ_STATE_DISABLE: 4871 case AMDGPU_IRQ_STATE_ENABLE: 4872 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4873 TIME_STAMP_INT_ENABLE, 4874 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4875 break; 4876 default: 4877 break; 4878 } 4879 } 4880 4881 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 4882 int me, int pipe, 4883 enum amdgpu_interrupt_state state) 4884 { 4885 u32 mec_int_cntl, mec_int_cntl_reg; 4886 4887 /* 4888 * amdgpu controls only the first MEC. That's why this function only 4889 * handles the setting of interrupts for this specific MEC. All other 4890 * pipes' interrupts are set by amdkfd. 4891 */ 4892 4893 if (me == 1) { 4894 switch (pipe) { 4895 case 0: 4896 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 4897 break; 4898 case 1: 4899 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 4900 break; 4901 case 2: 4902 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 4903 break; 4904 case 3: 4905 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 4906 break; 4907 default: 4908 DRM_DEBUG("invalid pipe %d\n", pipe); 4909 return; 4910 } 4911 } else { 4912 DRM_DEBUG("invalid me %d\n", me); 4913 return; 4914 } 4915 4916 switch (state) { 4917 case AMDGPU_IRQ_STATE_DISABLE: 4918 mec_int_cntl = RREG32(mec_int_cntl_reg); 4919 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 4920 TIME_STAMP_INT_ENABLE, 0); 4921 WREG32(mec_int_cntl_reg, mec_int_cntl); 4922 break; 4923 case AMDGPU_IRQ_STATE_ENABLE: 4924 mec_int_cntl = RREG32(mec_int_cntl_reg); 4925 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 4926 TIME_STAMP_INT_ENABLE, 1); 4927 WREG32(mec_int_cntl_reg, mec_int_cntl); 4928 break; 4929 default: 4930 break; 4931 } 4932 } 4933 4934 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 4935 struct amdgpu_irq_src *source, 4936 unsigned type, 4937 enum amdgpu_interrupt_state state) 4938 { 4939 switch (state) { 4940 case AMDGPU_IRQ_STATE_DISABLE: 4941 case AMDGPU_IRQ_STATE_ENABLE: 4942 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4943 PRIV_REG_INT_ENABLE, 4944 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4945 break; 4946 default: 4947 break; 4948 } 4949 4950 return 0; 4951 } 4952 4953 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 4954 struct amdgpu_irq_src *source, 4955 unsigned type, 4956 enum amdgpu_interrupt_state state) 4957 { 4958 switch (state) { 4959 case AMDGPU_IRQ_STATE_DISABLE: 4960 case AMDGPU_IRQ_STATE_ENABLE: 4961 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4962 PRIV_INSTR_INT_ENABLE, 4963 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4964 default: 4965 break; 4966 } 4967 4968 return 0; 4969 } 4970 4971 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 4972 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 4973 CP_ECC_ERROR_INT_ENABLE, 1) 4974 4975 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 4976 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 4977 CP_ECC_ERROR_INT_ENABLE, 0) 4978 4979 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 4980 struct amdgpu_irq_src *source, 4981 unsigned type, 4982 enum amdgpu_interrupt_state state) 4983 { 4984 switch (state) { 4985 case AMDGPU_IRQ_STATE_DISABLE: 4986 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4987 CP_ECC_ERROR_INT_ENABLE, 0); 4988 DISABLE_ECC_ON_ME_PIPE(1, 0); 4989 DISABLE_ECC_ON_ME_PIPE(1, 1); 4990 DISABLE_ECC_ON_ME_PIPE(1, 2); 4991 DISABLE_ECC_ON_ME_PIPE(1, 3); 4992 break; 4993 4994 case AMDGPU_IRQ_STATE_ENABLE: 4995 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4996 CP_ECC_ERROR_INT_ENABLE, 1); 4997 ENABLE_ECC_ON_ME_PIPE(1, 0); 4998 ENABLE_ECC_ON_ME_PIPE(1, 1); 4999 ENABLE_ECC_ON_ME_PIPE(1, 2); 5000 ENABLE_ECC_ON_ME_PIPE(1, 3); 5001 break; 5002 default: 5003 break; 5004 } 5005 5006 return 0; 5007 } 5008 5009 5010 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5011 struct amdgpu_irq_src *src, 5012 unsigned type, 5013 enum amdgpu_interrupt_state state) 5014 { 5015 switch (type) { 5016 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 5017 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 5018 break; 5019 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5020 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5021 break; 5022 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5023 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5024 break; 5025 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5026 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5027 break; 5028 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5029 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5030 break; 5031 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 5032 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 5033 break; 5034 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 5035 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 5036 break; 5037 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 5038 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 5039 break; 5040 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 5041 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 5042 break; 5043 default: 5044 break; 5045 } 5046 return 0; 5047 } 5048 5049 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 5050 struct amdgpu_irq_src *source, 5051 struct amdgpu_iv_entry *entry) 5052 { 5053 int i; 5054 u8 me_id, pipe_id, queue_id; 5055 struct amdgpu_ring *ring; 5056 5057 DRM_DEBUG("IH: CP EOP\n"); 5058 me_id = (entry->ring_id & 0x0c) >> 2; 5059 pipe_id = (entry->ring_id & 0x03) >> 0; 5060 queue_id = (entry->ring_id & 0x70) >> 4; 5061 5062 switch (me_id) { 5063 case 0: 5064 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5065 break; 5066 case 1: 5067 case 2: 5068 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5069 ring = &adev->gfx.compute_ring[i]; 5070 /* Per-queue interrupt is supported for MEC starting from VI. 5071 * The interrupt can only be enabled/disabled per pipe instead of per queue. 5072 */ 5073 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 5074 amdgpu_fence_process(ring); 5075 } 5076 break; 5077 } 5078 return 0; 5079 } 5080 5081 static void gfx_v9_0_fault(struct amdgpu_device *adev, 5082 struct amdgpu_iv_entry *entry) 5083 { 5084 u8 me_id, pipe_id, queue_id; 5085 struct amdgpu_ring *ring; 5086 int i; 5087 5088 me_id = (entry->ring_id & 0x0c) >> 2; 5089 pipe_id = (entry->ring_id & 0x03) >> 0; 5090 queue_id = (entry->ring_id & 0x70) >> 4; 5091 5092 switch (me_id) { 5093 case 0: 5094 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 5095 break; 5096 case 1: 5097 case 2: 5098 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5099 ring = &adev->gfx.compute_ring[i]; 5100 if (ring->me == me_id && ring->pipe == pipe_id && 5101 ring->queue == queue_id) 5102 drm_sched_fault(&ring->sched); 5103 } 5104 break; 5105 } 5106 } 5107 5108 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 5109 struct amdgpu_irq_src *source, 5110 struct amdgpu_iv_entry *entry) 5111 { 5112 DRM_ERROR("Illegal register access in command stream\n"); 5113 gfx_v9_0_fault(adev, entry); 5114 return 0; 5115 } 5116 5117 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 5118 struct amdgpu_irq_src *source, 5119 struct amdgpu_iv_entry *entry) 5120 { 5121 DRM_ERROR("Illegal instruction in command stream\n"); 5122 gfx_v9_0_fault(adev, entry); 5123 return 0; 5124 } 5125 5126 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 5127 struct amdgpu_iv_entry *entry) 5128 { 5129 /* TODO ue will trigger an interrupt. */ 5130 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); 5131 amdgpu_ras_reset_gpu(adev, 0); 5132 return AMDGPU_RAS_UE; 5133 } 5134 5135 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev, 5136 struct amdgpu_irq_src *source, 5137 struct amdgpu_iv_entry *entry) 5138 { 5139 struct ras_common_if *ras_if = adev->gfx.ras_if; 5140 struct ras_dispatch_if ih_data = { 5141 .entry = entry, 5142 }; 5143 5144 if (!ras_if) 5145 return 0; 5146 5147 ih_data.head = *ras_if; 5148 5149 DRM_ERROR("CP ECC ERROR IRQ\n"); 5150 amdgpu_ras_interrupt_dispatch(adev, &ih_data); 5151 return 0; 5152 } 5153 5154 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 5155 .name = "gfx_v9_0", 5156 .early_init = gfx_v9_0_early_init, 5157 .late_init = gfx_v9_0_late_init, 5158 .sw_init = gfx_v9_0_sw_init, 5159 .sw_fini = gfx_v9_0_sw_fini, 5160 .hw_init = gfx_v9_0_hw_init, 5161 .hw_fini = gfx_v9_0_hw_fini, 5162 .suspend = gfx_v9_0_suspend, 5163 .resume = gfx_v9_0_resume, 5164 .is_idle = gfx_v9_0_is_idle, 5165 .wait_for_idle = gfx_v9_0_wait_for_idle, 5166 .soft_reset = gfx_v9_0_soft_reset, 5167 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 5168 .set_powergating_state = gfx_v9_0_set_powergating_state, 5169 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 5170 }; 5171 5172 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 5173 .type = AMDGPU_RING_TYPE_GFX, 5174 .align_mask = 0xff, 5175 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5176 .support_64bit_ptrs = true, 5177 .vmhub = AMDGPU_GFXHUB, 5178 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 5179 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 5180 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 5181 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 5182 5 + /* COND_EXEC */ 5183 7 + /* PIPELINE_SYNC */ 5184 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5185 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5186 2 + /* VM_FLUSH */ 5187 8 + /* FENCE for VM_FLUSH */ 5188 20 + /* GDS switch */ 5189 4 + /* double SWITCH_BUFFER, 5190 the first COND_EXEC jump to the place just 5191 prior to this double SWITCH_BUFFER */ 5192 5 + /* COND_EXEC */ 5193 7 + /* HDP_flush */ 5194 4 + /* VGT_flush */ 5195 14 + /* CE_META */ 5196 31 + /* DE_META */ 5197 3 + /* CNTX_CTRL */ 5198 5 + /* HDP_INVL */ 5199 8 + 8 + /* FENCE x2 */ 5200 2, /* SWITCH_BUFFER */ 5201 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 5202 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 5203 .emit_fence = gfx_v9_0_ring_emit_fence, 5204 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 5205 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 5206 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 5207 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 5208 .test_ring = gfx_v9_0_ring_test_ring, 5209 .test_ib = gfx_v9_0_ring_test_ib, 5210 .insert_nop = amdgpu_ring_insert_nop, 5211 .pad_ib = amdgpu_ring_generic_pad_ib, 5212 .emit_switch_buffer = gfx_v9_ring_emit_sb, 5213 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 5214 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 5215 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 5216 .emit_tmz = gfx_v9_0_ring_emit_tmz, 5217 .emit_wreg = gfx_v9_0_ring_emit_wreg, 5218 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 5219 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 5220 .soft_recovery = gfx_v9_0_ring_soft_recovery, 5221 }; 5222 5223 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 5224 .type = AMDGPU_RING_TYPE_COMPUTE, 5225 .align_mask = 0xff, 5226 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5227 .support_64bit_ptrs = true, 5228 .vmhub = AMDGPU_GFXHUB, 5229 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 5230 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 5231 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 5232 .emit_frame_size = 5233 20 + /* gfx_v9_0_ring_emit_gds_switch */ 5234 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5235 5 + /* hdp invalidate */ 5236 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 5237 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5238 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5239 2 + /* gfx_v9_0_ring_emit_vm_flush */ 5240 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 5241 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 5242 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 5243 .emit_fence = gfx_v9_0_ring_emit_fence, 5244 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 5245 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 5246 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 5247 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 5248 .test_ring = gfx_v9_0_ring_test_ring, 5249 .test_ib = gfx_v9_0_ring_test_ib, 5250 .insert_nop = amdgpu_ring_insert_nop, 5251 .pad_ib = amdgpu_ring_generic_pad_ib, 5252 .set_priority = gfx_v9_0_ring_set_priority_compute, 5253 .emit_wreg = gfx_v9_0_ring_emit_wreg, 5254 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 5255 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 5256 }; 5257 5258 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 5259 .type = AMDGPU_RING_TYPE_KIQ, 5260 .align_mask = 0xff, 5261 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5262 .support_64bit_ptrs = true, 5263 .vmhub = AMDGPU_GFXHUB, 5264 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 5265 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 5266 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 5267 .emit_frame_size = 5268 20 + /* gfx_v9_0_ring_emit_gds_switch */ 5269 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5270 5 + /* hdp invalidate */ 5271 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 5272 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5273 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5274 2 + /* gfx_v9_0_ring_emit_vm_flush */ 5275 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 5276 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 5277 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 5278 .test_ring = gfx_v9_0_ring_test_ring, 5279 .insert_nop = amdgpu_ring_insert_nop, 5280 .pad_ib = amdgpu_ring_generic_pad_ib, 5281 .emit_rreg = gfx_v9_0_ring_emit_rreg, 5282 .emit_wreg = gfx_v9_0_ring_emit_wreg, 5283 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 5284 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 5285 }; 5286 5287 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 5288 { 5289 int i; 5290 5291 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 5292 5293 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 5294 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 5295 5296 for (i = 0; i < adev->gfx.num_compute_rings; i++) 5297 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 5298 } 5299 5300 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 5301 .set = gfx_v9_0_set_eop_interrupt_state, 5302 .process = gfx_v9_0_eop_irq, 5303 }; 5304 5305 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 5306 .set = gfx_v9_0_set_priv_reg_fault_state, 5307 .process = gfx_v9_0_priv_reg_irq, 5308 }; 5309 5310 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 5311 .set = gfx_v9_0_set_priv_inst_fault_state, 5312 .process = gfx_v9_0_priv_inst_irq, 5313 }; 5314 5315 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 5316 .set = gfx_v9_0_set_cp_ecc_error_state, 5317 .process = gfx_v9_0_cp_ecc_error_irq, 5318 }; 5319 5320 5321 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 5322 { 5323 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 5324 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 5325 5326 adev->gfx.priv_reg_irq.num_types = 1; 5327 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 5328 5329 adev->gfx.priv_inst_irq.num_types = 1; 5330 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 5331 5332 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 5333 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 5334 } 5335 5336 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 5337 { 5338 switch (adev->asic_type) { 5339 case CHIP_VEGA10: 5340 case CHIP_VEGA12: 5341 case CHIP_VEGA20: 5342 case CHIP_RAVEN: 5343 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 5344 break; 5345 default: 5346 break; 5347 } 5348 } 5349 5350 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 5351 { 5352 /* init asci gds info */ 5353 switch (adev->asic_type) { 5354 case CHIP_VEGA10: 5355 case CHIP_VEGA12: 5356 case CHIP_VEGA20: 5357 adev->gds.gds_size = 0x10000; 5358 break; 5359 case CHIP_RAVEN: 5360 adev->gds.gds_size = 0x1000; 5361 break; 5362 default: 5363 adev->gds.gds_size = 0x10000; 5364 break; 5365 } 5366 5367 switch (adev->asic_type) { 5368 case CHIP_VEGA10: 5369 case CHIP_VEGA20: 5370 adev->gds.gds_compute_max_wave_id = 0x7ff; 5371 break; 5372 case CHIP_VEGA12: 5373 adev->gds.gds_compute_max_wave_id = 0x27f; 5374 break; 5375 case CHIP_RAVEN: 5376 if (adev->rev_id >= 0x8) 5377 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 5378 else 5379 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 5380 break; 5381 default: 5382 /* this really depends on the chip */ 5383 adev->gds.gds_compute_max_wave_id = 0x7ff; 5384 break; 5385 } 5386 5387 adev->gds.gws_size = 64; 5388 adev->gds.oa_size = 16; 5389 } 5390 5391 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 5392 u32 bitmap) 5393 { 5394 u32 data; 5395 5396 if (!bitmap) 5397 return; 5398 5399 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 5400 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 5401 5402 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 5403 } 5404 5405 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 5406 { 5407 u32 data, mask; 5408 5409 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 5410 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 5411 5412 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 5413 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 5414 5415 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 5416 5417 return (~data) & mask; 5418 } 5419 5420 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 5421 struct amdgpu_cu_info *cu_info) 5422 { 5423 int i, j, k, counter, active_cu_number = 0; 5424 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 5425 unsigned disable_masks[4 * 2]; 5426 5427 if (!adev || !cu_info) 5428 return -EINVAL; 5429 5430 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 5431 5432 mutex_lock(&adev->grbm_idx_mutex); 5433 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 5434 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 5435 mask = 1; 5436 ao_bitmap = 0; 5437 counter = 0; 5438 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 5439 if (i < 4 && j < 2) 5440 gfx_v9_0_set_user_cu_inactive_bitmap( 5441 adev, disable_masks[i * 2 + j]); 5442 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 5443 cu_info->bitmap[i][j] = bitmap; 5444 5445 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 5446 if (bitmap & mask) { 5447 if (counter < adev->gfx.config.max_cu_per_sh) 5448 ao_bitmap |= mask; 5449 counter ++; 5450 } 5451 mask <<= 1; 5452 } 5453 active_cu_number += counter; 5454 if (i < 2 && j < 2) 5455 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 5456 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 5457 } 5458 } 5459 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5460 mutex_unlock(&adev->grbm_idx_mutex); 5461 5462 cu_info->number = active_cu_number; 5463 cu_info->ao_cu_mask = ao_cu_mask; 5464 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 5465 5466 return 0; 5467 } 5468 5469 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 5470 { 5471 .type = AMD_IP_BLOCK_TYPE_GFX, 5472 .major = 9, 5473 .minor = 0, 5474 .rev = 0, 5475 .funcs = &gfx_v9_0_ip_funcs, 5476 }; 5477