1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 40 #include "vega10_enum.h" 41 #include "hdp/hdp_4_0_offset.h" 42 43 #include "soc15_common.h" 44 #include "clearstate_gfx9.h" 45 #include "v9_structs.h" 46 47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 48 49 #include "amdgpu_ras.h" 50 51 #define GFX9_NUM_GFX_RINGS 1 52 #define GFX9_MEC_HPD_SIZE 4096 53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 55 56 #define mmPWR_MISC_CNTL_STATUS 0x0183 57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0 59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1 60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L 61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L 62 63 #define mmGCEA_PROBE_MAP 0x070c 64 #define mmGCEA_PROBE_MAP_BASE_IDX 0 65 66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 68 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 72 73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 75 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 79 80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 82 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 86 87 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 89 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 90 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 93 94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 96 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 101 102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 109 110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); 111 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin"); 112 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); 113 114 MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); 115 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); 116 MODULE_FIRMWARE("amdgpu/renoir_me.bin"); 117 MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); 118 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin"); 119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); 120 121 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 122 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 123 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04 124 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 125 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09 126 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 127 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a 128 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 129 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b 130 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 131 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c 132 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 133 134 struct ras_gfx_subblock_reg { 135 const char *name; 136 uint32_t hwip; 137 uint32_t inst; 138 uint32_t seg; 139 uint32_t reg_offset; 140 uint32_t sec_count_mask; 141 uint32_t sec_count_shift; 142 uint32_t ded_count_mask; 143 uint32_t ded_count_shift; 144 }; 145 146 enum ta_ras_gfx_subblock { 147 /*CPC*/ 148 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 149 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, 150 TA_RAS_BLOCK__GFX_CPC_UCODE, 151 TA_RAS_BLOCK__GFX_DC_STATE_ME1, 152 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 153 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, 154 TA_RAS_BLOCK__GFX_DC_STATE_ME2, 155 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 156 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 157 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 158 /* CPF*/ 159 TA_RAS_BLOCK__GFX_CPF_INDEX_START, 160 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, 161 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, 162 TA_RAS_BLOCK__GFX_CPF_TAG, 163 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, 164 /* CPG*/ 165 TA_RAS_BLOCK__GFX_CPG_INDEX_START, 166 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, 167 TA_RAS_BLOCK__GFX_CPG_DMA_TAG, 168 TA_RAS_BLOCK__GFX_CPG_TAG, 169 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, 170 /* GDS*/ 171 TA_RAS_BLOCK__GFX_GDS_INDEX_START, 172 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, 173 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 174 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 175 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 176 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 177 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 178 /* SPI*/ 179 TA_RAS_BLOCK__GFX_SPI_SR_MEM, 180 /* SQ*/ 181 TA_RAS_BLOCK__GFX_SQ_INDEX_START, 182 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, 183 TA_RAS_BLOCK__GFX_SQ_LDS_D, 184 TA_RAS_BLOCK__GFX_SQ_LDS_I, 185 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ 186 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, 187 /* SQC (3 ranges)*/ 188 TA_RAS_BLOCK__GFX_SQC_INDEX_START, 189 /* SQC range 0*/ 190 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, 191 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 192 TA_RAS_BLOCK__GFX_SQC_INDEX0_START, 193 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 194 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 195 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 196 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 197 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 198 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 199 TA_RAS_BLOCK__GFX_SQC_INDEX0_END = 200 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 201 /* SQC range 1*/ 202 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 203 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 204 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 205 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 206 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 207 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 208 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 209 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 210 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 211 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 212 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 213 TA_RAS_BLOCK__GFX_SQC_INDEX1_END = 214 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 215 /* SQC range 2*/ 216 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 217 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 218 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 219 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 220 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 221 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 222 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 223 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 224 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 225 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 226 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 227 TA_RAS_BLOCK__GFX_SQC_INDEX2_END = 228 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 229 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, 230 /* TA*/ 231 TA_RAS_BLOCK__GFX_TA_INDEX_START, 232 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, 233 TA_RAS_BLOCK__GFX_TA_FS_AFIFO, 234 TA_RAS_BLOCK__GFX_TA_FL_LFIFO, 235 TA_RAS_BLOCK__GFX_TA_FX_LFIFO, 236 TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 237 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 238 /* TCA*/ 239 TA_RAS_BLOCK__GFX_TCA_INDEX_START, 240 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, 241 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 242 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 243 /* TCC (5 sub-ranges)*/ 244 TA_RAS_BLOCK__GFX_TCC_INDEX_START, 245 /* TCC range 0*/ 246 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, 247 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, 248 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 249 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 250 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 251 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 252 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 253 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 254 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 255 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 256 /* TCC range 1*/ 257 TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 258 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 259 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 260 TA_RAS_BLOCK__GFX_TCC_INDEX1_END = 261 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 262 /* TCC range 2*/ 263 TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 264 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 265 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 266 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 267 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 268 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 269 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, 270 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 271 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 272 TA_RAS_BLOCK__GFX_TCC_INDEX2_END = 273 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 274 /* TCC range 3*/ 275 TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 276 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 277 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 278 TA_RAS_BLOCK__GFX_TCC_INDEX3_END = 279 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 280 /* TCC range 4*/ 281 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 282 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 283 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 284 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 285 TA_RAS_BLOCK__GFX_TCC_INDEX4_END = 286 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 287 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, 288 /* TCI*/ 289 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, 290 /* TCP*/ 291 TA_RAS_BLOCK__GFX_TCP_INDEX_START, 292 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, 293 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 294 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, 295 TA_RAS_BLOCK__GFX_TCP_VM_FIFO, 296 TA_RAS_BLOCK__GFX_TCP_DB_RAM, 297 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 298 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 299 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 300 /* TD*/ 301 TA_RAS_BLOCK__GFX_TD_INDEX_START, 302 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, 303 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 304 TA_RAS_BLOCK__GFX_TD_CS_FIFO, 305 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, 306 /* EA (3 sub-ranges)*/ 307 TA_RAS_BLOCK__GFX_EA_INDEX_START, 308 /* EA range 0*/ 309 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, 310 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, 311 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 312 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 313 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 314 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 315 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 316 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 317 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 318 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 319 /* EA range 1*/ 320 TA_RAS_BLOCK__GFX_EA_INDEX1_START, 321 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, 322 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 323 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 324 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 325 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 326 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 327 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 328 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 329 /* EA range 2*/ 330 TA_RAS_BLOCK__GFX_EA_INDEX2_START, 331 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, 332 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, 333 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, 334 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 335 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 336 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, 337 /* UTC VM L2 bank*/ 338 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, 339 /* UTC VM walker*/ 340 TA_RAS_BLOCK__UTC_VML2_WALKER, 341 /* UTC ATC L2 2MB cache*/ 342 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 343 /* UTC ATC L2 4KB cache*/ 344 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 345 TA_RAS_BLOCK__GFX_MAX 346 }; 347 348 struct ras_gfx_subblock { 349 unsigned char *name; 350 int ta_subblock; 351 int hw_supported_error_type; 352 int sw_supported_error_type; 353 }; 354 355 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ 356 [AMDGPU_RAS_BLOCK__##subblock] = { \ 357 #subblock, \ 358 TA_RAS_BLOCK__##subblock, \ 359 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ 360 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ 361 } 362 363 static const struct ras_gfx_subblock ras_gfx_subblocks[] = { 364 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), 365 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), 366 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 367 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 368 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 369 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 370 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 371 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 372 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 373 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 374 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), 375 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), 376 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), 377 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), 378 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 379 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), 380 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, 381 0), 382 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, 383 0), 384 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 385 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), 386 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), 387 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), 388 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), 389 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), 390 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), 391 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 392 0, 0), 393 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 394 0), 395 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 396 0, 0), 397 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, 398 0), 399 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 400 0, 0), 401 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 402 0), 403 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 404 1), 405 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 406 0, 0, 0), 407 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 408 0), 409 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 410 0), 411 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 412 0), 413 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 414 0), 415 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 416 0), 417 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 418 0, 0), 419 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 420 0), 421 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 422 0), 423 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 424 0, 0, 0), 425 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 426 0), 427 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 428 0), 429 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 430 0), 431 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 432 0), 433 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 434 0), 435 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 436 0, 0), 437 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 438 0), 439 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), 440 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 441 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 442 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 443 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 444 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), 445 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 446 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), 447 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, 448 1), 449 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, 450 1), 451 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, 452 1), 453 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, 454 0), 455 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, 456 0), 457 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 458 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 459 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), 460 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), 461 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), 462 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), 463 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 464 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), 465 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), 466 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 467 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), 468 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, 469 0), 470 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 471 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, 472 0), 473 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, 474 0, 0), 475 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, 476 0), 477 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 478 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), 479 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), 480 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 481 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 482 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 483 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), 484 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), 485 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), 486 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), 487 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 488 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), 489 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 490 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 491 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 492 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 493 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 494 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 495 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 496 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 497 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 498 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 499 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 500 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), 501 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 502 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 503 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), 504 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), 505 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), 506 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), 507 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), 508 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), 509 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), 510 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), 511 }; 512 513 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 514 { 515 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 516 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 517 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 518 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 519 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 520 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 521 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 522 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 523 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 524 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 525 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 526 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 527 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 528 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 532 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 533 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 534 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 535 }; 536 537 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 538 { 539 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 540 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 541 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 542 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 543 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 544 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 545 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 546 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 547 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 548 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 549 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 550 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 551 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 552 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 553 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 554 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 555 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 556 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 557 }; 558 559 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 560 { 561 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 562 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 563 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 564 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 565 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 566 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 567 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 568 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 569 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 570 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 571 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 572 }; 573 574 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 575 { 576 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 577 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 578 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 579 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 580 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 581 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 582 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 583 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 584 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 587 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 588 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 589 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 590 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 591 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 592 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 593 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 594 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 595 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 596 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 597 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 598 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 599 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 600 }; 601 602 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 603 { 604 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 605 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 606 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 607 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 608 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 609 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 610 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 611 }; 612 613 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 614 { 615 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 616 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 617 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 618 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 619 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 620 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 621 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 622 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 623 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 624 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 625 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 626 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 627 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 631 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 632 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 633 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 634 }; 635 636 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = 637 { 638 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 639 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 640 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 641 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), 642 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), 643 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 644 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 645 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 646 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 647 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 648 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 649 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), 650 }; 651 652 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 653 { 654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 657 }; 658 659 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 660 { 661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 667 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 668 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 671 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 672 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 673 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 674 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 677 }; 678 679 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 680 { 681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 682 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 684 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 685 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 692 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 693 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 694 }; 695 696 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = 697 { 698 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 699 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), 700 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), 701 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), 702 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), 703 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), 704 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), 705 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), 706 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), 707 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), 708 }; 709 710 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 711 { 712 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 713 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 714 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 715 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 716 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 717 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 718 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 719 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 720 }; 721 722 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 723 { 724 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 725 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 726 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 727 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 728 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 729 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 730 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 731 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 732 }; 733 734 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 735 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 736 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 737 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 738 739 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 740 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 741 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 742 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 743 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 744 struct amdgpu_cu_info *cu_info); 745 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 746 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); 747 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 748 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 749 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 750 void *ras_error_status); 751 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 752 void *inject_if); 753 754 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 755 { 756 switch (adev->asic_type) { 757 case CHIP_VEGA10: 758 soc15_program_register_sequence(adev, 759 golden_settings_gc_9_0, 760 ARRAY_SIZE(golden_settings_gc_9_0)); 761 soc15_program_register_sequence(adev, 762 golden_settings_gc_9_0_vg10, 763 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 764 break; 765 case CHIP_VEGA12: 766 soc15_program_register_sequence(adev, 767 golden_settings_gc_9_2_1, 768 ARRAY_SIZE(golden_settings_gc_9_2_1)); 769 soc15_program_register_sequence(adev, 770 golden_settings_gc_9_2_1_vg12, 771 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 772 break; 773 case CHIP_VEGA20: 774 soc15_program_register_sequence(adev, 775 golden_settings_gc_9_0, 776 ARRAY_SIZE(golden_settings_gc_9_0)); 777 soc15_program_register_sequence(adev, 778 golden_settings_gc_9_0_vg20, 779 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 780 break; 781 case CHIP_ARCTURUS: 782 soc15_program_register_sequence(adev, 783 golden_settings_gc_9_4_1_arct, 784 ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); 785 break; 786 case CHIP_RAVEN: 787 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 788 ARRAY_SIZE(golden_settings_gc_9_1)); 789 if (adev->rev_id >= 8) 790 soc15_program_register_sequence(adev, 791 golden_settings_gc_9_1_rv2, 792 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 793 else 794 soc15_program_register_sequence(adev, 795 golden_settings_gc_9_1_rv1, 796 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 797 break; 798 case CHIP_RENOIR: 799 soc15_program_register_sequence(adev, 800 golden_settings_gc_9_1_rn, 801 ARRAY_SIZE(golden_settings_gc_9_1_rn)); 802 return; /* for renoir, don't need common goldensetting */ 803 default: 804 break; 805 } 806 807 if (adev->asic_type != CHIP_ARCTURUS) 808 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 809 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 810 } 811 812 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 813 { 814 adev->gfx.scratch.num_reg = 8; 815 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 816 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 817 } 818 819 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 820 bool wc, uint32_t reg, uint32_t val) 821 { 822 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 823 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 824 WRITE_DATA_DST_SEL(0) | 825 (wc ? WR_CONFIRM : 0)); 826 amdgpu_ring_write(ring, reg); 827 amdgpu_ring_write(ring, 0); 828 amdgpu_ring_write(ring, val); 829 } 830 831 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 832 int mem_space, int opt, uint32_t addr0, 833 uint32_t addr1, uint32_t ref, uint32_t mask, 834 uint32_t inv) 835 { 836 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 837 amdgpu_ring_write(ring, 838 /* memory (1) or register (0) */ 839 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 840 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 841 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 842 WAIT_REG_MEM_ENGINE(eng_sel))); 843 844 if (mem_space) 845 BUG_ON(addr0 & 0x3); /* Dword align */ 846 amdgpu_ring_write(ring, addr0); 847 amdgpu_ring_write(ring, addr1); 848 amdgpu_ring_write(ring, ref); 849 amdgpu_ring_write(ring, mask); 850 amdgpu_ring_write(ring, inv); /* poll interval */ 851 } 852 853 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 854 { 855 struct amdgpu_device *adev = ring->adev; 856 uint32_t scratch; 857 uint32_t tmp = 0; 858 unsigned i; 859 int r; 860 861 r = amdgpu_gfx_scratch_get(adev, &scratch); 862 if (r) 863 return r; 864 865 WREG32(scratch, 0xCAFEDEAD); 866 r = amdgpu_ring_alloc(ring, 3); 867 if (r) 868 goto error_free_scratch; 869 870 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 871 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 872 amdgpu_ring_write(ring, 0xDEADBEEF); 873 amdgpu_ring_commit(ring); 874 875 for (i = 0; i < adev->usec_timeout; i++) { 876 tmp = RREG32(scratch); 877 if (tmp == 0xDEADBEEF) 878 break; 879 udelay(1); 880 } 881 882 if (i >= adev->usec_timeout) 883 r = -ETIMEDOUT; 884 885 error_free_scratch: 886 amdgpu_gfx_scratch_free(adev, scratch); 887 return r; 888 } 889 890 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 891 { 892 struct amdgpu_device *adev = ring->adev; 893 struct amdgpu_ib ib; 894 struct dma_fence *f = NULL; 895 896 unsigned index; 897 uint64_t gpu_addr; 898 uint32_t tmp; 899 long r; 900 901 r = amdgpu_device_wb_get(adev, &index); 902 if (r) 903 return r; 904 905 gpu_addr = adev->wb.gpu_addr + (index * 4); 906 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 907 memset(&ib, 0, sizeof(ib)); 908 r = amdgpu_ib_get(adev, NULL, 16, &ib); 909 if (r) 910 goto err1; 911 912 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 913 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 914 ib.ptr[2] = lower_32_bits(gpu_addr); 915 ib.ptr[3] = upper_32_bits(gpu_addr); 916 ib.ptr[4] = 0xDEADBEEF; 917 ib.length_dw = 5; 918 919 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 920 if (r) 921 goto err2; 922 923 r = dma_fence_wait_timeout(f, false, timeout); 924 if (r == 0) { 925 r = -ETIMEDOUT; 926 goto err2; 927 } else if (r < 0) { 928 goto err2; 929 } 930 931 tmp = adev->wb.wb[index]; 932 if (tmp == 0xDEADBEEF) 933 r = 0; 934 else 935 r = -EINVAL; 936 937 err2: 938 amdgpu_ib_free(adev, &ib, NULL); 939 dma_fence_put(f); 940 err1: 941 amdgpu_device_wb_free(adev, index); 942 return r; 943 } 944 945 946 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 947 { 948 release_firmware(adev->gfx.pfp_fw); 949 adev->gfx.pfp_fw = NULL; 950 release_firmware(adev->gfx.me_fw); 951 adev->gfx.me_fw = NULL; 952 release_firmware(adev->gfx.ce_fw); 953 adev->gfx.ce_fw = NULL; 954 release_firmware(adev->gfx.rlc_fw); 955 adev->gfx.rlc_fw = NULL; 956 release_firmware(adev->gfx.mec_fw); 957 adev->gfx.mec_fw = NULL; 958 release_firmware(adev->gfx.mec2_fw); 959 adev->gfx.mec2_fw = NULL; 960 961 kfree(adev->gfx.rlc.register_list_format); 962 } 963 964 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 965 { 966 const struct rlc_firmware_header_v2_1 *rlc_hdr; 967 968 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 969 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 970 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 971 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 972 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 973 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 974 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 975 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 976 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 977 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 978 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 979 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 980 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 981 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 982 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 983 } 984 985 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 986 { 987 adev->gfx.me_fw_write_wait = false; 988 adev->gfx.mec_fw_write_wait = false; 989 990 if ((adev->gfx.mec_fw_version < 0x000001a5) || 991 (adev->gfx.mec_feature_version < 46) || 992 (adev->gfx.pfp_fw_version < 0x000000b7) || 993 (adev->gfx.pfp_feature_version < 46)) 994 DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ 995 GRBM requires 1-cycle delay in cp firmware\n"); 996 997 switch (adev->asic_type) { 998 case CHIP_VEGA10: 999 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1000 (adev->gfx.me_feature_version >= 42) && 1001 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1002 (adev->gfx.pfp_feature_version >= 42)) 1003 adev->gfx.me_fw_write_wait = true; 1004 1005 if ((adev->gfx.mec_fw_version >= 0x00000193) && 1006 (adev->gfx.mec_feature_version >= 42)) 1007 adev->gfx.mec_fw_write_wait = true; 1008 break; 1009 case CHIP_VEGA12: 1010 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1011 (adev->gfx.me_feature_version >= 44) && 1012 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1013 (adev->gfx.pfp_feature_version >= 44)) 1014 adev->gfx.me_fw_write_wait = true; 1015 1016 if ((adev->gfx.mec_fw_version >= 0x00000196) && 1017 (adev->gfx.mec_feature_version >= 44)) 1018 adev->gfx.mec_fw_write_wait = true; 1019 break; 1020 case CHIP_VEGA20: 1021 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1022 (adev->gfx.me_feature_version >= 44) && 1023 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1024 (adev->gfx.pfp_feature_version >= 44)) 1025 adev->gfx.me_fw_write_wait = true; 1026 1027 if ((adev->gfx.mec_fw_version >= 0x00000197) && 1028 (adev->gfx.mec_feature_version >= 44)) 1029 adev->gfx.mec_fw_write_wait = true; 1030 break; 1031 case CHIP_RAVEN: 1032 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1033 (adev->gfx.me_feature_version >= 42) && 1034 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1035 (adev->gfx.pfp_feature_version >= 42)) 1036 adev->gfx.me_fw_write_wait = true; 1037 1038 if ((adev->gfx.mec_fw_version >= 0x00000192) && 1039 (adev->gfx.mec_feature_version >= 42)) 1040 adev->gfx.mec_fw_write_wait = true; 1041 break; 1042 default: 1043 break; 1044 } 1045 } 1046 1047 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 1048 { 1049 switch (adev->asic_type) { 1050 case CHIP_VEGA10: 1051 case CHIP_VEGA12: 1052 case CHIP_VEGA20: 1053 break; 1054 case CHIP_RAVEN: 1055 if (!(adev->rev_id >= 0x8 || 1056 adev->pdev->device == 0x15d8) && 1057 (adev->pm.fw_version < 0x41e2b || /* not raven1 fresh */ 1058 !adev->gfx.rlc.is_rlc_v2_1)) /* without rlc save restore ucodes */ 1059 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1060 1061 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1062 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1063 AMD_PG_SUPPORT_CP | 1064 AMD_PG_SUPPORT_RLC_SMU_HS; 1065 break; 1066 case CHIP_RENOIR: 1067 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1068 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1069 AMD_PG_SUPPORT_CP | 1070 AMD_PG_SUPPORT_RLC_SMU_HS; 1071 break; 1072 default: 1073 break; 1074 } 1075 } 1076 1077 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, 1078 const char *chip_name) 1079 { 1080 char fw_name[30]; 1081 int err; 1082 struct amdgpu_firmware_info *info = NULL; 1083 const struct common_firmware_header *header = NULL; 1084 const struct gfx_firmware_header_v1_0 *cp_hdr; 1085 1086 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1087 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1088 if (err) 1089 goto out; 1090 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 1091 if (err) 1092 goto out; 1093 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1094 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1095 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1096 1097 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1098 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1099 if (err) 1100 goto out; 1101 err = amdgpu_ucode_validate(adev->gfx.me_fw); 1102 if (err) 1103 goto out; 1104 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1105 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1106 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1107 1108 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1109 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1110 if (err) 1111 goto out; 1112 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 1113 if (err) 1114 goto out; 1115 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1116 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1117 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1118 1119 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1120 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1121 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1122 info->fw = adev->gfx.pfp_fw; 1123 header = (const struct common_firmware_header *)info->fw->data; 1124 adev->firmware.fw_size += 1125 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1126 1127 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1128 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1129 info->fw = adev->gfx.me_fw; 1130 header = (const struct common_firmware_header *)info->fw->data; 1131 adev->firmware.fw_size += 1132 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1133 1134 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1135 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1136 info->fw = adev->gfx.ce_fw; 1137 header = (const struct common_firmware_header *)info->fw->data; 1138 adev->firmware.fw_size += 1139 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1140 } 1141 1142 out: 1143 if (err) { 1144 dev_err(adev->dev, 1145 "gfx9: Failed to load firmware \"%s\"\n", 1146 fw_name); 1147 release_firmware(adev->gfx.pfp_fw); 1148 adev->gfx.pfp_fw = NULL; 1149 release_firmware(adev->gfx.me_fw); 1150 adev->gfx.me_fw = NULL; 1151 release_firmware(adev->gfx.ce_fw); 1152 adev->gfx.ce_fw = NULL; 1153 } 1154 return err; 1155 } 1156 1157 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, 1158 const char *chip_name) 1159 { 1160 char fw_name[30]; 1161 int err; 1162 struct amdgpu_firmware_info *info = NULL; 1163 const struct common_firmware_header *header = NULL; 1164 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1165 unsigned int *tmp = NULL; 1166 unsigned int i = 0; 1167 uint16_t version_major; 1168 uint16_t version_minor; 1169 uint32_t smu_version; 1170 1171 /* 1172 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 1173 * instead of picasso_rlc.bin. 1174 * Judgment method: 1175 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 1176 * or revision >= 0xD8 && revision <= 0xDF 1177 * otherwise is PCO FP5 1178 */ 1179 if (!strcmp(chip_name, "picasso") && 1180 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 1181 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 1182 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); 1183 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 1184 (smu_version >= 0x41e2b)) 1185 /** 1186 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 1187 */ 1188 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); 1189 else 1190 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1191 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1192 if (err) 1193 goto out; 1194 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1195 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1196 1197 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1198 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1199 if (version_major == 2 && version_minor == 1) 1200 adev->gfx.rlc.is_rlc_v2_1 = true; 1201 1202 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1203 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1204 adev->gfx.rlc.save_and_restore_offset = 1205 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1206 adev->gfx.rlc.clear_state_descriptor_offset = 1207 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1208 adev->gfx.rlc.avail_scratch_ram_locations = 1209 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1210 adev->gfx.rlc.reg_restore_list_size = 1211 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1212 adev->gfx.rlc.reg_list_format_start = 1213 le32_to_cpu(rlc_hdr->reg_list_format_start); 1214 adev->gfx.rlc.reg_list_format_separate_start = 1215 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1216 adev->gfx.rlc.starting_offsets_start = 1217 le32_to_cpu(rlc_hdr->starting_offsets_start); 1218 adev->gfx.rlc.reg_list_format_size_bytes = 1219 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1220 adev->gfx.rlc.reg_list_size_bytes = 1221 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1222 adev->gfx.rlc.register_list_format = 1223 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1224 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1225 if (!adev->gfx.rlc.register_list_format) { 1226 err = -ENOMEM; 1227 goto out; 1228 } 1229 1230 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1231 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1232 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 1233 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1234 1235 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1236 1237 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1238 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1239 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 1240 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1241 1242 if (adev->gfx.rlc.is_rlc_v2_1) 1243 gfx_v9_0_init_rlc_ext_microcode(adev); 1244 1245 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1246 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1247 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1248 info->fw = adev->gfx.rlc_fw; 1249 header = (const struct common_firmware_header *)info->fw->data; 1250 adev->firmware.fw_size += 1251 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1252 1253 if (adev->gfx.rlc.is_rlc_v2_1 && 1254 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 1255 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 1256 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 1257 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 1258 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 1259 info->fw = adev->gfx.rlc_fw; 1260 adev->firmware.fw_size += 1261 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 1262 1263 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 1264 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 1265 info->fw = adev->gfx.rlc_fw; 1266 adev->firmware.fw_size += 1267 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 1268 1269 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 1270 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 1271 info->fw = adev->gfx.rlc_fw; 1272 adev->firmware.fw_size += 1273 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 1274 } 1275 } 1276 1277 out: 1278 if (err) { 1279 dev_err(adev->dev, 1280 "gfx9: Failed to load firmware \"%s\"\n", 1281 fw_name); 1282 release_firmware(adev->gfx.rlc_fw); 1283 adev->gfx.rlc_fw = NULL; 1284 } 1285 return err; 1286 } 1287 1288 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, 1289 const char *chip_name) 1290 { 1291 char fw_name[30]; 1292 int err; 1293 struct amdgpu_firmware_info *info = NULL; 1294 const struct common_firmware_header *header = NULL; 1295 const struct gfx_firmware_header_v1_0 *cp_hdr; 1296 1297 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1298 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1299 if (err) 1300 goto out; 1301 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1302 if (err) 1303 goto out; 1304 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1305 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1306 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1307 1308 1309 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1310 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1311 if (!err) { 1312 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1313 if (err) 1314 goto out; 1315 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1316 adev->gfx.mec2_fw->data; 1317 adev->gfx.mec2_fw_version = 1318 le32_to_cpu(cp_hdr->header.ucode_version); 1319 adev->gfx.mec2_feature_version = 1320 le32_to_cpu(cp_hdr->ucode_feature_version); 1321 } else { 1322 err = 0; 1323 adev->gfx.mec2_fw = NULL; 1324 } 1325 1326 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1327 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1328 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1329 info->fw = adev->gfx.mec_fw; 1330 header = (const struct common_firmware_header *)info->fw->data; 1331 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1332 adev->firmware.fw_size += 1333 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1334 1335 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 1336 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 1337 info->fw = adev->gfx.mec_fw; 1338 adev->firmware.fw_size += 1339 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1340 1341 if (adev->gfx.mec2_fw) { 1342 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1343 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1344 info->fw = adev->gfx.mec2_fw; 1345 header = (const struct common_firmware_header *)info->fw->data; 1346 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1347 adev->firmware.fw_size += 1348 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1349 1350 /* TODO: Determine if MEC2 JT FW loading can be removed 1351 for all GFX V9 asic and above */ 1352 if (adev->asic_type != CHIP_ARCTURUS && 1353 adev->asic_type != CHIP_RENOIR) { 1354 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 1355 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 1356 info->fw = adev->gfx.mec2_fw; 1357 adev->firmware.fw_size += 1358 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, 1359 PAGE_SIZE); 1360 } 1361 } 1362 } 1363 1364 out: 1365 gfx_v9_0_check_if_need_gfxoff(adev); 1366 gfx_v9_0_check_fw_write_wait(adev); 1367 if (err) { 1368 dev_err(adev->dev, 1369 "gfx9: Failed to load firmware \"%s\"\n", 1370 fw_name); 1371 release_firmware(adev->gfx.mec_fw); 1372 adev->gfx.mec_fw = NULL; 1373 release_firmware(adev->gfx.mec2_fw); 1374 adev->gfx.mec2_fw = NULL; 1375 } 1376 return err; 1377 } 1378 1379 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 1380 { 1381 const char *chip_name; 1382 int r; 1383 1384 DRM_DEBUG("\n"); 1385 1386 switch (adev->asic_type) { 1387 case CHIP_VEGA10: 1388 chip_name = "vega10"; 1389 break; 1390 case CHIP_VEGA12: 1391 chip_name = "vega12"; 1392 break; 1393 case CHIP_VEGA20: 1394 chip_name = "vega20"; 1395 break; 1396 case CHIP_RAVEN: 1397 if (adev->rev_id >= 8) 1398 chip_name = "raven2"; 1399 else if (adev->pdev->device == 0x15d8) 1400 chip_name = "picasso"; 1401 else 1402 chip_name = "raven"; 1403 break; 1404 case CHIP_ARCTURUS: 1405 chip_name = "arcturus"; 1406 break; 1407 case CHIP_RENOIR: 1408 chip_name = "renoir"; 1409 break; 1410 default: 1411 BUG(); 1412 } 1413 1414 /* No CPG in Arcturus */ 1415 if (adev->asic_type != CHIP_ARCTURUS) { 1416 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); 1417 if (r) 1418 return r; 1419 } 1420 1421 r = gfx_v9_0_init_rlc_microcode(adev, chip_name); 1422 if (r) 1423 return r; 1424 1425 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name); 1426 if (r) 1427 return r; 1428 1429 return r; 1430 } 1431 1432 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 1433 { 1434 u32 count = 0; 1435 const struct cs_section_def *sect = NULL; 1436 const struct cs_extent_def *ext = NULL; 1437 1438 /* begin clear state */ 1439 count += 2; 1440 /* context control state */ 1441 count += 3; 1442 1443 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1444 for (ext = sect->section; ext->extent != NULL; ++ext) { 1445 if (sect->id == SECT_CONTEXT) 1446 count += 2 + ext->reg_count; 1447 else 1448 return 0; 1449 } 1450 } 1451 1452 /* end clear state */ 1453 count += 2; 1454 /* clear state */ 1455 count += 2; 1456 1457 return count; 1458 } 1459 1460 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 1461 volatile u32 *buffer) 1462 { 1463 u32 count = 0, i; 1464 const struct cs_section_def *sect = NULL; 1465 const struct cs_extent_def *ext = NULL; 1466 1467 if (adev->gfx.rlc.cs_data == NULL) 1468 return; 1469 if (buffer == NULL) 1470 return; 1471 1472 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1473 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1474 1475 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1476 buffer[count++] = cpu_to_le32(0x80000000); 1477 buffer[count++] = cpu_to_le32(0x80000000); 1478 1479 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1480 for (ext = sect->section; ext->extent != NULL; ++ext) { 1481 if (sect->id == SECT_CONTEXT) { 1482 buffer[count++] = 1483 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1484 buffer[count++] = cpu_to_le32(ext->reg_index - 1485 PACKET3_SET_CONTEXT_REG_START); 1486 for (i = 0; i < ext->reg_count; i++) 1487 buffer[count++] = cpu_to_le32(ext->extent[i]); 1488 } else { 1489 return; 1490 } 1491 } 1492 } 1493 1494 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1495 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1496 1497 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1498 buffer[count++] = cpu_to_le32(0); 1499 } 1500 1501 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 1502 { 1503 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 1504 uint32_t pg_always_on_cu_num = 2; 1505 uint32_t always_on_cu_num; 1506 uint32_t i, j, k; 1507 uint32_t mask, cu_bitmap, counter; 1508 1509 if (adev->flags & AMD_IS_APU) 1510 always_on_cu_num = 4; 1511 else if (adev->asic_type == CHIP_VEGA12) 1512 always_on_cu_num = 8; 1513 else 1514 always_on_cu_num = 12; 1515 1516 mutex_lock(&adev->grbm_idx_mutex); 1517 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1518 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1519 mask = 1; 1520 cu_bitmap = 0; 1521 counter = 0; 1522 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1523 1524 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 1525 if (cu_info->bitmap[i][j] & mask) { 1526 if (counter == pg_always_on_cu_num) 1527 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 1528 if (counter < always_on_cu_num) 1529 cu_bitmap |= mask; 1530 else 1531 break; 1532 counter++; 1533 } 1534 mask <<= 1; 1535 } 1536 1537 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1538 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1539 } 1540 } 1541 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1542 mutex_unlock(&adev->grbm_idx_mutex); 1543 } 1544 1545 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1546 { 1547 uint32_t data; 1548 1549 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1550 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1551 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1552 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1553 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1554 1555 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1556 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1557 1558 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1559 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1560 1561 mutex_lock(&adev->grbm_idx_mutex); 1562 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1563 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1564 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1565 1566 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1567 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1568 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1569 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1570 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1571 1572 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1573 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1574 data &= 0x0000FFFF; 1575 data |= 0x00C00000; 1576 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1577 1578 /* 1579 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1580 * programmed in gfx_v9_0_init_always_on_cu_mask() 1581 */ 1582 1583 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1584 * but used for RLC_LB_CNTL configuration */ 1585 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1586 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1587 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1588 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1589 mutex_unlock(&adev->grbm_idx_mutex); 1590 1591 gfx_v9_0_init_always_on_cu_mask(adev); 1592 } 1593 1594 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1595 { 1596 uint32_t data; 1597 1598 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1599 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1600 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1601 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1602 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1603 1604 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1605 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1606 1607 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1608 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1609 1610 mutex_lock(&adev->grbm_idx_mutex); 1611 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1612 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1613 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1614 1615 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1616 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1617 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1618 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1619 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1620 1621 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1622 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1623 data &= 0x0000FFFF; 1624 data |= 0x00C00000; 1625 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1626 1627 /* 1628 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1629 * programmed in gfx_v9_0_init_always_on_cu_mask() 1630 */ 1631 1632 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1633 * but used for RLC_LB_CNTL configuration */ 1634 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1635 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1636 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1637 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1638 mutex_unlock(&adev->grbm_idx_mutex); 1639 1640 gfx_v9_0_init_always_on_cu_mask(adev); 1641 } 1642 1643 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1644 { 1645 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1646 } 1647 1648 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1649 { 1650 return 5; 1651 } 1652 1653 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1654 { 1655 const struct cs_section_def *cs_data; 1656 int r; 1657 1658 adev->gfx.rlc.cs_data = gfx9_cs_data; 1659 1660 cs_data = adev->gfx.rlc.cs_data; 1661 1662 if (cs_data) { 1663 /* init clear state block */ 1664 r = amdgpu_gfx_rlc_init_csb(adev); 1665 if (r) 1666 return r; 1667 } 1668 1669 if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { 1670 /* TODO: double check the cp_table_size for RV */ 1671 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1672 r = amdgpu_gfx_rlc_init_cpt(adev); 1673 if (r) 1674 return r; 1675 } 1676 1677 switch (adev->asic_type) { 1678 case CHIP_RAVEN: 1679 gfx_v9_0_init_lbpw(adev); 1680 break; 1681 case CHIP_VEGA20: 1682 gfx_v9_4_init_lbpw(adev); 1683 break; 1684 default: 1685 break; 1686 } 1687 1688 return 0; 1689 } 1690 1691 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1692 { 1693 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1694 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1695 } 1696 1697 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1698 { 1699 int r; 1700 u32 *hpd; 1701 const __le32 *fw_data; 1702 unsigned fw_size; 1703 u32 *fw; 1704 size_t mec_hpd_size; 1705 1706 const struct gfx_firmware_header_v1_0 *mec_hdr; 1707 1708 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1709 1710 /* take ownership of the relevant compute queues */ 1711 amdgpu_gfx_compute_queue_acquire(adev); 1712 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1713 1714 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1715 AMDGPU_GEM_DOMAIN_VRAM, 1716 &adev->gfx.mec.hpd_eop_obj, 1717 &adev->gfx.mec.hpd_eop_gpu_addr, 1718 (void **)&hpd); 1719 if (r) { 1720 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1721 gfx_v9_0_mec_fini(adev); 1722 return r; 1723 } 1724 1725 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); 1726 1727 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1728 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1729 1730 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1731 1732 fw_data = (const __le32 *) 1733 (adev->gfx.mec_fw->data + 1734 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1735 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 1736 1737 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 1738 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1739 &adev->gfx.mec.mec_fw_obj, 1740 &adev->gfx.mec.mec_fw_gpu_addr, 1741 (void **)&fw); 1742 if (r) { 1743 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 1744 gfx_v9_0_mec_fini(adev); 1745 return r; 1746 } 1747 1748 memcpy(fw, fw_data, fw_size); 1749 1750 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1751 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1752 1753 return 0; 1754 } 1755 1756 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 1757 { 1758 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1759 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1760 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1761 (address << SQ_IND_INDEX__INDEX__SHIFT) | 1762 (SQ_IND_INDEX__FORCE_READ_MASK)); 1763 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1764 } 1765 1766 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 1767 uint32_t wave, uint32_t thread, 1768 uint32_t regno, uint32_t num, uint32_t *out) 1769 { 1770 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1771 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1772 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1773 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1774 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 1775 (SQ_IND_INDEX__FORCE_READ_MASK) | 1776 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1777 while (num--) 1778 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1779 } 1780 1781 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1782 { 1783 /* type 1 wave data */ 1784 dst[(*no_fields)++] = 1; 1785 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 1786 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 1787 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 1788 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 1789 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 1790 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 1791 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 1792 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 1793 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 1794 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 1795 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 1796 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 1797 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 1798 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 1799 } 1800 1801 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 1802 uint32_t wave, uint32_t start, 1803 uint32_t size, uint32_t *dst) 1804 { 1805 wave_read_regs( 1806 adev, simd, wave, 0, 1807 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 1808 } 1809 1810 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 1811 uint32_t wave, uint32_t thread, 1812 uint32_t start, uint32_t size, 1813 uint32_t *dst) 1814 { 1815 wave_read_regs( 1816 adev, simd, wave, thread, 1817 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1818 } 1819 1820 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 1821 u32 me, u32 pipe, u32 q, u32 vm) 1822 { 1823 soc15_grbm_select(adev, me, pipe, q, vm); 1824 } 1825 1826 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 1827 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 1828 .select_se_sh = &gfx_v9_0_select_se_sh, 1829 .read_wave_data = &gfx_v9_0_read_wave_data, 1830 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 1831 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 1832 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 1833 .ras_error_inject = &gfx_v9_0_ras_error_inject, 1834 .query_ras_error_count = &gfx_v9_0_query_ras_error_count 1835 }; 1836 1837 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 1838 { 1839 u32 gb_addr_config; 1840 int err; 1841 1842 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 1843 1844 switch (adev->asic_type) { 1845 case CHIP_VEGA10: 1846 adev->gfx.config.max_hw_contexts = 8; 1847 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1848 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1849 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1850 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1851 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 1852 break; 1853 case CHIP_VEGA12: 1854 adev->gfx.config.max_hw_contexts = 8; 1855 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1856 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1857 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1858 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1859 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 1860 DRM_INFO("fix gfx.config for vega12\n"); 1861 break; 1862 case CHIP_VEGA20: 1863 adev->gfx.config.max_hw_contexts = 8; 1864 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1865 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1866 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1867 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1868 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1869 gb_addr_config &= ~0xf3e777ff; 1870 gb_addr_config |= 0x22014042; 1871 /* check vbios table if gpu info is not available */ 1872 err = amdgpu_atomfirmware_get_gfx_info(adev); 1873 if (err) 1874 return err; 1875 break; 1876 case CHIP_RAVEN: 1877 adev->gfx.config.max_hw_contexts = 8; 1878 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1879 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1880 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1881 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1882 if (adev->rev_id >= 8) 1883 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 1884 else 1885 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 1886 break; 1887 case CHIP_ARCTURUS: 1888 adev->gfx.config.max_hw_contexts = 8; 1889 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1890 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1891 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1892 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1893 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1894 gb_addr_config &= ~0xf3e777ff; 1895 gb_addr_config |= 0x22014042; 1896 break; 1897 case CHIP_RENOIR: 1898 adev->gfx.config.max_hw_contexts = 8; 1899 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1900 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1901 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 1902 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1903 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1904 gb_addr_config &= ~0xf3e777ff; 1905 gb_addr_config |= 0x22010042; 1906 break; 1907 default: 1908 BUG(); 1909 break; 1910 } 1911 1912 adev->gfx.config.gb_addr_config = gb_addr_config; 1913 1914 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 1915 REG_GET_FIELD( 1916 adev->gfx.config.gb_addr_config, 1917 GB_ADDR_CONFIG, 1918 NUM_PIPES); 1919 1920 adev->gfx.config.max_tile_pipes = 1921 adev->gfx.config.gb_addr_config_fields.num_pipes; 1922 1923 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 1924 REG_GET_FIELD( 1925 adev->gfx.config.gb_addr_config, 1926 GB_ADDR_CONFIG, 1927 NUM_BANKS); 1928 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 1929 REG_GET_FIELD( 1930 adev->gfx.config.gb_addr_config, 1931 GB_ADDR_CONFIG, 1932 MAX_COMPRESSED_FRAGS); 1933 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 1934 REG_GET_FIELD( 1935 adev->gfx.config.gb_addr_config, 1936 GB_ADDR_CONFIG, 1937 NUM_RB_PER_SE); 1938 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 1939 REG_GET_FIELD( 1940 adev->gfx.config.gb_addr_config, 1941 GB_ADDR_CONFIG, 1942 NUM_SHADER_ENGINES); 1943 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 1944 REG_GET_FIELD( 1945 adev->gfx.config.gb_addr_config, 1946 GB_ADDR_CONFIG, 1947 PIPE_INTERLEAVE_SIZE)); 1948 1949 return 0; 1950 } 1951 1952 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1953 int mec, int pipe, int queue) 1954 { 1955 int r; 1956 unsigned irq_type; 1957 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1958 1959 ring = &adev->gfx.compute_ring[ring_id]; 1960 1961 /* mec0 is me1 */ 1962 ring->me = mec + 1; 1963 ring->pipe = pipe; 1964 ring->queue = queue; 1965 1966 ring->ring_obj = NULL; 1967 ring->use_doorbell = true; 1968 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1969 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1970 + (ring_id * GFX9_MEC_HPD_SIZE); 1971 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1972 1973 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1974 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1975 + ring->pipe; 1976 1977 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1978 r = amdgpu_ring_init(adev, ring, 1024, 1979 &adev->gfx.eop_irq, irq_type); 1980 if (r) 1981 return r; 1982 1983 1984 return 0; 1985 } 1986 1987 static int gfx_v9_0_sw_init(void *handle) 1988 { 1989 int i, j, k, r, ring_id; 1990 struct amdgpu_ring *ring; 1991 struct amdgpu_kiq *kiq; 1992 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1993 1994 switch (adev->asic_type) { 1995 case CHIP_VEGA10: 1996 case CHIP_VEGA12: 1997 case CHIP_VEGA20: 1998 case CHIP_RAVEN: 1999 case CHIP_ARCTURUS: 2000 case CHIP_RENOIR: 2001 adev->gfx.mec.num_mec = 2; 2002 break; 2003 default: 2004 adev->gfx.mec.num_mec = 1; 2005 break; 2006 } 2007 2008 adev->gfx.mec.num_pipe_per_mec = 4; 2009 adev->gfx.mec.num_queue_per_pipe = 8; 2010 2011 /* EOP Event */ 2012 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 2013 if (r) 2014 return r; 2015 2016 /* Privileged reg */ 2017 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 2018 &adev->gfx.priv_reg_irq); 2019 if (r) 2020 return r; 2021 2022 /* Privileged inst */ 2023 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 2024 &adev->gfx.priv_inst_irq); 2025 if (r) 2026 return r; 2027 2028 /* ECC error */ 2029 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 2030 &adev->gfx.cp_ecc_error_irq); 2031 if (r) 2032 return r; 2033 2034 /* FUE error */ 2035 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 2036 &adev->gfx.cp_ecc_error_irq); 2037 if (r) 2038 return r; 2039 2040 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2041 2042 gfx_v9_0_scratch_init(adev); 2043 2044 r = gfx_v9_0_init_microcode(adev); 2045 if (r) { 2046 DRM_ERROR("Failed to load gfx firmware!\n"); 2047 return r; 2048 } 2049 2050 r = adev->gfx.rlc.funcs->init(adev); 2051 if (r) { 2052 DRM_ERROR("Failed to init rlc BOs!\n"); 2053 return r; 2054 } 2055 2056 r = gfx_v9_0_mec_init(adev); 2057 if (r) { 2058 DRM_ERROR("Failed to init MEC BOs!\n"); 2059 return r; 2060 } 2061 2062 /* set up the gfx ring */ 2063 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2064 ring = &adev->gfx.gfx_ring[i]; 2065 ring->ring_obj = NULL; 2066 if (!i) 2067 sprintf(ring->name, "gfx"); 2068 else 2069 sprintf(ring->name, "gfx_%d", i); 2070 ring->use_doorbell = true; 2071 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2072 r = amdgpu_ring_init(adev, ring, 1024, 2073 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); 2074 if (r) 2075 return r; 2076 } 2077 2078 /* set up the compute queues - allocate horizontally across pipes */ 2079 ring_id = 0; 2080 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2081 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2082 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2083 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2084 continue; 2085 2086 r = gfx_v9_0_compute_ring_init(adev, 2087 ring_id, 2088 i, k, j); 2089 if (r) 2090 return r; 2091 2092 ring_id++; 2093 } 2094 } 2095 } 2096 2097 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); 2098 if (r) { 2099 DRM_ERROR("Failed to init KIQ BOs!\n"); 2100 return r; 2101 } 2102 2103 kiq = &adev->gfx.kiq; 2104 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2105 if (r) 2106 return r; 2107 2108 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2109 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); 2110 if (r) 2111 return r; 2112 2113 adev->gfx.ce_ram_size = 0x8000; 2114 2115 r = gfx_v9_0_gpu_early_init(adev); 2116 if (r) 2117 return r; 2118 2119 return 0; 2120 } 2121 2122 2123 static int gfx_v9_0_sw_fini(void *handle) 2124 { 2125 int i; 2126 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2127 2128 amdgpu_gfx_ras_fini(adev); 2129 2130 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2131 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2132 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2133 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2134 2135 amdgpu_gfx_mqd_sw_fini(adev); 2136 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); 2137 amdgpu_gfx_kiq_fini(adev); 2138 2139 gfx_v9_0_mec_fini(adev); 2140 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 2141 if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { 2142 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2143 &adev->gfx.rlc.cp_table_gpu_addr, 2144 (void **)&adev->gfx.rlc.cp_table_ptr); 2145 } 2146 gfx_v9_0_free_microcode(adev); 2147 2148 return 0; 2149 } 2150 2151 2152 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 2153 { 2154 /* TODO */ 2155 } 2156 2157 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) 2158 { 2159 u32 data; 2160 2161 if (instance == 0xffffffff) 2162 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 2163 else 2164 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 2165 2166 if (se_num == 0xffffffff) 2167 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 2168 else 2169 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2170 2171 if (sh_num == 0xffffffff) 2172 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 2173 else 2174 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 2175 2176 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 2177 } 2178 2179 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2180 { 2181 u32 data, mask; 2182 2183 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 2184 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 2185 2186 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 2187 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 2188 2189 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 2190 adev->gfx.config.max_sh_per_se); 2191 2192 return (~data) & mask; 2193 } 2194 2195 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 2196 { 2197 int i, j; 2198 u32 data; 2199 u32 active_rbs = 0; 2200 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 2201 adev->gfx.config.max_sh_per_se; 2202 2203 mutex_lock(&adev->grbm_idx_mutex); 2204 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2205 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2206 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2207 data = gfx_v9_0_get_rb_active_bitmap(adev); 2208 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 2209 rb_bitmap_width_per_sh); 2210 } 2211 } 2212 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2213 mutex_unlock(&adev->grbm_idx_mutex); 2214 2215 adev->gfx.config.backend_enable_mask = active_rbs; 2216 adev->gfx.config.num_rbs = hweight32(active_rbs); 2217 } 2218 2219 #define DEFAULT_SH_MEM_BASES (0x6000) 2220 #define FIRST_COMPUTE_VMID (8) 2221 #define LAST_COMPUTE_VMID (16) 2222 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 2223 { 2224 int i; 2225 uint32_t sh_mem_config; 2226 uint32_t sh_mem_bases; 2227 2228 /* 2229 * Configure apertures: 2230 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2231 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2232 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2233 */ 2234 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 2235 2236 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 2237 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 2238 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 2239 2240 mutex_lock(&adev->srbm_mutex); 2241 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 2242 soc15_grbm_select(adev, 0, 0, 0, i); 2243 /* CP and shaders */ 2244 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 2245 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 2246 } 2247 soc15_grbm_select(adev, 0, 0, 0, 0); 2248 mutex_unlock(&adev->srbm_mutex); 2249 2250 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 2251 acccess. These should be enabled by FW for target VMIDs. */ 2252 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 2253 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 2254 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 2255 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 2256 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 2257 } 2258 } 2259 2260 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) 2261 { 2262 int vmid; 2263 2264 /* 2265 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2266 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2267 * the driver can enable them for graphics. VMID0 should maintain 2268 * access so that HWS firmware can save/restore entries. 2269 */ 2270 for (vmid = 1; vmid < 16; vmid++) { 2271 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); 2272 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); 2273 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); 2274 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); 2275 } 2276 } 2277 2278 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 2279 { 2280 u32 tmp; 2281 int i; 2282 2283 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2284 2285 gfx_v9_0_tiling_mode_table_init(adev); 2286 2287 gfx_v9_0_setup_rb(adev); 2288 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 2289 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 2290 2291 /* XXX SH_MEM regs */ 2292 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2293 mutex_lock(&adev->srbm_mutex); 2294 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { 2295 soc15_grbm_select(adev, 0, 0, 0, i); 2296 /* CP and shaders */ 2297 if (i == 0) { 2298 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2299 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2300 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2301 !!amdgpu_noretry); 2302 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2303 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 2304 } else { 2305 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2306 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2307 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2308 !!amdgpu_noretry); 2309 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2310 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2311 (adev->gmc.private_aperture_start >> 48)); 2312 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2313 (adev->gmc.shared_aperture_start >> 48)); 2314 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 2315 } 2316 } 2317 soc15_grbm_select(adev, 0, 0, 0, 0); 2318 2319 mutex_unlock(&adev->srbm_mutex); 2320 2321 gfx_v9_0_init_compute_vmid(adev); 2322 gfx_v9_0_init_gds_vmid(adev); 2323 } 2324 2325 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 2326 { 2327 u32 i, j, k; 2328 u32 mask; 2329 2330 mutex_lock(&adev->grbm_idx_mutex); 2331 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2332 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2333 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2334 for (k = 0; k < adev->usec_timeout; k++) { 2335 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 2336 break; 2337 udelay(1); 2338 } 2339 if (k == adev->usec_timeout) { 2340 gfx_v9_0_select_se_sh(adev, 0xffffffff, 2341 0xffffffff, 0xffffffff); 2342 mutex_unlock(&adev->grbm_idx_mutex); 2343 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2344 i, j); 2345 return; 2346 } 2347 } 2348 } 2349 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2350 mutex_unlock(&adev->grbm_idx_mutex); 2351 2352 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2353 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2354 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2355 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2356 for (k = 0; k < adev->usec_timeout; k++) { 2357 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2358 break; 2359 udelay(1); 2360 } 2361 } 2362 2363 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2364 bool enable) 2365 { 2366 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2367 2368 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2369 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2370 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2371 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2372 2373 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2374 } 2375 2376 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2377 { 2378 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2379 /* csib */ 2380 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2381 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2382 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2383 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2384 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2385 adev->gfx.rlc.clear_state_size); 2386 } 2387 2388 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2389 int indirect_offset, 2390 int list_size, 2391 int *unique_indirect_regs, 2392 int unique_indirect_reg_count, 2393 int *indirect_start_offsets, 2394 int *indirect_start_offsets_count, 2395 int max_start_offsets_count) 2396 { 2397 int idx; 2398 2399 for (; indirect_offset < list_size; indirect_offset++) { 2400 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2401 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2402 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2403 2404 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2405 indirect_offset += 2; 2406 2407 /* look for the matching indice */ 2408 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2409 if (unique_indirect_regs[idx] == 2410 register_list_format[indirect_offset] || 2411 !unique_indirect_regs[idx]) 2412 break; 2413 } 2414 2415 BUG_ON(idx >= unique_indirect_reg_count); 2416 2417 if (!unique_indirect_regs[idx]) 2418 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2419 2420 indirect_offset++; 2421 } 2422 } 2423 } 2424 2425 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2426 { 2427 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2428 int unique_indirect_reg_count = 0; 2429 2430 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2431 int indirect_start_offsets_count = 0; 2432 2433 int list_size = 0; 2434 int i = 0, j = 0; 2435 u32 tmp = 0; 2436 2437 u32 *register_list_format = 2438 kmemdup(adev->gfx.rlc.register_list_format, 2439 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2440 if (!register_list_format) 2441 return -ENOMEM; 2442 2443 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2444 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2445 gfx_v9_1_parse_ind_reg_list(register_list_format, 2446 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2447 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2448 unique_indirect_regs, 2449 unique_indirect_reg_count, 2450 indirect_start_offsets, 2451 &indirect_start_offsets_count, 2452 ARRAY_SIZE(indirect_start_offsets)); 2453 2454 /* enable auto inc in case it is disabled */ 2455 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2456 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2457 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2458 2459 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2460 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2461 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2462 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2463 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2464 adev->gfx.rlc.register_restore[i]); 2465 2466 /* load indirect register */ 2467 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2468 adev->gfx.rlc.reg_list_format_start); 2469 2470 /* direct register portion */ 2471 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2472 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2473 register_list_format[i]); 2474 2475 /* indirect register portion */ 2476 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2477 if (register_list_format[i] == 0xFFFFFFFF) { 2478 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2479 continue; 2480 } 2481 2482 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2483 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2484 2485 for (j = 0; j < unique_indirect_reg_count; j++) { 2486 if (register_list_format[i] == unique_indirect_regs[j]) { 2487 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2488 break; 2489 } 2490 } 2491 2492 BUG_ON(j >= unique_indirect_reg_count); 2493 2494 i++; 2495 } 2496 2497 /* set save/restore list size */ 2498 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2499 list_size = list_size >> 1; 2500 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2501 adev->gfx.rlc.reg_restore_list_size); 2502 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2503 2504 /* write the starting offsets to RLC scratch ram */ 2505 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2506 adev->gfx.rlc.starting_offsets_start); 2507 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2508 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2509 indirect_start_offsets[i]); 2510 2511 /* load unique indirect regs*/ 2512 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2513 if (unique_indirect_regs[i] != 0) { 2514 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2515 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2516 unique_indirect_regs[i] & 0x3FFFF); 2517 2518 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2519 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2520 unique_indirect_regs[i] >> 20); 2521 } 2522 } 2523 2524 kfree(register_list_format); 2525 return 0; 2526 } 2527 2528 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2529 { 2530 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2531 } 2532 2533 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2534 bool enable) 2535 { 2536 uint32_t data = 0; 2537 uint32_t default_data = 0; 2538 2539 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2540 if (enable == true) { 2541 /* enable GFXIP control over CGPG */ 2542 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2543 if(default_data != data) 2544 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2545 2546 /* update status */ 2547 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2548 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2549 if(default_data != data) 2550 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2551 } else { 2552 /* restore GFXIP control over GCPG */ 2553 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2554 if(default_data != data) 2555 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2556 } 2557 } 2558 2559 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2560 { 2561 uint32_t data = 0; 2562 2563 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2564 AMD_PG_SUPPORT_GFX_SMG | 2565 AMD_PG_SUPPORT_GFX_DMG)) { 2566 /* init IDLE_POLL_COUNT = 60 */ 2567 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2568 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2569 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2570 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2571 2572 /* init RLC PG Delay */ 2573 data = 0; 2574 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2575 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2576 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2577 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2578 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2579 2580 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2581 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2582 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2583 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2584 2585 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2586 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2587 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2588 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2589 2590 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2591 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2592 2593 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2594 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2595 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2596 2597 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2598 } 2599 } 2600 2601 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2602 bool enable) 2603 { 2604 uint32_t data = 0; 2605 uint32_t default_data = 0; 2606 2607 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2608 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2609 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2610 enable ? 1 : 0); 2611 if (default_data != data) 2612 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2613 } 2614 2615 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2616 bool enable) 2617 { 2618 uint32_t data = 0; 2619 uint32_t default_data = 0; 2620 2621 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2622 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2623 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2624 enable ? 1 : 0); 2625 if(default_data != data) 2626 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2627 } 2628 2629 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2630 bool enable) 2631 { 2632 uint32_t data = 0; 2633 uint32_t default_data = 0; 2634 2635 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2636 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2637 CP_PG_DISABLE, 2638 enable ? 0 : 1); 2639 if(default_data != data) 2640 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2641 } 2642 2643 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2644 bool enable) 2645 { 2646 uint32_t data, default_data; 2647 2648 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2649 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2650 GFX_POWER_GATING_ENABLE, 2651 enable ? 1 : 0); 2652 if(default_data != data) 2653 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2654 } 2655 2656 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 2657 bool enable) 2658 { 2659 uint32_t data, default_data; 2660 2661 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2662 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2663 GFX_PIPELINE_PG_ENABLE, 2664 enable ? 1 : 0); 2665 if(default_data != data) 2666 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2667 2668 if (!enable) 2669 /* read any GFX register to wake up GFX */ 2670 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 2671 } 2672 2673 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 2674 bool enable) 2675 { 2676 uint32_t data, default_data; 2677 2678 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2679 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2680 STATIC_PER_CU_PG_ENABLE, 2681 enable ? 1 : 0); 2682 if(default_data != data) 2683 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2684 } 2685 2686 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 2687 bool enable) 2688 { 2689 uint32_t data, default_data; 2690 2691 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2692 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2693 DYN_PER_CU_PG_ENABLE, 2694 enable ? 1 : 0); 2695 if(default_data != data) 2696 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2697 } 2698 2699 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 2700 { 2701 gfx_v9_0_init_csb(adev); 2702 2703 /* 2704 * Rlc save restore list is workable since v2_1. 2705 * And it's needed by gfxoff feature. 2706 */ 2707 if (adev->gfx.rlc.is_rlc_v2_1) { 2708 if (adev->asic_type == CHIP_VEGA12 || 2709 (adev->asic_type == CHIP_RAVEN && 2710 adev->rev_id >= 8)) 2711 gfx_v9_1_init_rlc_save_restore_list(adev); 2712 gfx_v9_0_enable_save_restore_machine(adev); 2713 } 2714 2715 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2716 AMD_PG_SUPPORT_GFX_SMG | 2717 AMD_PG_SUPPORT_GFX_DMG | 2718 AMD_PG_SUPPORT_CP | 2719 AMD_PG_SUPPORT_GDS | 2720 AMD_PG_SUPPORT_RLC_SMU_HS)) { 2721 WREG32(mmRLC_JUMP_TABLE_RESTORE, 2722 adev->gfx.rlc.cp_table_gpu_addr >> 8); 2723 gfx_v9_0_init_gfx_power_gating(adev); 2724 } 2725 } 2726 2727 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 2728 { 2729 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 2730 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 2731 gfx_v9_0_wait_for_rlc_serdes(adev); 2732 } 2733 2734 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 2735 { 2736 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2737 udelay(50); 2738 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2739 udelay(50); 2740 } 2741 2742 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 2743 { 2744 #ifdef AMDGPU_RLC_DEBUG_RETRY 2745 u32 rlc_ucode_ver; 2746 #endif 2747 2748 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2749 udelay(50); 2750 2751 /* carrizo do enable cp interrupt after cp inited */ 2752 if (!(adev->flags & AMD_IS_APU)) { 2753 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 2754 udelay(50); 2755 } 2756 2757 #ifdef AMDGPU_RLC_DEBUG_RETRY 2758 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 2759 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 2760 if(rlc_ucode_ver == 0x108) { 2761 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 2762 rlc_ucode_ver, adev->gfx.rlc_fw_version); 2763 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 2764 * default is 0x9C4 to create a 100us interval */ 2765 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 2766 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 2767 * to disable the page fault retry interrupts, default is 2768 * 0x100 (256) */ 2769 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 2770 } 2771 #endif 2772 } 2773 2774 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 2775 { 2776 const struct rlc_firmware_header_v2_0 *hdr; 2777 const __le32 *fw_data; 2778 unsigned i, fw_size; 2779 2780 if (!adev->gfx.rlc_fw) 2781 return -EINVAL; 2782 2783 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2784 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2785 2786 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2787 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2788 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2789 2790 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 2791 RLCG_UCODE_LOADING_START_ADDRESS); 2792 for (i = 0; i < fw_size; i++) 2793 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 2794 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2795 2796 return 0; 2797 } 2798 2799 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 2800 { 2801 int r; 2802 2803 if (amdgpu_sriov_vf(adev)) { 2804 gfx_v9_0_init_csb(adev); 2805 return 0; 2806 } 2807 2808 adev->gfx.rlc.funcs->stop(adev); 2809 2810 /* disable CG */ 2811 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 2812 2813 gfx_v9_0_init_pg(adev); 2814 2815 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 2816 /* legacy rlc firmware loading */ 2817 r = gfx_v9_0_rlc_load_microcode(adev); 2818 if (r) 2819 return r; 2820 } 2821 2822 switch (adev->asic_type) { 2823 case CHIP_RAVEN: 2824 if (amdgpu_lbpw == 0) 2825 gfx_v9_0_enable_lbpw(adev, false); 2826 else 2827 gfx_v9_0_enable_lbpw(adev, true); 2828 break; 2829 case CHIP_VEGA20: 2830 if (amdgpu_lbpw > 0) 2831 gfx_v9_0_enable_lbpw(adev, true); 2832 else 2833 gfx_v9_0_enable_lbpw(adev, false); 2834 break; 2835 default: 2836 break; 2837 } 2838 2839 adev->gfx.rlc.funcs->start(adev); 2840 2841 return 0; 2842 } 2843 2844 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 2845 { 2846 int i; 2847 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 2848 2849 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 2850 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 2851 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 2852 if (!enable) { 2853 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2854 adev->gfx.gfx_ring[i].sched.ready = false; 2855 } 2856 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 2857 udelay(50); 2858 } 2859 2860 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 2861 { 2862 const struct gfx_firmware_header_v1_0 *pfp_hdr; 2863 const struct gfx_firmware_header_v1_0 *ce_hdr; 2864 const struct gfx_firmware_header_v1_0 *me_hdr; 2865 const __le32 *fw_data; 2866 unsigned i, fw_size; 2867 2868 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 2869 return -EINVAL; 2870 2871 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 2872 adev->gfx.pfp_fw->data; 2873 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 2874 adev->gfx.ce_fw->data; 2875 me_hdr = (const struct gfx_firmware_header_v1_0 *) 2876 adev->gfx.me_fw->data; 2877 2878 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 2879 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 2880 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 2881 2882 gfx_v9_0_cp_gfx_enable(adev, false); 2883 2884 /* PFP */ 2885 fw_data = (const __le32 *) 2886 (adev->gfx.pfp_fw->data + 2887 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 2888 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 2889 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 2890 for (i = 0; i < fw_size; i++) 2891 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 2892 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 2893 2894 /* CE */ 2895 fw_data = (const __le32 *) 2896 (adev->gfx.ce_fw->data + 2897 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 2898 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 2899 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 2900 for (i = 0; i < fw_size; i++) 2901 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 2902 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 2903 2904 /* ME */ 2905 fw_data = (const __le32 *) 2906 (adev->gfx.me_fw->data + 2907 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 2908 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 2909 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 2910 for (i = 0; i < fw_size; i++) 2911 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 2912 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 2913 2914 return 0; 2915 } 2916 2917 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 2918 { 2919 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 2920 const struct cs_section_def *sect = NULL; 2921 const struct cs_extent_def *ext = NULL; 2922 int r, i, tmp; 2923 2924 /* init the CP */ 2925 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 2926 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 2927 2928 gfx_v9_0_cp_gfx_enable(adev, true); 2929 2930 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 2931 if (r) { 2932 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 2933 return r; 2934 } 2935 2936 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2937 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 2938 2939 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 2940 amdgpu_ring_write(ring, 0x80000000); 2941 amdgpu_ring_write(ring, 0x80000000); 2942 2943 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 2944 for (ext = sect->section; ext->extent != NULL; ++ext) { 2945 if (sect->id == SECT_CONTEXT) { 2946 amdgpu_ring_write(ring, 2947 PACKET3(PACKET3_SET_CONTEXT_REG, 2948 ext->reg_count)); 2949 amdgpu_ring_write(ring, 2950 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 2951 for (i = 0; i < ext->reg_count; i++) 2952 amdgpu_ring_write(ring, ext->extent[i]); 2953 } 2954 } 2955 } 2956 2957 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2958 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 2959 2960 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 2961 amdgpu_ring_write(ring, 0); 2962 2963 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 2964 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 2965 amdgpu_ring_write(ring, 0x8000); 2966 amdgpu_ring_write(ring, 0x8000); 2967 2968 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 2969 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 2970 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 2971 amdgpu_ring_write(ring, tmp); 2972 amdgpu_ring_write(ring, 0); 2973 2974 amdgpu_ring_commit(ring); 2975 2976 return 0; 2977 } 2978 2979 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 2980 { 2981 struct amdgpu_ring *ring; 2982 u32 tmp; 2983 u32 rb_bufsz; 2984 u64 rb_addr, rptr_addr, wptr_gpu_addr; 2985 2986 /* Set the write pointer delay */ 2987 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 2988 2989 /* set the RB to use vmid 0 */ 2990 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 2991 2992 /* Set ring buffer size */ 2993 ring = &adev->gfx.gfx_ring[0]; 2994 rb_bufsz = order_base_2(ring->ring_size / 8); 2995 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 2996 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 2997 #ifdef __BIG_ENDIAN 2998 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 2999 #endif 3000 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3001 3002 /* Initialize the ring buffer's write pointers */ 3003 ring->wptr = 0; 3004 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3005 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3006 3007 /* set the wb address wether it's enabled or not */ 3008 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3009 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3010 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3011 3012 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3013 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 3014 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 3015 3016 mdelay(1); 3017 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3018 3019 rb_addr = ring->gpu_addr >> 8; 3020 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 3021 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3022 3023 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 3024 if (ring->use_doorbell) { 3025 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3026 DOORBELL_OFFSET, ring->doorbell_index); 3027 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3028 DOORBELL_EN, 1); 3029 } else { 3030 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 3031 } 3032 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 3033 3034 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3035 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3036 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 3037 3038 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 3039 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3040 3041 3042 /* start the ring */ 3043 gfx_v9_0_cp_gfx_start(adev); 3044 ring->sched.ready = true; 3045 3046 return 0; 3047 } 3048 3049 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3050 { 3051 int i; 3052 3053 if (enable) { 3054 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 3055 } else { 3056 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 3057 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 3058 for (i = 0; i < adev->gfx.num_compute_rings; i++) 3059 adev->gfx.compute_ring[i].sched.ready = false; 3060 adev->gfx.kiq.ring.sched.ready = false; 3061 } 3062 udelay(50); 3063 } 3064 3065 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3066 { 3067 const struct gfx_firmware_header_v1_0 *mec_hdr; 3068 const __le32 *fw_data; 3069 unsigned i; 3070 u32 tmp; 3071 3072 if (!adev->gfx.mec_fw) 3073 return -EINVAL; 3074 3075 gfx_v9_0_cp_compute_enable(adev, false); 3076 3077 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3078 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3079 3080 fw_data = (const __le32 *) 3081 (adev->gfx.mec_fw->data + 3082 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3083 tmp = 0; 3084 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3085 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3086 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 3087 3088 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 3089 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 3090 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 3091 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3092 3093 /* MEC1 */ 3094 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3095 mec_hdr->jt_offset); 3096 for (i = 0; i < mec_hdr->jt_size; i++) 3097 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 3098 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3099 3100 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3101 adev->gfx.mec_fw_version); 3102 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 3103 3104 return 0; 3105 } 3106 3107 /* KIQ functions */ 3108 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 3109 { 3110 uint32_t tmp; 3111 struct amdgpu_device *adev = ring->adev; 3112 3113 /* tell RLC which is KIQ queue */ 3114 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 3115 tmp &= 0xffffff00; 3116 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3117 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3118 tmp |= 0x80; 3119 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3120 } 3121 3122 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) 3123 { 3124 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3125 uint64_t queue_mask = 0; 3126 int r, i; 3127 3128 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 3129 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 3130 continue; 3131 3132 /* This situation may be hit in the future if a new HW 3133 * generation exposes more than 64 queues. If so, the 3134 * definition of queue_mask needs updating */ 3135 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 3136 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 3137 break; 3138 } 3139 3140 queue_mask |= (1ull << i); 3141 } 3142 3143 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); 3144 if (r) { 3145 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3146 return r; 3147 } 3148 3149 /* set resources */ 3150 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 3151 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 3152 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 3153 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 3154 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 3155 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 3156 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 3157 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 3158 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 3159 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3160 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3161 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 3162 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3163 3164 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 3165 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 3166 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 3167 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 3168 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 3169 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 3170 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 3171 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 3172 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 3173 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 3174 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ 3175 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 3176 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 3177 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 3178 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 3179 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 3180 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 3181 } 3182 3183 r = amdgpu_ring_test_helper(kiq_ring); 3184 if (r) 3185 DRM_ERROR("KCQ enable failed\n"); 3186 3187 return r; 3188 } 3189 3190 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 3191 { 3192 struct amdgpu_device *adev = ring->adev; 3193 struct v9_mqd *mqd = ring->mqd_ptr; 3194 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3195 uint32_t tmp; 3196 3197 mqd->header = 0xC0310800; 3198 mqd->compute_pipelinestat_enable = 0x00000001; 3199 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3200 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3201 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3202 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3203 mqd->compute_static_thread_mgmt_se4 = 0xffffffff; 3204 mqd->compute_static_thread_mgmt_se5 = 0xffffffff; 3205 mqd->compute_static_thread_mgmt_se6 = 0xffffffff; 3206 mqd->compute_static_thread_mgmt_se7 = 0xffffffff; 3207 mqd->compute_misc_reserved = 0x00000003; 3208 3209 mqd->dynamic_cu_mask_addr_lo = 3210 lower_32_bits(ring->mqd_gpu_addr 3211 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3212 mqd->dynamic_cu_mask_addr_hi = 3213 upper_32_bits(ring->mqd_gpu_addr 3214 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3215 3216 eop_base_addr = ring->eop_gpu_addr >> 8; 3217 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3218 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3219 3220 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3221 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3222 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3223 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 3224 3225 mqd->cp_hqd_eop_control = tmp; 3226 3227 /* enable doorbell? */ 3228 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3229 3230 if (ring->use_doorbell) { 3231 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3232 DOORBELL_OFFSET, ring->doorbell_index); 3233 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3234 DOORBELL_EN, 1); 3235 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3236 DOORBELL_SOURCE, 0); 3237 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3238 DOORBELL_HIT, 0); 3239 } else { 3240 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3241 DOORBELL_EN, 0); 3242 } 3243 3244 mqd->cp_hqd_pq_doorbell_control = tmp; 3245 3246 /* disable the queue if it's active */ 3247 ring->wptr = 0; 3248 mqd->cp_hqd_dequeue_request = 0; 3249 mqd->cp_hqd_pq_rptr = 0; 3250 mqd->cp_hqd_pq_wptr_lo = 0; 3251 mqd->cp_hqd_pq_wptr_hi = 0; 3252 3253 /* set the pointer to the MQD */ 3254 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3255 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3256 3257 /* set MQD vmid to 0 */ 3258 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3259 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3260 mqd->cp_mqd_control = tmp; 3261 3262 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3263 hqd_gpu_addr = ring->gpu_addr >> 8; 3264 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3265 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3266 3267 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3268 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3269 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3270 (order_base_2(ring->ring_size / 4) - 1)); 3271 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3272 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 3273 #ifdef __BIG_ENDIAN 3274 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3275 #endif 3276 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3277 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3278 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3279 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3280 mqd->cp_hqd_pq_control = tmp; 3281 3282 /* set the wb address whether it's enabled or not */ 3283 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3284 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3285 mqd->cp_hqd_pq_rptr_report_addr_hi = 3286 upper_32_bits(wb_gpu_addr) & 0xffff; 3287 3288 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3289 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3290 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3291 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3292 3293 tmp = 0; 3294 /* enable the doorbell if requested */ 3295 if (ring->use_doorbell) { 3296 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3297 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3298 DOORBELL_OFFSET, ring->doorbell_index); 3299 3300 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3301 DOORBELL_EN, 1); 3302 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3303 DOORBELL_SOURCE, 0); 3304 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3305 DOORBELL_HIT, 0); 3306 } 3307 3308 mqd->cp_hqd_pq_doorbell_control = tmp; 3309 3310 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3311 ring->wptr = 0; 3312 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3313 3314 /* set the vmid for the queue */ 3315 mqd->cp_hqd_vmid = 0; 3316 3317 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3318 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3319 mqd->cp_hqd_persistent_state = tmp; 3320 3321 /* set MIN_IB_AVAIL_SIZE */ 3322 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3323 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3324 mqd->cp_hqd_ib_control = tmp; 3325 3326 /* activate the queue */ 3327 mqd->cp_hqd_active = 1; 3328 3329 return 0; 3330 } 3331 3332 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 3333 { 3334 struct amdgpu_device *adev = ring->adev; 3335 struct v9_mqd *mqd = ring->mqd_ptr; 3336 int j; 3337 3338 /* disable wptr polling */ 3339 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3340 3341 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3342 mqd->cp_hqd_eop_base_addr_lo); 3343 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3344 mqd->cp_hqd_eop_base_addr_hi); 3345 3346 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3347 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 3348 mqd->cp_hqd_eop_control); 3349 3350 /* enable doorbell? */ 3351 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3352 mqd->cp_hqd_pq_doorbell_control); 3353 3354 /* disable the queue if it's active */ 3355 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3356 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3357 for (j = 0; j < adev->usec_timeout; j++) { 3358 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3359 break; 3360 udelay(1); 3361 } 3362 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3363 mqd->cp_hqd_dequeue_request); 3364 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3365 mqd->cp_hqd_pq_rptr); 3366 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3367 mqd->cp_hqd_pq_wptr_lo); 3368 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3369 mqd->cp_hqd_pq_wptr_hi); 3370 } 3371 3372 /* set the pointer to the MQD */ 3373 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3374 mqd->cp_mqd_base_addr_lo); 3375 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3376 mqd->cp_mqd_base_addr_hi); 3377 3378 /* set MQD vmid to 0 */ 3379 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3380 mqd->cp_mqd_control); 3381 3382 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3383 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3384 mqd->cp_hqd_pq_base_lo); 3385 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3386 mqd->cp_hqd_pq_base_hi); 3387 3388 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3389 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3390 mqd->cp_hqd_pq_control); 3391 3392 /* set the wb address whether it's enabled or not */ 3393 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3394 mqd->cp_hqd_pq_rptr_report_addr_lo); 3395 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3396 mqd->cp_hqd_pq_rptr_report_addr_hi); 3397 3398 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3399 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3400 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3401 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3402 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3403 3404 /* enable the doorbell if requested */ 3405 if (ring->use_doorbell) { 3406 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3407 (adev->doorbell_index.kiq * 2) << 2); 3408 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3409 (adev->doorbell_index.userqueue_end * 2) << 2); 3410 } 3411 3412 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3413 mqd->cp_hqd_pq_doorbell_control); 3414 3415 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3416 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3417 mqd->cp_hqd_pq_wptr_lo); 3418 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3419 mqd->cp_hqd_pq_wptr_hi); 3420 3421 /* set the vmid for the queue */ 3422 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3423 3424 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3425 mqd->cp_hqd_persistent_state); 3426 3427 /* activate the queue */ 3428 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3429 mqd->cp_hqd_active); 3430 3431 if (ring->use_doorbell) 3432 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3433 3434 return 0; 3435 } 3436 3437 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3438 { 3439 struct amdgpu_device *adev = ring->adev; 3440 int j; 3441 3442 /* disable the queue if it's active */ 3443 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3444 3445 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3446 3447 for (j = 0; j < adev->usec_timeout; j++) { 3448 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3449 break; 3450 udelay(1); 3451 } 3452 3453 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3454 DRM_DEBUG("KIQ dequeue request failed.\n"); 3455 3456 /* Manual disable if dequeue request times out */ 3457 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3458 } 3459 3460 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3461 0); 3462 } 3463 3464 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3465 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3466 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3467 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3468 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3469 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3470 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3471 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3472 3473 return 0; 3474 } 3475 3476 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3477 { 3478 struct amdgpu_device *adev = ring->adev; 3479 struct v9_mqd *mqd = ring->mqd_ptr; 3480 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3481 3482 gfx_v9_0_kiq_setting(ring); 3483 3484 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3485 /* reset MQD to a clean status */ 3486 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3487 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3488 3489 /* reset ring buffer */ 3490 ring->wptr = 0; 3491 amdgpu_ring_clear_ring(ring); 3492 3493 mutex_lock(&adev->srbm_mutex); 3494 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3495 gfx_v9_0_kiq_init_register(ring); 3496 soc15_grbm_select(adev, 0, 0, 0, 0); 3497 mutex_unlock(&adev->srbm_mutex); 3498 } else { 3499 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3500 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3501 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3502 mutex_lock(&adev->srbm_mutex); 3503 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3504 gfx_v9_0_mqd_init(ring); 3505 gfx_v9_0_kiq_init_register(ring); 3506 soc15_grbm_select(adev, 0, 0, 0, 0); 3507 mutex_unlock(&adev->srbm_mutex); 3508 3509 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3510 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3511 } 3512 3513 return 0; 3514 } 3515 3516 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3517 { 3518 struct amdgpu_device *adev = ring->adev; 3519 struct v9_mqd *mqd = ring->mqd_ptr; 3520 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3521 3522 if (!adev->in_gpu_reset && !adev->in_suspend) { 3523 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3524 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3525 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3526 mutex_lock(&adev->srbm_mutex); 3527 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3528 gfx_v9_0_mqd_init(ring); 3529 soc15_grbm_select(adev, 0, 0, 0, 0); 3530 mutex_unlock(&adev->srbm_mutex); 3531 3532 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3533 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3534 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3535 /* reset MQD to a clean status */ 3536 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3537 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3538 3539 /* reset ring buffer */ 3540 ring->wptr = 0; 3541 amdgpu_ring_clear_ring(ring); 3542 } else { 3543 amdgpu_ring_clear_ring(ring); 3544 } 3545 3546 return 0; 3547 } 3548 3549 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3550 { 3551 struct amdgpu_ring *ring; 3552 int r; 3553 3554 ring = &adev->gfx.kiq.ring; 3555 3556 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3557 if (unlikely(r != 0)) 3558 return r; 3559 3560 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3561 if (unlikely(r != 0)) 3562 return r; 3563 3564 gfx_v9_0_kiq_init_queue(ring); 3565 amdgpu_bo_kunmap(ring->mqd_obj); 3566 ring->mqd_ptr = NULL; 3567 amdgpu_bo_unreserve(ring->mqd_obj); 3568 ring->sched.ready = true; 3569 return 0; 3570 } 3571 3572 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3573 { 3574 struct amdgpu_ring *ring = NULL; 3575 int r = 0, i; 3576 3577 gfx_v9_0_cp_compute_enable(adev, true); 3578 3579 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3580 ring = &adev->gfx.compute_ring[i]; 3581 3582 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3583 if (unlikely(r != 0)) 3584 goto done; 3585 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3586 if (!r) { 3587 r = gfx_v9_0_kcq_init_queue(ring); 3588 amdgpu_bo_kunmap(ring->mqd_obj); 3589 ring->mqd_ptr = NULL; 3590 } 3591 amdgpu_bo_unreserve(ring->mqd_obj); 3592 if (r) 3593 goto done; 3594 } 3595 3596 r = gfx_v9_0_kiq_kcq_enable(adev); 3597 done: 3598 return r; 3599 } 3600 3601 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3602 { 3603 int r, i; 3604 struct amdgpu_ring *ring; 3605 3606 if (!(adev->flags & AMD_IS_APU)) 3607 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3608 3609 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3610 if (adev->asic_type != CHIP_ARCTURUS) { 3611 /* legacy firmware loading */ 3612 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3613 if (r) 3614 return r; 3615 } 3616 3617 r = gfx_v9_0_cp_compute_load_microcode(adev); 3618 if (r) 3619 return r; 3620 } 3621 3622 r = gfx_v9_0_kiq_resume(adev); 3623 if (r) 3624 return r; 3625 3626 if (adev->asic_type != CHIP_ARCTURUS) { 3627 r = gfx_v9_0_cp_gfx_resume(adev); 3628 if (r) 3629 return r; 3630 } 3631 3632 r = gfx_v9_0_kcq_resume(adev); 3633 if (r) 3634 return r; 3635 3636 if (adev->asic_type != CHIP_ARCTURUS) { 3637 ring = &adev->gfx.gfx_ring[0]; 3638 r = amdgpu_ring_test_helper(ring); 3639 if (r) 3640 return r; 3641 } 3642 3643 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3644 ring = &adev->gfx.compute_ring[i]; 3645 amdgpu_ring_test_helper(ring); 3646 } 3647 3648 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3649 3650 return 0; 3651 } 3652 3653 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3654 { 3655 if (adev->asic_type != CHIP_ARCTURUS) 3656 gfx_v9_0_cp_gfx_enable(adev, enable); 3657 gfx_v9_0_cp_compute_enable(adev, enable); 3658 } 3659 3660 static int gfx_v9_0_hw_init(void *handle) 3661 { 3662 int r; 3663 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3664 3665 if (!amdgpu_sriov_vf(adev)) 3666 gfx_v9_0_init_golden_registers(adev); 3667 3668 gfx_v9_0_constants_init(adev); 3669 3670 r = adev->gfx.rlc.funcs->resume(adev); 3671 if (r) 3672 return r; 3673 3674 r = gfx_v9_0_cp_resume(adev); 3675 if (r) 3676 return r; 3677 3678 return r; 3679 } 3680 3681 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) 3682 { 3683 int r, i; 3684 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3685 3686 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); 3687 if (r) 3688 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3689 3690 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3691 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3692 3693 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 3694 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 3695 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 3696 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 3697 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 3698 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 3699 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 3700 amdgpu_ring_write(kiq_ring, 0); 3701 amdgpu_ring_write(kiq_ring, 0); 3702 amdgpu_ring_write(kiq_ring, 0); 3703 } 3704 r = amdgpu_ring_test_helper(kiq_ring); 3705 if (r) 3706 DRM_ERROR("KCQ disable failed\n"); 3707 3708 return r; 3709 } 3710 3711 static int gfx_v9_0_hw_fini(void *handle) 3712 { 3713 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3714 3715 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 3716 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3717 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3718 3719 /* DF freeze and kcq disable will fail */ 3720 if (!amdgpu_ras_intr_triggered()) 3721 /* disable KCQ to avoid CPC touch memory not valid anymore */ 3722 gfx_v9_0_kcq_disable(adev); 3723 3724 if (amdgpu_sriov_vf(adev)) { 3725 gfx_v9_0_cp_gfx_enable(adev, false); 3726 /* must disable polling for SRIOV when hw finished, otherwise 3727 * CPC engine may still keep fetching WB address which is already 3728 * invalid after sw finished and trigger DMAR reading error in 3729 * hypervisor side. 3730 */ 3731 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3732 return 0; 3733 } 3734 3735 /* Use deinitialize sequence from CAIL when unbinding device from driver, 3736 * otherwise KIQ is hanging when binding back 3737 */ 3738 if (!adev->in_gpu_reset && !adev->in_suspend) { 3739 mutex_lock(&adev->srbm_mutex); 3740 soc15_grbm_select(adev, adev->gfx.kiq.ring.me, 3741 adev->gfx.kiq.ring.pipe, 3742 adev->gfx.kiq.ring.queue, 0); 3743 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); 3744 soc15_grbm_select(adev, 0, 0, 0, 0); 3745 mutex_unlock(&adev->srbm_mutex); 3746 } 3747 3748 gfx_v9_0_cp_enable(adev, false); 3749 adev->gfx.rlc.funcs->stop(adev); 3750 3751 return 0; 3752 } 3753 3754 static int gfx_v9_0_suspend(void *handle) 3755 { 3756 return gfx_v9_0_hw_fini(handle); 3757 } 3758 3759 static int gfx_v9_0_resume(void *handle) 3760 { 3761 return gfx_v9_0_hw_init(handle); 3762 } 3763 3764 static bool gfx_v9_0_is_idle(void *handle) 3765 { 3766 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3767 3768 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 3769 GRBM_STATUS, GUI_ACTIVE)) 3770 return false; 3771 else 3772 return true; 3773 } 3774 3775 static int gfx_v9_0_wait_for_idle(void *handle) 3776 { 3777 unsigned i; 3778 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3779 3780 for (i = 0; i < adev->usec_timeout; i++) { 3781 if (gfx_v9_0_is_idle(handle)) 3782 return 0; 3783 udelay(1); 3784 } 3785 return -ETIMEDOUT; 3786 } 3787 3788 static int gfx_v9_0_soft_reset(void *handle) 3789 { 3790 u32 grbm_soft_reset = 0; 3791 u32 tmp; 3792 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3793 3794 /* GRBM_STATUS */ 3795 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 3796 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 3797 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 3798 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 3799 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 3800 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 3801 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 3802 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3803 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3804 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3805 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 3806 } 3807 3808 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 3809 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3810 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3811 } 3812 3813 /* GRBM_STATUS2 */ 3814 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 3815 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 3816 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3817 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3818 3819 3820 if (grbm_soft_reset) { 3821 /* stop the rlc */ 3822 adev->gfx.rlc.funcs->stop(adev); 3823 3824 if (adev->asic_type != CHIP_ARCTURUS) 3825 /* Disable GFX parsing/prefetching */ 3826 gfx_v9_0_cp_gfx_enable(adev, false); 3827 3828 /* Disable MEC parsing/prefetching */ 3829 gfx_v9_0_cp_compute_enable(adev, false); 3830 3831 if (grbm_soft_reset) { 3832 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3833 tmp |= grbm_soft_reset; 3834 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 3835 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3836 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3837 3838 udelay(50); 3839 3840 tmp &= ~grbm_soft_reset; 3841 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3842 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3843 } 3844 3845 /* Wait a little for things to settle down */ 3846 udelay(50); 3847 } 3848 return 0; 3849 } 3850 3851 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 3852 { 3853 uint64_t clock; 3854 3855 mutex_lock(&adev->gfx.gpu_clock_mutex); 3856 if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) { 3857 uint32_t tmp, lsb, msb, i = 0; 3858 do { 3859 if (i != 0) 3860 udelay(1); 3861 tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); 3862 lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB); 3863 msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); 3864 i++; 3865 } while (unlikely(tmp != msb) && (i < adev->usec_timeout)); 3866 clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL); 3867 } else { 3868 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 3869 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 3870 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 3871 } 3872 mutex_unlock(&adev->gfx.gpu_clock_mutex); 3873 return clock; 3874 } 3875 3876 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 3877 uint32_t vmid, 3878 uint32_t gds_base, uint32_t gds_size, 3879 uint32_t gws_base, uint32_t gws_size, 3880 uint32_t oa_base, uint32_t oa_size) 3881 { 3882 struct amdgpu_device *adev = ring->adev; 3883 3884 /* GDS Base */ 3885 gfx_v9_0_write_data_to_reg(ring, 0, false, 3886 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 3887 gds_base); 3888 3889 /* GDS Size */ 3890 gfx_v9_0_write_data_to_reg(ring, 0, false, 3891 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 3892 gds_size); 3893 3894 /* GWS */ 3895 gfx_v9_0_write_data_to_reg(ring, 0, false, 3896 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 3897 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 3898 3899 /* OA */ 3900 gfx_v9_0_write_data_to_reg(ring, 0, false, 3901 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 3902 (1 << (oa_size + oa_base)) - (1 << oa_base)); 3903 } 3904 3905 static const u32 vgpr_init_compute_shader[] = 3906 { 3907 0xb07c0000, 0xbe8000ff, 3908 0x000000f8, 0xbf110800, 3909 0x7e000280, 0x7e020280, 3910 0x7e040280, 0x7e060280, 3911 0x7e080280, 0x7e0a0280, 3912 0x7e0c0280, 0x7e0e0280, 3913 0x80808800, 0xbe803200, 3914 0xbf84fff5, 0xbf9c0000, 3915 0xd28c0001, 0x0001007f, 3916 0xd28d0001, 0x0002027e, 3917 0x10020288, 0xb8810904, 3918 0xb7814000, 0xd1196a01, 3919 0x00000301, 0xbe800087, 3920 0xbefc00c1, 0xd89c4000, 3921 0x00020201, 0xd89cc080, 3922 0x00040401, 0x320202ff, 3923 0x00000800, 0x80808100, 3924 0xbf84fff8, 0x7e020280, 3925 0xbf810000, 0x00000000, 3926 }; 3927 3928 static const u32 sgpr_init_compute_shader[] = 3929 { 3930 0xb07c0000, 0xbe8000ff, 3931 0x0000005f, 0xbee50080, 3932 0xbe812c65, 0xbe822c65, 3933 0xbe832c65, 0xbe842c65, 3934 0xbe852c65, 0xb77c0005, 3935 0x80808500, 0xbf84fff8, 3936 0xbe800080, 0xbf810000, 3937 }; 3938 3939 static const struct soc15_reg_entry vgpr_init_regs[] = { 3940 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 3941 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 3942 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 3943 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 3944 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 3945 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 3946 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 3947 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 3948 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ 3949 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 3950 }; 3951 3952 static const struct soc15_reg_entry sgpr_init_regs[] = { 3953 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 3954 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 3955 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 3956 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 3957 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 3958 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 3959 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 3960 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 3961 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ 3962 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 3963 }; 3964 3965 static const struct soc15_reg_entry sec_ded_counter_registers[] = { 3966 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 3967 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 3968 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 3969 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 3970 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 3971 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 3972 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 3973 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 3974 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 3975 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 3976 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 3977 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 3978 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 3979 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 3980 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 3981 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 3982 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 3983 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 3984 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 3985 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 3986 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16}, 3987 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 3988 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 3989 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 3990 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 3991 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 3992 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 3993 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 3994 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 3995 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 3996 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 3997 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 3998 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 3999 }; 4000 4001 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 4002 { 4003 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4004 int i, r; 4005 4006 /* only support when RAS is enabled */ 4007 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4008 return 0; 4009 4010 r = amdgpu_ring_alloc(ring, 7); 4011 if (r) { 4012 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 4013 ring->name, r); 4014 return r; 4015 } 4016 4017 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 4018 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 4019 4020 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 4021 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 4022 PACKET3_DMA_DATA_DST_SEL(1) | 4023 PACKET3_DMA_DATA_SRC_SEL(2) | 4024 PACKET3_DMA_DATA_ENGINE(0))); 4025 amdgpu_ring_write(ring, 0); 4026 amdgpu_ring_write(ring, 0); 4027 amdgpu_ring_write(ring, 0); 4028 amdgpu_ring_write(ring, 0); 4029 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 4030 adev->gds.gds_size); 4031 4032 amdgpu_ring_commit(ring); 4033 4034 for (i = 0; i < adev->usec_timeout; i++) { 4035 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 4036 break; 4037 udelay(1); 4038 } 4039 4040 if (i >= adev->usec_timeout) 4041 r = -ETIMEDOUT; 4042 4043 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 4044 4045 return r; 4046 } 4047 4048 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 4049 { 4050 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4051 struct amdgpu_ib ib; 4052 struct dma_fence *f = NULL; 4053 int r, i, j, k; 4054 unsigned total_size, vgpr_offset, sgpr_offset; 4055 u64 gpu_addr; 4056 4057 /* only support when RAS is enabled */ 4058 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4059 return 0; 4060 4061 /* bail if the compute ring is not ready */ 4062 if (!ring->sched.ready) 4063 return 0; 4064 4065 total_size = 4066 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 4067 total_size += 4068 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 4069 total_size = ALIGN(total_size, 256); 4070 vgpr_offset = total_size; 4071 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 4072 sgpr_offset = total_size; 4073 total_size += sizeof(sgpr_init_compute_shader); 4074 4075 /* allocate an indirect buffer to put the commands in */ 4076 memset(&ib, 0, sizeof(ib)); 4077 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 4078 if (r) { 4079 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 4080 return r; 4081 } 4082 4083 /* load the compute shaders */ 4084 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 4085 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 4086 4087 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 4088 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 4089 4090 /* init the ib length to 0 */ 4091 ib.length_dw = 0; 4092 4093 /* VGPR */ 4094 /* write the register state for the compute dispatch */ 4095 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { 4096 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4097 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) 4098 - PACKET3_SET_SH_REG_START; 4099 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value; 4100 } 4101 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4102 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 4103 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4104 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4105 - PACKET3_SET_SH_REG_START; 4106 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4107 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4108 4109 /* write dispatch packet */ 4110 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4111 ib.ptr[ib.length_dw++] = 128; /* x */ 4112 ib.ptr[ib.length_dw++] = 1; /* y */ 4113 ib.ptr[ib.length_dw++] = 1; /* z */ 4114 ib.ptr[ib.length_dw++] = 4115 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4116 4117 /* write CS partial flush packet */ 4118 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4119 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4120 4121 /* SGPR */ 4122 /* write the register state for the compute dispatch */ 4123 for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { 4124 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4125 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) 4126 - PACKET3_SET_SH_REG_START; 4127 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; 4128 } 4129 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4130 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4131 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4132 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4133 - PACKET3_SET_SH_REG_START; 4134 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4135 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4136 4137 /* write dispatch packet */ 4138 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4139 ib.ptr[ib.length_dw++] = 128; /* x */ 4140 ib.ptr[ib.length_dw++] = 1; /* y */ 4141 ib.ptr[ib.length_dw++] = 1; /* z */ 4142 ib.ptr[ib.length_dw++] = 4143 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4144 4145 /* write CS partial flush packet */ 4146 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4147 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4148 4149 /* shedule the ib on the ring */ 4150 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 4151 if (r) { 4152 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 4153 goto fail; 4154 } 4155 4156 /* wait for the GPU to finish processing the IB */ 4157 r = dma_fence_wait(f, false); 4158 if (r) { 4159 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 4160 goto fail; 4161 } 4162 4163 /* read back registers to clear the counters */ 4164 mutex_lock(&adev->grbm_idx_mutex); 4165 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { 4166 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { 4167 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { 4168 gfx_v9_0_select_se_sh(adev, j, 0x0, k); 4169 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); 4170 } 4171 } 4172 } 4173 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 4174 mutex_unlock(&adev->grbm_idx_mutex); 4175 4176 fail: 4177 amdgpu_ib_free(adev, &ib, NULL); 4178 dma_fence_put(f); 4179 4180 return r; 4181 } 4182 4183 static int gfx_v9_0_early_init(void *handle) 4184 { 4185 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4186 4187 if (adev->asic_type == CHIP_ARCTURUS) 4188 adev->gfx.num_gfx_rings = 0; 4189 else 4190 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 4191 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 4192 gfx_v9_0_set_ring_funcs(adev); 4193 gfx_v9_0_set_irq_funcs(adev); 4194 gfx_v9_0_set_gds_init(adev); 4195 gfx_v9_0_set_rlc_funcs(adev); 4196 4197 return 0; 4198 } 4199 4200 static int gfx_v9_0_ecc_late_init(void *handle) 4201 { 4202 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4203 int r; 4204 4205 r = amdgpu_gfx_ras_late_init(adev); 4206 if (r) 4207 return r; 4208 4209 r = gfx_v9_0_do_edc_gds_workarounds(adev); 4210 if (r) 4211 return r; 4212 4213 /* requires IBs so do in late init after IB pool is initialized */ 4214 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 4215 if (r) 4216 return r; 4217 4218 return 0; 4219 } 4220 4221 static int gfx_v9_0_late_init(void *handle) 4222 { 4223 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4224 int r; 4225 4226 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4227 if (r) 4228 return r; 4229 4230 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4231 if (r) 4232 return r; 4233 4234 r = gfx_v9_0_ecc_late_init(handle); 4235 if (r) 4236 return r; 4237 4238 return 0; 4239 } 4240 4241 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 4242 { 4243 uint32_t rlc_setting; 4244 4245 /* if RLC is not enabled, do nothing */ 4246 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 4247 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 4248 return false; 4249 4250 return true; 4251 } 4252 4253 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) 4254 { 4255 uint32_t data; 4256 unsigned i; 4257 4258 data = RLC_SAFE_MODE__CMD_MASK; 4259 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4260 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4261 4262 /* wait for RLC_SAFE_MODE */ 4263 for (i = 0; i < adev->usec_timeout; i++) { 4264 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 4265 break; 4266 udelay(1); 4267 } 4268 } 4269 4270 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) 4271 { 4272 uint32_t data; 4273 4274 data = RLC_SAFE_MODE__CMD_MASK; 4275 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4276 } 4277 4278 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 4279 bool enable) 4280 { 4281 amdgpu_gfx_rlc_enter_safe_mode(adev); 4282 4283 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 4284 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 4285 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4286 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 4287 } else { 4288 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 4289 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4290 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 4291 } 4292 4293 amdgpu_gfx_rlc_exit_safe_mode(adev); 4294 } 4295 4296 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4297 bool enable) 4298 { 4299 /* TODO: double check if we need to perform under safe mode */ 4300 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4301 4302 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4303 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4304 else 4305 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4306 4307 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4308 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4309 else 4310 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4311 4312 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4313 } 4314 4315 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4316 bool enable) 4317 { 4318 uint32_t data, def; 4319 4320 amdgpu_gfx_rlc_enter_safe_mode(adev); 4321 4322 /* It is disabled by HW by default */ 4323 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4324 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4325 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4326 4327 if (adev->asic_type != CHIP_VEGA12) 4328 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4329 4330 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4331 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4332 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4333 4334 /* only for Vega10 & Raven1 */ 4335 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4336 4337 if (def != data) 4338 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4339 4340 /* MGLS is a global flag to control all MGLS in GFX */ 4341 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4342 /* 2 - RLC memory Light sleep */ 4343 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4344 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4345 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4346 if (def != data) 4347 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4348 } 4349 /* 3 - CP memory Light sleep */ 4350 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4351 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4352 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4353 if (def != data) 4354 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4355 } 4356 } 4357 } else { 4358 /* 1 - MGCG_OVERRIDE */ 4359 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4360 4361 if (adev->asic_type != CHIP_VEGA12) 4362 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4363 4364 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4365 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4366 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4367 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4368 4369 if (def != data) 4370 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4371 4372 /* 2 - disable MGLS in RLC */ 4373 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4374 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4375 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4376 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4377 } 4378 4379 /* 3 - disable MGLS in CP */ 4380 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4381 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4382 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4383 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4384 } 4385 } 4386 4387 amdgpu_gfx_rlc_exit_safe_mode(adev); 4388 } 4389 4390 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4391 bool enable) 4392 { 4393 uint32_t data, def; 4394 4395 if (adev->asic_type == CHIP_ARCTURUS) 4396 return; 4397 4398 amdgpu_gfx_rlc_enter_safe_mode(adev); 4399 4400 /* Enable 3D CGCG/CGLS */ 4401 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 4402 /* write cmd to clear cgcg/cgls ov */ 4403 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4404 /* unset CGCG override */ 4405 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4406 /* update CGCG and CGLS override bits */ 4407 if (def != data) 4408 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4409 4410 /* enable 3Dcgcg FSM(0x0000363f) */ 4411 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4412 4413 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4414 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4415 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4416 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4417 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4418 if (def != data) 4419 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4420 4421 /* set IDLE_POLL_COUNT(0x00900100) */ 4422 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4423 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4424 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4425 if (def != data) 4426 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4427 } else { 4428 /* Disable CGCG/CGLS */ 4429 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4430 /* disable cgcg, cgls should be disabled */ 4431 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 4432 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 4433 /* disable cgcg and cgls in FSM */ 4434 if (def != data) 4435 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4436 } 4437 4438 amdgpu_gfx_rlc_exit_safe_mode(adev); 4439 } 4440 4441 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4442 bool enable) 4443 { 4444 uint32_t def, data; 4445 4446 amdgpu_gfx_rlc_enter_safe_mode(adev); 4447 4448 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 4449 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4450 /* unset CGCG override */ 4451 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 4452 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4453 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4454 else 4455 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4456 /* update CGCG and CGLS override bits */ 4457 if (def != data) 4458 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4459 4460 /* enable cgcg FSM(0x0000363F) */ 4461 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4462 4463 if (adev->asic_type == CHIP_ARCTURUS) 4464 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4465 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4466 else 4467 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4468 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4469 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4470 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4471 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4472 if (def != data) 4473 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4474 4475 /* set IDLE_POLL_COUNT(0x00900100) */ 4476 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4477 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4478 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4479 if (def != data) 4480 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4481 } else { 4482 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4483 /* reset CGCG/CGLS bits */ 4484 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4485 /* disable cgcg and cgls in FSM */ 4486 if (def != data) 4487 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4488 } 4489 4490 amdgpu_gfx_rlc_exit_safe_mode(adev); 4491 } 4492 4493 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 4494 bool enable) 4495 { 4496 if (enable) { 4497 /* CGCG/CGLS should be enabled after MGCG/MGLS 4498 * === MGCG + MGLS === 4499 */ 4500 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4501 /* === CGCG /CGLS for GFX 3D Only === */ 4502 gfx_v9_0_update_3d_clock_gating(adev, enable); 4503 /* === CGCG + CGLS === */ 4504 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4505 } else { 4506 /* CGCG/CGLS should be disabled before MGCG/MGLS 4507 * === CGCG + CGLS === 4508 */ 4509 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4510 /* === CGCG /CGLS for GFX 3D Only === */ 4511 gfx_v9_0_update_3d_clock_gating(adev, enable); 4512 /* === MGCG + MGLS === */ 4513 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4514 } 4515 return 0; 4516 } 4517 4518 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 4519 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 4520 .set_safe_mode = gfx_v9_0_set_safe_mode, 4521 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 4522 .init = gfx_v9_0_rlc_init, 4523 .get_csb_size = gfx_v9_0_get_csb_size, 4524 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 4525 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 4526 .resume = gfx_v9_0_rlc_resume, 4527 .stop = gfx_v9_0_rlc_stop, 4528 .reset = gfx_v9_0_rlc_reset, 4529 .start = gfx_v9_0_rlc_start 4530 }; 4531 4532 static int gfx_v9_0_set_powergating_state(void *handle, 4533 enum amd_powergating_state state) 4534 { 4535 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4536 bool enable = (state == AMD_PG_STATE_GATE) ? true : false; 4537 4538 switch (adev->asic_type) { 4539 case CHIP_RAVEN: 4540 case CHIP_RENOIR: 4541 if (!enable) { 4542 amdgpu_gfx_off_ctrl(adev, false); 4543 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4544 } 4545 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 4546 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 4547 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 4548 } else { 4549 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 4550 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 4551 } 4552 4553 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 4554 gfx_v9_0_enable_cp_power_gating(adev, true); 4555 else 4556 gfx_v9_0_enable_cp_power_gating(adev, false); 4557 4558 /* update gfx cgpg state */ 4559 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 4560 4561 /* update mgcg state */ 4562 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 4563 4564 if (enable) 4565 amdgpu_gfx_off_ctrl(adev, true); 4566 break; 4567 case CHIP_VEGA12: 4568 if (!enable) { 4569 amdgpu_gfx_off_ctrl(adev, false); 4570 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4571 } else { 4572 amdgpu_gfx_off_ctrl(adev, true); 4573 } 4574 break; 4575 default: 4576 break; 4577 } 4578 4579 return 0; 4580 } 4581 4582 static int gfx_v9_0_set_clockgating_state(void *handle, 4583 enum amd_clockgating_state state) 4584 { 4585 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4586 4587 if (amdgpu_sriov_vf(adev)) 4588 return 0; 4589 4590 switch (adev->asic_type) { 4591 case CHIP_VEGA10: 4592 case CHIP_VEGA12: 4593 case CHIP_VEGA20: 4594 case CHIP_RAVEN: 4595 case CHIP_ARCTURUS: 4596 case CHIP_RENOIR: 4597 gfx_v9_0_update_gfx_clock_gating(adev, 4598 state == AMD_CG_STATE_GATE ? true : false); 4599 break; 4600 default: 4601 break; 4602 } 4603 return 0; 4604 } 4605 4606 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 4607 { 4608 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4609 int data; 4610 4611 if (amdgpu_sriov_vf(adev)) 4612 *flags = 0; 4613 4614 /* AMD_CG_SUPPORT_GFX_MGCG */ 4615 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4616 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 4617 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 4618 4619 /* AMD_CG_SUPPORT_GFX_CGCG */ 4620 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4621 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 4622 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 4623 4624 /* AMD_CG_SUPPORT_GFX_CGLS */ 4625 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 4626 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 4627 4628 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 4629 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4630 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 4631 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 4632 4633 /* AMD_CG_SUPPORT_GFX_CP_LS */ 4634 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4635 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 4636 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 4637 4638 if (adev->asic_type != CHIP_ARCTURUS) { 4639 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 4640 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4641 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 4642 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 4643 4644 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 4645 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 4646 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 4647 } 4648 } 4649 4650 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 4651 { 4652 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 4653 } 4654 4655 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 4656 { 4657 struct amdgpu_device *adev = ring->adev; 4658 u64 wptr; 4659 4660 /* XXX check if swapping is necessary on BE */ 4661 if (ring->use_doorbell) { 4662 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 4663 } else { 4664 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 4665 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 4666 } 4667 4668 return wptr; 4669 } 4670 4671 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 4672 { 4673 struct amdgpu_device *adev = ring->adev; 4674 4675 if (ring->use_doorbell) { 4676 /* XXX check if swapping is necessary on BE */ 4677 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4678 WDOORBELL64(ring->doorbell_index, ring->wptr); 4679 } else { 4680 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4681 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 4682 } 4683 } 4684 4685 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 4686 { 4687 struct amdgpu_device *adev = ring->adev; 4688 u32 ref_and_mask, reg_mem_engine; 4689 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 4690 4691 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 4692 switch (ring->me) { 4693 case 1: 4694 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 4695 break; 4696 case 2: 4697 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 4698 break; 4699 default: 4700 return; 4701 } 4702 reg_mem_engine = 0; 4703 } else { 4704 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 4705 reg_mem_engine = 1; /* pfp */ 4706 } 4707 4708 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 4709 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 4710 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 4711 ref_and_mask, ref_and_mask, 0x20); 4712 } 4713 4714 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 4715 struct amdgpu_job *job, 4716 struct amdgpu_ib *ib, 4717 uint32_t flags) 4718 { 4719 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4720 u32 header, control = 0; 4721 4722 if (ib->flags & AMDGPU_IB_FLAG_CE) 4723 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 4724 else 4725 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 4726 4727 control |= ib->length_dw | (vmid << 24); 4728 4729 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 4730 control |= INDIRECT_BUFFER_PRE_ENB(1); 4731 4732 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 4733 gfx_v9_0_ring_emit_de_meta(ring); 4734 } 4735 4736 amdgpu_ring_write(ring, header); 4737 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4738 amdgpu_ring_write(ring, 4739 #ifdef __BIG_ENDIAN 4740 (2 << 0) | 4741 #endif 4742 lower_32_bits(ib->gpu_addr)); 4743 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4744 amdgpu_ring_write(ring, control); 4745 } 4746 4747 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 4748 struct amdgpu_job *job, 4749 struct amdgpu_ib *ib, 4750 uint32_t flags) 4751 { 4752 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4753 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 4754 4755 /* Currently, there is a high possibility to get wave ID mismatch 4756 * between ME and GDS, leading to a hw deadlock, because ME generates 4757 * different wave IDs than the GDS expects. This situation happens 4758 * randomly when at least 5 compute pipes use GDS ordered append. 4759 * The wave IDs generated by ME are also wrong after suspend/resume. 4760 * Those are probably bugs somewhere else in the kernel driver. 4761 * 4762 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 4763 * GDS to 0 for this ring (me/pipe). 4764 */ 4765 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 4766 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 4767 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 4768 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 4769 } 4770 4771 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 4772 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4773 amdgpu_ring_write(ring, 4774 #ifdef __BIG_ENDIAN 4775 (2 << 0) | 4776 #endif 4777 lower_32_bits(ib->gpu_addr)); 4778 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4779 amdgpu_ring_write(ring, control); 4780 } 4781 4782 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 4783 u64 seq, unsigned flags) 4784 { 4785 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 4786 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 4787 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 4788 4789 /* RELEASE_MEM - flush caches, send int */ 4790 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 4791 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 4792 EOP_TC_NC_ACTION_EN) : 4793 (EOP_TCL1_ACTION_EN | 4794 EOP_TC_ACTION_EN | 4795 EOP_TC_WB_ACTION_EN | 4796 EOP_TC_MD_ACTION_EN)) | 4797 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 4798 EVENT_INDEX(5))); 4799 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 4800 4801 /* 4802 * the address should be Qword aligned if 64bit write, Dword 4803 * aligned if only send 32bit data low (discard data high) 4804 */ 4805 if (write64bit) 4806 BUG_ON(addr & 0x7); 4807 else 4808 BUG_ON(addr & 0x3); 4809 amdgpu_ring_write(ring, lower_32_bits(addr)); 4810 amdgpu_ring_write(ring, upper_32_bits(addr)); 4811 amdgpu_ring_write(ring, lower_32_bits(seq)); 4812 amdgpu_ring_write(ring, upper_32_bits(seq)); 4813 amdgpu_ring_write(ring, 0); 4814 } 4815 4816 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 4817 { 4818 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 4819 uint32_t seq = ring->fence_drv.sync_seq; 4820 uint64_t addr = ring->fence_drv.gpu_addr; 4821 4822 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 4823 lower_32_bits(addr), upper_32_bits(addr), 4824 seq, 0xffffffff, 4); 4825 } 4826 4827 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 4828 unsigned vmid, uint64_t pd_addr) 4829 { 4830 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 4831 4832 /* compute doesn't have PFP */ 4833 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 4834 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 4835 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 4836 amdgpu_ring_write(ring, 0x0); 4837 } 4838 } 4839 4840 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 4841 { 4842 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 4843 } 4844 4845 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 4846 { 4847 u64 wptr; 4848 4849 /* XXX check if swapping is necessary on BE */ 4850 if (ring->use_doorbell) 4851 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 4852 else 4853 BUG(); 4854 return wptr; 4855 } 4856 4857 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 4858 bool acquire) 4859 { 4860 struct amdgpu_device *adev = ring->adev; 4861 int pipe_num, tmp, reg; 4862 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 4863 4864 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 4865 4866 /* first me only has 2 entries, GFX and HP3D */ 4867 if (ring->me > 0) 4868 pipe_num -= 2; 4869 4870 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num; 4871 tmp = RREG32(reg); 4872 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 4873 WREG32(reg, tmp); 4874 } 4875 4876 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, 4877 struct amdgpu_ring *ring, 4878 bool acquire) 4879 { 4880 int i, pipe; 4881 bool reserve; 4882 struct amdgpu_ring *iring; 4883 4884 mutex_lock(&adev->gfx.pipe_reserve_mutex); 4885 pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0); 4886 if (acquire) 4887 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4888 else 4889 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4890 4891 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 4892 /* Clear all reservations - everyone reacquires all resources */ 4893 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 4894 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 4895 true); 4896 4897 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 4898 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 4899 true); 4900 } else { 4901 /* Lower all pipes without a current reservation */ 4902 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 4903 iring = &adev->gfx.gfx_ring[i]; 4904 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 4905 iring->me, 4906 iring->pipe, 4907 0); 4908 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4909 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 4910 } 4911 4912 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 4913 iring = &adev->gfx.compute_ring[i]; 4914 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 4915 iring->me, 4916 iring->pipe, 4917 0); 4918 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4919 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 4920 } 4921 } 4922 4923 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 4924 } 4925 4926 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, 4927 struct amdgpu_ring *ring, 4928 bool acquire) 4929 { 4930 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 4931 uint32_t queue_priority = acquire ? 0xf : 0x0; 4932 4933 mutex_lock(&adev->srbm_mutex); 4934 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4935 4936 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); 4937 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); 4938 4939 soc15_grbm_select(adev, 0, 0, 0, 0); 4940 mutex_unlock(&adev->srbm_mutex); 4941 } 4942 4943 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring, 4944 enum drm_sched_priority priority) 4945 { 4946 struct amdgpu_device *adev = ring->adev; 4947 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 4948 4949 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 4950 return; 4951 4952 gfx_v9_0_hqd_set_priority(adev, ring, acquire); 4953 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire); 4954 } 4955 4956 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 4957 { 4958 struct amdgpu_device *adev = ring->adev; 4959 4960 /* XXX check if swapping is necessary on BE */ 4961 if (ring->use_doorbell) { 4962 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4963 WDOORBELL64(ring->doorbell_index, ring->wptr); 4964 } else{ 4965 BUG(); /* only DOORBELL method supported on gfx9 now */ 4966 } 4967 } 4968 4969 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 4970 u64 seq, unsigned int flags) 4971 { 4972 struct amdgpu_device *adev = ring->adev; 4973 4974 /* we only allocate 32bit for each seq wb address */ 4975 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 4976 4977 /* write fence seq to the "addr" */ 4978 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4979 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4980 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 4981 amdgpu_ring_write(ring, lower_32_bits(addr)); 4982 amdgpu_ring_write(ring, upper_32_bits(addr)); 4983 amdgpu_ring_write(ring, lower_32_bits(seq)); 4984 4985 if (flags & AMDGPU_FENCE_FLAG_INT) { 4986 /* set register to trigger INT */ 4987 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4988 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4989 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 4990 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 4991 amdgpu_ring_write(ring, 0); 4992 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 4993 } 4994 } 4995 4996 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 4997 { 4998 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 4999 amdgpu_ring_write(ring, 0); 5000 } 5001 5002 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 5003 { 5004 struct v9_ce_ib_state ce_payload = {0}; 5005 uint64_t csa_addr; 5006 int cnt; 5007 5008 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 5009 csa_addr = amdgpu_csa_vaddr(ring->adev); 5010 5011 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5012 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 5013 WRITE_DATA_DST_SEL(8) | 5014 WR_CONFIRM) | 5015 WRITE_DATA_CACHE_POLICY(0)); 5016 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5017 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5018 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 5019 } 5020 5021 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 5022 { 5023 struct v9_de_ib_state de_payload = {0}; 5024 uint64_t csa_addr, gds_addr; 5025 int cnt; 5026 5027 csa_addr = amdgpu_csa_vaddr(ring->adev); 5028 gds_addr = csa_addr + 4096; 5029 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5030 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5031 5032 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 5033 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5034 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 5035 WRITE_DATA_DST_SEL(8) | 5036 WR_CONFIRM) | 5037 WRITE_DATA_CACHE_POLICY(0)); 5038 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5039 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5040 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 5041 } 5042 5043 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) 5044 { 5045 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 5046 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ 5047 } 5048 5049 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 5050 { 5051 uint32_t dw2 = 0; 5052 5053 if (amdgpu_sriov_vf(ring->adev)) 5054 gfx_v9_0_ring_emit_ce_meta(ring); 5055 5056 gfx_v9_0_ring_emit_tmz(ring, true); 5057 5058 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5059 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5060 /* set load_global_config & load_global_uconfig */ 5061 dw2 |= 0x8001; 5062 /* set load_cs_sh_regs */ 5063 dw2 |= 0x01000000; 5064 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5065 dw2 |= 0x10002; 5066 5067 /* set load_ce_ram if preamble presented */ 5068 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 5069 dw2 |= 0x10000000; 5070 } else { 5071 /* still load_ce_ram if this is the first time preamble presented 5072 * although there is no context switch happens. 5073 */ 5074 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 5075 dw2 |= 0x10000000; 5076 } 5077 5078 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5079 amdgpu_ring_write(ring, dw2); 5080 amdgpu_ring_write(ring, 0); 5081 } 5082 5083 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 5084 { 5085 unsigned ret; 5086 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5087 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 5088 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 5089 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 5090 ret = ring->wptr & ring->buf_mask; 5091 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 5092 return ret; 5093 } 5094 5095 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 5096 { 5097 unsigned cur; 5098 BUG_ON(offset > ring->buf_mask); 5099 BUG_ON(ring->ring[offset] != 0x55aa55aa); 5100 5101 cur = (ring->wptr & ring->buf_mask) - 1; 5102 if (likely(cur > offset)) 5103 ring->ring[offset] = cur - offset; 5104 else 5105 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 5106 } 5107 5108 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 5109 { 5110 struct amdgpu_device *adev = ring->adev; 5111 5112 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 5113 amdgpu_ring_write(ring, 0 | /* src: register*/ 5114 (5 << 8) | /* dst: memory */ 5115 (1 << 20)); /* write confirm */ 5116 amdgpu_ring_write(ring, reg); 5117 amdgpu_ring_write(ring, 0); 5118 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 5119 adev->virt.reg_val_offs * 4)); 5120 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 5121 adev->virt.reg_val_offs * 4)); 5122 } 5123 5124 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 5125 uint32_t val) 5126 { 5127 uint32_t cmd = 0; 5128 5129 switch (ring->funcs->type) { 5130 case AMDGPU_RING_TYPE_GFX: 5131 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 5132 break; 5133 case AMDGPU_RING_TYPE_KIQ: 5134 cmd = (1 << 16); /* no inc addr */ 5135 break; 5136 default: 5137 cmd = WR_CONFIRM; 5138 break; 5139 } 5140 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5141 amdgpu_ring_write(ring, cmd); 5142 amdgpu_ring_write(ring, reg); 5143 amdgpu_ring_write(ring, 0); 5144 amdgpu_ring_write(ring, val); 5145 } 5146 5147 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 5148 uint32_t val, uint32_t mask) 5149 { 5150 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 5151 } 5152 5153 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 5154 uint32_t reg0, uint32_t reg1, 5155 uint32_t ref, uint32_t mask) 5156 { 5157 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5158 struct amdgpu_device *adev = ring->adev; 5159 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 5160 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 5161 5162 if (fw_version_ok) 5163 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 5164 ref, mask, 0x20); 5165 else 5166 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 5167 ref, mask); 5168 } 5169 5170 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 5171 { 5172 struct amdgpu_device *adev = ring->adev; 5173 uint32_t value = 0; 5174 5175 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 5176 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 5177 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 5178 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 5179 WREG32_SOC15(GC, 0, mmSQ_CMD, value); 5180 } 5181 5182 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5183 enum amdgpu_interrupt_state state) 5184 { 5185 switch (state) { 5186 case AMDGPU_IRQ_STATE_DISABLE: 5187 case AMDGPU_IRQ_STATE_ENABLE: 5188 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5189 TIME_STAMP_INT_ENABLE, 5190 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5191 break; 5192 default: 5193 break; 5194 } 5195 } 5196 5197 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5198 int me, int pipe, 5199 enum amdgpu_interrupt_state state) 5200 { 5201 u32 mec_int_cntl, mec_int_cntl_reg; 5202 5203 /* 5204 * amdgpu controls only the first MEC. That's why this function only 5205 * handles the setting of interrupts for this specific MEC. All other 5206 * pipes' interrupts are set by amdkfd. 5207 */ 5208 5209 if (me == 1) { 5210 switch (pipe) { 5211 case 0: 5212 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 5213 break; 5214 case 1: 5215 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 5216 break; 5217 case 2: 5218 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 5219 break; 5220 case 3: 5221 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 5222 break; 5223 default: 5224 DRM_DEBUG("invalid pipe %d\n", pipe); 5225 return; 5226 } 5227 } else { 5228 DRM_DEBUG("invalid me %d\n", me); 5229 return; 5230 } 5231 5232 switch (state) { 5233 case AMDGPU_IRQ_STATE_DISABLE: 5234 mec_int_cntl = RREG32(mec_int_cntl_reg); 5235 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5236 TIME_STAMP_INT_ENABLE, 0); 5237 WREG32(mec_int_cntl_reg, mec_int_cntl); 5238 break; 5239 case AMDGPU_IRQ_STATE_ENABLE: 5240 mec_int_cntl = RREG32(mec_int_cntl_reg); 5241 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5242 TIME_STAMP_INT_ENABLE, 1); 5243 WREG32(mec_int_cntl_reg, mec_int_cntl); 5244 break; 5245 default: 5246 break; 5247 } 5248 } 5249 5250 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 5251 struct amdgpu_irq_src *source, 5252 unsigned type, 5253 enum amdgpu_interrupt_state state) 5254 { 5255 switch (state) { 5256 case AMDGPU_IRQ_STATE_DISABLE: 5257 case AMDGPU_IRQ_STATE_ENABLE: 5258 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5259 PRIV_REG_INT_ENABLE, 5260 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5261 break; 5262 default: 5263 break; 5264 } 5265 5266 return 0; 5267 } 5268 5269 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 5270 struct amdgpu_irq_src *source, 5271 unsigned type, 5272 enum amdgpu_interrupt_state state) 5273 { 5274 switch (state) { 5275 case AMDGPU_IRQ_STATE_DISABLE: 5276 case AMDGPU_IRQ_STATE_ENABLE: 5277 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5278 PRIV_INSTR_INT_ENABLE, 5279 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5280 default: 5281 break; 5282 } 5283 5284 return 0; 5285 } 5286 5287 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 5288 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5289 CP_ECC_ERROR_INT_ENABLE, 1) 5290 5291 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 5292 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5293 CP_ECC_ERROR_INT_ENABLE, 0) 5294 5295 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 5296 struct amdgpu_irq_src *source, 5297 unsigned type, 5298 enum amdgpu_interrupt_state state) 5299 { 5300 switch (state) { 5301 case AMDGPU_IRQ_STATE_DISABLE: 5302 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5303 CP_ECC_ERROR_INT_ENABLE, 0); 5304 DISABLE_ECC_ON_ME_PIPE(1, 0); 5305 DISABLE_ECC_ON_ME_PIPE(1, 1); 5306 DISABLE_ECC_ON_ME_PIPE(1, 2); 5307 DISABLE_ECC_ON_ME_PIPE(1, 3); 5308 break; 5309 5310 case AMDGPU_IRQ_STATE_ENABLE: 5311 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5312 CP_ECC_ERROR_INT_ENABLE, 1); 5313 ENABLE_ECC_ON_ME_PIPE(1, 0); 5314 ENABLE_ECC_ON_ME_PIPE(1, 1); 5315 ENABLE_ECC_ON_ME_PIPE(1, 2); 5316 ENABLE_ECC_ON_ME_PIPE(1, 3); 5317 break; 5318 default: 5319 break; 5320 } 5321 5322 return 0; 5323 } 5324 5325 5326 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5327 struct amdgpu_irq_src *src, 5328 unsigned type, 5329 enum amdgpu_interrupt_state state) 5330 { 5331 switch (type) { 5332 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 5333 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 5334 break; 5335 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5336 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5337 break; 5338 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5339 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5340 break; 5341 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5342 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5343 break; 5344 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5345 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5346 break; 5347 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 5348 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 5349 break; 5350 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 5351 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 5352 break; 5353 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 5354 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 5355 break; 5356 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 5357 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 5358 break; 5359 default: 5360 break; 5361 } 5362 return 0; 5363 } 5364 5365 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 5366 struct amdgpu_irq_src *source, 5367 struct amdgpu_iv_entry *entry) 5368 { 5369 int i; 5370 u8 me_id, pipe_id, queue_id; 5371 struct amdgpu_ring *ring; 5372 5373 DRM_DEBUG("IH: CP EOP\n"); 5374 me_id = (entry->ring_id & 0x0c) >> 2; 5375 pipe_id = (entry->ring_id & 0x03) >> 0; 5376 queue_id = (entry->ring_id & 0x70) >> 4; 5377 5378 switch (me_id) { 5379 case 0: 5380 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5381 break; 5382 case 1: 5383 case 2: 5384 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5385 ring = &adev->gfx.compute_ring[i]; 5386 /* Per-queue interrupt is supported for MEC starting from VI. 5387 * The interrupt can only be enabled/disabled per pipe instead of per queue. 5388 */ 5389 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 5390 amdgpu_fence_process(ring); 5391 } 5392 break; 5393 } 5394 return 0; 5395 } 5396 5397 static void gfx_v9_0_fault(struct amdgpu_device *adev, 5398 struct amdgpu_iv_entry *entry) 5399 { 5400 u8 me_id, pipe_id, queue_id; 5401 struct amdgpu_ring *ring; 5402 int i; 5403 5404 me_id = (entry->ring_id & 0x0c) >> 2; 5405 pipe_id = (entry->ring_id & 0x03) >> 0; 5406 queue_id = (entry->ring_id & 0x70) >> 4; 5407 5408 switch (me_id) { 5409 case 0: 5410 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 5411 break; 5412 case 1: 5413 case 2: 5414 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5415 ring = &adev->gfx.compute_ring[i]; 5416 if (ring->me == me_id && ring->pipe == pipe_id && 5417 ring->queue == queue_id) 5418 drm_sched_fault(&ring->sched); 5419 } 5420 break; 5421 } 5422 } 5423 5424 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 5425 struct amdgpu_irq_src *source, 5426 struct amdgpu_iv_entry *entry) 5427 { 5428 DRM_ERROR("Illegal register access in command stream\n"); 5429 gfx_v9_0_fault(adev, entry); 5430 return 0; 5431 } 5432 5433 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 5434 struct amdgpu_irq_src *source, 5435 struct amdgpu_iv_entry *entry) 5436 { 5437 DRM_ERROR("Illegal instruction in command stream\n"); 5438 gfx_v9_0_fault(adev, entry); 5439 return 0; 5440 } 5441 5442 5443 static const struct ras_gfx_subblock_reg ras_subblock_regs[] = { 5444 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 5445 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), 5446 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) 5447 }, 5448 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 5449 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT), 5450 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) 5451 }, 5452 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 5453 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1), 5454 0, 0 5455 }, 5456 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 5457 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2), 5458 0, 0 5459 }, 5460 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 5461 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT), 5462 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) 5463 }, 5464 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 5465 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT), 5466 0, 0 5467 }, 5468 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 5469 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), 5470 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT) 5471 }, 5472 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 5473 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT), 5474 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT) 5475 }, 5476 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 5477 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1), 5478 0, 0 5479 }, 5480 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 5481 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1), 5482 0, 0 5483 }, 5484 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 5485 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1), 5486 0, 0 5487 }, 5488 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 5489 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC), 5490 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) 5491 }, 5492 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 5493 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 5494 0, 0 5495 }, 5496 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 5497 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), 5498 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) 5499 }, 5500 { "GDS_OA_PHY_PHY_CMD_RAM_MEM", 5501 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 5502 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), 5503 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) 5504 }, 5505 { "GDS_OA_PHY_PHY_DATA_RAM_MEM", 5506 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 5507 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 5508 0, 0 5509 }, 5510 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", 5511 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 5512 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), 5513 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) 5514 }, 5515 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", 5516 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 5517 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), 5518 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) 5519 }, 5520 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", 5521 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 5522 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), 5523 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) 5524 }, 5525 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", 5526 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 5527 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), 5528 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) 5529 }, 5530 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 5531 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 5532 0, 0 5533 }, 5534 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 5535 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), 5536 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) 5537 }, 5538 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 5539 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 5540 0, 0 5541 }, 5542 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 5543 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 5544 0, 0 5545 }, 5546 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 5547 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 5548 0, 0 5549 }, 5550 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 5551 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 5552 0, 0 5553 }, 5554 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 5555 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 5556 0, 0 5557 }, 5558 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 5559 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 5560 0, 0 5561 }, 5562 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5563 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), 5564 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) 5565 }, 5566 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5567 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), 5568 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) 5569 }, 5570 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5571 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), 5572 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) 5573 }, 5574 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5575 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), 5576 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) 5577 }, 5578 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5579 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), 5580 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) 5581 }, 5582 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5583 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 5584 0, 0 5585 }, 5586 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5587 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 5588 0, 0 5589 }, 5590 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5591 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 5592 0, 0 5593 }, 5594 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5595 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 5596 0, 0 5597 }, 5598 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5599 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 5600 0, 0 5601 }, 5602 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5603 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 5604 0, 0 5605 }, 5606 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5607 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 5608 0, 0 5609 }, 5610 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5611 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 5612 0, 0 5613 }, 5614 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5615 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 5616 0, 0 5617 }, 5618 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5619 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), 5620 0, 0 5621 }, 5622 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5623 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 5624 0, 0 5625 }, 5626 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5627 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), 5628 0, 0 5629 }, 5630 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5631 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 5632 0, 0 5633 }, 5634 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 5635 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 5636 0, 0 5637 }, 5638 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5639 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), 5640 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) 5641 }, 5642 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5643 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), 5644 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) 5645 }, 5646 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5647 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 5648 0, 0 5649 }, 5650 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5651 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 5652 0, 0 5653 }, 5654 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5655 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 5656 0, 0 5657 }, 5658 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5659 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), 5660 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) 5661 }, 5662 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5663 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), 5664 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) 5665 }, 5666 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 5667 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), 5668 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) 5669 }, 5670 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 5671 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), 5672 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) 5673 }, 5674 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 5675 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT), 5676 0, 0 5677 }, 5678 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5679 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT), 5680 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) 5681 }, 5682 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5683 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT), 5684 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) 5685 }, 5686 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5687 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT), 5688 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) 5689 }, 5690 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5691 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT), 5692 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) 5693 }, 5694 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5695 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT), 5696 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) 5697 }, 5698 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5699 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT), 5700 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) 5701 }, 5702 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5703 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT), 5704 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) 5705 }, 5706 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5707 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), 5708 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) 5709 }, 5710 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5711 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), 5712 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) 5713 }, 5714 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5715 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), 5716 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) 5717 }, 5718 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5719 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), 5720 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) 5721 }, 5722 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5723 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), 5724 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) 5725 }, 5726 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5727 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), 5728 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) 5729 }, 5730 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5731 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), 5732 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) 5733 }, 5734 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5735 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), 5736 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) 5737 }, 5738 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5739 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), 5740 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) 5741 }, 5742 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5743 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), 5744 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) 5745 }, 5746 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5747 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), 5748 0, 0 5749 }, 5750 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5751 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 5752 0, 0 5753 }, 5754 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5755 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 5756 0, 0 5757 }, 5758 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5759 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 5760 0, 0 5761 }, 5762 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5763 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 5764 0, 0 5765 }, 5766 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5767 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), 5768 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) 5769 }, 5770 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5771 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), 5772 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) 5773 }, 5774 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5775 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), 5776 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) 5777 }, 5778 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5779 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), 5780 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) 5781 }, 5782 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5783 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), 5784 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) 5785 }, 5786 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5787 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), 5788 0, 0 5789 }, 5790 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5791 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 5792 0, 0 5793 }, 5794 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5795 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 5796 0, 0 5797 }, 5798 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5799 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 5800 0, 0 5801 }, 5802 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5803 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 5804 0, 0 5805 }, 5806 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5807 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), 5808 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) 5809 }, 5810 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5811 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), 5812 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) 5813 }, 5814 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5815 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), 5816 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) 5817 }, 5818 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5819 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), 5820 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) 5821 }, 5822 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5823 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), 5824 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) 5825 }, 5826 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5827 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 5828 0, 0 5829 }, 5830 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5831 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 5832 0, 0 5833 }, 5834 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5835 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 5836 0, 0 5837 }, 5838 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5839 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 5840 0, 0 5841 }, 5842 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5843 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 5844 0, 0 5845 }, 5846 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5847 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), 5848 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) 5849 }, 5850 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5851 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), 5852 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) 5853 }, 5854 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5855 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), 5856 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) 5857 }, 5858 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5859 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 5860 0, 0 5861 }, 5862 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5863 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 5864 0, 0 5865 }, 5866 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5867 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 5868 0, 0 5869 }, 5870 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5871 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 5872 0, 0 5873 }, 5874 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5875 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 5876 0, 0 5877 }, 5878 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5879 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 5880 0, 0 5881 } 5882 }; 5883 5884 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 5885 void *inject_if) 5886 { 5887 struct ras_inject_if *info = (struct ras_inject_if *)inject_if; 5888 int ret; 5889 struct ta_ras_trigger_error_input block_info = { 0 }; 5890 5891 if (adev->asic_type != CHIP_VEGA20) 5892 return -EINVAL; 5893 5894 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) 5895 return -EINVAL; 5896 5897 if (!ras_gfx_subblocks[info->head.sub_block_index].name) 5898 return -EPERM; 5899 5900 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & 5901 info->head.type)) { 5902 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", 5903 ras_gfx_subblocks[info->head.sub_block_index].name, 5904 info->head.type); 5905 return -EPERM; 5906 } 5907 5908 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & 5909 info->head.type)) { 5910 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", 5911 ras_gfx_subblocks[info->head.sub_block_index].name, 5912 info->head.type); 5913 return -EPERM; 5914 } 5915 5916 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); 5917 block_info.sub_block_index = 5918 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; 5919 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); 5920 block_info.address = info->address; 5921 block_info.value = info->value; 5922 5923 mutex_lock(&adev->grbm_idx_mutex); 5924 ret = psp_ras_trigger_error(&adev->psp, &block_info); 5925 mutex_unlock(&adev->grbm_idx_mutex); 5926 5927 return ret; 5928 } 5929 5930 static const char *vml2_mems[] = { 5931 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", 5932 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", 5933 "UTC_VML2_BANK_CACHE_0_4K_MEM0", 5934 "UTC_VML2_BANK_CACHE_0_4K_MEM1", 5935 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0", 5936 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1", 5937 "UTC_VML2_BANK_CACHE_1_4K_MEM0", 5938 "UTC_VML2_BANK_CACHE_1_4K_MEM1", 5939 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0", 5940 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1", 5941 "UTC_VML2_BANK_CACHE_2_4K_MEM0", 5942 "UTC_VML2_BANK_CACHE_2_4K_MEM1", 5943 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0", 5944 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1", 5945 "UTC_VML2_BANK_CACHE_3_4K_MEM0", 5946 "UTC_VML2_BANK_CACHE_3_4K_MEM1", 5947 }; 5948 5949 static const char *vml2_walker_mems[] = { 5950 "UTC_VML2_CACHE_PDE0_MEM0", 5951 "UTC_VML2_CACHE_PDE0_MEM1", 5952 "UTC_VML2_CACHE_PDE1_MEM0", 5953 "UTC_VML2_CACHE_PDE1_MEM1", 5954 "UTC_VML2_CACHE_PDE2_MEM0", 5955 "UTC_VML2_CACHE_PDE2_MEM1", 5956 "UTC_VML2_RDIF_LOG_FIFO", 5957 }; 5958 5959 static const char *atc_l2_cache_2m_mems[] = { 5960 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", 5961 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", 5962 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", 5963 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM", 5964 }; 5965 5966 static const char *atc_l2_cache_4k_mems[] = { 5967 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0", 5968 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1", 5969 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2", 5970 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3", 5971 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4", 5972 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5", 5973 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6", 5974 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7", 5975 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0", 5976 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1", 5977 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2", 5978 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3", 5979 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4", 5980 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5", 5981 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6", 5982 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7", 5983 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0", 5984 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1", 5985 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2", 5986 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3", 5987 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4", 5988 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5", 5989 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6", 5990 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7", 5991 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0", 5992 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1", 5993 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2", 5994 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3", 5995 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4", 5996 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5", 5997 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6", 5998 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7", 5999 }; 6000 6001 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, 6002 struct ras_err_data *err_data) 6003 { 6004 uint32_t i, data; 6005 uint32_t sec_count, ded_count; 6006 6007 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6008 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6009 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6010 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6011 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6012 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6013 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6014 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6015 6016 for (i = 0; i < 16; i++) { 6017 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6018 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6019 6020 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT); 6021 if (sec_count) { 6022 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, 6023 vml2_mems[i], sec_count); 6024 err_data->ce_count += sec_count; 6025 } 6026 6027 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT); 6028 if (ded_count) { 6029 DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, 6030 vml2_mems[i], ded_count); 6031 err_data->ue_count += ded_count; 6032 } 6033 } 6034 6035 for (i = 0; i < 7; i++) { 6036 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6037 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6038 6039 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6040 SEC_COUNT); 6041 if (sec_count) { 6042 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, 6043 vml2_walker_mems[i], sec_count); 6044 err_data->ce_count += sec_count; 6045 } 6046 6047 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6048 DED_COUNT); 6049 if (ded_count) { 6050 DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, 6051 vml2_walker_mems[i], ded_count); 6052 err_data->ue_count += ded_count; 6053 } 6054 } 6055 6056 for (i = 0; i < 4; i++) { 6057 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6058 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6059 6060 sec_count = (data & 0x00006000L) >> 0xd; 6061 if (sec_count) { 6062 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, 6063 atc_l2_cache_2m_mems[i], sec_count); 6064 err_data->ce_count += sec_count; 6065 } 6066 } 6067 6068 for (i = 0; i < 32; i++) { 6069 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6070 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6071 6072 sec_count = (data & 0x00006000L) >> 0xd; 6073 if (sec_count) { 6074 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, 6075 atc_l2_cache_4k_mems[i], sec_count); 6076 err_data->ce_count += sec_count; 6077 } 6078 6079 ded_count = (data & 0x00018000L) >> 0xf; 6080 if (ded_count) { 6081 DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, 6082 atc_l2_cache_4k_mems[i], ded_count); 6083 err_data->ue_count += ded_count; 6084 } 6085 } 6086 6087 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6088 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6089 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6090 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6091 6092 return 0; 6093 } 6094 6095 static int __get_ras_error_count(const struct soc15_reg_entry *reg, 6096 uint32_t se_id, uint32_t inst_id, uint32_t value, 6097 uint32_t *sec_count, uint32_t *ded_count) 6098 { 6099 uint32_t i; 6100 uint32_t sec_cnt, ded_cnt; 6101 6102 for (i = 0; i < ARRAY_SIZE(ras_subblock_regs); i++) { 6103 if(ras_subblock_regs[i].reg_offset != reg->reg_offset || 6104 ras_subblock_regs[i].seg != reg->seg || 6105 ras_subblock_regs[i].inst != reg->inst) 6106 continue; 6107 6108 sec_cnt = (value & 6109 ras_subblock_regs[i].sec_count_mask) >> 6110 ras_subblock_regs[i].sec_count_shift; 6111 if (sec_cnt) { 6112 DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n", 6113 ras_subblock_regs[i].name, 6114 se_id, inst_id, 6115 sec_cnt); 6116 *sec_count += sec_cnt; 6117 } 6118 6119 ded_cnt = (value & 6120 ras_subblock_regs[i].ded_count_mask) >> 6121 ras_subblock_regs[i].ded_count_shift; 6122 if (ded_cnt) { 6123 DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n", 6124 ras_subblock_regs[i].name, 6125 se_id, inst_id, 6126 ded_cnt); 6127 *ded_count += ded_cnt; 6128 } 6129 } 6130 6131 return 0; 6132 } 6133 6134 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 6135 void *ras_error_status) 6136 { 6137 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 6138 uint32_t sec_count = 0, ded_count = 0; 6139 uint32_t i, j, k; 6140 uint32_t reg_value; 6141 6142 if (adev->asic_type != CHIP_VEGA20) 6143 return -EINVAL; 6144 6145 err_data->ue_count = 0; 6146 err_data->ce_count = 0; 6147 6148 mutex_lock(&adev->grbm_idx_mutex); 6149 6150 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { 6151 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { 6152 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { 6153 gfx_v9_0_select_se_sh(adev, j, 0, k); 6154 reg_value = 6155 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); 6156 if (reg_value) 6157 __get_ras_error_count(&sec_ded_counter_registers[i], 6158 j, k, reg_value, 6159 &sec_count, &ded_count); 6160 } 6161 } 6162 } 6163 6164 err_data->ce_count += sec_count; 6165 err_data->ue_count += ded_count; 6166 6167 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6168 mutex_unlock(&adev->grbm_idx_mutex); 6169 6170 gfx_v9_0_query_utc_edc_status(adev, err_data); 6171 6172 return 0; 6173 } 6174 6175 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 6176 .name = "gfx_v9_0", 6177 .early_init = gfx_v9_0_early_init, 6178 .late_init = gfx_v9_0_late_init, 6179 .sw_init = gfx_v9_0_sw_init, 6180 .sw_fini = gfx_v9_0_sw_fini, 6181 .hw_init = gfx_v9_0_hw_init, 6182 .hw_fini = gfx_v9_0_hw_fini, 6183 .suspend = gfx_v9_0_suspend, 6184 .resume = gfx_v9_0_resume, 6185 .is_idle = gfx_v9_0_is_idle, 6186 .wait_for_idle = gfx_v9_0_wait_for_idle, 6187 .soft_reset = gfx_v9_0_soft_reset, 6188 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 6189 .set_powergating_state = gfx_v9_0_set_powergating_state, 6190 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 6191 }; 6192 6193 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 6194 .type = AMDGPU_RING_TYPE_GFX, 6195 .align_mask = 0xff, 6196 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6197 .support_64bit_ptrs = true, 6198 .vmhub = AMDGPU_GFXHUB_0, 6199 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 6200 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 6201 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 6202 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 6203 5 + /* COND_EXEC */ 6204 7 + /* PIPELINE_SYNC */ 6205 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6206 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6207 2 + /* VM_FLUSH */ 6208 8 + /* FENCE for VM_FLUSH */ 6209 20 + /* GDS switch */ 6210 4 + /* double SWITCH_BUFFER, 6211 the first COND_EXEC jump to the place just 6212 prior to this double SWITCH_BUFFER */ 6213 5 + /* COND_EXEC */ 6214 7 + /* HDP_flush */ 6215 4 + /* VGT_flush */ 6216 14 + /* CE_META */ 6217 31 + /* DE_META */ 6218 3 + /* CNTX_CTRL */ 6219 5 + /* HDP_INVL */ 6220 8 + 8 + /* FENCE x2 */ 6221 2, /* SWITCH_BUFFER */ 6222 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 6223 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 6224 .emit_fence = gfx_v9_0_ring_emit_fence, 6225 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6226 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6227 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6228 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6229 .test_ring = gfx_v9_0_ring_test_ring, 6230 .test_ib = gfx_v9_0_ring_test_ib, 6231 .insert_nop = amdgpu_ring_insert_nop, 6232 .pad_ib = amdgpu_ring_generic_pad_ib, 6233 .emit_switch_buffer = gfx_v9_ring_emit_sb, 6234 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 6235 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 6236 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 6237 .emit_tmz = gfx_v9_0_ring_emit_tmz, 6238 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6239 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6240 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6241 .soft_recovery = gfx_v9_0_ring_soft_recovery, 6242 }; 6243 6244 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 6245 .type = AMDGPU_RING_TYPE_COMPUTE, 6246 .align_mask = 0xff, 6247 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6248 .support_64bit_ptrs = true, 6249 .vmhub = AMDGPU_GFXHUB_0, 6250 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6251 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6252 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6253 .emit_frame_size = 6254 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6255 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6256 5 + /* hdp invalidate */ 6257 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6258 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6259 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6260 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6261 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 6262 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6263 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 6264 .emit_fence = gfx_v9_0_ring_emit_fence, 6265 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6266 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6267 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6268 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6269 .test_ring = gfx_v9_0_ring_test_ring, 6270 .test_ib = gfx_v9_0_ring_test_ib, 6271 .insert_nop = amdgpu_ring_insert_nop, 6272 .pad_ib = amdgpu_ring_generic_pad_ib, 6273 .set_priority = gfx_v9_0_ring_set_priority_compute, 6274 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6275 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6276 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6277 }; 6278 6279 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 6280 .type = AMDGPU_RING_TYPE_KIQ, 6281 .align_mask = 0xff, 6282 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6283 .support_64bit_ptrs = true, 6284 .vmhub = AMDGPU_GFXHUB_0, 6285 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6286 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6287 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6288 .emit_frame_size = 6289 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6290 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6291 5 + /* hdp invalidate */ 6292 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6293 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6294 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6295 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6296 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6297 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6298 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 6299 .test_ring = gfx_v9_0_ring_test_ring, 6300 .insert_nop = amdgpu_ring_insert_nop, 6301 .pad_ib = amdgpu_ring_generic_pad_ib, 6302 .emit_rreg = gfx_v9_0_ring_emit_rreg, 6303 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6304 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6305 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6306 }; 6307 6308 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 6309 { 6310 int i; 6311 6312 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 6313 6314 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6315 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 6316 6317 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6318 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 6319 } 6320 6321 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 6322 .set = gfx_v9_0_set_eop_interrupt_state, 6323 .process = gfx_v9_0_eop_irq, 6324 }; 6325 6326 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 6327 .set = gfx_v9_0_set_priv_reg_fault_state, 6328 .process = gfx_v9_0_priv_reg_irq, 6329 }; 6330 6331 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 6332 .set = gfx_v9_0_set_priv_inst_fault_state, 6333 .process = gfx_v9_0_priv_inst_irq, 6334 }; 6335 6336 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 6337 .set = gfx_v9_0_set_cp_ecc_error_state, 6338 .process = amdgpu_gfx_cp_ecc_error_irq, 6339 }; 6340 6341 6342 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 6343 { 6344 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6345 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 6346 6347 adev->gfx.priv_reg_irq.num_types = 1; 6348 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 6349 6350 adev->gfx.priv_inst_irq.num_types = 1; 6351 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 6352 6353 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 6354 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 6355 } 6356 6357 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 6358 { 6359 switch (adev->asic_type) { 6360 case CHIP_VEGA10: 6361 case CHIP_VEGA12: 6362 case CHIP_VEGA20: 6363 case CHIP_RAVEN: 6364 case CHIP_ARCTURUS: 6365 case CHIP_RENOIR: 6366 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 6367 break; 6368 default: 6369 break; 6370 } 6371 } 6372 6373 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 6374 { 6375 /* init asci gds info */ 6376 switch (adev->asic_type) { 6377 case CHIP_VEGA10: 6378 case CHIP_VEGA12: 6379 case CHIP_VEGA20: 6380 adev->gds.gds_size = 0x10000; 6381 break; 6382 case CHIP_RAVEN: 6383 case CHIP_ARCTURUS: 6384 adev->gds.gds_size = 0x1000; 6385 break; 6386 default: 6387 adev->gds.gds_size = 0x10000; 6388 break; 6389 } 6390 6391 switch (adev->asic_type) { 6392 case CHIP_VEGA10: 6393 case CHIP_VEGA20: 6394 adev->gds.gds_compute_max_wave_id = 0x7ff; 6395 break; 6396 case CHIP_VEGA12: 6397 adev->gds.gds_compute_max_wave_id = 0x27f; 6398 break; 6399 case CHIP_RAVEN: 6400 if (adev->rev_id >= 0x8) 6401 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 6402 else 6403 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 6404 break; 6405 case CHIP_ARCTURUS: 6406 adev->gds.gds_compute_max_wave_id = 0xfff; 6407 break; 6408 default: 6409 /* this really depends on the chip */ 6410 adev->gds.gds_compute_max_wave_id = 0x7ff; 6411 break; 6412 } 6413 6414 adev->gds.gws_size = 64; 6415 adev->gds.oa_size = 16; 6416 } 6417 6418 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 6419 u32 bitmap) 6420 { 6421 u32 data; 6422 6423 if (!bitmap) 6424 return; 6425 6426 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6427 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6428 6429 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 6430 } 6431 6432 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 6433 { 6434 u32 data, mask; 6435 6436 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 6437 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 6438 6439 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6440 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6441 6442 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 6443 6444 return (~data) & mask; 6445 } 6446 6447 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 6448 struct amdgpu_cu_info *cu_info) 6449 { 6450 int i, j, k, counter, active_cu_number = 0; 6451 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 6452 unsigned disable_masks[4 * 4]; 6453 6454 if (!adev || !cu_info) 6455 return -EINVAL; 6456 6457 /* 6458 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs 6459 */ 6460 if (adev->gfx.config.max_shader_engines * 6461 adev->gfx.config.max_sh_per_se > 16) 6462 return -EINVAL; 6463 6464 amdgpu_gfx_parse_disable_cu(disable_masks, 6465 adev->gfx.config.max_shader_engines, 6466 adev->gfx.config.max_sh_per_se); 6467 6468 mutex_lock(&adev->grbm_idx_mutex); 6469 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 6470 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 6471 mask = 1; 6472 ao_bitmap = 0; 6473 counter = 0; 6474 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 6475 gfx_v9_0_set_user_cu_inactive_bitmap( 6476 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); 6477 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 6478 6479 /* 6480 * The bitmap(and ao_cu_bitmap) in cu_info structure is 6481 * 4x4 size array, and it's usually suitable for Vega 6482 * ASICs which has 4*2 SE/SH layout. 6483 * But for Arcturus, SE/SH layout is changed to 8*1. 6484 * To mostly reduce the impact, we make it compatible 6485 * with current bitmap array as below: 6486 * SE4,SH0 --> bitmap[0][1] 6487 * SE5,SH0 --> bitmap[1][1] 6488 * SE6,SH0 --> bitmap[2][1] 6489 * SE7,SH0 --> bitmap[3][1] 6490 */ 6491 cu_info->bitmap[i % 4][j + i / 4] = bitmap; 6492 6493 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 6494 if (bitmap & mask) { 6495 if (counter < adev->gfx.config.max_cu_per_sh) 6496 ao_bitmap |= mask; 6497 counter ++; 6498 } 6499 mask <<= 1; 6500 } 6501 active_cu_number += counter; 6502 if (i < 2 && j < 2) 6503 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 6504 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; 6505 } 6506 } 6507 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6508 mutex_unlock(&adev->grbm_idx_mutex); 6509 6510 cu_info->number = active_cu_number; 6511 cu_info->ao_cu_mask = ao_cu_mask; 6512 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 6513 6514 return 0; 6515 } 6516 6517 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 6518 { 6519 .type = AMD_IP_BLOCK_TYPE_GFX, 6520 .major = 9, 6521 .minor = 0, 6522 .rev = 0, 6523 .funcs = &gfx_v9_0_ip_funcs, 6524 }; 6525