1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 40 #include "vega10_enum.h" 41 #include "hdp/hdp_4_0_offset.h" 42 43 #include "soc15_common.h" 44 #include "clearstate_gfx9.h" 45 #include "v9_structs.h" 46 47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 48 49 #include "amdgpu_ras.h" 50 51 #define GFX9_NUM_GFX_RINGS 1 52 #define GFX9_MEC_HPD_SIZE 4096 53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 55 56 #define mmPWR_MISC_CNTL_STATUS 0x0183 57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0 59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1 60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L 61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L 62 63 #define mmGCEA_PROBE_MAP 0x070c 64 #define mmGCEA_PROBE_MAP_BASE_IDX 0 65 66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 68 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 72 73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 75 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 79 80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 82 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 86 87 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 89 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 90 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 93 94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 96 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 101 102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 109 110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); 111 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin"); 112 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); 113 114 MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); 115 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); 116 MODULE_FIRMWARE("amdgpu/renoir_me.bin"); 117 MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); 118 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin"); 119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); 120 121 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 122 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 123 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04 124 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 125 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09 126 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 127 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a 128 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 129 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b 130 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 131 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c 132 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 133 134 enum ta_ras_gfx_subblock { 135 /*CPC*/ 136 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 137 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, 138 TA_RAS_BLOCK__GFX_CPC_UCODE, 139 TA_RAS_BLOCK__GFX_DC_STATE_ME1, 140 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 141 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, 142 TA_RAS_BLOCK__GFX_DC_STATE_ME2, 143 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 144 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 145 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 146 /* CPF*/ 147 TA_RAS_BLOCK__GFX_CPF_INDEX_START, 148 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, 149 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, 150 TA_RAS_BLOCK__GFX_CPF_TAG, 151 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, 152 /* CPG*/ 153 TA_RAS_BLOCK__GFX_CPG_INDEX_START, 154 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, 155 TA_RAS_BLOCK__GFX_CPG_DMA_TAG, 156 TA_RAS_BLOCK__GFX_CPG_TAG, 157 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, 158 /* GDS*/ 159 TA_RAS_BLOCK__GFX_GDS_INDEX_START, 160 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, 161 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 162 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 163 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 164 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 165 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 166 /* SPI*/ 167 TA_RAS_BLOCK__GFX_SPI_SR_MEM, 168 /* SQ*/ 169 TA_RAS_BLOCK__GFX_SQ_INDEX_START, 170 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, 171 TA_RAS_BLOCK__GFX_SQ_LDS_D, 172 TA_RAS_BLOCK__GFX_SQ_LDS_I, 173 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ 174 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, 175 /* SQC (3 ranges)*/ 176 TA_RAS_BLOCK__GFX_SQC_INDEX_START, 177 /* SQC range 0*/ 178 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, 179 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 180 TA_RAS_BLOCK__GFX_SQC_INDEX0_START, 181 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 182 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 183 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 184 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 185 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 186 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 187 TA_RAS_BLOCK__GFX_SQC_INDEX0_END = 188 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 189 /* SQC range 1*/ 190 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 191 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 192 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 193 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 194 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 195 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 196 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 197 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 198 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 199 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 200 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 201 TA_RAS_BLOCK__GFX_SQC_INDEX1_END = 202 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 203 /* SQC range 2*/ 204 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 205 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 206 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 207 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 208 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 209 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 210 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 211 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 212 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 213 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 214 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 215 TA_RAS_BLOCK__GFX_SQC_INDEX2_END = 216 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 217 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, 218 /* TA*/ 219 TA_RAS_BLOCK__GFX_TA_INDEX_START, 220 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, 221 TA_RAS_BLOCK__GFX_TA_FS_AFIFO, 222 TA_RAS_BLOCK__GFX_TA_FL_LFIFO, 223 TA_RAS_BLOCK__GFX_TA_FX_LFIFO, 224 TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 225 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 226 /* TCA*/ 227 TA_RAS_BLOCK__GFX_TCA_INDEX_START, 228 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, 229 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 230 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 231 /* TCC (5 sub-ranges)*/ 232 TA_RAS_BLOCK__GFX_TCC_INDEX_START, 233 /* TCC range 0*/ 234 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, 235 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, 236 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 237 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 238 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 239 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 240 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 241 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 242 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 243 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 244 /* TCC range 1*/ 245 TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 246 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 247 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 248 TA_RAS_BLOCK__GFX_TCC_INDEX1_END = 249 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 250 /* TCC range 2*/ 251 TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 252 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 253 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 254 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 255 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 256 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 257 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, 258 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 259 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 260 TA_RAS_BLOCK__GFX_TCC_INDEX2_END = 261 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 262 /* TCC range 3*/ 263 TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 264 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 265 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 266 TA_RAS_BLOCK__GFX_TCC_INDEX3_END = 267 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 268 /* TCC range 4*/ 269 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 270 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 271 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 272 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 273 TA_RAS_BLOCK__GFX_TCC_INDEX4_END = 274 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 275 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, 276 /* TCI*/ 277 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, 278 /* TCP*/ 279 TA_RAS_BLOCK__GFX_TCP_INDEX_START, 280 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, 281 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 282 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, 283 TA_RAS_BLOCK__GFX_TCP_VM_FIFO, 284 TA_RAS_BLOCK__GFX_TCP_DB_RAM, 285 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 286 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 287 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 288 /* TD*/ 289 TA_RAS_BLOCK__GFX_TD_INDEX_START, 290 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, 291 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 292 TA_RAS_BLOCK__GFX_TD_CS_FIFO, 293 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, 294 /* EA (3 sub-ranges)*/ 295 TA_RAS_BLOCK__GFX_EA_INDEX_START, 296 /* EA range 0*/ 297 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, 298 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, 299 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 300 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 301 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 302 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 303 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 304 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 305 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 306 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 307 /* EA range 1*/ 308 TA_RAS_BLOCK__GFX_EA_INDEX1_START, 309 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, 310 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 311 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 312 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 313 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 314 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 315 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 316 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 317 /* EA range 2*/ 318 TA_RAS_BLOCK__GFX_EA_INDEX2_START, 319 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, 320 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, 321 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, 322 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 323 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 324 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, 325 /* UTC VM L2 bank*/ 326 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, 327 /* UTC VM walker*/ 328 TA_RAS_BLOCK__UTC_VML2_WALKER, 329 /* UTC ATC L2 2MB cache*/ 330 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 331 /* UTC ATC L2 4KB cache*/ 332 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 333 TA_RAS_BLOCK__GFX_MAX 334 }; 335 336 struct ras_gfx_subblock { 337 unsigned char *name; 338 int ta_subblock; 339 int hw_supported_error_type; 340 int sw_supported_error_type; 341 }; 342 343 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ 344 [AMDGPU_RAS_BLOCK__##subblock] = { \ 345 #subblock, \ 346 TA_RAS_BLOCK__##subblock, \ 347 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ 348 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ 349 } 350 351 static const struct ras_gfx_subblock ras_gfx_subblocks[] = { 352 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), 353 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), 354 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 355 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 356 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 357 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 358 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 359 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 360 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 361 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 362 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), 363 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), 364 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), 365 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), 366 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 367 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), 368 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, 369 0), 370 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, 371 0), 372 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 373 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), 374 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), 375 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), 376 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), 377 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), 378 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), 379 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 380 0, 0), 381 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 382 0), 383 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 384 0, 0), 385 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, 386 0), 387 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 388 0, 0), 389 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 390 0), 391 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 392 1), 393 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 394 0, 0, 0), 395 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 396 0), 397 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 398 0), 399 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 400 0), 401 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 402 0), 403 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 404 0), 405 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 406 0, 0), 407 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 408 0), 409 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 410 0), 411 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 412 0, 0, 0), 413 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 414 0), 415 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 416 0), 417 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 418 0), 419 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 420 0), 421 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 422 0), 423 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 424 0, 0), 425 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 426 0), 427 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), 428 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 429 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 430 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 431 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 432 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), 433 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 434 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), 435 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, 436 1), 437 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, 438 1), 439 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, 440 1), 441 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, 442 0), 443 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, 444 0), 445 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 446 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 447 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), 448 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), 449 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), 450 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), 451 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 452 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), 453 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), 454 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 455 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), 456 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, 457 0), 458 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 459 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, 460 0), 461 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, 462 0, 0), 463 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, 464 0), 465 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 466 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), 467 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), 468 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 469 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 470 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 471 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), 472 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), 473 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), 474 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), 475 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 476 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), 477 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 478 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 479 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 480 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 481 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 482 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 483 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 484 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 485 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 486 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 487 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 488 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), 489 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 490 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 491 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), 492 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), 493 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), 494 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), 495 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), 496 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), 497 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), 498 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), 499 }; 500 501 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 502 { 503 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 504 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 505 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 506 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 507 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 508 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 509 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 510 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 511 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 512 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 513 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 514 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 515 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 516 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 517 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 518 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 519 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 520 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 521 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 522 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 523 }; 524 525 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 526 { 527 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 528 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 532 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 533 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 534 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 535 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 536 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 537 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 538 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 539 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 540 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 541 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 542 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 543 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 544 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 545 }; 546 547 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 548 { 549 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 550 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 551 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 552 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 553 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 554 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 555 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 556 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 557 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 558 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 559 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 560 }; 561 562 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 563 { 564 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 565 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 566 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 567 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 568 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 569 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 570 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 571 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 572 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 573 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 574 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 575 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 576 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 577 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 578 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 579 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 580 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 581 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 582 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 583 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 584 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 587 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 588 }; 589 590 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 591 { 592 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 593 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 594 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 595 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 596 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 597 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 598 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 599 }; 600 601 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 602 { 603 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 604 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 605 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 606 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 607 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 608 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 609 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 610 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 611 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 612 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 613 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 614 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 615 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 616 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 617 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 618 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 619 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 620 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 621 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 622 }; 623 624 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = 625 { 626 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 627 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), 630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), 631 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 632 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 633 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 634 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 635 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 636 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 637 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), 638 }; 639 640 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 641 { 642 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 643 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 644 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 645 }; 646 647 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 648 { 649 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 650 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 657 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 659 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 665 }; 666 667 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 668 { 669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 671 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 672 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 673 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 674 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 682 }; 683 684 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = 685 { 686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), 688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), 689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), 690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), 691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), 692 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), 693 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), 694 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), 695 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), 696 }; 697 698 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 699 { 700 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 701 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 702 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 703 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 704 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 705 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 706 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 707 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 708 }; 709 710 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 711 { 712 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 713 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 714 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 715 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 716 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 717 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 718 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 719 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 720 }; 721 722 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 723 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 724 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 725 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 726 727 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 728 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 729 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 730 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 731 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 732 struct amdgpu_cu_info *cu_info); 733 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 734 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); 735 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 736 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 737 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 738 void *ras_error_status); 739 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 740 void *inject_if); 741 742 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, 743 uint64_t queue_mask) 744 { 745 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 746 amdgpu_ring_write(kiq_ring, 747 PACKET3_SET_RESOURCES_VMID_MASK(0) | 748 /* vmid_mask:0* queue_type:0 (KIQ) */ 749 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); 750 amdgpu_ring_write(kiq_ring, 751 lower_32_bits(queue_mask)); /* queue mask lo */ 752 amdgpu_ring_write(kiq_ring, 753 upper_32_bits(queue_mask)); /* queue mask hi */ 754 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 755 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 756 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 757 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 758 } 759 760 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, 761 struct amdgpu_ring *ring) 762 { 763 struct amdgpu_device *adev = kiq_ring->adev; 764 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 765 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 766 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 767 768 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 769 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 770 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 771 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 772 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 773 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 774 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 775 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 776 /*queue_type: normal compute queue */ 777 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | 778 /* alloc format: all_on_one_pipe */ 779 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | 780 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 781 /* num_queues: must be 1 */ 782 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 783 amdgpu_ring_write(kiq_ring, 784 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 785 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 786 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 787 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 788 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 789 } 790 791 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 792 struct amdgpu_ring *ring, 793 enum amdgpu_unmap_queues_action action, 794 u64 gpu_addr, u64 seq) 795 { 796 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 797 798 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 799 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 800 PACKET3_UNMAP_QUEUES_ACTION(action) | 801 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 802 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 803 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 804 amdgpu_ring_write(kiq_ring, 805 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 806 807 if (action == PREEMPT_QUEUES_NO_UNMAP) { 808 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 809 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 810 amdgpu_ring_write(kiq_ring, seq); 811 } else { 812 amdgpu_ring_write(kiq_ring, 0); 813 amdgpu_ring_write(kiq_ring, 0); 814 amdgpu_ring_write(kiq_ring, 0); 815 } 816 } 817 818 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, 819 struct amdgpu_ring *ring, 820 u64 addr, 821 u64 seq) 822 { 823 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 824 825 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 826 amdgpu_ring_write(kiq_ring, 827 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 828 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 829 PACKET3_QUERY_STATUS_COMMAND(2)); 830 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 831 amdgpu_ring_write(kiq_ring, 832 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 833 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 834 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 835 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 836 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 837 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 838 } 839 840 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 841 uint16_t pasid, uint32_t flush_type, 842 bool all_hub) 843 { 844 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 845 amdgpu_ring_write(kiq_ring, 846 PACKET3_INVALIDATE_TLBS_DST_SEL(1) | 847 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 848 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 849 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 850 } 851 852 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { 853 .kiq_set_resources = gfx_v9_0_kiq_set_resources, 854 .kiq_map_queues = gfx_v9_0_kiq_map_queues, 855 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, 856 .kiq_query_status = gfx_v9_0_kiq_query_status, 857 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, 858 .set_resources_size = 8, 859 .map_queues_size = 7, 860 .unmap_queues_size = 6, 861 .query_status_size = 7, 862 .invalidate_tlbs_size = 2, 863 }; 864 865 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 866 { 867 adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs; 868 } 869 870 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 871 { 872 switch (adev->asic_type) { 873 case CHIP_VEGA10: 874 soc15_program_register_sequence(adev, 875 golden_settings_gc_9_0, 876 ARRAY_SIZE(golden_settings_gc_9_0)); 877 soc15_program_register_sequence(adev, 878 golden_settings_gc_9_0_vg10, 879 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 880 break; 881 case CHIP_VEGA12: 882 soc15_program_register_sequence(adev, 883 golden_settings_gc_9_2_1, 884 ARRAY_SIZE(golden_settings_gc_9_2_1)); 885 soc15_program_register_sequence(adev, 886 golden_settings_gc_9_2_1_vg12, 887 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 888 break; 889 case CHIP_VEGA20: 890 soc15_program_register_sequence(adev, 891 golden_settings_gc_9_0, 892 ARRAY_SIZE(golden_settings_gc_9_0)); 893 soc15_program_register_sequence(adev, 894 golden_settings_gc_9_0_vg20, 895 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 896 break; 897 case CHIP_ARCTURUS: 898 soc15_program_register_sequence(adev, 899 golden_settings_gc_9_4_1_arct, 900 ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); 901 break; 902 case CHIP_RAVEN: 903 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 904 ARRAY_SIZE(golden_settings_gc_9_1)); 905 if (adev->rev_id >= 8) 906 soc15_program_register_sequence(adev, 907 golden_settings_gc_9_1_rv2, 908 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 909 else 910 soc15_program_register_sequence(adev, 911 golden_settings_gc_9_1_rv1, 912 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 913 break; 914 case CHIP_RENOIR: 915 soc15_program_register_sequence(adev, 916 golden_settings_gc_9_1_rn, 917 ARRAY_SIZE(golden_settings_gc_9_1_rn)); 918 return; /* for renoir, don't need common goldensetting */ 919 default: 920 break; 921 } 922 923 if (adev->asic_type != CHIP_ARCTURUS) 924 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 925 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 926 } 927 928 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 929 { 930 adev->gfx.scratch.num_reg = 8; 931 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 932 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 933 } 934 935 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 936 bool wc, uint32_t reg, uint32_t val) 937 { 938 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 939 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 940 WRITE_DATA_DST_SEL(0) | 941 (wc ? WR_CONFIRM : 0)); 942 amdgpu_ring_write(ring, reg); 943 amdgpu_ring_write(ring, 0); 944 amdgpu_ring_write(ring, val); 945 } 946 947 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 948 int mem_space, int opt, uint32_t addr0, 949 uint32_t addr1, uint32_t ref, uint32_t mask, 950 uint32_t inv) 951 { 952 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 953 amdgpu_ring_write(ring, 954 /* memory (1) or register (0) */ 955 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 956 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 957 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 958 WAIT_REG_MEM_ENGINE(eng_sel))); 959 960 if (mem_space) 961 BUG_ON(addr0 & 0x3); /* Dword align */ 962 amdgpu_ring_write(ring, addr0); 963 amdgpu_ring_write(ring, addr1); 964 amdgpu_ring_write(ring, ref); 965 amdgpu_ring_write(ring, mask); 966 amdgpu_ring_write(ring, inv); /* poll interval */ 967 } 968 969 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 970 { 971 struct amdgpu_device *adev = ring->adev; 972 uint32_t scratch; 973 uint32_t tmp = 0; 974 unsigned i; 975 int r; 976 977 r = amdgpu_gfx_scratch_get(adev, &scratch); 978 if (r) 979 return r; 980 981 WREG32(scratch, 0xCAFEDEAD); 982 r = amdgpu_ring_alloc(ring, 3); 983 if (r) 984 goto error_free_scratch; 985 986 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 987 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 988 amdgpu_ring_write(ring, 0xDEADBEEF); 989 amdgpu_ring_commit(ring); 990 991 for (i = 0; i < adev->usec_timeout; i++) { 992 tmp = RREG32(scratch); 993 if (tmp == 0xDEADBEEF) 994 break; 995 udelay(1); 996 } 997 998 if (i >= adev->usec_timeout) 999 r = -ETIMEDOUT; 1000 1001 error_free_scratch: 1002 amdgpu_gfx_scratch_free(adev, scratch); 1003 return r; 1004 } 1005 1006 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 1007 { 1008 struct amdgpu_device *adev = ring->adev; 1009 struct amdgpu_ib ib; 1010 struct dma_fence *f = NULL; 1011 1012 unsigned index; 1013 uint64_t gpu_addr; 1014 uint32_t tmp; 1015 long r; 1016 1017 r = amdgpu_device_wb_get(adev, &index); 1018 if (r) 1019 return r; 1020 1021 gpu_addr = adev->wb.gpu_addr + (index * 4); 1022 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 1023 memset(&ib, 0, sizeof(ib)); 1024 r = amdgpu_ib_get(adev, NULL, 16, &ib); 1025 if (r) 1026 goto err1; 1027 1028 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 1029 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1030 ib.ptr[2] = lower_32_bits(gpu_addr); 1031 ib.ptr[3] = upper_32_bits(gpu_addr); 1032 ib.ptr[4] = 0xDEADBEEF; 1033 ib.length_dw = 5; 1034 1035 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1036 if (r) 1037 goto err2; 1038 1039 r = dma_fence_wait_timeout(f, false, timeout); 1040 if (r == 0) { 1041 r = -ETIMEDOUT; 1042 goto err2; 1043 } else if (r < 0) { 1044 goto err2; 1045 } 1046 1047 tmp = adev->wb.wb[index]; 1048 if (tmp == 0xDEADBEEF) 1049 r = 0; 1050 else 1051 r = -EINVAL; 1052 1053 err2: 1054 amdgpu_ib_free(adev, &ib, NULL); 1055 dma_fence_put(f); 1056 err1: 1057 amdgpu_device_wb_free(adev, index); 1058 return r; 1059 } 1060 1061 1062 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 1063 { 1064 release_firmware(adev->gfx.pfp_fw); 1065 adev->gfx.pfp_fw = NULL; 1066 release_firmware(adev->gfx.me_fw); 1067 adev->gfx.me_fw = NULL; 1068 release_firmware(adev->gfx.ce_fw); 1069 adev->gfx.ce_fw = NULL; 1070 release_firmware(adev->gfx.rlc_fw); 1071 adev->gfx.rlc_fw = NULL; 1072 release_firmware(adev->gfx.mec_fw); 1073 adev->gfx.mec_fw = NULL; 1074 release_firmware(adev->gfx.mec2_fw); 1075 adev->gfx.mec2_fw = NULL; 1076 1077 kfree(adev->gfx.rlc.register_list_format); 1078 } 1079 1080 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 1081 { 1082 const struct rlc_firmware_header_v2_1 *rlc_hdr; 1083 1084 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 1085 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 1086 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 1087 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 1088 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 1089 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 1090 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 1091 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 1092 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 1093 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 1094 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 1095 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 1096 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 1097 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 1098 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 1099 } 1100 1101 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 1102 { 1103 adev->gfx.me_fw_write_wait = false; 1104 adev->gfx.mec_fw_write_wait = false; 1105 1106 if ((adev->gfx.mec_fw_version < 0x000001a5) || 1107 (adev->gfx.mec_feature_version < 46) || 1108 (adev->gfx.pfp_fw_version < 0x000000b7) || 1109 (adev->gfx.pfp_feature_version < 46)) 1110 DRM_WARN_ONCE("CP firmware version too old, please update!"); 1111 1112 switch (adev->asic_type) { 1113 case CHIP_VEGA10: 1114 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1115 (adev->gfx.me_feature_version >= 42) && 1116 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1117 (adev->gfx.pfp_feature_version >= 42)) 1118 adev->gfx.me_fw_write_wait = true; 1119 1120 if ((adev->gfx.mec_fw_version >= 0x00000193) && 1121 (adev->gfx.mec_feature_version >= 42)) 1122 adev->gfx.mec_fw_write_wait = true; 1123 break; 1124 case CHIP_VEGA12: 1125 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1126 (adev->gfx.me_feature_version >= 44) && 1127 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1128 (adev->gfx.pfp_feature_version >= 44)) 1129 adev->gfx.me_fw_write_wait = true; 1130 1131 if ((adev->gfx.mec_fw_version >= 0x00000196) && 1132 (adev->gfx.mec_feature_version >= 44)) 1133 adev->gfx.mec_fw_write_wait = true; 1134 break; 1135 case CHIP_VEGA20: 1136 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1137 (adev->gfx.me_feature_version >= 44) && 1138 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1139 (adev->gfx.pfp_feature_version >= 44)) 1140 adev->gfx.me_fw_write_wait = true; 1141 1142 if ((adev->gfx.mec_fw_version >= 0x00000197) && 1143 (adev->gfx.mec_feature_version >= 44)) 1144 adev->gfx.mec_fw_write_wait = true; 1145 break; 1146 case CHIP_RAVEN: 1147 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1148 (adev->gfx.me_feature_version >= 42) && 1149 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1150 (adev->gfx.pfp_feature_version >= 42)) 1151 adev->gfx.me_fw_write_wait = true; 1152 1153 if ((adev->gfx.mec_fw_version >= 0x00000192) && 1154 (adev->gfx.mec_feature_version >= 42)) 1155 adev->gfx.mec_fw_write_wait = true; 1156 break; 1157 default: 1158 break; 1159 } 1160 } 1161 1162 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 1163 { 1164 switch (adev->asic_type) { 1165 case CHIP_VEGA10: 1166 case CHIP_VEGA12: 1167 case CHIP_VEGA20: 1168 break; 1169 case CHIP_RAVEN: 1170 if (!(adev->rev_id >= 0x8 || 1171 adev->pdev->device == 0x15d8) && 1172 (adev->pm.fw_version < 0x41e2b || /* not raven1 fresh */ 1173 !adev->gfx.rlc.is_rlc_v2_1)) /* without rlc save restore ucodes */ 1174 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1175 1176 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1177 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1178 AMD_PG_SUPPORT_CP | 1179 AMD_PG_SUPPORT_RLC_SMU_HS; 1180 break; 1181 case CHIP_RENOIR: 1182 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1183 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1184 AMD_PG_SUPPORT_CP | 1185 AMD_PG_SUPPORT_RLC_SMU_HS; 1186 break; 1187 default: 1188 break; 1189 } 1190 } 1191 1192 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, 1193 const char *chip_name) 1194 { 1195 char fw_name[30]; 1196 int err; 1197 struct amdgpu_firmware_info *info = NULL; 1198 const struct common_firmware_header *header = NULL; 1199 const struct gfx_firmware_header_v1_0 *cp_hdr; 1200 1201 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1202 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1203 if (err) 1204 goto out; 1205 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 1206 if (err) 1207 goto out; 1208 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1209 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1210 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1211 1212 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1213 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1214 if (err) 1215 goto out; 1216 err = amdgpu_ucode_validate(adev->gfx.me_fw); 1217 if (err) 1218 goto out; 1219 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1220 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1221 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1222 1223 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1224 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1225 if (err) 1226 goto out; 1227 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 1228 if (err) 1229 goto out; 1230 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1231 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1232 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1233 1234 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1235 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1236 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1237 info->fw = adev->gfx.pfp_fw; 1238 header = (const struct common_firmware_header *)info->fw->data; 1239 adev->firmware.fw_size += 1240 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1241 1242 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1243 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1244 info->fw = adev->gfx.me_fw; 1245 header = (const struct common_firmware_header *)info->fw->data; 1246 adev->firmware.fw_size += 1247 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1248 1249 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1250 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1251 info->fw = adev->gfx.ce_fw; 1252 header = (const struct common_firmware_header *)info->fw->data; 1253 adev->firmware.fw_size += 1254 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1255 } 1256 1257 out: 1258 if (err) { 1259 dev_err(adev->dev, 1260 "gfx9: Failed to load firmware \"%s\"\n", 1261 fw_name); 1262 release_firmware(adev->gfx.pfp_fw); 1263 adev->gfx.pfp_fw = NULL; 1264 release_firmware(adev->gfx.me_fw); 1265 adev->gfx.me_fw = NULL; 1266 release_firmware(adev->gfx.ce_fw); 1267 adev->gfx.ce_fw = NULL; 1268 } 1269 return err; 1270 } 1271 1272 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, 1273 const char *chip_name) 1274 { 1275 char fw_name[30]; 1276 int err; 1277 struct amdgpu_firmware_info *info = NULL; 1278 const struct common_firmware_header *header = NULL; 1279 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1280 unsigned int *tmp = NULL; 1281 unsigned int i = 0; 1282 uint16_t version_major; 1283 uint16_t version_minor; 1284 uint32_t smu_version; 1285 1286 /* 1287 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 1288 * instead of picasso_rlc.bin. 1289 * Judgment method: 1290 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 1291 * or revision >= 0xD8 && revision <= 0xDF 1292 * otherwise is PCO FP5 1293 */ 1294 if (!strcmp(chip_name, "picasso") && 1295 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 1296 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 1297 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); 1298 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 1299 (smu_version >= 0x41e2b)) 1300 /** 1301 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 1302 */ 1303 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); 1304 else 1305 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1306 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1307 if (err) 1308 goto out; 1309 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1310 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1311 1312 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1313 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1314 if (version_major == 2 && version_minor == 1) 1315 adev->gfx.rlc.is_rlc_v2_1 = true; 1316 1317 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1318 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1319 adev->gfx.rlc.save_and_restore_offset = 1320 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1321 adev->gfx.rlc.clear_state_descriptor_offset = 1322 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1323 adev->gfx.rlc.avail_scratch_ram_locations = 1324 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1325 adev->gfx.rlc.reg_restore_list_size = 1326 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1327 adev->gfx.rlc.reg_list_format_start = 1328 le32_to_cpu(rlc_hdr->reg_list_format_start); 1329 adev->gfx.rlc.reg_list_format_separate_start = 1330 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1331 adev->gfx.rlc.starting_offsets_start = 1332 le32_to_cpu(rlc_hdr->starting_offsets_start); 1333 adev->gfx.rlc.reg_list_format_size_bytes = 1334 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1335 adev->gfx.rlc.reg_list_size_bytes = 1336 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1337 adev->gfx.rlc.register_list_format = 1338 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1339 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1340 if (!adev->gfx.rlc.register_list_format) { 1341 err = -ENOMEM; 1342 goto out; 1343 } 1344 1345 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1346 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1347 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 1348 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1349 1350 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1351 1352 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1353 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1354 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 1355 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1356 1357 if (adev->gfx.rlc.is_rlc_v2_1) 1358 gfx_v9_0_init_rlc_ext_microcode(adev); 1359 1360 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1361 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1362 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1363 info->fw = adev->gfx.rlc_fw; 1364 header = (const struct common_firmware_header *)info->fw->data; 1365 adev->firmware.fw_size += 1366 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1367 1368 if (adev->gfx.rlc.is_rlc_v2_1 && 1369 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 1370 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 1371 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 1372 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 1373 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 1374 info->fw = adev->gfx.rlc_fw; 1375 adev->firmware.fw_size += 1376 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 1377 1378 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 1379 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 1380 info->fw = adev->gfx.rlc_fw; 1381 adev->firmware.fw_size += 1382 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 1383 1384 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 1385 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 1386 info->fw = adev->gfx.rlc_fw; 1387 adev->firmware.fw_size += 1388 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 1389 } 1390 } 1391 1392 out: 1393 if (err) { 1394 dev_err(adev->dev, 1395 "gfx9: Failed to load firmware \"%s\"\n", 1396 fw_name); 1397 release_firmware(adev->gfx.rlc_fw); 1398 adev->gfx.rlc_fw = NULL; 1399 } 1400 return err; 1401 } 1402 1403 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, 1404 const char *chip_name) 1405 { 1406 char fw_name[30]; 1407 int err; 1408 struct amdgpu_firmware_info *info = NULL; 1409 const struct common_firmware_header *header = NULL; 1410 const struct gfx_firmware_header_v1_0 *cp_hdr; 1411 1412 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1413 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1414 if (err) 1415 goto out; 1416 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1417 if (err) 1418 goto out; 1419 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1420 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1421 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1422 1423 1424 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1425 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1426 if (!err) { 1427 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1428 if (err) 1429 goto out; 1430 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1431 adev->gfx.mec2_fw->data; 1432 adev->gfx.mec2_fw_version = 1433 le32_to_cpu(cp_hdr->header.ucode_version); 1434 adev->gfx.mec2_feature_version = 1435 le32_to_cpu(cp_hdr->ucode_feature_version); 1436 } else { 1437 err = 0; 1438 adev->gfx.mec2_fw = NULL; 1439 } 1440 1441 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1442 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1443 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1444 info->fw = adev->gfx.mec_fw; 1445 header = (const struct common_firmware_header *)info->fw->data; 1446 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1447 adev->firmware.fw_size += 1448 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1449 1450 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 1451 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 1452 info->fw = adev->gfx.mec_fw; 1453 adev->firmware.fw_size += 1454 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1455 1456 if (adev->gfx.mec2_fw) { 1457 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1458 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1459 info->fw = adev->gfx.mec2_fw; 1460 header = (const struct common_firmware_header *)info->fw->data; 1461 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1462 adev->firmware.fw_size += 1463 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1464 1465 /* TODO: Determine if MEC2 JT FW loading can be removed 1466 for all GFX V9 asic and above */ 1467 if (adev->asic_type != CHIP_ARCTURUS && 1468 adev->asic_type != CHIP_RENOIR) { 1469 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 1470 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 1471 info->fw = adev->gfx.mec2_fw; 1472 adev->firmware.fw_size += 1473 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, 1474 PAGE_SIZE); 1475 } 1476 } 1477 } 1478 1479 out: 1480 gfx_v9_0_check_if_need_gfxoff(adev); 1481 gfx_v9_0_check_fw_write_wait(adev); 1482 if (err) { 1483 dev_err(adev->dev, 1484 "gfx9: Failed to load firmware \"%s\"\n", 1485 fw_name); 1486 release_firmware(adev->gfx.mec_fw); 1487 adev->gfx.mec_fw = NULL; 1488 release_firmware(adev->gfx.mec2_fw); 1489 adev->gfx.mec2_fw = NULL; 1490 } 1491 return err; 1492 } 1493 1494 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 1495 { 1496 const char *chip_name; 1497 int r; 1498 1499 DRM_DEBUG("\n"); 1500 1501 switch (adev->asic_type) { 1502 case CHIP_VEGA10: 1503 chip_name = "vega10"; 1504 break; 1505 case CHIP_VEGA12: 1506 chip_name = "vega12"; 1507 break; 1508 case CHIP_VEGA20: 1509 chip_name = "vega20"; 1510 break; 1511 case CHIP_RAVEN: 1512 if (adev->rev_id >= 8) 1513 chip_name = "raven2"; 1514 else if (adev->pdev->device == 0x15d8) 1515 chip_name = "picasso"; 1516 else 1517 chip_name = "raven"; 1518 break; 1519 case CHIP_ARCTURUS: 1520 chip_name = "arcturus"; 1521 break; 1522 case CHIP_RENOIR: 1523 chip_name = "renoir"; 1524 break; 1525 default: 1526 BUG(); 1527 } 1528 1529 /* No CPG in Arcturus */ 1530 if (adev->asic_type != CHIP_ARCTURUS) { 1531 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); 1532 if (r) 1533 return r; 1534 } 1535 1536 r = gfx_v9_0_init_rlc_microcode(adev, chip_name); 1537 if (r) 1538 return r; 1539 1540 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name); 1541 if (r) 1542 return r; 1543 1544 return r; 1545 } 1546 1547 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 1548 { 1549 u32 count = 0; 1550 const struct cs_section_def *sect = NULL; 1551 const struct cs_extent_def *ext = NULL; 1552 1553 /* begin clear state */ 1554 count += 2; 1555 /* context control state */ 1556 count += 3; 1557 1558 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1559 for (ext = sect->section; ext->extent != NULL; ++ext) { 1560 if (sect->id == SECT_CONTEXT) 1561 count += 2 + ext->reg_count; 1562 else 1563 return 0; 1564 } 1565 } 1566 1567 /* end clear state */ 1568 count += 2; 1569 /* clear state */ 1570 count += 2; 1571 1572 return count; 1573 } 1574 1575 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 1576 volatile u32 *buffer) 1577 { 1578 u32 count = 0, i; 1579 const struct cs_section_def *sect = NULL; 1580 const struct cs_extent_def *ext = NULL; 1581 1582 if (adev->gfx.rlc.cs_data == NULL) 1583 return; 1584 if (buffer == NULL) 1585 return; 1586 1587 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1588 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1589 1590 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1591 buffer[count++] = cpu_to_le32(0x80000000); 1592 buffer[count++] = cpu_to_le32(0x80000000); 1593 1594 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1595 for (ext = sect->section; ext->extent != NULL; ++ext) { 1596 if (sect->id == SECT_CONTEXT) { 1597 buffer[count++] = 1598 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1599 buffer[count++] = cpu_to_le32(ext->reg_index - 1600 PACKET3_SET_CONTEXT_REG_START); 1601 for (i = 0; i < ext->reg_count; i++) 1602 buffer[count++] = cpu_to_le32(ext->extent[i]); 1603 } else { 1604 return; 1605 } 1606 } 1607 } 1608 1609 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1610 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1611 1612 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1613 buffer[count++] = cpu_to_le32(0); 1614 } 1615 1616 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 1617 { 1618 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 1619 uint32_t pg_always_on_cu_num = 2; 1620 uint32_t always_on_cu_num; 1621 uint32_t i, j, k; 1622 uint32_t mask, cu_bitmap, counter; 1623 1624 if (adev->flags & AMD_IS_APU) 1625 always_on_cu_num = 4; 1626 else if (adev->asic_type == CHIP_VEGA12) 1627 always_on_cu_num = 8; 1628 else 1629 always_on_cu_num = 12; 1630 1631 mutex_lock(&adev->grbm_idx_mutex); 1632 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1633 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1634 mask = 1; 1635 cu_bitmap = 0; 1636 counter = 0; 1637 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1638 1639 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 1640 if (cu_info->bitmap[i][j] & mask) { 1641 if (counter == pg_always_on_cu_num) 1642 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 1643 if (counter < always_on_cu_num) 1644 cu_bitmap |= mask; 1645 else 1646 break; 1647 counter++; 1648 } 1649 mask <<= 1; 1650 } 1651 1652 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1653 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1654 } 1655 } 1656 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1657 mutex_unlock(&adev->grbm_idx_mutex); 1658 } 1659 1660 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1661 { 1662 uint32_t data; 1663 1664 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1665 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1666 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1667 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1668 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1669 1670 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1671 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1672 1673 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1674 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1675 1676 mutex_lock(&adev->grbm_idx_mutex); 1677 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1678 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1679 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1680 1681 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1682 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1683 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1684 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1685 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1686 1687 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1688 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1689 data &= 0x0000FFFF; 1690 data |= 0x00C00000; 1691 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1692 1693 /* 1694 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1695 * programmed in gfx_v9_0_init_always_on_cu_mask() 1696 */ 1697 1698 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1699 * but used for RLC_LB_CNTL configuration */ 1700 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1701 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1702 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1703 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1704 mutex_unlock(&adev->grbm_idx_mutex); 1705 1706 gfx_v9_0_init_always_on_cu_mask(adev); 1707 } 1708 1709 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1710 { 1711 uint32_t data; 1712 1713 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1714 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1715 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1716 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1717 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1718 1719 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1720 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1721 1722 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1723 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1724 1725 mutex_lock(&adev->grbm_idx_mutex); 1726 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1727 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1728 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1729 1730 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1731 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1732 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1733 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1734 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1735 1736 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1737 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1738 data &= 0x0000FFFF; 1739 data |= 0x00C00000; 1740 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1741 1742 /* 1743 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1744 * programmed in gfx_v9_0_init_always_on_cu_mask() 1745 */ 1746 1747 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1748 * but used for RLC_LB_CNTL configuration */ 1749 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1750 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1751 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1752 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1753 mutex_unlock(&adev->grbm_idx_mutex); 1754 1755 gfx_v9_0_init_always_on_cu_mask(adev); 1756 } 1757 1758 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1759 { 1760 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1761 } 1762 1763 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1764 { 1765 return 5; 1766 } 1767 1768 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1769 { 1770 const struct cs_section_def *cs_data; 1771 int r; 1772 1773 adev->gfx.rlc.cs_data = gfx9_cs_data; 1774 1775 cs_data = adev->gfx.rlc.cs_data; 1776 1777 if (cs_data) { 1778 /* init clear state block */ 1779 r = amdgpu_gfx_rlc_init_csb(adev); 1780 if (r) 1781 return r; 1782 } 1783 1784 if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { 1785 /* TODO: double check the cp_table_size for RV */ 1786 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1787 r = amdgpu_gfx_rlc_init_cpt(adev); 1788 if (r) 1789 return r; 1790 } 1791 1792 switch (adev->asic_type) { 1793 case CHIP_RAVEN: 1794 gfx_v9_0_init_lbpw(adev); 1795 break; 1796 case CHIP_VEGA20: 1797 gfx_v9_4_init_lbpw(adev); 1798 break; 1799 default: 1800 break; 1801 } 1802 1803 return 0; 1804 } 1805 1806 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1807 { 1808 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1809 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1810 } 1811 1812 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1813 { 1814 int r; 1815 u32 *hpd; 1816 const __le32 *fw_data; 1817 unsigned fw_size; 1818 u32 *fw; 1819 size_t mec_hpd_size; 1820 1821 const struct gfx_firmware_header_v1_0 *mec_hdr; 1822 1823 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1824 1825 /* take ownership of the relevant compute queues */ 1826 amdgpu_gfx_compute_queue_acquire(adev); 1827 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1828 1829 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1830 AMDGPU_GEM_DOMAIN_VRAM, 1831 &adev->gfx.mec.hpd_eop_obj, 1832 &adev->gfx.mec.hpd_eop_gpu_addr, 1833 (void **)&hpd); 1834 if (r) { 1835 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1836 gfx_v9_0_mec_fini(adev); 1837 return r; 1838 } 1839 1840 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); 1841 1842 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1843 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1844 1845 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1846 1847 fw_data = (const __le32 *) 1848 (adev->gfx.mec_fw->data + 1849 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1850 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 1851 1852 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 1853 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1854 &adev->gfx.mec.mec_fw_obj, 1855 &adev->gfx.mec.mec_fw_gpu_addr, 1856 (void **)&fw); 1857 if (r) { 1858 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 1859 gfx_v9_0_mec_fini(adev); 1860 return r; 1861 } 1862 1863 memcpy(fw, fw_data, fw_size); 1864 1865 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1866 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1867 1868 return 0; 1869 } 1870 1871 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 1872 { 1873 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1874 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1875 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1876 (address << SQ_IND_INDEX__INDEX__SHIFT) | 1877 (SQ_IND_INDEX__FORCE_READ_MASK)); 1878 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1879 } 1880 1881 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 1882 uint32_t wave, uint32_t thread, 1883 uint32_t regno, uint32_t num, uint32_t *out) 1884 { 1885 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1886 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1887 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1888 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1889 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 1890 (SQ_IND_INDEX__FORCE_READ_MASK) | 1891 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1892 while (num--) 1893 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1894 } 1895 1896 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1897 { 1898 /* type 1 wave data */ 1899 dst[(*no_fields)++] = 1; 1900 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 1901 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 1902 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 1903 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 1904 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 1905 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 1906 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 1907 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 1908 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 1909 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 1910 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 1911 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 1912 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 1913 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 1914 } 1915 1916 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 1917 uint32_t wave, uint32_t start, 1918 uint32_t size, uint32_t *dst) 1919 { 1920 wave_read_regs( 1921 adev, simd, wave, 0, 1922 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 1923 } 1924 1925 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 1926 uint32_t wave, uint32_t thread, 1927 uint32_t start, uint32_t size, 1928 uint32_t *dst) 1929 { 1930 wave_read_regs( 1931 adev, simd, wave, thread, 1932 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1933 } 1934 1935 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 1936 u32 me, u32 pipe, u32 q, u32 vm) 1937 { 1938 soc15_grbm_select(adev, me, pipe, q, vm); 1939 } 1940 1941 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 1942 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 1943 .select_se_sh = &gfx_v9_0_select_se_sh, 1944 .read_wave_data = &gfx_v9_0_read_wave_data, 1945 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 1946 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 1947 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 1948 .ras_error_inject = &gfx_v9_0_ras_error_inject, 1949 .query_ras_error_count = &gfx_v9_0_query_ras_error_count 1950 }; 1951 1952 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 1953 { 1954 u32 gb_addr_config; 1955 int err; 1956 1957 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 1958 1959 switch (adev->asic_type) { 1960 case CHIP_VEGA10: 1961 adev->gfx.config.max_hw_contexts = 8; 1962 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1963 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1964 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1965 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1966 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 1967 break; 1968 case CHIP_VEGA12: 1969 adev->gfx.config.max_hw_contexts = 8; 1970 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1971 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1972 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1973 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1974 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 1975 DRM_INFO("fix gfx.config for vega12\n"); 1976 break; 1977 case CHIP_VEGA20: 1978 adev->gfx.config.max_hw_contexts = 8; 1979 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1980 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1981 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1982 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1983 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1984 gb_addr_config &= ~0xf3e777ff; 1985 gb_addr_config |= 0x22014042; 1986 /* check vbios table if gpu info is not available */ 1987 err = amdgpu_atomfirmware_get_gfx_info(adev); 1988 if (err) 1989 return err; 1990 break; 1991 case CHIP_RAVEN: 1992 adev->gfx.config.max_hw_contexts = 8; 1993 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1994 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1995 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1996 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1997 if (adev->rev_id >= 8) 1998 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 1999 else 2000 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 2001 break; 2002 case CHIP_ARCTURUS: 2003 adev->gfx.config.max_hw_contexts = 8; 2004 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2005 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2006 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2007 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2008 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2009 gb_addr_config &= ~0xf3e777ff; 2010 gb_addr_config |= 0x22014042; 2011 break; 2012 case CHIP_RENOIR: 2013 adev->gfx.config.max_hw_contexts = 8; 2014 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2015 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2016 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 2017 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2018 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2019 gb_addr_config &= ~0xf3e777ff; 2020 gb_addr_config |= 0x22010042; 2021 break; 2022 default: 2023 BUG(); 2024 break; 2025 } 2026 2027 adev->gfx.config.gb_addr_config = gb_addr_config; 2028 2029 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 2030 REG_GET_FIELD( 2031 adev->gfx.config.gb_addr_config, 2032 GB_ADDR_CONFIG, 2033 NUM_PIPES); 2034 2035 adev->gfx.config.max_tile_pipes = 2036 adev->gfx.config.gb_addr_config_fields.num_pipes; 2037 2038 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 2039 REG_GET_FIELD( 2040 adev->gfx.config.gb_addr_config, 2041 GB_ADDR_CONFIG, 2042 NUM_BANKS); 2043 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 2044 REG_GET_FIELD( 2045 adev->gfx.config.gb_addr_config, 2046 GB_ADDR_CONFIG, 2047 MAX_COMPRESSED_FRAGS); 2048 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 2049 REG_GET_FIELD( 2050 adev->gfx.config.gb_addr_config, 2051 GB_ADDR_CONFIG, 2052 NUM_RB_PER_SE); 2053 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 2054 REG_GET_FIELD( 2055 adev->gfx.config.gb_addr_config, 2056 GB_ADDR_CONFIG, 2057 NUM_SHADER_ENGINES); 2058 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 2059 REG_GET_FIELD( 2060 adev->gfx.config.gb_addr_config, 2061 GB_ADDR_CONFIG, 2062 PIPE_INTERLEAVE_SIZE)); 2063 2064 return 0; 2065 } 2066 2067 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 2068 int mec, int pipe, int queue) 2069 { 2070 int r; 2071 unsigned irq_type; 2072 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 2073 2074 ring = &adev->gfx.compute_ring[ring_id]; 2075 2076 /* mec0 is me1 */ 2077 ring->me = mec + 1; 2078 ring->pipe = pipe; 2079 ring->queue = queue; 2080 2081 ring->ring_obj = NULL; 2082 ring->use_doorbell = true; 2083 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 2084 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2085 + (ring_id * GFX9_MEC_HPD_SIZE); 2086 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2087 2088 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2089 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2090 + ring->pipe; 2091 2092 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2093 r = amdgpu_ring_init(adev, ring, 1024, 2094 &adev->gfx.eop_irq, irq_type); 2095 if (r) 2096 return r; 2097 2098 2099 return 0; 2100 } 2101 2102 static int gfx_v9_0_sw_init(void *handle) 2103 { 2104 int i, j, k, r, ring_id; 2105 struct amdgpu_ring *ring; 2106 struct amdgpu_kiq *kiq; 2107 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2108 2109 switch (adev->asic_type) { 2110 case CHIP_VEGA10: 2111 case CHIP_VEGA12: 2112 case CHIP_VEGA20: 2113 case CHIP_RAVEN: 2114 case CHIP_ARCTURUS: 2115 case CHIP_RENOIR: 2116 adev->gfx.mec.num_mec = 2; 2117 break; 2118 default: 2119 adev->gfx.mec.num_mec = 1; 2120 break; 2121 } 2122 2123 adev->gfx.mec.num_pipe_per_mec = 4; 2124 adev->gfx.mec.num_queue_per_pipe = 8; 2125 2126 /* EOP Event */ 2127 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 2128 if (r) 2129 return r; 2130 2131 /* Privileged reg */ 2132 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 2133 &adev->gfx.priv_reg_irq); 2134 if (r) 2135 return r; 2136 2137 /* Privileged inst */ 2138 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 2139 &adev->gfx.priv_inst_irq); 2140 if (r) 2141 return r; 2142 2143 /* ECC error */ 2144 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 2145 &adev->gfx.cp_ecc_error_irq); 2146 if (r) 2147 return r; 2148 2149 /* FUE error */ 2150 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 2151 &adev->gfx.cp_ecc_error_irq); 2152 if (r) 2153 return r; 2154 2155 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2156 2157 gfx_v9_0_scratch_init(adev); 2158 2159 r = gfx_v9_0_init_microcode(adev); 2160 if (r) { 2161 DRM_ERROR("Failed to load gfx firmware!\n"); 2162 return r; 2163 } 2164 2165 r = adev->gfx.rlc.funcs->init(adev); 2166 if (r) { 2167 DRM_ERROR("Failed to init rlc BOs!\n"); 2168 return r; 2169 } 2170 2171 r = gfx_v9_0_mec_init(adev); 2172 if (r) { 2173 DRM_ERROR("Failed to init MEC BOs!\n"); 2174 return r; 2175 } 2176 2177 /* set up the gfx ring */ 2178 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2179 ring = &adev->gfx.gfx_ring[i]; 2180 ring->ring_obj = NULL; 2181 if (!i) 2182 sprintf(ring->name, "gfx"); 2183 else 2184 sprintf(ring->name, "gfx_%d", i); 2185 ring->use_doorbell = true; 2186 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2187 r = amdgpu_ring_init(adev, ring, 1024, 2188 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); 2189 if (r) 2190 return r; 2191 } 2192 2193 /* set up the compute queues - allocate horizontally across pipes */ 2194 ring_id = 0; 2195 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2196 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2197 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2198 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2199 continue; 2200 2201 r = gfx_v9_0_compute_ring_init(adev, 2202 ring_id, 2203 i, k, j); 2204 if (r) 2205 return r; 2206 2207 ring_id++; 2208 } 2209 } 2210 } 2211 2212 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); 2213 if (r) { 2214 DRM_ERROR("Failed to init KIQ BOs!\n"); 2215 return r; 2216 } 2217 2218 kiq = &adev->gfx.kiq; 2219 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2220 if (r) 2221 return r; 2222 2223 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2224 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); 2225 if (r) 2226 return r; 2227 2228 adev->gfx.ce_ram_size = 0x8000; 2229 2230 r = gfx_v9_0_gpu_early_init(adev); 2231 if (r) 2232 return r; 2233 2234 return 0; 2235 } 2236 2237 2238 static int gfx_v9_0_sw_fini(void *handle) 2239 { 2240 int i; 2241 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2242 2243 amdgpu_gfx_ras_fini(adev); 2244 2245 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2246 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2247 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2248 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2249 2250 amdgpu_gfx_mqd_sw_fini(adev); 2251 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); 2252 amdgpu_gfx_kiq_fini(adev); 2253 2254 gfx_v9_0_mec_fini(adev); 2255 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 2256 if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { 2257 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2258 &adev->gfx.rlc.cp_table_gpu_addr, 2259 (void **)&adev->gfx.rlc.cp_table_ptr); 2260 } 2261 gfx_v9_0_free_microcode(adev); 2262 2263 return 0; 2264 } 2265 2266 2267 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 2268 { 2269 /* TODO */ 2270 } 2271 2272 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) 2273 { 2274 u32 data; 2275 2276 if (instance == 0xffffffff) 2277 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 2278 else 2279 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 2280 2281 if (se_num == 0xffffffff) 2282 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 2283 else 2284 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2285 2286 if (sh_num == 0xffffffff) 2287 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 2288 else 2289 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 2290 2291 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 2292 } 2293 2294 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2295 { 2296 u32 data, mask; 2297 2298 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 2299 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 2300 2301 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 2302 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 2303 2304 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 2305 adev->gfx.config.max_sh_per_se); 2306 2307 return (~data) & mask; 2308 } 2309 2310 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 2311 { 2312 int i, j; 2313 u32 data; 2314 u32 active_rbs = 0; 2315 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 2316 adev->gfx.config.max_sh_per_se; 2317 2318 mutex_lock(&adev->grbm_idx_mutex); 2319 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2320 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2321 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2322 data = gfx_v9_0_get_rb_active_bitmap(adev); 2323 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 2324 rb_bitmap_width_per_sh); 2325 } 2326 } 2327 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2328 mutex_unlock(&adev->grbm_idx_mutex); 2329 2330 adev->gfx.config.backend_enable_mask = active_rbs; 2331 adev->gfx.config.num_rbs = hweight32(active_rbs); 2332 } 2333 2334 #define DEFAULT_SH_MEM_BASES (0x6000) 2335 #define FIRST_COMPUTE_VMID (8) 2336 #define LAST_COMPUTE_VMID (16) 2337 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 2338 { 2339 int i; 2340 uint32_t sh_mem_config; 2341 uint32_t sh_mem_bases; 2342 2343 /* 2344 * Configure apertures: 2345 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2346 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2347 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2348 */ 2349 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 2350 2351 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 2352 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 2353 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 2354 2355 mutex_lock(&adev->srbm_mutex); 2356 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 2357 soc15_grbm_select(adev, 0, 0, 0, i); 2358 /* CP and shaders */ 2359 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 2360 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 2361 } 2362 soc15_grbm_select(adev, 0, 0, 0, 0); 2363 mutex_unlock(&adev->srbm_mutex); 2364 2365 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 2366 acccess. These should be enabled by FW for target VMIDs. */ 2367 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 2368 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 2369 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 2370 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 2371 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 2372 } 2373 } 2374 2375 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) 2376 { 2377 int vmid; 2378 2379 /* 2380 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2381 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2382 * the driver can enable them for graphics. VMID0 should maintain 2383 * access so that HWS firmware can save/restore entries. 2384 */ 2385 for (vmid = 1; vmid < 16; vmid++) { 2386 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); 2387 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); 2388 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); 2389 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); 2390 } 2391 } 2392 2393 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 2394 { 2395 u32 tmp; 2396 int i; 2397 2398 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2399 2400 gfx_v9_0_tiling_mode_table_init(adev); 2401 2402 gfx_v9_0_setup_rb(adev); 2403 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 2404 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 2405 2406 /* XXX SH_MEM regs */ 2407 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2408 mutex_lock(&adev->srbm_mutex); 2409 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { 2410 soc15_grbm_select(adev, 0, 0, 0, i); 2411 /* CP and shaders */ 2412 if (i == 0) { 2413 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2414 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2415 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2416 !!amdgpu_noretry); 2417 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2418 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 2419 } else { 2420 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2421 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2422 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2423 !!amdgpu_noretry); 2424 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2425 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2426 (adev->gmc.private_aperture_start >> 48)); 2427 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2428 (adev->gmc.shared_aperture_start >> 48)); 2429 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 2430 } 2431 } 2432 soc15_grbm_select(adev, 0, 0, 0, 0); 2433 2434 mutex_unlock(&adev->srbm_mutex); 2435 2436 gfx_v9_0_init_compute_vmid(adev); 2437 gfx_v9_0_init_gds_vmid(adev); 2438 } 2439 2440 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 2441 { 2442 u32 i, j, k; 2443 u32 mask; 2444 2445 mutex_lock(&adev->grbm_idx_mutex); 2446 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2447 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2448 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2449 for (k = 0; k < adev->usec_timeout; k++) { 2450 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 2451 break; 2452 udelay(1); 2453 } 2454 if (k == adev->usec_timeout) { 2455 gfx_v9_0_select_se_sh(adev, 0xffffffff, 2456 0xffffffff, 0xffffffff); 2457 mutex_unlock(&adev->grbm_idx_mutex); 2458 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2459 i, j); 2460 return; 2461 } 2462 } 2463 } 2464 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2465 mutex_unlock(&adev->grbm_idx_mutex); 2466 2467 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2468 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2469 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2470 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2471 for (k = 0; k < adev->usec_timeout; k++) { 2472 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2473 break; 2474 udelay(1); 2475 } 2476 } 2477 2478 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2479 bool enable) 2480 { 2481 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2482 2483 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2484 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2485 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2486 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2487 2488 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2489 } 2490 2491 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2492 { 2493 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2494 /* csib */ 2495 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2496 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2497 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2498 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2499 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2500 adev->gfx.rlc.clear_state_size); 2501 } 2502 2503 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2504 int indirect_offset, 2505 int list_size, 2506 int *unique_indirect_regs, 2507 int unique_indirect_reg_count, 2508 int *indirect_start_offsets, 2509 int *indirect_start_offsets_count, 2510 int max_start_offsets_count) 2511 { 2512 int idx; 2513 2514 for (; indirect_offset < list_size; indirect_offset++) { 2515 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2516 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2517 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2518 2519 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2520 indirect_offset += 2; 2521 2522 /* look for the matching indice */ 2523 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2524 if (unique_indirect_regs[idx] == 2525 register_list_format[indirect_offset] || 2526 !unique_indirect_regs[idx]) 2527 break; 2528 } 2529 2530 BUG_ON(idx >= unique_indirect_reg_count); 2531 2532 if (!unique_indirect_regs[idx]) 2533 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2534 2535 indirect_offset++; 2536 } 2537 } 2538 } 2539 2540 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2541 { 2542 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2543 int unique_indirect_reg_count = 0; 2544 2545 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2546 int indirect_start_offsets_count = 0; 2547 2548 int list_size = 0; 2549 int i = 0, j = 0; 2550 u32 tmp = 0; 2551 2552 u32 *register_list_format = 2553 kmemdup(adev->gfx.rlc.register_list_format, 2554 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2555 if (!register_list_format) 2556 return -ENOMEM; 2557 2558 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2559 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2560 gfx_v9_1_parse_ind_reg_list(register_list_format, 2561 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2562 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2563 unique_indirect_regs, 2564 unique_indirect_reg_count, 2565 indirect_start_offsets, 2566 &indirect_start_offsets_count, 2567 ARRAY_SIZE(indirect_start_offsets)); 2568 2569 /* enable auto inc in case it is disabled */ 2570 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2571 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2572 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2573 2574 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2575 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2576 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2577 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2578 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2579 adev->gfx.rlc.register_restore[i]); 2580 2581 /* load indirect register */ 2582 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2583 adev->gfx.rlc.reg_list_format_start); 2584 2585 /* direct register portion */ 2586 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2587 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2588 register_list_format[i]); 2589 2590 /* indirect register portion */ 2591 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2592 if (register_list_format[i] == 0xFFFFFFFF) { 2593 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2594 continue; 2595 } 2596 2597 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2598 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2599 2600 for (j = 0; j < unique_indirect_reg_count; j++) { 2601 if (register_list_format[i] == unique_indirect_regs[j]) { 2602 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2603 break; 2604 } 2605 } 2606 2607 BUG_ON(j >= unique_indirect_reg_count); 2608 2609 i++; 2610 } 2611 2612 /* set save/restore list size */ 2613 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2614 list_size = list_size >> 1; 2615 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2616 adev->gfx.rlc.reg_restore_list_size); 2617 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2618 2619 /* write the starting offsets to RLC scratch ram */ 2620 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2621 adev->gfx.rlc.starting_offsets_start); 2622 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2623 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2624 indirect_start_offsets[i]); 2625 2626 /* load unique indirect regs*/ 2627 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2628 if (unique_indirect_regs[i] != 0) { 2629 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2630 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2631 unique_indirect_regs[i] & 0x3FFFF); 2632 2633 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2634 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2635 unique_indirect_regs[i] >> 20); 2636 } 2637 } 2638 2639 kfree(register_list_format); 2640 return 0; 2641 } 2642 2643 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2644 { 2645 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2646 } 2647 2648 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2649 bool enable) 2650 { 2651 uint32_t data = 0; 2652 uint32_t default_data = 0; 2653 2654 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2655 if (enable == true) { 2656 /* enable GFXIP control over CGPG */ 2657 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2658 if(default_data != data) 2659 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2660 2661 /* update status */ 2662 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2663 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2664 if(default_data != data) 2665 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2666 } else { 2667 /* restore GFXIP control over GCPG */ 2668 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2669 if(default_data != data) 2670 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2671 } 2672 } 2673 2674 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2675 { 2676 uint32_t data = 0; 2677 2678 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2679 AMD_PG_SUPPORT_GFX_SMG | 2680 AMD_PG_SUPPORT_GFX_DMG)) { 2681 /* init IDLE_POLL_COUNT = 60 */ 2682 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2683 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2684 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2685 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2686 2687 /* init RLC PG Delay */ 2688 data = 0; 2689 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2690 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2691 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2692 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2693 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2694 2695 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2696 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2697 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2698 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2699 2700 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2701 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2702 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2703 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2704 2705 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2706 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2707 2708 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2709 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2710 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2711 2712 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2713 } 2714 } 2715 2716 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2717 bool enable) 2718 { 2719 uint32_t data = 0; 2720 uint32_t default_data = 0; 2721 2722 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2723 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2724 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2725 enable ? 1 : 0); 2726 if (default_data != data) 2727 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2728 } 2729 2730 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2731 bool enable) 2732 { 2733 uint32_t data = 0; 2734 uint32_t default_data = 0; 2735 2736 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2737 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2738 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2739 enable ? 1 : 0); 2740 if(default_data != data) 2741 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2742 } 2743 2744 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2745 bool enable) 2746 { 2747 uint32_t data = 0; 2748 uint32_t default_data = 0; 2749 2750 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2751 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2752 CP_PG_DISABLE, 2753 enable ? 0 : 1); 2754 if(default_data != data) 2755 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2756 } 2757 2758 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2759 bool enable) 2760 { 2761 uint32_t data, default_data; 2762 2763 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2764 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2765 GFX_POWER_GATING_ENABLE, 2766 enable ? 1 : 0); 2767 if(default_data != data) 2768 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2769 } 2770 2771 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 2772 bool enable) 2773 { 2774 uint32_t data, default_data; 2775 2776 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2777 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2778 GFX_PIPELINE_PG_ENABLE, 2779 enable ? 1 : 0); 2780 if(default_data != data) 2781 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2782 2783 if (!enable) 2784 /* read any GFX register to wake up GFX */ 2785 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 2786 } 2787 2788 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 2789 bool enable) 2790 { 2791 uint32_t data, default_data; 2792 2793 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2794 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2795 STATIC_PER_CU_PG_ENABLE, 2796 enable ? 1 : 0); 2797 if(default_data != data) 2798 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2799 } 2800 2801 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 2802 bool enable) 2803 { 2804 uint32_t data, default_data; 2805 2806 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2807 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2808 DYN_PER_CU_PG_ENABLE, 2809 enable ? 1 : 0); 2810 if(default_data != data) 2811 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2812 } 2813 2814 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 2815 { 2816 gfx_v9_0_init_csb(adev); 2817 2818 /* 2819 * Rlc save restore list is workable since v2_1. 2820 * And it's needed by gfxoff feature. 2821 */ 2822 if (adev->gfx.rlc.is_rlc_v2_1) { 2823 if (adev->asic_type == CHIP_VEGA12 || 2824 (adev->asic_type == CHIP_RAVEN && 2825 adev->rev_id >= 8)) 2826 gfx_v9_1_init_rlc_save_restore_list(adev); 2827 gfx_v9_0_enable_save_restore_machine(adev); 2828 } 2829 2830 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2831 AMD_PG_SUPPORT_GFX_SMG | 2832 AMD_PG_SUPPORT_GFX_DMG | 2833 AMD_PG_SUPPORT_CP | 2834 AMD_PG_SUPPORT_GDS | 2835 AMD_PG_SUPPORT_RLC_SMU_HS)) { 2836 WREG32(mmRLC_JUMP_TABLE_RESTORE, 2837 adev->gfx.rlc.cp_table_gpu_addr >> 8); 2838 gfx_v9_0_init_gfx_power_gating(adev); 2839 } 2840 } 2841 2842 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 2843 { 2844 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 2845 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 2846 gfx_v9_0_wait_for_rlc_serdes(adev); 2847 } 2848 2849 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 2850 { 2851 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2852 udelay(50); 2853 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2854 udelay(50); 2855 } 2856 2857 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 2858 { 2859 #ifdef AMDGPU_RLC_DEBUG_RETRY 2860 u32 rlc_ucode_ver; 2861 #endif 2862 2863 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2864 udelay(50); 2865 2866 /* carrizo do enable cp interrupt after cp inited */ 2867 if (!(adev->flags & AMD_IS_APU)) { 2868 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 2869 udelay(50); 2870 } 2871 2872 #ifdef AMDGPU_RLC_DEBUG_RETRY 2873 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 2874 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 2875 if(rlc_ucode_ver == 0x108) { 2876 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 2877 rlc_ucode_ver, adev->gfx.rlc_fw_version); 2878 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 2879 * default is 0x9C4 to create a 100us interval */ 2880 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 2881 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 2882 * to disable the page fault retry interrupts, default is 2883 * 0x100 (256) */ 2884 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 2885 } 2886 #endif 2887 } 2888 2889 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 2890 { 2891 const struct rlc_firmware_header_v2_0 *hdr; 2892 const __le32 *fw_data; 2893 unsigned i, fw_size; 2894 2895 if (!adev->gfx.rlc_fw) 2896 return -EINVAL; 2897 2898 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2899 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2900 2901 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2902 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2903 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2904 2905 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 2906 RLCG_UCODE_LOADING_START_ADDRESS); 2907 for (i = 0; i < fw_size; i++) 2908 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 2909 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2910 2911 return 0; 2912 } 2913 2914 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 2915 { 2916 int r; 2917 2918 if (amdgpu_sriov_vf(adev)) { 2919 gfx_v9_0_init_csb(adev); 2920 return 0; 2921 } 2922 2923 adev->gfx.rlc.funcs->stop(adev); 2924 2925 /* disable CG */ 2926 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 2927 2928 gfx_v9_0_init_pg(adev); 2929 2930 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 2931 /* legacy rlc firmware loading */ 2932 r = gfx_v9_0_rlc_load_microcode(adev); 2933 if (r) 2934 return r; 2935 } 2936 2937 switch (adev->asic_type) { 2938 case CHIP_RAVEN: 2939 if (amdgpu_lbpw == 0) 2940 gfx_v9_0_enable_lbpw(adev, false); 2941 else 2942 gfx_v9_0_enable_lbpw(adev, true); 2943 break; 2944 case CHIP_VEGA20: 2945 if (amdgpu_lbpw > 0) 2946 gfx_v9_0_enable_lbpw(adev, true); 2947 else 2948 gfx_v9_0_enable_lbpw(adev, false); 2949 break; 2950 default: 2951 break; 2952 } 2953 2954 adev->gfx.rlc.funcs->start(adev); 2955 2956 return 0; 2957 } 2958 2959 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 2960 { 2961 int i; 2962 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 2963 2964 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 2965 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 2966 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 2967 if (!enable) { 2968 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2969 adev->gfx.gfx_ring[i].sched.ready = false; 2970 } 2971 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 2972 udelay(50); 2973 } 2974 2975 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 2976 { 2977 const struct gfx_firmware_header_v1_0 *pfp_hdr; 2978 const struct gfx_firmware_header_v1_0 *ce_hdr; 2979 const struct gfx_firmware_header_v1_0 *me_hdr; 2980 const __le32 *fw_data; 2981 unsigned i, fw_size; 2982 2983 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 2984 return -EINVAL; 2985 2986 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 2987 adev->gfx.pfp_fw->data; 2988 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 2989 adev->gfx.ce_fw->data; 2990 me_hdr = (const struct gfx_firmware_header_v1_0 *) 2991 adev->gfx.me_fw->data; 2992 2993 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 2994 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 2995 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 2996 2997 gfx_v9_0_cp_gfx_enable(adev, false); 2998 2999 /* PFP */ 3000 fw_data = (const __le32 *) 3001 (adev->gfx.pfp_fw->data + 3002 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3003 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 3004 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 3005 for (i = 0; i < fw_size; i++) 3006 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 3007 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3008 3009 /* CE */ 3010 fw_data = (const __le32 *) 3011 (adev->gfx.ce_fw->data + 3012 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 3013 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 3014 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 3015 for (i = 0; i < fw_size; i++) 3016 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 3017 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 3018 3019 /* ME */ 3020 fw_data = (const __le32 *) 3021 (adev->gfx.me_fw->data + 3022 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3023 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 3024 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 3025 for (i = 0; i < fw_size; i++) 3026 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 3027 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 3028 3029 return 0; 3030 } 3031 3032 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 3033 { 3034 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 3035 const struct cs_section_def *sect = NULL; 3036 const struct cs_extent_def *ext = NULL; 3037 int r, i, tmp; 3038 3039 /* init the CP */ 3040 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 3041 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 3042 3043 gfx_v9_0_cp_gfx_enable(adev, true); 3044 3045 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 3046 if (r) { 3047 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3048 return r; 3049 } 3050 3051 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3052 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3053 3054 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3055 amdgpu_ring_write(ring, 0x80000000); 3056 amdgpu_ring_write(ring, 0x80000000); 3057 3058 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 3059 for (ext = sect->section; ext->extent != NULL; ++ext) { 3060 if (sect->id == SECT_CONTEXT) { 3061 amdgpu_ring_write(ring, 3062 PACKET3(PACKET3_SET_CONTEXT_REG, 3063 ext->reg_count)); 3064 amdgpu_ring_write(ring, 3065 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 3066 for (i = 0; i < ext->reg_count; i++) 3067 amdgpu_ring_write(ring, ext->extent[i]); 3068 } 3069 } 3070 } 3071 3072 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3073 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3074 3075 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3076 amdgpu_ring_write(ring, 0); 3077 3078 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3079 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3080 amdgpu_ring_write(ring, 0x8000); 3081 amdgpu_ring_write(ring, 0x8000); 3082 3083 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 3084 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 3085 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 3086 amdgpu_ring_write(ring, tmp); 3087 amdgpu_ring_write(ring, 0); 3088 3089 amdgpu_ring_commit(ring); 3090 3091 return 0; 3092 } 3093 3094 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 3095 { 3096 struct amdgpu_ring *ring; 3097 u32 tmp; 3098 u32 rb_bufsz; 3099 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3100 3101 /* Set the write pointer delay */ 3102 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 3103 3104 /* set the RB to use vmid 0 */ 3105 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 3106 3107 /* Set ring buffer size */ 3108 ring = &adev->gfx.gfx_ring[0]; 3109 rb_bufsz = order_base_2(ring->ring_size / 8); 3110 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3111 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3112 #ifdef __BIG_ENDIAN 3113 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 3114 #endif 3115 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3116 3117 /* Initialize the ring buffer's write pointers */ 3118 ring->wptr = 0; 3119 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3120 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3121 3122 /* set the wb address wether it's enabled or not */ 3123 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3124 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3125 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3126 3127 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3128 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 3129 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 3130 3131 mdelay(1); 3132 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3133 3134 rb_addr = ring->gpu_addr >> 8; 3135 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 3136 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3137 3138 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 3139 if (ring->use_doorbell) { 3140 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3141 DOORBELL_OFFSET, ring->doorbell_index); 3142 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3143 DOORBELL_EN, 1); 3144 } else { 3145 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 3146 } 3147 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 3148 3149 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3150 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3151 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 3152 3153 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 3154 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3155 3156 3157 /* start the ring */ 3158 gfx_v9_0_cp_gfx_start(adev); 3159 ring->sched.ready = true; 3160 3161 return 0; 3162 } 3163 3164 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3165 { 3166 int i; 3167 3168 if (enable) { 3169 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 3170 } else { 3171 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 3172 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 3173 for (i = 0; i < adev->gfx.num_compute_rings; i++) 3174 adev->gfx.compute_ring[i].sched.ready = false; 3175 adev->gfx.kiq.ring.sched.ready = false; 3176 } 3177 udelay(50); 3178 } 3179 3180 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3181 { 3182 const struct gfx_firmware_header_v1_0 *mec_hdr; 3183 const __le32 *fw_data; 3184 unsigned i; 3185 u32 tmp; 3186 3187 if (!adev->gfx.mec_fw) 3188 return -EINVAL; 3189 3190 gfx_v9_0_cp_compute_enable(adev, false); 3191 3192 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3193 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3194 3195 fw_data = (const __le32 *) 3196 (adev->gfx.mec_fw->data + 3197 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3198 tmp = 0; 3199 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3200 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3201 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 3202 3203 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 3204 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 3205 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 3206 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3207 3208 /* MEC1 */ 3209 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3210 mec_hdr->jt_offset); 3211 for (i = 0; i < mec_hdr->jt_size; i++) 3212 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 3213 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3214 3215 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3216 adev->gfx.mec_fw_version); 3217 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 3218 3219 return 0; 3220 } 3221 3222 /* KIQ functions */ 3223 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 3224 { 3225 uint32_t tmp; 3226 struct amdgpu_device *adev = ring->adev; 3227 3228 /* tell RLC which is KIQ queue */ 3229 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 3230 tmp &= 0xffffff00; 3231 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3232 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3233 tmp |= 0x80; 3234 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3235 } 3236 3237 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 3238 { 3239 struct amdgpu_device *adev = ring->adev; 3240 struct v9_mqd *mqd = ring->mqd_ptr; 3241 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3242 uint32_t tmp; 3243 3244 mqd->header = 0xC0310800; 3245 mqd->compute_pipelinestat_enable = 0x00000001; 3246 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3247 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3248 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3249 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3250 mqd->compute_static_thread_mgmt_se4 = 0xffffffff; 3251 mqd->compute_static_thread_mgmt_se5 = 0xffffffff; 3252 mqd->compute_static_thread_mgmt_se6 = 0xffffffff; 3253 mqd->compute_static_thread_mgmt_se7 = 0xffffffff; 3254 mqd->compute_misc_reserved = 0x00000003; 3255 3256 mqd->dynamic_cu_mask_addr_lo = 3257 lower_32_bits(ring->mqd_gpu_addr 3258 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3259 mqd->dynamic_cu_mask_addr_hi = 3260 upper_32_bits(ring->mqd_gpu_addr 3261 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3262 3263 eop_base_addr = ring->eop_gpu_addr >> 8; 3264 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3265 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3266 3267 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3268 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3269 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3270 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 3271 3272 mqd->cp_hqd_eop_control = tmp; 3273 3274 /* enable doorbell? */ 3275 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3276 3277 if (ring->use_doorbell) { 3278 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3279 DOORBELL_OFFSET, ring->doorbell_index); 3280 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3281 DOORBELL_EN, 1); 3282 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3283 DOORBELL_SOURCE, 0); 3284 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3285 DOORBELL_HIT, 0); 3286 } else { 3287 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3288 DOORBELL_EN, 0); 3289 } 3290 3291 mqd->cp_hqd_pq_doorbell_control = tmp; 3292 3293 /* disable the queue if it's active */ 3294 ring->wptr = 0; 3295 mqd->cp_hqd_dequeue_request = 0; 3296 mqd->cp_hqd_pq_rptr = 0; 3297 mqd->cp_hqd_pq_wptr_lo = 0; 3298 mqd->cp_hqd_pq_wptr_hi = 0; 3299 3300 /* set the pointer to the MQD */ 3301 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3302 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3303 3304 /* set MQD vmid to 0 */ 3305 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3306 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3307 mqd->cp_mqd_control = tmp; 3308 3309 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3310 hqd_gpu_addr = ring->gpu_addr >> 8; 3311 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3312 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3313 3314 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3315 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3316 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3317 (order_base_2(ring->ring_size / 4) - 1)); 3318 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3319 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 3320 #ifdef __BIG_ENDIAN 3321 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3322 #endif 3323 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3324 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3325 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3326 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3327 mqd->cp_hqd_pq_control = tmp; 3328 3329 /* set the wb address whether it's enabled or not */ 3330 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3331 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3332 mqd->cp_hqd_pq_rptr_report_addr_hi = 3333 upper_32_bits(wb_gpu_addr) & 0xffff; 3334 3335 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3336 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3337 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3338 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3339 3340 tmp = 0; 3341 /* enable the doorbell if requested */ 3342 if (ring->use_doorbell) { 3343 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3344 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3345 DOORBELL_OFFSET, ring->doorbell_index); 3346 3347 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3348 DOORBELL_EN, 1); 3349 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3350 DOORBELL_SOURCE, 0); 3351 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3352 DOORBELL_HIT, 0); 3353 } 3354 3355 mqd->cp_hqd_pq_doorbell_control = tmp; 3356 3357 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3358 ring->wptr = 0; 3359 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3360 3361 /* set the vmid for the queue */ 3362 mqd->cp_hqd_vmid = 0; 3363 3364 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3365 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3366 mqd->cp_hqd_persistent_state = tmp; 3367 3368 /* set MIN_IB_AVAIL_SIZE */ 3369 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3370 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3371 mqd->cp_hqd_ib_control = tmp; 3372 3373 /* map_queues packet doesn't need activate the queue, 3374 * so only kiq need set this field. 3375 */ 3376 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 3377 mqd->cp_hqd_active = 1; 3378 3379 return 0; 3380 } 3381 3382 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 3383 { 3384 struct amdgpu_device *adev = ring->adev; 3385 struct v9_mqd *mqd = ring->mqd_ptr; 3386 int j; 3387 3388 /* disable wptr polling */ 3389 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3390 3391 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3392 mqd->cp_hqd_eop_base_addr_lo); 3393 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3394 mqd->cp_hqd_eop_base_addr_hi); 3395 3396 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3397 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 3398 mqd->cp_hqd_eop_control); 3399 3400 /* enable doorbell? */ 3401 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3402 mqd->cp_hqd_pq_doorbell_control); 3403 3404 /* disable the queue if it's active */ 3405 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3406 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3407 for (j = 0; j < adev->usec_timeout; j++) { 3408 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3409 break; 3410 udelay(1); 3411 } 3412 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3413 mqd->cp_hqd_dequeue_request); 3414 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3415 mqd->cp_hqd_pq_rptr); 3416 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3417 mqd->cp_hqd_pq_wptr_lo); 3418 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3419 mqd->cp_hqd_pq_wptr_hi); 3420 } 3421 3422 /* set the pointer to the MQD */ 3423 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3424 mqd->cp_mqd_base_addr_lo); 3425 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3426 mqd->cp_mqd_base_addr_hi); 3427 3428 /* set MQD vmid to 0 */ 3429 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3430 mqd->cp_mqd_control); 3431 3432 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3433 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3434 mqd->cp_hqd_pq_base_lo); 3435 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3436 mqd->cp_hqd_pq_base_hi); 3437 3438 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3439 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3440 mqd->cp_hqd_pq_control); 3441 3442 /* set the wb address whether it's enabled or not */ 3443 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3444 mqd->cp_hqd_pq_rptr_report_addr_lo); 3445 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3446 mqd->cp_hqd_pq_rptr_report_addr_hi); 3447 3448 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3449 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3450 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3451 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3452 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3453 3454 /* enable the doorbell if requested */ 3455 if (ring->use_doorbell) { 3456 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3457 (adev->doorbell_index.kiq * 2) << 2); 3458 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3459 (adev->doorbell_index.userqueue_end * 2) << 2); 3460 } 3461 3462 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3463 mqd->cp_hqd_pq_doorbell_control); 3464 3465 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3466 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3467 mqd->cp_hqd_pq_wptr_lo); 3468 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3469 mqd->cp_hqd_pq_wptr_hi); 3470 3471 /* set the vmid for the queue */ 3472 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3473 3474 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3475 mqd->cp_hqd_persistent_state); 3476 3477 /* activate the queue */ 3478 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3479 mqd->cp_hqd_active); 3480 3481 if (ring->use_doorbell) 3482 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3483 3484 return 0; 3485 } 3486 3487 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3488 { 3489 struct amdgpu_device *adev = ring->adev; 3490 int j; 3491 3492 /* disable the queue if it's active */ 3493 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3494 3495 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3496 3497 for (j = 0; j < adev->usec_timeout; j++) { 3498 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3499 break; 3500 udelay(1); 3501 } 3502 3503 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3504 DRM_DEBUG("KIQ dequeue request failed.\n"); 3505 3506 /* Manual disable if dequeue request times out */ 3507 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3508 } 3509 3510 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3511 0); 3512 } 3513 3514 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3515 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3516 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3517 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3518 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3519 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3520 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3521 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3522 3523 return 0; 3524 } 3525 3526 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3527 { 3528 struct amdgpu_device *adev = ring->adev; 3529 struct v9_mqd *mqd = ring->mqd_ptr; 3530 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3531 3532 gfx_v9_0_kiq_setting(ring); 3533 3534 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3535 /* reset MQD to a clean status */ 3536 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3537 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3538 3539 /* reset ring buffer */ 3540 ring->wptr = 0; 3541 amdgpu_ring_clear_ring(ring); 3542 3543 mutex_lock(&adev->srbm_mutex); 3544 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3545 gfx_v9_0_kiq_init_register(ring); 3546 soc15_grbm_select(adev, 0, 0, 0, 0); 3547 mutex_unlock(&adev->srbm_mutex); 3548 } else { 3549 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3550 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3551 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3552 mutex_lock(&adev->srbm_mutex); 3553 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3554 gfx_v9_0_mqd_init(ring); 3555 gfx_v9_0_kiq_init_register(ring); 3556 soc15_grbm_select(adev, 0, 0, 0, 0); 3557 mutex_unlock(&adev->srbm_mutex); 3558 3559 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3560 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3561 } 3562 3563 return 0; 3564 } 3565 3566 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3567 { 3568 struct amdgpu_device *adev = ring->adev; 3569 struct v9_mqd *mqd = ring->mqd_ptr; 3570 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3571 3572 if (!adev->in_gpu_reset && !adev->in_suspend) { 3573 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3574 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3575 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3576 mutex_lock(&adev->srbm_mutex); 3577 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3578 gfx_v9_0_mqd_init(ring); 3579 soc15_grbm_select(adev, 0, 0, 0, 0); 3580 mutex_unlock(&adev->srbm_mutex); 3581 3582 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3583 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3584 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3585 /* reset MQD to a clean status */ 3586 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3587 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3588 3589 /* reset ring buffer */ 3590 ring->wptr = 0; 3591 amdgpu_ring_clear_ring(ring); 3592 } else { 3593 amdgpu_ring_clear_ring(ring); 3594 } 3595 3596 return 0; 3597 } 3598 3599 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3600 { 3601 struct amdgpu_ring *ring; 3602 int r; 3603 3604 ring = &adev->gfx.kiq.ring; 3605 3606 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3607 if (unlikely(r != 0)) 3608 return r; 3609 3610 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3611 if (unlikely(r != 0)) 3612 return r; 3613 3614 gfx_v9_0_kiq_init_queue(ring); 3615 amdgpu_bo_kunmap(ring->mqd_obj); 3616 ring->mqd_ptr = NULL; 3617 amdgpu_bo_unreserve(ring->mqd_obj); 3618 ring->sched.ready = true; 3619 return 0; 3620 } 3621 3622 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3623 { 3624 struct amdgpu_ring *ring = NULL; 3625 int r = 0, i; 3626 3627 gfx_v9_0_cp_compute_enable(adev, true); 3628 3629 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3630 ring = &adev->gfx.compute_ring[i]; 3631 3632 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3633 if (unlikely(r != 0)) 3634 goto done; 3635 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3636 if (!r) { 3637 r = gfx_v9_0_kcq_init_queue(ring); 3638 amdgpu_bo_kunmap(ring->mqd_obj); 3639 ring->mqd_ptr = NULL; 3640 } 3641 amdgpu_bo_unreserve(ring->mqd_obj); 3642 if (r) 3643 goto done; 3644 } 3645 3646 r = amdgpu_gfx_enable_kcq(adev); 3647 done: 3648 return r; 3649 } 3650 3651 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3652 { 3653 int r, i; 3654 struct amdgpu_ring *ring; 3655 3656 if (!(adev->flags & AMD_IS_APU)) 3657 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3658 3659 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3660 if (adev->asic_type != CHIP_ARCTURUS) { 3661 /* legacy firmware loading */ 3662 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3663 if (r) 3664 return r; 3665 } 3666 3667 r = gfx_v9_0_cp_compute_load_microcode(adev); 3668 if (r) 3669 return r; 3670 } 3671 3672 r = gfx_v9_0_kiq_resume(adev); 3673 if (r) 3674 return r; 3675 3676 if (adev->asic_type != CHIP_ARCTURUS) { 3677 r = gfx_v9_0_cp_gfx_resume(adev); 3678 if (r) 3679 return r; 3680 } 3681 3682 r = gfx_v9_0_kcq_resume(adev); 3683 if (r) 3684 return r; 3685 3686 if (adev->asic_type != CHIP_ARCTURUS) { 3687 ring = &adev->gfx.gfx_ring[0]; 3688 r = amdgpu_ring_test_helper(ring); 3689 if (r) 3690 return r; 3691 } 3692 3693 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3694 ring = &adev->gfx.compute_ring[i]; 3695 amdgpu_ring_test_helper(ring); 3696 } 3697 3698 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3699 3700 return 0; 3701 } 3702 3703 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) 3704 { 3705 u32 tmp; 3706 3707 if (adev->asic_type != CHIP_ARCTURUS) 3708 return; 3709 3710 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); 3711 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH, 3712 adev->df.hash_status.hash_64k); 3713 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH, 3714 adev->df.hash_status.hash_2m); 3715 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH, 3716 adev->df.hash_status.hash_1g); 3717 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp); 3718 } 3719 3720 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3721 { 3722 if (adev->asic_type != CHIP_ARCTURUS) 3723 gfx_v9_0_cp_gfx_enable(adev, enable); 3724 gfx_v9_0_cp_compute_enable(adev, enable); 3725 } 3726 3727 static int gfx_v9_0_hw_init(void *handle) 3728 { 3729 int r; 3730 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3731 3732 if (!amdgpu_sriov_vf(adev)) 3733 gfx_v9_0_init_golden_registers(adev); 3734 3735 gfx_v9_0_constants_init(adev); 3736 3737 gfx_v9_0_init_tcp_config(adev); 3738 3739 r = adev->gfx.rlc.funcs->resume(adev); 3740 if (r) 3741 return r; 3742 3743 r = gfx_v9_0_cp_resume(adev); 3744 if (r) 3745 return r; 3746 3747 return r; 3748 } 3749 3750 static int gfx_v9_0_hw_fini(void *handle) 3751 { 3752 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3753 3754 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 3755 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3756 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3757 3758 /* DF freeze and kcq disable will fail */ 3759 if (!amdgpu_ras_intr_triggered()) 3760 /* disable KCQ to avoid CPC touch memory not valid anymore */ 3761 amdgpu_gfx_disable_kcq(adev); 3762 3763 if (amdgpu_sriov_vf(adev)) { 3764 gfx_v9_0_cp_gfx_enable(adev, false); 3765 /* must disable polling for SRIOV when hw finished, otherwise 3766 * CPC engine may still keep fetching WB address which is already 3767 * invalid after sw finished and trigger DMAR reading error in 3768 * hypervisor side. 3769 */ 3770 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3771 return 0; 3772 } 3773 3774 /* Use deinitialize sequence from CAIL when unbinding device from driver, 3775 * otherwise KIQ is hanging when binding back 3776 */ 3777 if (!adev->in_gpu_reset && !adev->in_suspend) { 3778 mutex_lock(&adev->srbm_mutex); 3779 soc15_grbm_select(adev, adev->gfx.kiq.ring.me, 3780 adev->gfx.kiq.ring.pipe, 3781 adev->gfx.kiq.ring.queue, 0); 3782 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); 3783 soc15_grbm_select(adev, 0, 0, 0, 0); 3784 mutex_unlock(&adev->srbm_mutex); 3785 } 3786 3787 gfx_v9_0_cp_enable(adev, false); 3788 adev->gfx.rlc.funcs->stop(adev); 3789 3790 return 0; 3791 } 3792 3793 static int gfx_v9_0_suspend(void *handle) 3794 { 3795 return gfx_v9_0_hw_fini(handle); 3796 } 3797 3798 static int gfx_v9_0_resume(void *handle) 3799 { 3800 return gfx_v9_0_hw_init(handle); 3801 } 3802 3803 static bool gfx_v9_0_is_idle(void *handle) 3804 { 3805 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3806 3807 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 3808 GRBM_STATUS, GUI_ACTIVE)) 3809 return false; 3810 else 3811 return true; 3812 } 3813 3814 static int gfx_v9_0_wait_for_idle(void *handle) 3815 { 3816 unsigned i; 3817 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3818 3819 for (i = 0; i < adev->usec_timeout; i++) { 3820 if (gfx_v9_0_is_idle(handle)) 3821 return 0; 3822 udelay(1); 3823 } 3824 return -ETIMEDOUT; 3825 } 3826 3827 static int gfx_v9_0_soft_reset(void *handle) 3828 { 3829 u32 grbm_soft_reset = 0; 3830 u32 tmp; 3831 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3832 3833 /* GRBM_STATUS */ 3834 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 3835 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 3836 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 3837 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 3838 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 3839 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 3840 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 3841 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3842 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3843 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3844 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 3845 } 3846 3847 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 3848 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3849 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3850 } 3851 3852 /* GRBM_STATUS2 */ 3853 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 3854 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 3855 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3856 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3857 3858 3859 if (grbm_soft_reset) { 3860 /* stop the rlc */ 3861 adev->gfx.rlc.funcs->stop(adev); 3862 3863 if (adev->asic_type != CHIP_ARCTURUS) 3864 /* Disable GFX parsing/prefetching */ 3865 gfx_v9_0_cp_gfx_enable(adev, false); 3866 3867 /* Disable MEC parsing/prefetching */ 3868 gfx_v9_0_cp_compute_enable(adev, false); 3869 3870 if (grbm_soft_reset) { 3871 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3872 tmp |= grbm_soft_reset; 3873 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 3874 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3875 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3876 3877 udelay(50); 3878 3879 tmp &= ~grbm_soft_reset; 3880 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3881 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3882 } 3883 3884 /* Wait a little for things to settle down */ 3885 udelay(50); 3886 } 3887 return 0; 3888 } 3889 3890 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 3891 { 3892 uint64_t clock; 3893 3894 mutex_lock(&adev->gfx.gpu_clock_mutex); 3895 if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) { 3896 uint32_t tmp, lsb, msb, i = 0; 3897 do { 3898 if (i != 0) 3899 udelay(1); 3900 tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); 3901 lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB); 3902 msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); 3903 i++; 3904 } while (unlikely(tmp != msb) && (i < adev->usec_timeout)); 3905 clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL); 3906 } else { 3907 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 3908 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 3909 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 3910 } 3911 mutex_unlock(&adev->gfx.gpu_clock_mutex); 3912 return clock; 3913 } 3914 3915 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 3916 uint32_t vmid, 3917 uint32_t gds_base, uint32_t gds_size, 3918 uint32_t gws_base, uint32_t gws_size, 3919 uint32_t oa_base, uint32_t oa_size) 3920 { 3921 struct amdgpu_device *adev = ring->adev; 3922 3923 /* GDS Base */ 3924 gfx_v9_0_write_data_to_reg(ring, 0, false, 3925 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 3926 gds_base); 3927 3928 /* GDS Size */ 3929 gfx_v9_0_write_data_to_reg(ring, 0, false, 3930 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 3931 gds_size); 3932 3933 /* GWS */ 3934 gfx_v9_0_write_data_to_reg(ring, 0, false, 3935 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 3936 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 3937 3938 /* OA */ 3939 gfx_v9_0_write_data_to_reg(ring, 0, false, 3940 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 3941 (1 << (oa_size + oa_base)) - (1 << oa_base)); 3942 } 3943 3944 static const u32 vgpr_init_compute_shader[] = 3945 { 3946 0xb07c0000, 0xbe8000ff, 3947 0x000000f8, 0xbf110800, 3948 0x7e000280, 0x7e020280, 3949 0x7e040280, 0x7e060280, 3950 0x7e080280, 0x7e0a0280, 3951 0x7e0c0280, 0x7e0e0280, 3952 0x80808800, 0xbe803200, 3953 0xbf84fff5, 0xbf9c0000, 3954 0xd28c0001, 0x0001007f, 3955 0xd28d0001, 0x0002027e, 3956 0x10020288, 0xb8810904, 3957 0xb7814000, 0xd1196a01, 3958 0x00000301, 0xbe800087, 3959 0xbefc00c1, 0xd89c4000, 3960 0x00020201, 0xd89cc080, 3961 0x00040401, 0x320202ff, 3962 0x00000800, 0x80808100, 3963 0xbf84fff8, 0x7e020280, 3964 0xbf810000, 0x00000000, 3965 }; 3966 3967 static const u32 sgpr_init_compute_shader[] = 3968 { 3969 0xb07c0000, 0xbe8000ff, 3970 0x0000005f, 0xbee50080, 3971 0xbe812c65, 0xbe822c65, 3972 0xbe832c65, 0xbe842c65, 3973 0xbe852c65, 0xb77c0005, 3974 0x80808500, 0xbf84fff8, 3975 0xbe800080, 0xbf810000, 3976 }; 3977 3978 /* When below register arrays changed, please update gpr_reg_size, 3979 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds, 3980 to cover all gfx9 ASICs */ 3981 static const struct soc15_reg_entry vgpr_init_regs[] = { 3982 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 3983 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 3984 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 3985 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 3986 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f }, 3987 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 3988 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 3989 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 3990 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 3991 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 3992 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 3993 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 3994 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 3995 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 3996 }; 3997 3998 static const struct soc15_reg_entry sgpr1_init_regs[] = { 3999 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4000 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4001 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4002 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4003 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4004 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4005 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, 4006 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, 4007 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, 4008 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, 4009 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff }, 4010 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff }, 4011 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff }, 4012 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff }, 4013 }; 4014 4015 static const struct soc15_reg_entry sgpr2_init_regs[] = { 4016 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4017 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4018 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4019 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4020 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4021 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4022 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, 4023 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, 4024 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, 4025 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, 4026 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 }, 4027 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 }, 4028 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 }, 4029 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 }, 4030 }; 4031 4032 static const struct soc15_reg_entry sec_ded_counter_registers[] = { 4033 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 4034 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 4035 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 4036 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 4037 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 4038 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 4039 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 4040 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 4041 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 4042 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 4043 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 4044 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 4045 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 4046 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 4047 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 4048 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 4049 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 4050 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 4051 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 4052 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 4053 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16}, 4054 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 4055 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 4056 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 4057 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 4058 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 4059 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 4060 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 4061 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 4062 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 4063 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 4064 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 4065 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 4066 { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1}, 4067 }; 4068 4069 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 4070 { 4071 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4072 int i, r; 4073 4074 /* only support when RAS is enabled */ 4075 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4076 return 0; 4077 4078 r = amdgpu_ring_alloc(ring, 7); 4079 if (r) { 4080 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 4081 ring->name, r); 4082 return r; 4083 } 4084 4085 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 4086 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 4087 4088 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 4089 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 4090 PACKET3_DMA_DATA_DST_SEL(1) | 4091 PACKET3_DMA_DATA_SRC_SEL(2) | 4092 PACKET3_DMA_DATA_ENGINE(0))); 4093 amdgpu_ring_write(ring, 0); 4094 amdgpu_ring_write(ring, 0); 4095 amdgpu_ring_write(ring, 0); 4096 amdgpu_ring_write(ring, 0); 4097 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 4098 adev->gds.gds_size); 4099 4100 amdgpu_ring_commit(ring); 4101 4102 for (i = 0; i < adev->usec_timeout; i++) { 4103 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 4104 break; 4105 udelay(1); 4106 } 4107 4108 if (i >= adev->usec_timeout) 4109 r = -ETIMEDOUT; 4110 4111 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 4112 4113 return r; 4114 } 4115 4116 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 4117 { 4118 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4119 struct amdgpu_ib ib; 4120 struct dma_fence *f = NULL; 4121 int r, i, j, k; 4122 unsigned total_size, vgpr_offset, sgpr_offset; 4123 u64 gpu_addr; 4124 4125 int compute_dim_x = adev->gfx.config.max_shader_engines * 4126 adev->gfx.config.max_cu_per_sh * 4127 adev->gfx.config.max_sh_per_se; 4128 int sgpr_work_group_size = 5; 4129 int gpr_reg_size = compute_dim_x / 16 + 6; 4130 4131 /* only support when RAS is enabled */ 4132 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4133 return 0; 4134 4135 /* bail if the compute ring is not ready */ 4136 if (!ring->sched.ready) 4137 return 0; 4138 4139 total_size = 4140 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */ 4141 total_size += 4142 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */ 4143 total_size += 4144 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */ 4145 total_size = ALIGN(total_size, 256); 4146 vgpr_offset = total_size; 4147 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 4148 sgpr_offset = total_size; 4149 total_size += sizeof(sgpr_init_compute_shader); 4150 4151 /* allocate an indirect buffer to put the commands in */ 4152 memset(&ib, 0, sizeof(ib)); 4153 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 4154 if (r) { 4155 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 4156 return r; 4157 } 4158 4159 /* load the compute shaders */ 4160 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 4161 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 4162 4163 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 4164 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 4165 4166 /* init the ib length to 0 */ 4167 ib.length_dw = 0; 4168 4169 /* VGPR */ 4170 /* write the register state for the compute dispatch */ 4171 for (i = 0; i < gpr_reg_size; i++) { 4172 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4173 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) 4174 - PACKET3_SET_SH_REG_START; 4175 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value; 4176 } 4177 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4178 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 4179 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4180 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4181 - PACKET3_SET_SH_REG_START; 4182 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4183 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4184 4185 /* write dispatch packet */ 4186 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4187 ib.ptr[ib.length_dw++] = compute_dim_x; /* x */ 4188 ib.ptr[ib.length_dw++] = 1; /* y */ 4189 ib.ptr[ib.length_dw++] = 1; /* z */ 4190 ib.ptr[ib.length_dw++] = 4191 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4192 4193 /* write CS partial flush packet */ 4194 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4195 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4196 4197 /* SGPR1 */ 4198 /* write the register state for the compute dispatch */ 4199 for (i = 0; i < gpr_reg_size; i++) { 4200 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4201 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i]) 4202 - PACKET3_SET_SH_REG_START; 4203 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value; 4204 } 4205 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4206 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4207 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4208 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4209 - PACKET3_SET_SH_REG_START; 4210 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4211 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4212 4213 /* write dispatch packet */ 4214 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4215 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4216 ib.ptr[ib.length_dw++] = 1; /* y */ 4217 ib.ptr[ib.length_dw++] = 1; /* z */ 4218 ib.ptr[ib.length_dw++] = 4219 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4220 4221 /* write CS partial flush packet */ 4222 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4223 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4224 4225 /* SGPR2 */ 4226 /* write the register state for the compute dispatch */ 4227 for (i = 0; i < gpr_reg_size; i++) { 4228 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4229 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i]) 4230 - PACKET3_SET_SH_REG_START; 4231 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value; 4232 } 4233 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4234 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4235 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4236 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4237 - PACKET3_SET_SH_REG_START; 4238 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4239 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4240 4241 /* write dispatch packet */ 4242 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4243 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4244 ib.ptr[ib.length_dw++] = 1; /* y */ 4245 ib.ptr[ib.length_dw++] = 1; /* z */ 4246 ib.ptr[ib.length_dw++] = 4247 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4248 4249 /* write CS partial flush packet */ 4250 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4251 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4252 4253 /* shedule the ib on the ring */ 4254 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 4255 if (r) { 4256 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 4257 goto fail; 4258 } 4259 4260 /* wait for the GPU to finish processing the IB */ 4261 r = dma_fence_wait(f, false); 4262 if (r) { 4263 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 4264 goto fail; 4265 } 4266 4267 /* read back registers to clear the counters */ 4268 mutex_lock(&adev->grbm_idx_mutex); 4269 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { 4270 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { 4271 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { 4272 gfx_v9_0_select_se_sh(adev, j, 0x0, k); 4273 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); 4274 } 4275 } 4276 } 4277 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 4278 mutex_unlock(&adev->grbm_idx_mutex); 4279 4280 fail: 4281 amdgpu_ib_free(adev, &ib, NULL); 4282 dma_fence_put(f); 4283 4284 return r; 4285 } 4286 4287 static int gfx_v9_0_early_init(void *handle) 4288 { 4289 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4290 4291 if (adev->asic_type == CHIP_ARCTURUS) 4292 adev->gfx.num_gfx_rings = 0; 4293 else 4294 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 4295 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 4296 gfx_v9_0_set_kiq_pm4_funcs(adev); 4297 gfx_v9_0_set_ring_funcs(adev); 4298 gfx_v9_0_set_irq_funcs(adev); 4299 gfx_v9_0_set_gds_init(adev); 4300 gfx_v9_0_set_rlc_funcs(adev); 4301 4302 return 0; 4303 } 4304 4305 static int gfx_v9_0_ecc_late_init(void *handle) 4306 { 4307 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4308 int r; 4309 4310 r = gfx_v9_0_do_edc_gds_workarounds(adev); 4311 if (r) 4312 return r; 4313 4314 /* requires IBs so do in late init after IB pool is initialized */ 4315 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 4316 if (r) 4317 return r; 4318 4319 r = amdgpu_gfx_ras_late_init(adev); 4320 if (r) 4321 return r; 4322 4323 return 0; 4324 } 4325 4326 static int gfx_v9_0_late_init(void *handle) 4327 { 4328 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4329 int r; 4330 4331 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4332 if (r) 4333 return r; 4334 4335 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4336 if (r) 4337 return r; 4338 4339 r = gfx_v9_0_ecc_late_init(handle); 4340 if (r) 4341 return r; 4342 4343 return 0; 4344 } 4345 4346 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 4347 { 4348 uint32_t rlc_setting; 4349 4350 /* if RLC is not enabled, do nothing */ 4351 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 4352 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 4353 return false; 4354 4355 return true; 4356 } 4357 4358 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) 4359 { 4360 uint32_t data; 4361 unsigned i; 4362 4363 data = RLC_SAFE_MODE__CMD_MASK; 4364 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4365 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4366 4367 /* wait for RLC_SAFE_MODE */ 4368 for (i = 0; i < adev->usec_timeout; i++) { 4369 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 4370 break; 4371 udelay(1); 4372 } 4373 } 4374 4375 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) 4376 { 4377 uint32_t data; 4378 4379 data = RLC_SAFE_MODE__CMD_MASK; 4380 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4381 } 4382 4383 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 4384 bool enable) 4385 { 4386 amdgpu_gfx_rlc_enter_safe_mode(adev); 4387 4388 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 4389 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 4390 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4391 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 4392 } else { 4393 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 4394 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4395 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 4396 } 4397 4398 amdgpu_gfx_rlc_exit_safe_mode(adev); 4399 } 4400 4401 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4402 bool enable) 4403 { 4404 /* TODO: double check if we need to perform under safe mode */ 4405 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4406 4407 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4408 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4409 else 4410 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4411 4412 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4413 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4414 else 4415 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4416 4417 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4418 } 4419 4420 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4421 bool enable) 4422 { 4423 uint32_t data, def; 4424 4425 amdgpu_gfx_rlc_enter_safe_mode(adev); 4426 4427 /* It is disabled by HW by default */ 4428 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4429 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4430 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4431 4432 if (adev->asic_type != CHIP_VEGA12) 4433 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4434 4435 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4436 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4437 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4438 4439 /* only for Vega10 & Raven1 */ 4440 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4441 4442 if (def != data) 4443 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4444 4445 /* MGLS is a global flag to control all MGLS in GFX */ 4446 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4447 /* 2 - RLC memory Light sleep */ 4448 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4449 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4450 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4451 if (def != data) 4452 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4453 } 4454 /* 3 - CP memory Light sleep */ 4455 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4456 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4457 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4458 if (def != data) 4459 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4460 } 4461 } 4462 } else { 4463 /* 1 - MGCG_OVERRIDE */ 4464 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4465 4466 if (adev->asic_type != CHIP_VEGA12) 4467 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4468 4469 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4470 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4471 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4472 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4473 4474 if (def != data) 4475 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4476 4477 /* 2 - disable MGLS in RLC */ 4478 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4479 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4480 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4481 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4482 } 4483 4484 /* 3 - disable MGLS in CP */ 4485 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4486 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4487 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4488 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4489 } 4490 } 4491 4492 amdgpu_gfx_rlc_exit_safe_mode(adev); 4493 } 4494 4495 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4496 bool enable) 4497 { 4498 uint32_t data, def; 4499 4500 if (adev->asic_type == CHIP_ARCTURUS) 4501 return; 4502 4503 amdgpu_gfx_rlc_enter_safe_mode(adev); 4504 4505 /* Enable 3D CGCG/CGLS */ 4506 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 4507 /* write cmd to clear cgcg/cgls ov */ 4508 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4509 /* unset CGCG override */ 4510 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4511 /* update CGCG and CGLS override bits */ 4512 if (def != data) 4513 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4514 4515 /* enable 3Dcgcg FSM(0x0000363f) */ 4516 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4517 4518 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4519 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4520 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4521 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4522 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4523 if (def != data) 4524 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4525 4526 /* set IDLE_POLL_COUNT(0x00900100) */ 4527 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4528 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4529 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4530 if (def != data) 4531 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4532 } else { 4533 /* Disable CGCG/CGLS */ 4534 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4535 /* disable cgcg, cgls should be disabled */ 4536 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 4537 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 4538 /* disable cgcg and cgls in FSM */ 4539 if (def != data) 4540 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4541 } 4542 4543 amdgpu_gfx_rlc_exit_safe_mode(adev); 4544 } 4545 4546 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4547 bool enable) 4548 { 4549 uint32_t def, data; 4550 4551 amdgpu_gfx_rlc_enter_safe_mode(adev); 4552 4553 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 4554 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4555 /* unset CGCG override */ 4556 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 4557 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4558 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4559 else 4560 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4561 /* update CGCG and CGLS override bits */ 4562 if (def != data) 4563 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4564 4565 /* enable cgcg FSM(0x0000363F) */ 4566 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4567 4568 if (adev->asic_type == CHIP_ARCTURUS) 4569 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4570 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4571 else 4572 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4573 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4574 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4575 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4576 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4577 if (def != data) 4578 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4579 4580 /* set IDLE_POLL_COUNT(0x00900100) */ 4581 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4582 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4583 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4584 if (def != data) 4585 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4586 } else { 4587 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4588 /* reset CGCG/CGLS bits */ 4589 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4590 /* disable cgcg and cgls in FSM */ 4591 if (def != data) 4592 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4593 } 4594 4595 amdgpu_gfx_rlc_exit_safe_mode(adev); 4596 } 4597 4598 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 4599 bool enable) 4600 { 4601 if (enable) { 4602 /* CGCG/CGLS should be enabled after MGCG/MGLS 4603 * === MGCG + MGLS === 4604 */ 4605 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4606 /* === CGCG /CGLS for GFX 3D Only === */ 4607 gfx_v9_0_update_3d_clock_gating(adev, enable); 4608 /* === CGCG + CGLS === */ 4609 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4610 } else { 4611 /* CGCG/CGLS should be disabled before MGCG/MGLS 4612 * === CGCG + CGLS === 4613 */ 4614 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4615 /* === CGCG /CGLS for GFX 3D Only === */ 4616 gfx_v9_0_update_3d_clock_gating(adev, enable); 4617 /* === MGCG + MGLS === */ 4618 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4619 } 4620 return 0; 4621 } 4622 4623 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 4624 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 4625 .set_safe_mode = gfx_v9_0_set_safe_mode, 4626 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 4627 .init = gfx_v9_0_rlc_init, 4628 .get_csb_size = gfx_v9_0_get_csb_size, 4629 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 4630 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 4631 .resume = gfx_v9_0_rlc_resume, 4632 .stop = gfx_v9_0_rlc_stop, 4633 .reset = gfx_v9_0_rlc_reset, 4634 .start = gfx_v9_0_rlc_start 4635 }; 4636 4637 static int gfx_v9_0_set_powergating_state(void *handle, 4638 enum amd_powergating_state state) 4639 { 4640 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4641 bool enable = (state == AMD_PG_STATE_GATE) ? true : false; 4642 4643 switch (adev->asic_type) { 4644 case CHIP_RAVEN: 4645 case CHIP_RENOIR: 4646 if (!enable) { 4647 amdgpu_gfx_off_ctrl(adev, false); 4648 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4649 } 4650 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 4651 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 4652 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 4653 } else { 4654 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 4655 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 4656 } 4657 4658 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 4659 gfx_v9_0_enable_cp_power_gating(adev, true); 4660 else 4661 gfx_v9_0_enable_cp_power_gating(adev, false); 4662 4663 /* update gfx cgpg state */ 4664 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 4665 4666 /* update mgcg state */ 4667 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 4668 4669 if (enable) 4670 amdgpu_gfx_off_ctrl(adev, true); 4671 break; 4672 case CHIP_VEGA12: 4673 if (!enable) { 4674 amdgpu_gfx_off_ctrl(adev, false); 4675 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4676 } else { 4677 amdgpu_gfx_off_ctrl(adev, true); 4678 } 4679 break; 4680 default: 4681 break; 4682 } 4683 4684 return 0; 4685 } 4686 4687 static int gfx_v9_0_set_clockgating_state(void *handle, 4688 enum amd_clockgating_state state) 4689 { 4690 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4691 4692 if (amdgpu_sriov_vf(adev)) 4693 return 0; 4694 4695 switch (adev->asic_type) { 4696 case CHIP_VEGA10: 4697 case CHIP_VEGA12: 4698 case CHIP_VEGA20: 4699 case CHIP_RAVEN: 4700 case CHIP_ARCTURUS: 4701 case CHIP_RENOIR: 4702 gfx_v9_0_update_gfx_clock_gating(adev, 4703 state == AMD_CG_STATE_GATE ? true : false); 4704 break; 4705 default: 4706 break; 4707 } 4708 return 0; 4709 } 4710 4711 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 4712 { 4713 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4714 int data; 4715 4716 if (amdgpu_sriov_vf(adev)) 4717 *flags = 0; 4718 4719 /* AMD_CG_SUPPORT_GFX_MGCG */ 4720 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); 4721 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 4722 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 4723 4724 /* AMD_CG_SUPPORT_GFX_CGCG */ 4725 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); 4726 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 4727 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 4728 4729 /* AMD_CG_SUPPORT_GFX_CGLS */ 4730 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 4731 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 4732 4733 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 4734 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); 4735 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 4736 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 4737 4738 /* AMD_CG_SUPPORT_GFX_CP_LS */ 4739 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); 4740 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 4741 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 4742 4743 if (adev->asic_type != CHIP_ARCTURUS) { 4744 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 4745 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); 4746 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 4747 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 4748 4749 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 4750 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 4751 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 4752 } 4753 } 4754 4755 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 4756 { 4757 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 4758 } 4759 4760 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 4761 { 4762 struct amdgpu_device *adev = ring->adev; 4763 u64 wptr; 4764 4765 /* XXX check if swapping is necessary on BE */ 4766 if (ring->use_doorbell) { 4767 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 4768 } else { 4769 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 4770 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 4771 } 4772 4773 return wptr; 4774 } 4775 4776 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 4777 { 4778 struct amdgpu_device *adev = ring->adev; 4779 4780 if (ring->use_doorbell) { 4781 /* XXX check if swapping is necessary on BE */ 4782 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4783 WDOORBELL64(ring->doorbell_index, ring->wptr); 4784 } else { 4785 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4786 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 4787 } 4788 } 4789 4790 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 4791 { 4792 struct amdgpu_device *adev = ring->adev; 4793 u32 ref_and_mask, reg_mem_engine; 4794 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 4795 4796 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 4797 switch (ring->me) { 4798 case 1: 4799 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 4800 break; 4801 case 2: 4802 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 4803 break; 4804 default: 4805 return; 4806 } 4807 reg_mem_engine = 0; 4808 } else { 4809 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 4810 reg_mem_engine = 1; /* pfp */ 4811 } 4812 4813 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 4814 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 4815 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 4816 ref_and_mask, ref_and_mask, 0x20); 4817 } 4818 4819 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 4820 struct amdgpu_job *job, 4821 struct amdgpu_ib *ib, 4822 uint32_t flags) 4823 { 4824 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4825 u32 header, control = 0; 4826 4827 if (ib->flags & AMDGPU_IB_FLAG_CE) 4828 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 4829 else 4830 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 4831 4832 control |= ib->length_dw | (vmid << 24); 4833 4834 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 4835 control |= INDIRECT_BUFFER_PRE_ENB(1); 4836 4837 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 4838 gfx_v9_0_ring_emit_de_meta(ring); 4839 } 4840 4841 amdgpu_ring_write(ring, header); 4842 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4843 amdgpu_ring_write(ring, 4844 #ifdef __BIG_ENDIAN 4845 (2 << 0) | 4846 #endif 4847 lower_32_bits(ib->gpu_addr)); 4848 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4849 amdgpu_ring_write(ring, control); 4850 } 4851 4852 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 4853 struct amdgpu_job *job, 4854 struct amdgpu_ib *ib, 4855 uint32_t flags) 4856 { 4857 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4858 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 4859 4860 /* Currently, there is a high possibility to get wave ID mismatch 4861 * between ME and GDS, leading to a hw deadlock, because ME generates 4862 * different wave IDs than the GDS expects. This situation happens 4863 * randomly when at least 5 compute pipes use GDS ordered append. 4864 * The wave IDs generated by ME are also wrong after suspend/resume. 4865 * Those are probably bugs somewhere else in the kernel driver. 4866 * 4867 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 4868 * GDS to 0 for this ring (me/pipe). 4869 */ 4870 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 4871 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 4872 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 4873 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 4874 } 4875 4876 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 4877 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4878 amdgpu_ring_write(ring, 4879 #ifdef __BIG_ENDIAN 4880 (2 << 0) | 4881 #endif 4882 lower_32_bits(ib->gpu_addr)); 4883 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4884 amdgpu_ring_write(ring, control); 4885 } 4886 4887 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 4888 u64 seq, unsigned flags) 4889 { 4890 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 4891 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 4892 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 4893 4894 /* RELEASE_MEM - flush caches, send int */ 4895 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 4896 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 4897 EOP_TC_NC_ACTION_EN) : 4898 (EOP_TCL1_ACTION_EN | 4899 EOP_TC_ACTION_EN | 4900 EOP_TC_WB_ACTION_EN | 4901 EOP_TC_MD_ACTION_EN)) | 4902 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 4903 EVENT_INDEX(5))); 4904 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 4905 4906 /* 4907 * the address should be Qword aligned if 64bit write, Dword 4908 * aligned if only send 32bit data low (discard data high) 4909 */ 4910 if (write64bit) 4911 BUG_ON(addr & 0x7); 4912 else 4913 BUG_ON(addr & 0x3); 4914 amdgpu_ring_write(ring, lower_32_bits(addr)); 4915 amdgpu_ring_write(ring, upper_32_bits(addr)); 4916 amdgpu_ring_write(ring, lower_32_bits(seq)); 4917 amdgpu_ring_write(ring, upper_32_bits(seq)); 4918 amdgpu_ring_write(ring, 0); 4919 } 4920 4921 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 4922 { 4923 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 4924 uint32_t seq = ring->fence_drv.sync_seq; 4925 uint64_t addr = ring->fence_drv.gpu_addr; 4926 4927 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 4928 lower_32_bits(addr), upper_32_bits(addr), 4929 seq, 0xffffffff, 4); 4930 } 4931 4932 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 4933 unsigned vmid, uint64_t pd_addr) 4934 { 4935 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 4936 4937 /* compute doesn't have PFP */ 4938 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 4939 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 4940 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 4941 amdgpu_ring_write(ring, 0x0); 4942 } 4943 } 4944 4945 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 4946 { 4947 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 4948 } 4949 4950 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 4951 { 4952 u64 wptr; 4953 4954 /* XXX check if swapping is necessary on BE */ 4955 if (ring->use_doorbell) 4956 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 4957 else 4958 BUG(); 4959 return wptr; 4960 } 4961 4962 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 4963 bool acquire) 4964 { 4965 struct amdgpu_device *adev = ring->adev; 4966 int pipe_num, tmp, reg; 4967 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 4968 4969 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 4970 4971 /* first me only has 2 entries, GFX and HP3D */ 4972 if (ring->me > 0) 4973 pipe_num -= 2; 4974 4975 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num; 4976 tmp = RREG32(reg); 4977 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 4978 WREG32(reg, tmp); 4979 } 4980 4981 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, 4982 struct amdgpu_ring *ring, 4983 bool acquire) 4984 { 4985 int i, pipe; 4986 bool reserve; 4987 struct amdgpu_ring *iring; 4988 4989 mutex_lock(&adev->gfx.pipe_reserve_mutex); 4990 pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0); 4991 if (acquire) 4992 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4993 else 4994 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4995 4996 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 4997 /* Clear all reservations - everyone reacquires all resources */ 4998 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 4999 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 5000 true); 5001 5002 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 5003 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 5004 true); 5005 } else { 5006 /* Lower all pipes without a current reservation */ 5007 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 5008 iring = &adev->gfx.gfx_ring[i]; 5009 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 5010 iring->me, 5011 iring->pipe, 5012 0); 5013 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5014 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 5015 } 5016 5017 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 5018 iring = &adev->gfx.compute_ring[i]; 5019 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 5020 iring->me, 5021 iring->pipe, 5022 0); 5023 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5024 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 5025 } 5026 } 5027 5028 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 5029 } 5030 5031 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, 5032 struct amdgpu_ring *ring, 5033 bool acquire) 5034 { 5035 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 5036 uint32_t queue_priority = acquire ? 0xf : 0x0; 5037 5038 mutex_lock(&adev->srbm_mutex); 5039 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5040 5041 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); 5042 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); 5043 5044 soc15_grbm_select(adev, 0, 0, 0, 0); 5045 mutex_unlock(&adev->srbm_mutex); 5046 } 5047 5048 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring, 5049 enum drm_sched_priority priority) 5050 { 5051 struct amdgpu_device *adev = ring->adev; 5052 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 5053 5054 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 5055 return; 5056 5057 gfx_v9_0_hqd_set_priority(adev, ring, acquire); 5058 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire); 5059 } 5060 5061 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5062 { 5063 struct amdgpu_device *adev = ring->adev; 5064 5065 /* XXX check if swapping is necessary on BE */ 5066 if (ring->use_doorbell) { 5067 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 5068 WDOORBELL64(ring->doorbell_index, ring->wptr); 5069 } else{ 5070 BUG(); /* only DOORBELL method supported on gfx9 now */ 5071 } 5072 } 5073 5074 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5075 u64 seq, unsigned int flags) 5076 { 5077 struct amdgpu_device *adev = ring->adev; 5078 5079 /* we only allocate 32bit for each seq wb address */ 5080 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5081 5082 /* write fence seq to the "addr" */ 5083 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5084 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5085 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5086 amdgpu_ring_write(ring, lower_32_bits(addr)); 5087 amdgpu_ring_write(ring, upper_32_bits(addr)); 5088 amdgpu_ring_write(ring, lower_32_bits(seq)); 5089 5090 if (flags & AMDGPU_FENCE_FLAG_INT) { 5091 /* set register to trigger INT */ 5092 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5093 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5094 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5095 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 5096 amdgpu_ring_write(ring, 0); 5097 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5098 } 5099 } 5100 5101 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 5102 { 5103 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5104 amdgpu_ring_write(ring, 0); 5105 } 5106 5107 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 5108 { 5109 struct v9_ce_ib_state ce_payload = {0}; 5110 uint64_t csa_addr; 5111 int cnt; 5112 5113 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 5114 csa_addr = amdgpu_csa_vaddr(ring->adev); 5115 5116 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5117 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 5118 WRITE_DATA_DST_SEL(8) | 5119 WR_CONFIRM) | 5120 WRITE_DATA_CACHE_POLICY(0)); 5121 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5122 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5123 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 5124 } 5125 5126 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 5127 { 5128 struct v9_de_ib_state de_payload = {0}; 5129 uint64_t csa_addr, gds_addr; 5130 int cnt; 5131 5132 csa_addr = amdgpu_csa_vaddr(ring->adev); 5133 gds_addr = csa_addr + 4096; 5134 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5135 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5136 5137 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 5138 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5139 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 5140 WRITE_DATA_DST_SEL(8) | 5141 WR_CONFIRM) | 5142 WRITE_DATA_CACHE_POLICY(0)); 5143 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5144 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5145 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 5146 } 5147 5148 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) 5149 { 5150 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 5151 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ 5152 } 5153 5154 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 5155 { 5156 uint32_t dw2 = 0; 5157 5158 if (amdgpu_sriov_vf(ring->adev)) 5159 gfx_v9_0_ring_emit_ce_meta(ring); 5160 5161 gfx_v9_0_ring_emit_tmz(ring, true); 5162 5163 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5164 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5165 /* set load_global_config & load_global_uconfig */ 5166 dw2 |= 0x8001; 5167 /* set load_cs_sh_regs */ 5168 dw2 |= 0x01000000; 5169 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5170 dw2 |= 0x10002; 5171 5172 /* set load_ce_ram if preamble presented */ 5173 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 5174 dw2 |= 0x10000000; 5175 } else { 5176 /* still load_ce_ram if this is the first time preamble presented 5177 * although there is no context switch happens. 5178 */ 5179 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 5180 dw2 |= 0x10000000; 5181 } 5182 5183 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5184 amdgpu_ring_write(ring, dw2); 5185 amdgpu_ring_write(ring, 0); 5186 } 5187 5188 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 5189 { 5190 unsigned ret; 5191 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5192 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 5193 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 5194 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 5195 ret = ring->wptr & ring->buf_mask; 5196 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 5197 return ret; 5198 } 5199 5200 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 5201 { 5202 unsigned cur; 5203 BUG_ON(offset > ring->buf_mask); 5204 BUG_ON(ring->ring[offset] != 0x55aa55aa); 5205 5206 cur = (ring->wptr & ring->buf_mask) - 1; 5207 if (likely(cur > offset)) 5208 ring->ring[offset] = cur - offset; 5209 else 5210 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 5211 } 5212 5213 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 5214 { 5215 struct amdgpu_device *adev = ring->adev; 5216 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 5217 5218 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 5219 amdgpu_ring_write(ring, 0 | /* src: register*/ 5220 (5 << 8) | /* dst: memory */ 5221 (1 << 20)); /* write confirm */ 5222 amdgpu_ring_write(ring, reg); 5223 amdgpu_ring_write(ring, 0); 5224 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 5225 kiq->reg_val_offs * 4)); 5226 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 5227 kiq->reg_val_offs * 4)); 5228 } 5229 5230 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 5231 uint32_t val) 5232 { 5233 uint32_t cmd = 0; 5234 5235 switch (ring->funcs->type) { 5236 case AMDGPU_RING_TYPE_GFX: 5237 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 5238 break; 5239 case AMDGPU_RING_TYPE_KIQ: 5240 cmd = (1 << 16); /* no inc addr */ 5241 break; 5242 default: 5243 cmd = WR_CONFIRM; 5244 break; 5245 } 5246 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5247 amdgpu_ring_write(ring, cmd); 5248 amdgpu_ring_write(ring, reg); 5249 amdgpu_ring_write(ring, 0); 5250 amdgpu_ring_write(ring, val); 5251 } 5252 5253 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 5254 uint32_t val, uint32_t mask) 5255 { 5256 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 5257 } 5258 5259 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 5260 uint32_t reg0, uint32_t reg1, 5261 uint32_t ref, uint32_t mask) 5262 { 5263 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5264 struct amdgpu_device *adev = ring->adev; 5265 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 5266 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 5267 5268 if (fw_version_ok) 5269 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 5270 ref, mask, 0x20); 5271 else 5272 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 5273 ref, mask); 5274 } 5275 5276 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 5277 { 5278 struct amdgpu_device *adev = ring->adev; 5279 uint32_t value = 0; 5280 5281 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 5282 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 5283 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 5284 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 5285 WREG32_SOC15(GC, 0, mmSQ_CMD, value); 5286 } 5287 5288 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5289 enum amdgpu_interrupt_state state) 5290 { 5291 switch (state) { 5292 case AMDGPU_IRQ_STATE_DISABLE: 5293 case AMDGPU_IRQ_STATE_ENABLE: 5294 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5295 TIME_STAMP_INT_ENABLE, 5296 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5297 break; 5298 default: 5299 break; 5300 } 5301 } 5302 5303 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5304 int me, int pipe, 5305 enum amdgpu_interrupt_state state) 5306 { 5307 u32 mec_int_cntl, mec_int_cntl_reg; 5308 5309 /* 5310 * amdgpu controls only the first MEC. That's why this function only 5311 * handles the setting of interrupts for this specific MEC. All other 5312 * pipes' interrupts are set by amdkfd. 5313 */ 5314 5315 if (me == 1) { 5316 switch (pipe) { 5317 case 0: 5318 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 5319 break; 5320 case 1: 5321 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 5322 break; 5323 case 2: 5324 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 5325 break; 5326 case 3: 5327 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 5328 break; 5329 default: 5330 DRM_DEBUG("invalid pipe %d\n", pipe); 5331 return; 5332 } 5333 } else { 5334 DRM_DEBUG("invalid me %d\n", me); 5335 return; 5336 } 5337 5338 switch (state) { 5339 case AMDGPU_IRQ_STATE_DISABLE: 5340 mec_int_cntl = RREG32(mec_int_cntl_reg); 5341 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5342 TIME_STAMP_INT_ENABLE, 0); 5343 WREG32(mec_int_cntl_reg, mec_int_cntl); 5344 break; 5345 case AMDGPU_IRQ_STATE_ENABLE: 5346 mec_int_cntl = RREG32(mec_int_cntl_reg); 5347 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5348 TIME_STAMP_INT_ENABLE, 1); 5349 WREG32(mec_int_cntl_reg, mec_int_cntl); 5350 break; 5351 default: 5352 break; 5353 } 5354 } 5355 5356 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 5357 struct amdgpu_irq_src *source, 5358 unsigned type, 5359 enum amdgpu_interrupt_state state) 5360 { 5361 switch (state) { 5362 case AMDGPU_IRQ_STATE_DISABLE: 5363 case AMDGPU_IRQ_STATE_ENABLE: 5364 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5365 PRIV_REG_INT_ENABLE, 5366 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5367 break; 5368 default: 5369 break; 5370 } 5371 5372 return 0; 5373 } 5374 5375 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 5376 struct amdgpu_irq_src *source, 5377 unsigned type, 5378 enum amdgpu_interrupt_state state) 5379 { 5380 switch (state) { 5381 case AMDGPU_IRQ_STATE_DISABLE: 5382 case AMDGPU_IRQ_STATE_ENABLE: 5383 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5384 PRIV_INSTR_INT_ENABLE, 5385 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5386 default: 5387 break; 5388 } 5389 5390 return 0; 5391 } 5392 5393 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 5394 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5395 CP_ECC_ERROR_INT_ENABLE, 1) 5396 5397 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 5398 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5399 CP_ECC_ERROR_INT_ENABLE, 0) 5400 5401 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 5402 struct amdgpu_irq_src *source, 5403 unsigned type, 5404 enum amdgpu_interrupt_state state) 5405 { 5406 switch (state) { 5407 case AMDGPU_IRQ_STATE_DISABLE: 5408 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5409 CP_ECC_ERROR_INT_ENABLE, 0); 5410 DISABLE_ECC_ON_ME_PIPE(1, 0); 5411 DISABLE_ECC_ON_ME_PIPE(1, 1); 5412 DISABLE_ECC_ON_ME_PIPE(1, 2); 5413 DISABLE_ECC_ON_ME_PIPE(1, 3); 5414 break; 5415 5416 case AMDGPU_IRQ_STATE_ENABLE: 5417 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5418 CP_ECC_ERROR_INT_ENABLE, 1); 5419 ENABLE_ECC_ON_ME_PIPE(1, 0); 5420 ENABLE_ECC_ON_ME_PIPE(1, 1); 5421 ENABLE_ECC_ON_ME_PIPE(1, 2); 5422 ENABLE_ECC_ON_ME_PIPE(1, 3); 5423 break; 5424 default: 5425 break; 5426 } 5427 5428 return 0; 5429 } 5430 5431 5432 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5433 struct amdgpu_irq_src *src, 5434 unsigned type, 5435 enum amdgpu_interrupt_state state) 5436 { 5437 switch (type) { 5438 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 5439 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 5440 break; 5441 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5442 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5443 break; 5444 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5445 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5446 break; 5447 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5448 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5449 break; 5450 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5451 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5452 break; 5453 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 5454 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 5455 break; 5456 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 5457 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 5458 break; 5459 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 5460 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 5461 break; 5462 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 5463 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 5464 break; 5465 default: 5466 break; 5467 } 5468 return 0; 5469 } 5470 5471 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 5472 struct amdgpu_irq_src *source, 5473 struct amdgpu_iv_entry *entry) 5474 { 5475 int i; 5476 u8 me_id, pipe_id, queue_id; 5477 struct amdgpu_ring *ring; 5478 5479 DRM_DEBUG("IH: CP EOP\n"); 5480 me_id = (entry->ring_id & 0x0c) >> 2; 5481 pipe_id = (entry->ring_id & 0x03) >> 0; 5482 queue_id = (entry->ring_id & 0x70) >> 4; 5483 5484 switch (me_id) { 5485 case 0: 5486 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5487 break; 5488 case 1: 5489 case 2: 5490 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5491 ring = &adev->gfx.compute_ring[i]; 5492 /* Per-queue interrupt is supported for MEC starting from VI. 5493 * The interrupt can only be enabled/disabled per pipe instead of per queue. 5494 */ 5495 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 5496 amdgpu_fence_process(ring); 5497 } 5498 break; 5499 } 5500 return 0; 5501 } 5502 5503 static void gfx_v9_0_fault(struct amdgpu_device *adev, 5504 struct amdgpu_iv_entry *entry) 5505 { 5506 u8 me_id, pipe_id, queue_id; 5507 struct amdgpu_ring *ring; 5508 int i; 5509 5510 me_id = (entry->ring_id & 0x0c) >> 2; 5511 pipe_id = (entry->ring_id & 0x03) >> 0; 5512 queue_id = (entry->ring_id & 0x70) >> 4; 5513 5514 switch (me_id) { 5515 case 0: 5516 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 5517 break; 5518 case 1: 5519 case 2: 5520 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5521 ring = &adev->gfx.compute_ring[i]; 5522 if (ring->me == me_id && ring->pipe == pipe_id && 5523 ring->queue == queue_id) 5524 drm_sched_fault(&ring->sched); 5525 } 5526 break; 5527 } 5528 } 5529 5530 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 5531 struct amdgpu_irq_src *source, 5532 struct amdgpu_iv_entry *entry) 5533 { 5534 DRM_ERROR("Illegal register access in command stream\n"); 5535 gfx_v9_0_fault(adev, entry); 5536 return 0; 5537 } 5538 5539 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 5540 struct amdgpu_irq_src *source, 5541 struct amdgpu_iv_entry *entry) 5542 { 5543 DRM_ERROR("Illegal instruction in command stream\n"); 5544 gfx_v9_0_fault(adev, entry); 5545 return 0; 5546 } 5547 5548 5549 static const struct soc15_ras_field_entry gc_ras_fields_vg20[] = { 5550 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 5551 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), 5552 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) 5553 }, 5554 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 5555 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT), 5556 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) 5557 }, 5558 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 5559 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1), 5560 0, 0 5561 }, 5562 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 5563 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2), 5564 0, 0 5565 }, 5566 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 5567 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT), 5568 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) 5569 }, 5570 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 5571 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT), 5572 0, 0 5573 }, 5574 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 5575 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), 5576 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT) 5577 }, 5578 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 5579 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT), 5580 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT) 5581 }, 5582 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 5583 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1), 5584 0, 0 5585 }, 5586 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 5587 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1), 5588 0, 0 5589 }, 5590 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 5591 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1), 5592 0, 0 5593 }, 5594 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 5595 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC), 5596 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) 5597 }, 5598 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 5599 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 5600 0, 0 5601 }, 5602 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 5603 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), 5604 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) 5605 }, 5606 { "GDS_OA_PHY_PHY_CMD_RAM_MEM", 5607 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 5608 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), 5609 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) 5610 }, 5611 { "GDS_OA_PHY_PHY_DATA_RAM_MEM", 5612 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 5613 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 5614 0, 0 5615 }, 5616 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", 5617 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 5618 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), 5619 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) 5620 }, 5621 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", 5622 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 5623 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), 5624 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) 5625 }, 5626 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", 5627 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 5628 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), 5629 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) 5630 }, 5631 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", 5632 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 5633 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), 5634 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) 5635 }, 5636 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 5637 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 5638 0, 0 5639 }, 5640 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 5641 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), 5642 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) 5643 }, 5644 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 5645 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 5646 0, 0 5647 }, 5648 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 5649 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 5650 0, 0 5651 }, 5652 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 5653 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 5654 0, 0 5655 }, 5656 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 5657 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 5658 0, 0 5659 }, 5660 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 5661 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 5662 0, 0 5663 }, 5664 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 5665 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 5666 0, 0 5667 }, 5668 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5669 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), 5670 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) 5671 }, 5672 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5673 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), 5674 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) 5675 }, 5676 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5677 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), 5678 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) 5679 }, 5680 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5681 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), 5682 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) 5683 }, 5684 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5685 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), 5686 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) 5687 }, 5688 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5689 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 5690 0, 0 5691 }, 5692 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5693 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 5694 0, 0 5695 }, 5696 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5697 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 5698 0, 0 5699 }, 5700 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5701 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 5702 0, 0 5703 }, 5704 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5705 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 5706 0, 0 5707 }, 5708 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5709 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 5710 0, 0 5711 }, 5712 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5713 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 5714 0, 0 5715 }, 5716 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5717 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 5718 0, 0 5719 }, 5720 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5721 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 5722 0, 0 5723 }, 5724 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5725 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), 5726 0, 0 5727 }, 5728 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5729 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 5730 0, 0 5731 }, 5732 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5733 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), 5734 0, 0 5735 }, 5736 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5737 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 5738 0, 0 5739 }, 5740 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 5741 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 5742 0, 0 5743 }, 5744 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5745 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), 5746 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) 5747 }, 5748 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5749 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), 5750 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) 5751 }, 5752 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5753 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 5754 0, 0 5755 }, 5756 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5757 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 5758 0, 0 5759 }, 5760 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5761 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 5762 0, 0 5763 }, 5764 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5765 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), 5766 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) 5767 }, 5768 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5769 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), 5770 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) 5771 }, 5772 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 5773 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), 5774 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) 5775 }, 5776 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 5777 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), 5778 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) 5779 }, 5780 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 5781 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT), 5782 0, 0 5783 }, 5784 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5785 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT), 5786 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) 5787 }, 5788 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5789 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT), 5790 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) 5791 }, 5792 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5793 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT), 5794 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) 5795 }, 5796 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5797 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT), 5798 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) 5799 }, 5800 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5801 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT), 5802 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) 5803 }, 5804 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5805 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT), 5806 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) 5807 }, 5808 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5809 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT), 5810 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) 5811 }, 5812 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5813 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), 5814 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) 5815 }, 5816 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5817 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), 5818 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) 5819 }, 5820 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5821 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), 5822 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) 5823 }, 5824 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5825 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), 5826 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) 5827 }, 5828 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5829 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), 5830 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) 5831 }, 5832 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5833 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), 5834 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) 5835 }, 5836 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5837 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), 5838 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) 5839 }, 5840 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5841 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), 5842 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) 5843 }, 5844 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5845 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), 5846 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) 5847 }, 5848 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5849 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), 5850 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) 5851 }, 5852 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5853 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), 5854 0, 0 5855 }, 5856 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5857 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 5858 0, 0 5859 }, 5860 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5861 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 5862 0, 0 5863 }, 5864 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5865 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 5866 0, 0 5867 }, 5868 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5869 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 5870 0, 0 5871 }, 5872 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5873 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), 5874 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) 5875 }, 5876 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5877 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), 5878 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) 5879 }, 5880 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5881 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), 5882 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) 5883 }, 5884 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5885 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), 5886 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) 5887 }, 5888 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5889 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), 5890 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) 5891 }, 5892 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5893 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), 5894 0, 0 5895 }, 5896 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5897 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 5898 0, 0 5899 }, 5900 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5901 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 5902 0, 0 5903 }, 5904 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5905 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 5906 0, 0 5907 }, 5908 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5909 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 5910 0, 0 5911 }, 5912 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5913 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), 5914 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) 5915 }, 5916 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5917 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), 5918 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) 5919 }, 5920 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5921 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), 5922 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) 5923 }, 5924 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5925 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), 5926 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) 5927 }, 5928 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5929 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), 5930 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) 5931 }, 5932 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5933 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 5934 0, 0 5935 }, 5936 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5937 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 5938 0, 0 5939 }, 5940 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5941 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 5942 0, 0 5943 }, 5944 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5945 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 5946 0, 0 5947 }, 5948 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5949 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 5950 0, 0 5951 }, 5952 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5953 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), 5954 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) 5955 }, 5956 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5957 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), 5958 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) 5959 }, 5960 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5961 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), 5962 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) 5963 }, 5964 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5965 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 5966 0, 0 5967 }, 5968 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5969 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 5970 0, 0 5971 }, 5972 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5973 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 5974 0, 0 5975 }, 5976 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5977 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 5978 0, 0 5979 }, 5980 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5981 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 5982 0, 0 5983 }, 5984 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5985 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 5986 0, 0 5987 } 5988 }; 5989 5990 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 5991 void *inject_if) 5992 { 5993 struct ras_inject_if *info = (struct ras_inject_if *)inject_if; 5994 int ret; 5995 struct ta_ras_trigger_error_input block_info = { 0 }; 5996 5997 if (adev->asic_type != CHIP_VEGA20) 5998 return -EINVAL; 5999 6000 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) 6001 return -EINVAL; 6002 6003 if (!ras_gfx_subblocks[info->head.sub_block_index].name) 6004 return -EPERM; 6005 6006 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & 6007 info->head.type)) { 6008 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", 6009 ras_gfx_subblocks[info->head.sub_block_index].name, 6010 info->head.type); 6011 return -EPERM; 6012 } 6013 6014 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & 6015 info->head.type)) { 6016 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", 6017 ras_gfx_subblocks[info->head.sub_block_index].name, 6018 info->head.type); 6019 return -EPERM; 6020 } 6021 6022 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); 6023 block_info.sub_block_index = 6024 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; 6025 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); 6026 block_info.address = info->address; 6027 block_info.value = info->value; 6028 6029 mutex_lock(&adev->grbm_idx_mutex); 6030 ret = psp_ras_trigger_error(&adev->psp, &block_info); 6031 mutex_unlock(&adev->grbm_idx_mutex); 6032 6033 return ret; 6034 } 6035 6036 static const char *vml2_mems[] = { 6037 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", 6038 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", 6039 "UTC_VML2_BANK_CACHE_0_4K_MEM0", 6040 "UTC_VML2_BANK_CACHE_0_4K_MEM1", 6041 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0", 6042 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1", 6043 "UTC_VML2_BANK_CACHE_1_4K_MEM0", 6044 "UTC_VML2_BANK_CACHE_1_4K_MEM1", 6045 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0", 6046 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1", 6047 "UTC_VML2_BANK_CACHE_2_4K_MEM0", 6048 "UTC_VML2_BANK_CACHE_2_4K_MEM1", 6049 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0", 6050 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1", 6051 "UTC_VML2_BANK_CACHE_3_4K_MEM0", 6052 "UTC_VML2_BANK_CACHE_3_4K_MEM1", 6053 }; 6054 6055 static const char *vml2_walker_mems[] = { 6056 "UTC_VML2_CACHE_PDE0_MEM0", 6057 "UTC_VML2_CACHE_PDE0_MEM1", 6058 "UTC_VML2_CACHE_PDE1_MEM0", 6059 "UTC_VML2_CACHE_PDE1_MEM1", 6060 "UTC_VML2_CACHE_PDE2_MEM0", 6061 "UTC_VML2_CACHE_PDE2_MEM1", 6062 "UTC_VML2_RDIF_LOG_FIFO", 6063 }; 6064 6065 static const char *atc_l2_cache_2m_mems[] = { 6066 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", 6067 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", 6068 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", 6069 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM", 6070 }; 6071 6072 static const char *atc_l2_cache_4k_mems[] = { 6073 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0", 6074 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1", 6075 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2", 6076 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3", 6077 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4", 6078 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5", 6079 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6", 6080 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7", 6081 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0", 6082 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1", 6083 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2", 6084 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3", 6085 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4", 6086 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5", 6087 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6", 6088 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7", 6089 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0", 6090 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1", 6091 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2", 6092 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3", 6093 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4", 6094 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5", 6095 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6", 6096 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7", 6097 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0", 6098 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1", 6099 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2", 6100 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3", 6101 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4", 6102 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5", 6103 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6", 6104 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7", 6105 }; 6106 6107 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, 6108 struct ras_err_data *err_data) 6109 { 6110 uint32_t i, data; 6111 uint32_t sec_count, ded_count; 6112 6113 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6114 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6115 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6116 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6117 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6118 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6119 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6120 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6121 6122 for (i = 0; i < 16; i++) { 6123 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6124 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6125 6126 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT); 6127 if (sec_count) { 6128 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, 6129 vml2_mems[i], sec_count); 6130 err_data->ce_count += sec_count; 6131 } 6132 6133 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT); 6134 if (ded_count) { 6135 DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, 6136 vml2_mems[i], ded_count); 6137 err_data->ue_count += ded_count; 6138 } 6139 } 6140 6141 for (i = 0; i < 7; i++) { 6142 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6143 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6144 6145 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6146 SEC_COUNT); 6147 if (sec_count) { 6148 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, 6149 vml2_walker_mems[i], sec_count); 6150 err_data->ce_count += sec_count; 6151 } 6152 6153 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6154 DED_COUNT); 6155 if (ded_count) { 6156 DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, 6157 vml2_walker_mems[i], ded_count); 6158 err_data->ue_count += ded_count; 6159 } 6160 } 6161 6162 for (i = 0; i < 4; i++) { 6163 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6164 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6165 6166 sec_count = (data & 0x00006000L) >> 0xd; 6167 if (sec_count) { 6168 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, 6169 atc_l2_cache_2m_mems[i], sec_count); 6170 err_data->ce_count += sec_count; 6171 } 6172 } 6173 6174 for (i = 0; i < 32; i++) { 6175 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6176 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6177 6178 sec_count = (data & 0x00006000L) >> 0xd; 6179 if (sec_count) { 6180 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, 6181 atc_l2_cache_4k_mems[i], sec_count); 6182 err_data->ce_count += sec_count; 6183 } 6184 6185 ded_count = (data & 0x00018000L) >> 0xf; 6186 if (ded_count) { 6187 DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, 6188 atc_l2_cache_4k_mems[i], ded_count); 6189 err_data->ue_count += ded_count; 6190 } 6191 } 6192 6193 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6194 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6195 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6196 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6197 6198 return 0; 6199 } 6200 6201 static int __get_ras_error_count(const struct soc15_reg_entry *reg, 6202 uint32_t se_id, uint32_t inst_id, uint32_t value, 6203 uint32_t *sec_count, uint32_t *ded_count) 6204 { 6205 uint32_t i; 6206 uint32_t sec_cnt, ded_cnt; 6207 6208 for (i = 0; i < ARRAY_SIZE(gc_ras_fields_vg20); i++) { 6209 if(gc_ras_fields_vg20[i].reg_offset != reg->reg_offset || 6210 gc_ras_fields_vg20[i].seg != reg->seg || 6211 gc_ras_fields_vg20[i].inst != reg->inst) 6212 continue; 6213 6214 sec_cnt = (value & 6215 gc_ras_fields_vg20[i].sec_count_mask) >> 6216 gc_ras_fields_vg20[i].sec_count_shift; 6217 if (sec_cnt) { 6218 DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n", 6219 gc_ras_fields_vg20[i].name, 6220 se_id, inst_id, 6221 sec_cnt); 6222 *sec_count += sec_cnt; 6223 } 6224 6225 ded_cnt = (value & 6226 gc_ras_fields_vg20[i].ded_count_mask) >> 6227 gc_ras_fields_vg20[i].ded_count_shift; 6228 if (ded_cnt) { 6229 DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n", 6230 gc_ras_fields_vg20[i].name, 6231 se_id, inst_id, 6232 ded_cnt); 6233 *ded_count += ded_cnt; 6234 } 6235 } 6236 6237 return 0; 6238 } 6239 6240 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 6241 void *ras_error_status) 6242 { 6243 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 6244 uint32_t sec_count = 0, ded_count = 0; 6245 uint32_t i, j, k; 6246 uint32_t reg_value; 6247 6248 if (adev->asic_type != CHIP_VEGA20) 6249 return -EINVAL; 6250 6251 err_data->ue_count = 0; 6252 err_data->ce_count = 0; 6253 6254 mutex_lock(&adev->grbm_idx_mutex); 6255 6256 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { 6257 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { 6258 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { 6259 gfx_v9_0_select_se_sh(adev, j, 0, k); 6260 reg_value = 6261 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); 6262 if (reg_value) 6263 __get_ras_error_count(&sec_ded_counter_registers[i], 6264 j, k, reg_value, 6265 &sec_count, &ded_count); 6266 } 6267 } 6268 } 6269 6270 err_data->ce_count += sec_count; 6271 err_data->ue_count += ded_count; 6272 6273 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6274 mutex_unlock(&adev->grbm_idx_mutex); 6275 6276 gfx_v9_0_query_utc_edc_status(adev, err_data); 6277 6278 return 0; 6279 } 6280 6281 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 6282 .name = "gfx_v9_0", 6283 .early_init = gfx_v9_0_early_init, 6284 .late_init = gfx_v9_0_late_init, 6285 .sw_init = gfx_v9_0_sw_init, 6286 .sw_fini = gfx_v9_0_sw_fini, 6287 .hw_init = gfx_v9_0_hw_init, 6288 .hw_fini = gfx_v9_0_hw_fini, 6289 .suspend = gfx_v9_0_suspend, 6290 .resume = gfx_v9_0_resume, 6291 .is_idle = gfx_v9_0_is_idle, 6292 .wait_for_idle = gfx_v9_0_wait_for_idle, 6293 .soft_reset = gfx_v9_0_soft_reset, 6294 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 6295 .set_powergating_state = gfx_v9_0_set_powergating_state, 6296 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 6297 }; 6298 6299 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 6300 .type = AMDGPU_RING_TYPE_GFX, 6301 .align_mask = 0xff, 6302 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6303 .support_64bit_ptrs = true, 6304 .vmhub = AMDGPU_GFXHUB_0, 6305 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 6306 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 6307 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 6308 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 6309 5 + /* COND_EXEC */ 6310 7 + /* PIPELINE_SYNC */ 6311 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6312 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6313 2 + /* VM_FLUSH */ 6314 8 + /* FENCE for VM_FLUSH */ 6315 20 + /* GDS switch */ 6316 4 + /* double SWITCH_BUFFER, 6317 the first COND_EXEC jump to the place just 6318 prior to this double SWITCH_BUFFER */ 6319 5 + /* COND_EXEC */ 6320 7 + /* HDP_flush */ 6321 4 + /* VGT_flush */ 6322 14 + /* CE_META */ 6323 31 + /* DE_META */ 6324 3 + /* CNTX_CTRL */ 6325 5 + /* HDP_INVL */ 6326 8 + 8 + /* FENCE x2 */ 6327 2, /* SWITCH_BUFFER */ 6328 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 6329 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 6330 .emit_fence = gfx_v9_0_ring_emit_fence, 6331 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6332 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6333 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6334 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6335 .test_ring = gfx_v9_0_ring_test_ring, 6336 .test_ib = gfx_v9_0_ring_test_ib, 6337 .insert_nop = amdgpu_ring_insert_nop, 6338 .pad_ib = amdgpu_ring_generic_pad_ib, 6339 .emit_switch_buffer = gfx_v9_ring_emit_sb, 6340 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 6341 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 6342 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 6343 .emit_tmz = gfx_v9_0_ring_emit_tmz, 6344 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6345 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6346 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6347 .soft_recovery = gfx_v9_0_ring_soft_recovery, 6348 }; 6349 6350 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 6351 .type = AMDGPU_RING_TYPE_COMPUTE, 6352 .align_mask = 0xff, 6353 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6354 .support_64bit_ptrs = true, 6355 .vmhub = AMDGPU_GFXHUB_0, 6356 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6357 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6358 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6359 .emit_frame_size = 6360 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6361 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6362 5 + /* hdp invalidate */ 6363 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6364 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6365 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6366 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6367 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 6368 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6369 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 6370 .emit_fence = gfx_v9_0_ring_emit_fence, 6371 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6372 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6373 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6374 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6375 .test_ring = gfx_v9_0_ring_test_ring, 6376 .test_ib = gfx_v9_0_ring_test_ib, 6377 .insert_nop = amdgpu_ring_insert_nop, 6378 .pad_ib = amdgpu_ring_generic_pad_ib, 6379 .set_priority = gfx_v9_0_ring_set_priority_compute, 6380 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6381 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6382 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6383 }; 6384 6385 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 6386 .type = AMDGPU_RING_TYPE_KIQ, 6387 .align_mask = 0xff, 6388 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6389 .support_64bit_ptrs = true, 6390 .vmhub = AMDGPU_GFXHUB_0, 6391 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6392 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6393 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6394 .emit_frame_size = 6395 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6396 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6397 5 + /* hdp invalidate */ 6398 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6399 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6400 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6401 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6402 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6403 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6404 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 6405 .test_ring = gfx_v9_0_ring_test_ring, 6406 .insert_nop = amdgpu_ring_insert_nop, 6407 .pad_ib = amdgpu_ring_generic_pad_ib, 6408 .emit_rreg = gfx_v9_0_ring_emit_rreg, 6409 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6410 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6411 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6412 }; 6413 6414 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 6415 { 6416 int i; 6417 6418 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 6419 6420 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6421 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 6422 6423 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6424 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 6425 } 6426 6427 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 6428 .set = gfx_v9_0_set_eop_interrupt_state, 6429 .process = gfx_v9_0_eop_irq, 6430 }; 6431 6432 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 6433 .set = gfx_v9_0_set_priv_reg_fault_state, 6434 .process = gfx_v9_0_priv_reg_irq, 6435 }; 6436 6437 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 6438 .set = gfx_v9_0_set_priv_inst_fault_state, 6439 .process = gfx_v9_0_priv_inst_irq, 6440 }; 6441 6442 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 6443 .set = gfx_v9_0_set_cp_ecc_error_state, 6444 .process = amdgpu_gfx_cp_ecc_error_irq, 6445 }; 6446 6447 6448 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 6449 { 6450 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6451 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 6452 6453 adev->gfx.priv_reg_irq.num_types = 1; 6454 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 6455 6456 adev->gfx.priv_inst_irq.num_types = 1; 6457 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 6458 6459 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 6460 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 6461 } 6462 6463 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 6464 { 6465 switch (adev->asic_type) { 6466 case CHIP_VEGA10: 6467 case CHIP_VEGA12: 6468 case CHIP_VEGA20: 6469 case CHIP_RAVEN: 6470 case CHIP_ARCTURUS: 6471 case CHIP_RENOIR: 6472 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 6473 break; 6474 default: 6475 break; 6476 } 6477 } 6478 6479 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 6480 { 6481 /* init asci gds info */ 6482 switch (adev->asic_type) { 6483 case CHIP_VEGA10: 6484 case CHIP_VEGA12: 6485 case CHIP_VEGA20: 6486 adev->gds.gds_size = 0x10000; 6487 break; 6488 case CHIP_RAVEN: 6489 case CHIP_ARCTURUS: 6490 adev->gds.gds_size = 0x1000; 6491 break; 6492 default: 6493 adev->gds.gds_size = 0x10000; 6494 break; 6495 } 6496 6497 switch (adev->asic_type) { 6498 case CHIP_VEGA10: 6499 case CHIP_VEGA20: 6500 adev->gds.gds_compute_max_wave_id = 0x7ff; 6501 break; 6502 case CHIP_VEGA12: 6503 adev->gds.gds_compute_max_wave_id = 0x27f; 6504 break; 6505 case CHIP_RAVEN: 6506 if (adev->rev_id >= 0x8) 6507 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 6508 else 6509 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 6510 break; 6511 case CHIP_ARCTURUS: 6512 adev->gds.gds_compute_max_wave_id = 0xfff; 6513 break; 6514 default: 6515 /* this really depends on the chip */ 6516 adev->gds.gds_compute_max_wave_id = 0x7ff; 6517 break; 6518 } 6519 6520 adev->gds.gws_size = 64; 6521 adev->gds.oa_size = 16; 6522 } 6523 6524 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 6525 u32 bitmap) 6526 { 6527 u32 data; 6528 6529 if (!bitmap) 6530 return; 6531 6532 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6533 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6534 6535 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 6536 } 6537 6538 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 6539 { 6540 u32 data, mask; 6541 6542 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 6543 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 6544 6545 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6546 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6547 6548 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 6549 6550 return (~data) & mask; 6551 } 6552 6553 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 6554 struct amdgpu_cu_info *cu_info) 6555 { 6556 int i, j, k, counter, active_cu_number = 0; 6557 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 6558 unsigned disable_masks[4 * 4]; 6559 6560 if (!adev || !cu_info) 6561 return -EINVAL; 6562 6563 /* 6564 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs 6565 */ 6566 if (adev->gfx.config.max_shader_engines * 6567 adev->gfx.config.max_sh_per_se > 16) 6568 return -EINVAL; 6569 6570 amdgpu_gfx_parse_disable_cu(disable_masks, 6571 adev->gfx.config.max_shader_engines, 6572 adev->gfx.config.max_sh_per_se); 6573 6574 mutex_lock(&adev->grbm_idx_mutex); 6575 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 6576 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 6577 mask = 1; 6578 ao_bitmap = 0; 6579 counter = 0; 6580 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 6581 gfx_v9_0_set_user_cu_inactive_bitmap( 6582 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); 6583 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 6584 6585 /* 6586 * The bitmap(and ao_cu_bitmap) in cu_info structure is 6587 * 4x4 size array, and it's usually suitable for Vega 6588 * ASICs which has 4*2 SE/SH layout. 6589 * But for Arcturus, SE/SH layout is changed to 8*1. 6590 * To mostly reduce the impact, we make it compatible 6591 * with current bitmap array as below: 6592 * SE4,SH0 --> bitmap[0][1] 6593 * SE5,SH0 --> bitmap[1][1] 6594 * SE6,SH0 --> bitmap[2][1] 6595 * SE7,SH0 --> bitmap[3][1] 6596 */ 6597 cu_info->bitmap[i % 4][j + i / 4] = bitmap; 6598 6599 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 6600 if (bitmap & mask) { 6601 if (counter < adev->gfx.config.max_cu_per_sh) 6602 ao_bitmap |= mask; 6603 counter ++; 6604 } 6605 mask <<= 1; 6606 } 6607 active_cu_number += counter; 6608 if (i < 2 && j < 2) 6609 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 6610 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; 6611 } 6612 } 6613 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6614 mutex_unlock(&adev->grbm_idx_mutex); 6615 6616 cu_info->number = active_cu_number; 6617 cu_info->ao_cu_mask = ao_cu_mask; 6618 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 6619 6620 return 0; 6621 } 6622 6623 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 6624 { 6625 .type = AMD_IP_BLOCK_TYPE_GFX, 6626 .major = 9, 6627 .minor = 0, 6628 .rev = 0, 6629 .funcs = &gfx_v9_0_ip_funcs, 6630 }; 6631