1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 40 #include "vega10_enum.h" 41 #include "hdp/hdp_4_0_offset.h" 42 43 #include "soc15_common.h" 44 #include "clearstate_gfx9.h" 45 #include "v9_structs.h" 46 47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 48 49 #include "amdgpu_ras.h" 50 51 #define GFX9_NUM_GFX_RINGS 1 52 #define GFX9_MEC_HPD_SIZE 4096 53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 55 56 #define mmPWR_MISC_CNTL_STATUS 0x0183 57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0 59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1 60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L 61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L 62 63 #define mmGCEA_PROBE_MAP 0x070c 64 #define mmGCEA_PROBE_MAP_BASE_IDX 0 65 66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 68 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 72 73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 75 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 79 80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 82 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 86 87 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 89 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 90 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 93 94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 96 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 101 102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 109 110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); 111 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin"); 112 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); 113 114 MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); 115 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); 116 MODULE_FIRMWARE("amdgpu/renoir_me.bin"); 117 MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); 118 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin"); 119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); 120 121 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 122 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 123 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04 124 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 125 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09 126 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 127 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a 128 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 129 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b 130 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 131 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c 132 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 133 134 enum ta_ras_gfx_subblock { 135 /*CPC*/ 136 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 137 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, 138 TA_RAS_BLOCK__GFX_CPC_UCODE, 139 TA_RAS_BLOCK__GFX_DC_STATE_ME1, 140 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 141 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, 142 TA_RAS_BLOCK__GFX_DC_STATE_ME2, 143 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 144 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 145 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 146 /* CPF*/ 147 TA_RAS_BLOCK__GFX_CPF_INDEX_START, 148 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, 149 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, 150 TA_RAS_BLOCK__GFX_CPF_TAG, 151 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, 152 /* CPG*/ 153 TA_RAS_BLOCK__GFX_CPG_INDEX_START, 154 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, 155 TA_RAS_BLOCK__GFX_CPG_DMA_TAG, 156 TA_RAS_BLOCK__GFX_CPG_TAG, 157 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, 158 /* GDS*/ 159 TA_RAS_BLOCK__GFX_GDS_INDEX_START, 160 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, 161 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 162 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 163 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 164 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 165 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 166 /* SPI*/ 167 TA_RAS_BLOCK__GFX_SPI_SR_MEM, 168 /* SQ*/ 169 TA_RAS_BLOCK__GFX_SQ_INDEX_START, 170 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, 171 TA_RAS_BLOCK__GFX_SQ_LDS_D, 172 TA_RAS_BLOCK__GFX_SQ_LDS_I, 173 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ 174 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, 175 /* SQC (3 ranges)*/ 176 TA_RAS_BLOCK__GFX_SQC_INDEX_START, 177 /* SQC range 0*/ 178 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, 179 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 180 TA_RAS_BLOCK__GFX_SQC_INDEX0_START, 181 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 182 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 183 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 184 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 185 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 186 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 187 TA_RAS_BLOCK__GFX_SQC_INDEX0_END = 188 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 189 /* SQC range 1*/ 190 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 191 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 192 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 193 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 194 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 195 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 196 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 197 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 198 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 199 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 200 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 201 TA_RAS_BLOCK__GFX_SQC_INDEX1_END = 202 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 203 /* SQC range 2*/ 204 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 205 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 206 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 207 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 208 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 209 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 210 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 211 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 212 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 213 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 214 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 215 TA_RAS_BLOCK__GFX_SQC_INDEX2_END = 216 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 217 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, 218 /* TA*/ 219 TA_RAS_BLOCK__GFX_TA_INDEX_START, 220 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, 221 TA_RAS_BLOCK__GFX_TA_FS_AFIFO, 222 TA_RAS_BLOCK__GFX_TA_FL_LFIFO, 223 TA_RAS_BLOCK__GFX_TA_FX_LFIFO, 224 TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 225 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 226 /* TCA*/ 227 TA_RAS_BLOCK__GFX_TCA_INDEX_START, 228 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, 229 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 230 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 231 /* TCC (5 sub-ranges)*/ 232 TA_RAS_BLOCK__GFX_TCC_INDEX_START, 233 /* TCC range 0*/ 234 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, 235 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, 236 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 237 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 238 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 239 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 240 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 241 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 242 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 243 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 244 /* TCC range 1*/ 245 TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 246 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 247 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 248 TA_RAS_BLOCK__GFX_TCC_INDEX1_END = 249 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 250 /* TCC range 2*/ 251 TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 252 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 253 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 254 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 255 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 256 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 257 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, 258 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 259 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 260 TA_RAS_BLOCK__GFX_TCC_INDEX2_END = 261 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 262 /* TCC range 3*/ 263 TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 264 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 265 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 266 TA_RAS_BLOCK__GFX_TCC_INDEX3_END = 267 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 268 /* TCC range 4*/ 269 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 270 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 271 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 272 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 273 TA_RAS_BLOCK__GFX_TCC_INDEX4_END = 274 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 275 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, 276 /* TCI*/ 277 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, 278 /* TCP*/ 279 TA_RAS_BLOCK__GFX_TCP_INDEX_START, 280 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, 281 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 282 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, 283 TA_RAS_BLOCK__GFX_TCP_VM_FIFO, 284 TA_RAS_BLOCK__GFX_TCP_DB_RAM, 285 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 286 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 287 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 288 /* TD*/ 289 TA_RAS_BLOCK__GFX_TD_INDEX_START, 290 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, 291 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 292 TA_RAS_BLOCK__GFX_TD_CS_FIFO, 293 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, 294 /* EA (3 sub-ranges)*/ 295 TA_RAS_BLOCK__GFX_EA_INDEX_START, 296 /* EA range 0*/ 297 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, 298 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, 299 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 300 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 301 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 302 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 303 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 304 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 305 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 306 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 307 /* EA range 1*/ 308 TA_RAS_BLOCK__GFX_EA_INDEX1_START, 309 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, 310 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 311 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 312 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 313 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 314 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 315 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 316 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 317 /* EA range 2*/ 318 TA_RAS_BLOCK__GFX_EA_INDEX2_START, 319 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, 320 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, 321 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, 322 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 323 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 324 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, 325 /* UTC VM L2 bank*/ 326 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, 327 /* UTC VM walker*/ 328 TA_RAS_BLOCK__UTC_VML2_WALKER, 329 /* UTC ATC L2 2MB cache*/ 330 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 331 /* UTC ATC L2 4KB cache*/ 332 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 333 TA_RAS_BLOCK__GFX_MAX 334 }; 335 336 struct ras_gfx_subblock { 337 unsigned char *name; 338 int ta_subblock; 339 int hw_supported_error_type; 340 int sw_supported_error_type; 341 }; 342 343 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ 344 [AMDGPU_RAS_BLOCK__##subblock] = { \ 345 #subblock, \ 346 TA_RAS_BLOCK__##subblock, \ 347 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ 348 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ 349 } 350 351 static const struct ras_gfx_subblock ras_gfx_subblocks[] = { 352 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), 353 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), 354 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 355 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 356 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 357 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 358 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 359 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 360 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 361 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 362 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), 363 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), 364 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), 365 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), 366 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 367 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), 368 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, 369 0), 370 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, 371 0), 372 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 373 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), 374 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), 375 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), 376 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), 377 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), 378 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), 379 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 380 0, 0), 381 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 382 0), 383 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 384 0, 0), 385 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, 386 0), 387 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 388 0, 0), 389 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 390 0), 391 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 392 1), 393 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 394 0, 0, 0), 395 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 396 0), 397 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 398 0), 399 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 400 0), 401 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 402 0), 403 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 404 0), 405 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 406 0, 0), 407 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 408 0), 409 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 410 0), 411 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 412 0, 0, 0), 413 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 414 0), 415 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 416 0), 417 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 418 0), 419 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 420 0), 421 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 422 0), 423 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 424 0, 0), 425 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 426 0), 427 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), 428 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 429 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 430 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 431 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 432 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), 433 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 434 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), 435 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, 436 1), 437 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, 438 1), 439 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, 440 1), 441 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, 442 0), 443 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, 444 0), 445 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 446 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 447 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), 448 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), 449 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), 450 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), 451 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 452 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), 453 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), 454 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 455 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), 456 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, 457 0), 458 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 459 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, 460 0), 461 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, 462 0, 0), 463 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, 464 0), 465 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 466 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), 467 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), 468 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 469 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 470 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 471 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), 472 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), 473 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), 474 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), 475 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 476 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), 477 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 478 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 479 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 480 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 481 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 482 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 483 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 484 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 485 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 486 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 487 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 488 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), 489 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 490 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 491 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), 492 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), 493 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), 494 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), 495 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), 496 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), 497 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), 498 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), 499 }; 500 501 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 502 { 503 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 504 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 505 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 506 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 507 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 508 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 509 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 510 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 511 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 512 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 513 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 514 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 515 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 516 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 517 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 518 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 519 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 520 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 521 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 522 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 523 }; 524 525 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 526 { 527 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 528 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 532 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 533 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 534 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 535 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 536 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 537 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 538 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 539 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 540 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 541 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 542 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 543 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 544 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 545 }; 546 547 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 548 { 549 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 550 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 551 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 552 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 553 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 554 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 555 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 556 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 557 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 558 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 559 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 560 }; 561 562 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 563 { 564 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 565 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 566 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 567 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 568 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 569 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 570 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 571 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 572 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 573 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 574 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 575 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 576 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 577 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 578 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 579 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 580 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 581 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 582 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 583 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 584 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 587 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 588 }; 589 590 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 591 { 592 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 593 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 594 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 595 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 596 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 597 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 598 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 599 }; 600 601 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 602 { 603 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 604 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 605 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 606 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 607 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 608 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 609 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 610 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 611 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 612 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 613 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 614 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 615 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 616 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 617 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 618 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 619 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 620 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 621 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 622 }; 623 624 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = 625 { 626 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 627 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), 630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), 631 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 632 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 633 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 634 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 635 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 636 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 637 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), 638 }; 639 640 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 641 { 642 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 643 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 644 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 645 }; 646 647 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 648 { 649 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 650 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 657 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 659 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 665 }; 666 667 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 668 { 669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 671 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 672 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 673 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 674 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 682 }; 683 684 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = 685 { 686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), 688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), 689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), 690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), 691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), 692 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), 693 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), 694 }; 695 696 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 697 { 698 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 699 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 700 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 701 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 702 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 703 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 704 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 705 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 706 }; 707 708 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 709 { 710 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 711 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 712 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 713 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 714 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 715 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 716 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 717 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 718 }; 719 720 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 721 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 722 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 723 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 724 725 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 726 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 727 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 728 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 729 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 730 struct amdgpu_cu_info *cu_info); 731 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 732 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); 733 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 734 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 735 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 736 void *ras_error_status); 737 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 738 void *inject_if); 739 740 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 741 { 742 switch (adev->asic_type) { 743 case CHIP_VEGA10: 744 soc15_program_register_sequence(adev, 745 golden_settings_gc_9_0, 746 ARRAY_SIZE(golden_settings_gc_9_0)); 747 soc15_program_register_sequence(adev, 748 golden_settings_gc_9_0_vg10, 749 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 750 break; 751 case CHIP_VEGA12: 752 soc15_program_register_sequence(adev, 753 golden_settings_gc_9_2_1, 754 ARRAY_SIZE(golden_settings_gc_9_2_1)); 755 soc15_program_register_sequence(adev, 756 golden_settings_gc_9_2_1_vg12, 757 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 758 break; 759 case CHIP_VEGA20: 760 soc15_program_register_sequence(adev, 761 golden_settings_gc_9_0, 762 ARRAY_SIZE(golden_settings_gc_9_0)); 763 soc15_program_register_sequence(adev, 764 golden_settings_gc_9_0_vg20, 765 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 766 break; 767 case CHIP_ARCTURUS: 768 soc15_program_register_sequence(adev, 769 golden_settings_gc_9_4_1_arct, 770 ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); 771 break; 772 case CHIP_RAVEN: 773 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 774 ARRAY_SIZE(golden_settings_gc_9_1)); 775 if (adev->rev_id >= 8) 776 soc15_program_register_sequence(adev, 777 golden_settings_gc_9_1_rv2, 778 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 779 else 780 soc15_program_register_sequence(adev, 781 golden_settings_gc_9_1_rv1, 782 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 783 break; 784 case CHIP_RENOIR: 785 soc15_program_register_sequence(adev, 786 golden_settings_gc_9_1_rn, 787 ARRAY_SIZE(golden_settings_gc_9_1_rn)); 788 return; /* for renoir, don't need common goldensetting */ 789 default: 790 break; 791 } 792 793 if (adev->asic_type != CHIP_ARCTURUS) 794 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 795 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 796 } 797 798 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 799 { 800 adev->gfx.scratch.num_reg = 8; 801 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 802 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 803 } 804 805 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 806 bool wc, uint32_t reg, uint32_t val) 807 { 808 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 809 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 810 WRITE_DATA_DST_SEL(0) | 811 (wc ? WR_CONFIRM : 0)); 812 amdgpu_ring_write(ring, reg); 813 amdgpu_ring_write(ring, 0); 814 amdgpu_ring_write(ring, val); 815 } 816 817 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 818 int mem_space, int opt, uint32_t addr0, 819 uint32_t addr1, uint32_t ref, uint32_t mask, 820 uint32_t inv) 821 { 822 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 823 amdgpu_ring_write(ring, 824 /* memory (1) or register (0) */ 825 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 826 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 827 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 828 WAIT_REG_MEM_ENGINE(eng_sel))); 829 830 if (mem_space) 831 BUG_ON(addr0 & 0x3); /* Dword align */ 832 amdgpu_ring_write(ring, addr0); 833 amdgpu_ring_write(ring, addr1); 834 amdgpu_ring_write(ring, ref); 835 amdgpu_ring_write(ring, mask); 836 amdgpu_ring_write(ring, inv); /* poll interval */ 837 } 838 839 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 840 { 841 struct amdgpu_device *adev = ring->adev; 842 uint32_t scratch; 843 uint32_t tmp = 0; 844 unsigned i; 845 int r; 846 847 r = amdgpu_gfx_scratch_get(adev, &scratch); 848 if (r) 849 return r; 850 851 WREG32(scratch, 0xCAFEDEAD); 852 r = amdgpu_ring_alloc(ring, 3); 853 if (r) 854 goto error_free_scratch; 855 856 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 857 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 858 amdgpu_ring_write(ring, 0xDEADBEEF); 859 amdgpu_ring_commit(ring); 860 861 for (i = 0; i < adev->usec_timeout; i++) { 862 tmp = RREG32(scratch); 863 if (tmp == 0xDEADBEEF) 864 break; 865 udelay(1); 866 } 867 868 if (i >= adev->usec_timeout) 869 r = -ETIMEDOUT; 870 871 error_free_scratch: 872 amdgpu_gfx_scratch_free(adev, scratch); 873 return r; 874 } 875 876 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 877 { 878 struct amdgpu_device *adev = ring->adev; 879 struct amdgpu_ib ib; 880 struct dma_fence *f = NULL; 881 882 unsigned index; 883 uint64_t gpu_addr; 884 uint32_t tmp; 885 long r; 886 887 r = amdgpu_device_wb_get(adev, &index); 888 if (r) 889 return r; 890 891 gpu_addr = adev->wb.gpu_addr + (index * 4); 892 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 893 memset(&ib, 0, sizeof(ib)); 894 r = amdgpu_ib_get(adev, NULL, 16, &ib); 895 if (r) 896 goto err1; 897 898 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 899 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 900 ib.ptr[2] = lower_32_bits(gpu_addr); 901 ib.ptr[3] = upper_32_bits(gpu_addr); 902 ib.ptr[4] = 0xDEADBEEF; 903 ib.length_dw = 5; 904 905 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 906 if (r) 907 goto err2; 908 909 r = dma_fence_wait_timeout(f, false, timeout); 910 if (r == 0) { 911 r = -ETIMEDOUT; 912 goto err2; 913 } else if (r < 0) { 914 goto err2; 915 } 916 917 tmp = adev->wb.wb[index]; 918 if (tmp == 0xDEADBEEF) 919 r = 0; 920 else 921 r = -EINVAL; 922 923 err2: 924 amdgpu_ib_free(adev, &ib, NULL); 925 dma_fence_put(f); 926 err1: 927 amdgpu_device_wb_free(adev, index); 928 return r; 929 } 930 931 932 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 933 { 934 release_firmware(adev->gfx.pfp_fw); 935 adev->gfx.pfp_fw = NULL; 936 release_firmware(adev->gfx.me_fw); 937 adev->gfx.me_fw = NULL; 938 release_firmware(adev->gfx.ce_fw); 939 adev->gfx.ce_fw = NULL; 940 release_firmware(adev->gfx.rlc_fw); 941 adev->gfx.rlc_fw = NULL; 942 release_firmware(adev->gfx.mec_fw); 943 adev->gfx.mec_fw = NULL; 944 release_firmware(adev->gfx.mec2_fw); 945 adev->gfx.mec2_fw = NULL; 946 947 kfree(adev->gfx.rlc.register_list_format); 948 } 949 950 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 951 { 952 const struct rlc_firmware_header_v2_1 *rlc_hdr; 953 954 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 955 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 956 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 957 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 958 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 959 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 960 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 961 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 962 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 963 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 964 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 965 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 966 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 967 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 968 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 969 } 970 971 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 972 { 973 adev->gfx.me_fw_write_wait = false; 974 adev->gfx.mec_fw_write_wait = false; 975 976 switch (adev->asic_type) { 977 case CHIP_VEGA10: 978 if ((adev->gfx.me_fw_version >= 0x0000009c) && 979 (adev->gfx.me_feature_version >= 42) && 980 (adev->gfx.pfp_fw_version >= 0x000000b1) && 981 (adev->gfx.pfp_feature_version >= 42)) 982 adev->gfx.me_fw_write_wait = true; 983 984 if ((adev->gfx.mec_fw_version >= 0x00000193) && 985 (adev->gfx.mec_feature_version >= 42)) 986 adev->gfx.mec_fw_write_wait = true; 987 break; 988 case CHIP_VEGA12: 989 if ((adev->gfx.me_fw_version >= 0x0000009c) && 990 (adev->gfx.me_feature_version >= 44) && 991 (adev->gfx.pfp_fw_version >= 0x000000b2) && 992 (adev->gfx.pfp_feature_version >= 44)) 993 adev->gfx.me_fw_write_wait = true; 994 995 if ((adev->gfx.mec_fw_version >= 0x00000196) && 996 (adev->gfx.mec_feature_version >= 44)) 997 adev->gfx.mec_fw_write_wait = true; 998 break; 999 case CHIP_VEGA20: 1000 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1001 (adev->gfx.me_feature_version >= 44) && 1002 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1003 (adev->gfx.pfp_feature_version >= 44)) 1004 adev->gfx.me_fw_write_wait = true; 1005 1006 if ((adev->gfx.mec_fw_version >= 0x00000197) && 1007 (adev->gfx.mec_feature_version >= 44)) 1008 adev->gfx.mec_fw_write_wait = true; 1009 break; 1010 case CHIP_RAVEN: 1011 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1012 (adev->gfx.me_feature_version >= 42) && 1013 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1014 (adev->gfx.pfp_feature_version >= 42)) 1015 adev->gfx.me_fw_write_wait = true; 1016 1017 if ((adev->gfx.mec_fw_version >= 0x00000192) && 1018 (adev->gfx.mec_feature_version >= 42)) 1019 adev->gfx.mec_fw_write_wait = true; 1020 break; 1021 default: 1022 break; 1023 } 1024 } 1025 1026 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 1027 { 1028 switch (adev->asic_type) { 1029 case CHIP_VEGA10: 1030 case CHIP_VEGA12: 1031 case CHIP_VEGA20: 1032 break; 1033 case CHIP_RAVEN: 1034 if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) 1035 &&((adev->gfx.rlc_fw_version != 106 && 1036 adev->gfx.rlc_fw_version < 531) || 1037 (adev->gfx.rlc_fw_version == 53815) || 1038 (adev->gfx.rlc_feature_version < 1) || 1039 !adev->gfx.rlc.is_rlc_v2_1)) 1040 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1041 1042 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1043 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1044 AMD_PG_SUPPORT_CP | 1045 AMD_PG_SUPPORT_RLC_SMU_HS; 1046 break; 1047 default: 1048 break; 1049 } 1050 } 1051 1052 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, 1053 const char *chip_name) 1054 { 1055 char fw_name[30]; 1056 int err; 1057 struct amdgpu_firmware_info *info = NULL; 1058 const struct common_firmware_header *header = NULL; 1059 const struct gfx_firmware_header_v1_0 *cp_hdr; 1060 1061 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1062 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1063 if (err) 1064 goto out; 1065 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 1066 if (err) 1067 goto out; 1068 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1069 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1070 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1071 1072 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1073 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1074 if (err) 1075 goto out; 1076 err = amdgpu_ucode_validate(adev->gfx.me_fw); 1077 if (err) 1078 goto out; 1079 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1080 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1081 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1082 1083 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1084 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1085 if (err) 1086 goto out; 1087 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 1088 if (err) 1089 goto out; 1090 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1091 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1092 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1093 1094 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1095 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1096 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1097 info->fw = adev->gfx.pfp_fw; 1098 header = (const struct common_firmware_header *)info->fw->data; 1099 adev->firmware.fw_size += 1100 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1101 1102 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1103 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1104 info->fw = adev->gfx.me_fw; 1105 header = (const struct common_firmware_header *)info->fw->data; 1106 adev->firmware.fw_size += 1107 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1108 1109 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1110 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1111 info->fw = adev->gfx.ce_fw; 1112 header = (const struct common_firmware_header *)info->fw->data; 1113 adev->firmware.fw_size += 1114 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1115 } 1116 1117 out: 1118 if (err) { 1119 dev_err(adev->dev, 1120 "gfx9: Failed to load firmware \"%s\"\n", 1121 fw_name); 1122 release_firmware(adev->gfx.pfp_fw); 1123 adev->gfx.pfp_fw = NULL; 1124 release_firmware(adev->gfx.me_fw); 1125 adev->gfx.me_fw = NULL; 1126 release_firmware(adev->gfx.ce_fw); 1127 adev->gfx.ce_fw = NULL; 1128 } 1129 return err; 1130 } 1131 1132 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, 1133 const char *chip_name) 1134 { 1135 char fw_name[30]; 1136 int err; 1137 struct amdgpu_firmware_info *info = NULL; 1138 const struct common_firmware_header *header = NULL; 1139 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1140 unsigned int *tmp = NULL; 1141 unsigned int i = 0; 1142 uint16_t version_major; 1143 uint16_t version_minor; 1144 uint32_t smu_version; 1145 1146 /* 1147 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 1148 * instead of picasso_rlc.bin. 1149 * Judgment method: 1150 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 1151 * or revision >= 0xD8 && revision <= 0xDF 1152 * otherwise is PCO FP5 1153 */ 1154 if (!strcmp(chip_name, "picasso") && 1155 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 1156 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 1157 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); 1158 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 1159 (smu_version >= 0x41e2b)) 1160 /** 1161 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 1162 */ 1163 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); 1164 else 1165 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1166 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1167 if (err) 1168 goto out; 1169 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1170 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1171 1172 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1173 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1174 if (version_major == 2 && version_minor == 1) 1175 adev->gfx.rlc.is_rlc_v2_1 = true; 1176 1177 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1178 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1179 adev->gfx.rlc.save_and_restore_offset = 1180 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1181 adev->gfx.rlc.clear_state_descriptor_offset = 1182 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1183 adev->gfx.rlc.avail_scratch_ram_locations = 1184 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1185 adev->gfx.rlc.reg_restore_list_size = 1186 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1187 adev->gfx.rlc.reg_list_format_start = 1188 le32_to_cpu(rlc_hdr->reg_list_format_start); 1189 adev->gfx.rlc.reg_list_format_separate_start = 1190 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1191 adev->gfx.rlc.starting_offsets_start = 1192 le32_to_cpu(rlc_hdr->starting_offsets_start); 1193 adev->gfx.rlc.reg_list_format_size_bytes = 1194 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1195 adev->gfx.rlc.reg_list_size_bytes = 1196 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1197 adev->gfx.rlc.register_list_format = 1198 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1199 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1200 if (!adev->gfx.rlc.register_list_format) { 1201 err = -ENOMEM; 1202 goto out; 1203 } 1204 1205 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1206 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1207 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 1208 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1209 1210 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1211 1212 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1213 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1214 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 1215 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1216 1217 if (adev->gfx.rlc.is_rlc_v2_1) 1218 gfx_v9_0_init_rlc_ext_microcode(adev); 1219 1220 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1221 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1222 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1223 info->fw = adev->gfx.rlc_fw; 1224 header = (const struct common_firmware_header *)info->fw->data; 1225 adev->firmware.fw_size += 1226 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1227 1228 if (adev->gfx.rlc.is_rlc_v2_1 && 1229 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 1230 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 1231 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 1232 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 1233 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 1234 info->fw = adev->gfx.rlc_fw; 1235 adev->firmware.fw_size += 1236 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 1237 1238 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 1239 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 1240 info->fw = adev->gfx.rlc_fw; 1241 adev->firmware.fw_size += 1242 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 1243 1244 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 1245 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 1246 info->fw = adev->gfx.rlc_fw; 1247 adev->firmware.fw_size += 1248 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 1249 } 1250 } 1251 1252 out: 1253 if (err) { 1254 dev_err(adev->dev, 1255 "gfx9: Failed to load firmware \"%s\"\n", 1256 fw_name); 1257 release_firmware(adev->gfx.rlc_fw); 1258 adev->gfx.rlc_fw = NULL; 1259 } 1260 return err; 1261 } 1262 1263 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, 1264 const char *chip_name) 1265 { 1266 char fw_name[30]; 1267 int err; 1268 struct amdgpu_firmware_info *info = NULL; 1269 const struct common_firmware_header *header = NULL; 1270 const struct gfx_firmware_header_v1_0 *cp_hdr; 1271 1272 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1273 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1274 if (err) 1275 goto out; 1276 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1277 if (err) 1278 goto out; 1279 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1280 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1281 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1282 1283 1284 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1285 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1286 if (!err) { 1287 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1288 if (err) 1289 goto out; 1290 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1291 adev->gfx.mec2_fw->data; 1292 adev->gfx.mec2_fw_version = 1293 le32_to_cpu(cp_hdr->header.ucode_version); 1294 adev->gfx.mec2_feature_version = 1295 le32_to_cpu(cp_hdr->ucode_feature_version); 1296 } else { 1297 err = 0; 1298 adev->gfx.mec2_fw = NULL; 1299 } 1300 1301 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1302 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1303 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1304 info->fw = adev->gfx.mec_fw; 1305 header = (const struct common_firmware_header *)info->fw->data; 1306 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1307 adev->firmware.fw_size += 1308 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1309 1310 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 1311 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 1312 info->fw = adev->gfx.mec_fw; 1313 adev->firmware.fw_size += 1314 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1315 1316 if (adev->gfx.mec2_fw) { 1317 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1318 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1319 info->fw = adev->gfx.mec2_fw; 1320 header = (const struct common_firmware_header *)info->fw->data; 1321 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1322 adev->firmware.fw_size += 1323 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1324 1325 /* TODO: Determine if MEC2 JT FW loading can be removed 1326 for all GFX V9 asic and above */ 1327 if (adev->asic_type != CHIP_ARCTURUS) { 1328 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 1329 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 1330 info->fw = adev->gfx.mec2_fw; 1331 adev->firmware.fw_size += 1332 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, 1333 PAGE_SIZE); 1334 } 1335 } 1336 } 1337 1338 out: 1339 gfx_v9_0_check_if_need_gfxoff(adev); 1340 gfx_v9_0_check_fw_write_wait(adev); 1341 if (err) { 1342 dev_err(adev->dev, 1343 "gfx9: Failed to load firmware \"%s\"\n", 1344 fw_name); 1345 release_firmware(adev->gfx.mec_fw); 1346 adev->gfx.mec_fw = NULL; 1347 release_firmware(adev->gfx.mec2_fw); 1348 adev->gfx.mec2_fw = NULL; 1349 } 1350 return err; 1351 } 1352 1353 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 1354 { 1355 const char *chip_name; 1356 int r; 1357 1358 DRM_DEBUG("\n"); 1359 1360 switch (adev->asic_type) { 1361 case CHIP_VEGA10: 1362 chip_name = "vega10"; 1363 break; 1364 case CHIP_VEGA12: 1365 chip_name = "vega12"; 1366 break; 1367 case CHIP_VEGA20: 1368 chip_name = "vega20"; 1369 break; 1370 case CHIP_RAVEN: 1371 if (adev->rev_id >= 8) 1372 chip_name = "raven2"; 1373 else if (adev->pdev->device == 0x15d8) 1374 chip_name = "picasso"; 1375 else 1376 chip_name = "raven"; 1377 break; 1378 case CHIP_ARCTURUS: 1379 chip_name = "arcturus"; 1380 break; 1381 case CHIP_RENOIR: 1382 chip_name = "renoir"; 1383 break; 1384 default: 1385 BUG(); 1386 } 1387 1388 /* No CPG in Arcturus */ 1389 if (adev->asic_type != CHIP_ARCTURUS) { 1390 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); 1391 if (r) 1392 return r; 1393 } 1394 1395 r = gfx_v9_0_init_rlc_microcode(adev, chip_name); 1396 if (r) 1397 return r; 1398 1399 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name); 1400 if (r) 1401 return r; 1402 1403 return r; 1404 } 1405 1406 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 1407 { 1408 u32 count = 0; 1409 const struct cs_section_def *sect = NULL; 1410 const struct cs_extent_def *ext = NULL; 1411 1412 /* begin clear state */ 1413 count += 2; 1414 /* context control state */ 1415 count += 3; 1416 1417 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1418 for (ext = sect->section; ext->extent != NULL; ++ext) { 1419 if (sect->id == SECT_CONTEXT) 1420 count += 2 + ext->reg_count; 1421 else 1422 return 0; 1423 } 1424 } 1425 1426 /* end clear state */ 1427 count += 2; 1428 /* clear state */ 1429 count += 2; 1430 1431 return count; 1432 } 1433 1434 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 1435 volatile u32 *buffer) 1436 { 1437 u32 count = 0, i; 1438 const struct cs_section_def *sect = NULL; 1439 const struct cs_extent_def *ext = NULL; 1440 1441 if (adev->gfx.rlc.cs_data == NULL) 1442 return; 1443 if (buffer == NULL) 1444 return; 1445 1446 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1447 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1448 1449 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1450 buffer[count++] = cpu_to_le32(0x80000000); 1451 buffer[count++] = cpu_to_le32(0x80000000); 1452 1453 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1454 for (ext = sect->section; ext->extent != NULL; ++ext) { 1455 if (sect->id == SECT_CONTEXT) { 1456 buffer[count++] = 1457 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1458 buffer[count++] = cpu_to_le32(ext->reg_index - 1459 PACKET3_SET_CONTEXT_REG_START); 1460 for (i = 0; i < ext->reg_count; i++) 1461 buffer[count++] = cpu_to_le32(ext->extent[i]); 1462 } else { 1463 return; 1464 } 1465 } 1466 } 1467 1468 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1469 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1470 1471 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1472 buffer[count++] = cpu_to_le32(0); 1473 } 1474 1475 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 1476 { 1477 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 1478 uint32_t pg_always_on_cu_num = 2; 1479 uint32_t always_on_cu_num; 1480 uint32_t i, j, k; 1481 uint32_t mask, cu_bitmap, counter; 1482 1483 if (adev->flags & AMD_IS_APU) 1484 always_on_cu_num = 4; 1485 else if (adev->asic_type == CHIP_VEGA12) 1486 always_on_cu_num = 8; 1487 else 1488 always_on_cu_num = 12; 1489 1490 mutex_lock(&adev->grbm_idx_mutex); 1491 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1492 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1493 mask = 1; 1494 cu_bitmap = 0; 1495 counter = 0; 1496 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1497 1498 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 1499 if (cu_info->bitmap[i][j] & mask) { 1500 if (counter == pg_always_on_cu_num) 1501 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 1502 if (counter < always_on_cu_num) 1503 cu_bitmap |= mask; 1504 else 1505 break; 1506 counter++; 1507 } 1508 mask <<= 1; 1509 } 1510 1511 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1512 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1513 } 1514 } 1515 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1516 mutex_unlock(&adev->grbm_idx_mutex); 1517 } 1518 1519 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1520 { 1521 uint32_t data; 1522 1523 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1524 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1525 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1526 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1527 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1528 1529 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1530 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1531 1532 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1533 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1534 1535 mutex_lock(&adev->grbm_idx_mutex); 1536 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1537 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1538 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1539 1540 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1541 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1542 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1543 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1544 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1545 1546 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1547 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1548 data &= 0x0000FFFF; 1549 data |= 0x00C00000; 1550 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1551 1552 /* 1553 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1554 * programmed in gfx_v9_0_init_always_on_cu_mask() 1555 */ 1556 1557 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1558 * but used for RLC_LB_CNTL configuration */ 1559 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1560 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1561 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1562 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1563 mutex_unlock(&adev->grbm_idx_mutex); 1564 1565 gfx_v9_0_init_always_on_cu_mask(adev); 1566 } 1567 1568 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1569 { 1570 uint32_t data; 1571 1572 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1573 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1574 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1575 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1576 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1577 1578 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1579 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1580 1581 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1582 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1583 1584 mutex_lock(&adev->grbm_idx_mutex); 1585 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1586 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1587 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1588 1589 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1590 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1591 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1592 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1593 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1594 1595 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1596 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1597 data &= 0x0000FFFF; 1598 data |= 0x00C00000; 1599 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1600 1601 /* 1602 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1603 * programmed in gfx_v9_0_init_always_on_cu_mask() 1604 */ 1605 1606 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1607 * but used for RLC_LB_CNTL configuration */ 1608 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1609 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1610 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1611 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1612 mutex_unlock(&adev->grbm_idx_mutex); 1613 1614 gfx_v9_0_init_always_on_cu_mask(adev); 1615 } 1616 1617 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1618 { 1619 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1620 } 1621 1622 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1623 { 1624 return 5; 1625 } 1626 1627 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1628 { 1629 const struct cs_section_def *cs_data; 1630 int r; 1631 1632 adev->gfx.rlc.cs_data = gfx9_cs_data; 1633 1634 cs_data = adev->gfx.rlc.cs_data; 1635 1636 if (cs_data) { 1637 /* init clear state block */ 1638 r = amdgpu_gfx_rlc_init_csb(adev); 1639 if (r) 1640 return r; 1641 } 1642 1643 if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { 1644 /* TODO: double check the cp_table_size for RV */ 1645 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1646 r = amdgpu_gfx_rlc_init_cpt(adev); 1647 if (r) 1648 return r; 1649 } 1650 1651 switch (adev->asic_type) { 1652 case CHIP_RAVEN: 1653 gfx_v9_0_init_lbpw(adev); 1654 break; 1655 case CHIP_VEGA20: 1656 gfx_v9_4_init_lbpw(adev); 1657 break; 1658 default: 1659 break; 1660 } 1661 1662 return 0; 1663 } 1664 1665 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev) 1666 { 1667 int r; 1668 1669 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1670 if (unlikely(r != 0)) 1671 return r; 1672 1673 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, 1674 AMDGPU_GEM_DOMAIN_VRAM); 1675 if (!r) 1676 adev->gfx.rlc.clear_state_gpu_addr = 1677 amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); 1678 1679 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1680 1681 return r; 1682 } 1683 1684 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev) 1685 { 1686 int r; 1687 1688 if (!adev->gfx.rlc.clear_state_obj) 1689 return; 1690 1691 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); 1692 if (likely(r == 0)) { 1693 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 1694 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1695 } 1696 } 1697 1698 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1699 { 1700 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1701 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1702 } 1703 1704 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1705 { 1706 int r; 1707 u32 *hpd; 1708 const __le32 *fw_data; 1709 unsigned fw_size; 1710 u32 *fw; 1711 size_t mec_hpd_size; 1712 1713 const struct gfx_firmware_header_v1_0 *mec_hdr; 1714 1715 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1716 1717 /* take ownership of the relevant compute queues */ 1718 amdgpu_gfx_compute_queue_acquire(adev); 1719 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1720 1721 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1722 AMDGPU_GEM_DOMAIN_VRAM, 1723 &adev->gfx.mec.hpd_eop_obj, 1724 &adev->gfx.mec.hpd_eop_gpu_addr, 1725 (void **)&hpd); 1726 if (r) { 1727 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1728 gfx_v9_0_mec_fini(adev); 1729 return r; 1730 } 1731 1732 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); 1733 1734 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1735 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1736 1737 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1738 1739 fw_data = (const __le32 *) 1740 (adev->gfx.mec_fw->data + 1741 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1742 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 1743 1744 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 1745 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1746 &adev->gfx.mec.mec_fw_obj, 1747 &adev->gfx.mec.mec_fw_gpu_addr, 1748 (void **)&fw); 1749 if (r) { 1750 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 1751 gfx_v9_0_mec_fini(adev); 1752 return r; 1753 } 1754 1755 memcpy(fw, fw_data, fw_size); 1756 1757 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1758 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1759 1760 return 0; 1761 } 1762 1763 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 1764 { 1765 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1766 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1767 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1768 (address << SQ_IND_INDEX__INDEX__SHIFT) | 1769 (SQ_IND_INDEX__FORCE_READ_MASK)); 1770 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1771 } 1772 1773 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 1774 uint32_t wave, uint32_t thread, 1775 uint32_t regno, uint32_t num, uint32_t *out) 1776 { 1777 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1778 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1779 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1780 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1781 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 1782 (SQ_IND_INDEX__FORCE_READ_MASK) | 1783 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1784 while (num--) 1785 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1786 } 1787 1788 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1789 { 1790 /* type 1 wave data */ 1791 dst[(*no_fields)++] = 1; 1792 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 1793 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 1794 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 1795 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 1796 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 1797 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 1798 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 1799 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 1800 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 1801 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 1802 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 1803 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 1804 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 1805 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 1806 } 1807 1808 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 1809 uint32_t wave, uint32_t start, 1810 uint32_t size, uint32_t *dst) 1811 { 1812 wave_read_regs( 1813 adev, simd, wave, 0, 1814 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 1815 } 1816 1817 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 1818 uint32_t wave, uint32_t thread, 1819 uint32_t start, uint32_t size, 1820 uint32_t *dst) 1821 { 1822 wave_read_regs( 1823 adev, simd, wave, thread, 1824 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1825 } 1826 1827 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 1828 u32 me, u32 pipe, u32 q, u32 vm) 1829 { 1830 soc15_grbm_select(adev, me, pipe, q, vm); 1831 } 1832 1833 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 1834 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 1835 .select_se_sh = &gfx_v9_0_select_se_sh, 1836 .read_wave_data = &gfx_v9_0_read_wave_data, 1837 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 1838 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 1839 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 1840 .ras_error_inject = &gfx_v9_0_ras_error_inject, 1841 .query_ras_error_count = &gfx_v9_0_query_ras_error_count 1842 }; 1843 1844 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 1845 { 1846 u32 gb_addr_config; 1847 int err; 1848 1849 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 1850 1851 switch (adev->asic_type) { 1852 case CHIP_VEGA10: 1853 adev->gfx.config.max_hw_contexts = 8; 1854 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1855 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1856 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1857 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1858 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 1859 break; 1860 case CHIP_VEGA12: 1861 adev->gfx.config.max_hw_contexts = 8; 1862 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1863 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1864 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1865 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1866 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 1867 DRM_INFO("fix gfx.config for vega12\n"); 1868 break; 1869 case CHIP_VEGA20: 1870 adev->gfx.config.max_hw_contexts = 8; 1871 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1872 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1873 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1874 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1875 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1876 gb_addr_config &= ~0xf3e777ff; 1877 gb_addr_config |= 0x22014042; 1878 /* check vbios table if gpu info is not available */ 1879 err = amdgpu_atomfirmware_get_gfx_info(adev); 1880 if (err) 1881 return err; 1882 break; 1883 case CHIP_RAVEN: 1884 adev->gfx.config.max_hw_contexts = 8; 1885 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1886 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1887 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1888 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1889 if (adev->rev_id >= 8) 1890 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 1891 else 1892 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 1893 break; 1894 case CHIP_ARCTURUS: 1895 adev->gfx.config.max_hw_contexts = 8; 1896 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1897 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1898 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1899 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1900 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1901 gb_addr_config &= ~0xf3e777ff; 1902 gb_addr_config |= 0x22014042; 1903 break; 1904 case CHIP_RENOIR: 1905 adev->gfx.config.max_hw_contexts = 8; 1906 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1907 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1908 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 1909 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1910 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1911 gb_addr_config &= ~0xf3e777ff; 1912 gb_addr_config |= 0x22010042; 1913 break; 1914 default: 1915 BUG(); 1916 break; 1917 } 1918 1919 adev->gfx.config.gb_addr_config = gb_addr_config; 1920 1921 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 1922 REG_GET_FIELD( 1923 adev->gfx.config.gb_addr_config, 1924 GB_ADDR_CONFIG, 1925 NUM_PIPES); 1926 1927 adev->gfx.config.max_tile_pipes = 1928 adev->gfx.config.gb_addr_config_fields.num_pipes; 1929 1930 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 1931 REG_GET_FIELD( 1932 adev->gfx.config.gb_addr_config, 1933 GB_ADDR_CONFIG, 1934 NUM_BANKS); 1935 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 1936 REG_GET_FIELD( 1937 adev->gfx.config.gb_addr_config, 1938 GB_ADDR_CONFIG, 1939 MAX_COMPRESSED_FRAGS); 1940 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 1941 REG_GET_FIELD( 1942 adev->gfx.config.gb_addr_config, 1943 GB_ADDR_CONFIG, 1944 NUM_RB_PER_SE); 1945 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 1946 REG_GET_FIELD( 1947 adev->gfx.config.gb_addr_config, 1948 GB_ADDR_CONFIG, 1949 NUM_SHADER_ENGINES); 1950 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 1951 REG_GET_FIELD( 1952 adev->gfx.config.gb_addr_config, 1953 GB_ADDR_CONFIG, 1954 PIPE_INTERLEAVE_SIZE)); 1955 1956 return 0; 1957 } 1958 1959 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, 1960 struct amdgpu_ngg_buf *ngg_buf, 1961 int size_se, 1962 int default_size_se) 1963 { 1964 int r; 1965 1966 if (size_se < 0) { 1967 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se); 1968 return -EINVAL; 1969 } 1970 size_se = size_se ? size_se : default_size_se; 1971 1972 ngg_buf->size = size_se * adev->gfx.config.max_shader_engines; 1973 r = amdgpu_bo_create_kernel(adev, ngg_buf->size, 1974 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1975 &ngg_buf->bo, 1976 &ngg_buf->gpu_addr, 1977 NULL); 1978 if (r) { 1979 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r); 1980 return r; 1981 } 1982 ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo); 1983 1984 return r; 1985 } 1986 1987 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev) 1988 { 1989 int i; 1990 1991 for (i = 0; i < NGG_BUF_MAX; i++) 1992 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo, 1993 &adev->gfx.ngg.buf[i].gpu_addr, 1994 NULL); 1995 1996 memset(&adev->gfx.ngg.buf[0], 0, 1997 sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX); 1998 1999 adev->gfx.ngg.init = false; 2000 2001 return 0; 2002 } 2003 2004 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) 2005 { 2006 int r; 2007 2008 if (!amdgpu_ngg || adev->gfx.ngg.init == true) 2009 return 0; 2010 2011 /* GDS reserve memory: 64 bytes alignment */ 2012 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); 2013 adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size; 2014 adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); 2015 adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); 2016 2017 /* Primitive Buffer */ 2018 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], 2019 amdgpu_prim_buf_per_se, 2020 64 * 1024); 2021 if (r) { 2022 dev_err(adev->dev, "Failed to create Primitive Buffer\n"); 2023 goto err; 2024 } 2025 2026 /* Position Buffer */ 2027 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS], 2028 amdgpu_pos_buf_per_se, 2029 256 * 1024); 2030 if (r) { 2031 dev_err(adev->dev, "Failed to create Position Buffer\n"); 2032 goto err; 2033 } 2034 2035 /* Control Sideband */ 2036 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL], 2037 amdgpu_cntl_sb_buf_per_se, 2038 256); 2039 if (r) { 2040 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n"); 2041 goto err; 2042 } 2043 2044 /* Parameter Cache, not created by default */ 2045 if (amdgpu_param_buf_per_se <= 0) 2046 goto out; 2047 2048 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM], 2049 amdgpu_param_buf_per_se, 2050 512 * 1024); 2051 if (r) { 2052 dev_err(adev->dev, "Failed to create Parameter Cache\n"); 2053 goto err; 2054 } 2055 2056 out: 2057 adev->gfx.ngg.init = true; 2058 return 0; 2059 err: 2060 gfx_v9_0_ngg_fini(adev); 2061 return r; 2062 } 2063 2064 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) 2065 { 2066 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 2067 int r; 2068 u32 data, base; 2069 2070 if (!amdgpu_ngg) 2071 return 0; 2072 2073 /* Program buffer size */ 2074 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, 2075 adev->gfx.ngg.buf[NGG_PRIM].size >> 8); 2076 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, 2077 adev->gfx.ngg.buf[NGG_POS].size >> 8); 2078 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); 2079 2080 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, 2081 adev->gfx.ngg.buf[NGG_CNTL].size >> 8); 2082 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, 2083 adev->gfx.ngg.buf[NGG_PARAM].size >> 10); 2084 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); 2085 2086 /* Program buffer base address */ 2087 base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 2088 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); 2089 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data); 2090 2091 base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 2092 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); 2093 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data); 2094 2095 base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 2096 data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); 2097 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data); 2098 2099 base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 2100 data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); 2101 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data); 2102 2103 base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 2104 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); 2105 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data); 2106 2107 base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 2108 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); 2109 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data); 2110 2111 /* Clear GDS reserved memory */ 2112 r = amdgpu_ring_alloc(ring, 17); 2113 if (r) { 2114 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n", 2115 ring->name, r); 2116 return r; 2117 } 2118 2119 gfx_v9_0_write_data_to_reg(ring, 0, false, 2120 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 2121 (adev->gds.gds_size + 2122 adev->gfx.ngg.gds_reserve_size)); 2123 2124 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 2125 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 2126 PACKET3_DMA_DATA_DST_SEL(1) | 2127 PACKET3_DMA_DATA_SRC_SEL(2))); 2128 amdgpu_ring_write(ring, 0); 2129 amdgpu_ring_write(ring, 0); 2130 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); 2131 amdgpu_ring_write(ring, 0); 2132 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 2133 adev->gfx.ngg.gds_reserve_size); 2134 2135 gfx_v9_0_write_data_to_reg(ring, 0, false, 2136 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0); 2137 2138 amdgpu_ring_commit(ring); 2139 2140 return 0; 2141 } 2142 2143 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 2144 int mec, int pipe, int queue) 2145 { 2146 int r; 2147 unsigned irq_type; 2148 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 2149 2150 ring = &adev->gfx.compute_ring[ring_id]; 2151 2152 /* mec0 is me1 */ 2153 ring->me = mec + 1; 2154 ring->pipe = pipe; 2155 ring->queue = queue; 2156 2157 ring->ring_obj = NULL; 2158 ring->use_doorbell = true; 2159 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 2160 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2161 + (ring_id * GFX9_MEC_HPD_SIZE); 2162 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2163 2164 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2165 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2166 + ring->pipe; 2167 2168 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2169 r = amdgpu_ring_init(adev, ring, 1024, 2170 &adev->gfx.eop_irq, irq_type); 2171 if (r) 2172 return r; 2173 2174 2175 return 0; 2176 } 2177 2178 static int gfx_v9_0_sw_init(void *handle) 2179 { 2180 int i, j, k, r, ring_id; 2181 struct amdgpu_ring *ring; 2182 struct amdgpu_kiq *kiq; 2183 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2184 2185 switch (adev->asic_type) { 2186 case CHIP_VEGA10: 2187 case CHIP_VEGA12: 2188 case CHIP_VEGA20: 2189 case CHIP_RAVEN: 2190 case CHIP_ARCTURUS: 2191 case CHIP_RENOIR: 2192 adev->gfx.mec.num_mec = 2; 2193 break; 2194 default: 2195 adev->gfx.mec.num_mec = 1; 2196 break; 2197 } 2198 2199 adev->gfx.mec.num_pipe_per_mec = 4; 2200 adev->gfx.mec.num_queue_per_pipe = 8; 2201 2202 /* EOP Event */ 2203 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 2204 if (r) 2205 return r; 2206 2207 /* Privileged reg */ 2208 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 2209 &adev->gfx.priv_reg_irq); 2210 if (r) 2211 return r; 2212 2213 /* Privileged inst */ 2214 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 2215 &adev->gfx.priv_inst_irq); 2216 if (r) 2217 return r; 2218 2219 /* ECC error */ 2220 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 2221 &adev->gfx.cp_ecc_error_irq); 2222 if (r) 2223 return r; 2224 2225 /* FUE error */ 2226 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 2227 &adev->gfx.cp_ecc_error_irq); 2228 if (r) 2229 return r; 2230 2231 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2232 2233 gfx_v9_0_scratch_init(adev); 2234 2235 r = gfx_v9_0_init_microcode(adev); 2236 if (r) { 2237 DRM_ERROR("Failed to load gfx firmware!\n"); 2238 return r; 2239 } 2240 2241 r = adev->gfx.rlc.funcs->init(adev); 2242 if (r) { 2243 DRM_ERROR("Failed to init rlc BOs!\n"); 2244 return r; 2245 } 2246 2247 r = gfx_v9_0_mec_init(adev); 2248 if (r) { 2249 DRM_ERROR("Failed to init MEC BOs!\n"); 2250 return r; 2251 } 2252 2253 /* set up the gfx ring */ 2254 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2255 ring = &adev->gfx.gfx_ring[i]; 2256 ring->ring_obj = NULL; 2257 if (!i) 2258 sprintf(ring->name, "gfx"); 2259 else 2260 sprintf(ring->name, "gfx_%d", i); 2261 ring->use_doorbell = true; 2262 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2263 r = amdgpu_ring_init(adev, ring, 1024, 2264 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); 2265 if (r) 2266 return r; 2267 } 2268 2269 /* set up the compute queues - allocate horizontally across pipes */ 2270 ring_id = 0; 2271 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2272 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2273 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2274 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2275 continue; 2276 2277 r = gfx_v9_0_compute_ring_init(adev, 2278 ring_id, 2279 i, k, j); 2280 if (r) 2281 return r; 2282 2283 ring_id++; 2284 } 2285 } 2286 } 2287 2288 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); 2289 if (r) { 2290 DRM_ERROR("Failed to init KIQ BOs!\n"); 2291 return r; 2292 } 2293 2294 kiq = &adev->gfx.kiq; 2295 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2296 if (r) 2297 return r; 2298 2299 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2300 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); 2301 if (r) 2302 return r; 2303 2304 adev->gfx.ce_ram_size = 0x8000; 2305 2306 r = gfx_v9_0_gpu_early_init(adev); 2307 if (r) 2308 return r; 2309 2310 r = gfx_v9_0_ngg_init(adev); 2311 if (r) 2312 return r; 2313 2314 return 0; 2315 } 2316 2317 2318 static int gfx_v9_0_sw_fini(void *handle) 2319 { 2320 int i; 2321 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2322 2323 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && 2324 adev->gfx.ras_if) { 2325 struct ras_common_if *ras_if = adev->gfx.ras_if; 2326 struct ras_ih_if ih_info = { 2327 .head = *ras_if, 2328 }; 2329 2330 amdgpu_ras_debugfs_remove(adev, ras_if); 2331 amdgpu_ras_sysfs_remove(adev, ras_if); 2332 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 2333 amdgpu_ras_feature_enable(adev, ras_if, 0); 2334 kfree(ras_if); 2335 } 2336 2337 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2338 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2339 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2340 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2341 2342 amdgpu_gfx_mqd_sw_fini(adev); 2343 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2344 amdgpu_gfx_kiq_fini(adev); 2345 2346 gfx_v9_0_mec_fini(adev); 2347 gfx_v9_0_ngg_fini(adev); 2348 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 2349 if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { 2350 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2351 &adev->gfx.rlc.cp_table_gpu_addr, 2352 (void **)&adev->gfx.rlc.cp_table_ptr); 2353 } 2354 gfx_v9_0_free_microcode(adev); 2355 2356 return 0; 2357 } 2358 2359 2360 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 2361 { 2362 /* TODO */ 2363 } 2364 2365 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) 2366 { 2367 u32 data; 2368 2369 if (instance == 0xffffffff) 2370 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 2371 else 2372 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 2373 2374 if (se_num == 0xffffffff) 2375 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 2376 else 2377 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2378 2379 if (sh_num == 0xffffffff) 2380 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 2381 else 2382 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 2383 2384 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 2385 } 2386 2387 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2388 { 2389 u32 data, mask; 2390 2391 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 2392 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 2393 2394 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 2395 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 2396 2397 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 2398 adev->gfx.config.max_sh_per_se); 2399 2400 return (~data) & mask; 2401 } 2402 2403 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 2404 { 2405 int i, j; 2406 u32 data; 2407 u32 active_rbs = 0; 2408 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 2409 adev->gfx.config.max_sh_per_se; 2410 2411 mutex_lock(&adev->grbm_idx_mutex); 2412 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2413 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2414 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2415 data = gfx_v9_0_get_rb_active_bitmap(adev); 2416 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 2417 rb_bitmap_width_per_sh); 2418 } 2419 } 2420 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2421 mutex_unlock(&adev->grbm_idx_mutex); 2422 2423 adev->gfx.config.backend_enable_mask = active_rbs; 2424 adev->gfx.config.num_rbs = hweight32(active_rbs); 2425 } 2426 2427 #define DEFAULT_SH_MEM_BASES (0x6000) 2428 #define FIRST_COMPUTE_VMID (8) 2429 #define LAST_COMPUTE_VMID (16) 2430 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 2431 { 2432 int i; 2433 uint32_t sh_mem_config; 2434 uint32_t sh_mem_bases; 2435 2436 /* 2437 * Configure apertures: 2438 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2439 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2440 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2441 */ 2442 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 2443 2444 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 2445 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 2446 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 2447 2448 mutex_lock(&adev->srbm_mutex); 2449 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 2450 soc15_grbm_select(adev, 0, 0, 0, i); 2451 /* CP and shaders */ 2452 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 2453 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 2454 } 2455 soc15_grbm_select(adev, 0, 0, 0, 0); 2456 mutex_unlock(&adev->srbm_mutex); 2457 2458 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 2459 acccess. These should be enabled by FW for target VMIDs. */ 2460 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 2461 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 2462 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 2463 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 2464 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 2465 } 2466 } 2467 2468 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) 2469 { 2470 int vmid; 2471 2472 /* 2473 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2474 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2475 * the driver can enable them for graphics. VMID0 should maintain 2476 * access so that HWS firmware can save/restore entries. 2477 */ 2478 for (vmid = 1; vmid < 16; vmid++) { 2479 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); 2480 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); 2481 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); 2482 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); 2483 } 2484 } 2485 2486 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 2487 { 2488 u32 tmp; 2489 int i; 2490 2491 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2492 2493 gfx_v9_0_tiling_mode_table_init(adev); 2494 2495 gfx_v9_0_setup_rb(adev); 2496 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 2497 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 2498 2499 /* XXX SH_MEM regs */ 2500 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2501 mutex_lock(&adev->srbm_mutex); 2502 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { 2503 soc15_grbm_select(adev, 0, 0, 0, i); 2504 /* CP and shaders */ 2505 if (i == 0) { 2506 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2507 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2508 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2509 !!amdgpu_noretry); 2510 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2511 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 2512 } else { 2513 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2514 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2515 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2516 !!amdgpu_noretry); 2517 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2518 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2519 (adev->gmc.private_aperture_start >> 48)); 2520 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2521 (adev->gmc.shared_aperture_start >> 48)); 2522 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 2523 } 2524 } 2525 soc15_grbm_select(adev, 0, 0, 0, 0); 2526 2527 mutex_unlock(&adev->srbm_mutex); 2528 2529 gfx_v9_0_init_compute_vmid(adev); 2530 gfx_v9_0_init_gds_vmid(adev); 2531 } 2532 2533 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 2534 { 2535 u32 i, j, k; 2536 u32 mask; 2537 2538 mutex_lock(&adev->grbm_idx_mutex); 2539 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2540 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2541 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2542 for (k = 0; k < adev->usec_timeout; k++) { 2543 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 2544 break; 2545 udelay(1); 2546 } 2547 if (k == adev->usec_timeout) { 2548 gfx_v9_0_select_se_sh(adev, 0xffffffff, 2549 0xffffffff, 0xffffffff); 2550 mutex_unlock(&adev->grbm_idx_mutex); 2551 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2552 i, j); 2553 return; 2554 } 2555 } 2556 } 2557 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2558 mutex_unlock(&adev->grbm_idx_mutex); 2559 2560 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2561 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2562 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2563 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2564 for (k = 0; k < adev->usec_timeout; k++) { 2565 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2566 break; 2567 udelay(1); 2568 } 2569 } 2570 2571 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2572 bool enable) 2573 { 2574 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2575 2576 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2577 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2578 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2579 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2580 2581 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2582 } 2583 2584 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2585 { 2586 /* csib */ 2587 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2588 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2589 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2590 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2591 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2592 adev->gfx.rlc.clear_state_size); 2593 } 2594 2595 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2596 int indirect_offset, 2597 int list_size, 2598 int *unique_indirect_regs, 2599 int unique_indirect_reg_count, 2600 int *indirect_start_offsets, 2601 int *indirect_start_offsets_count, 2602 int max_start_offsets_count) 2603 { 2604 int idx; 2605 2606 for (; indirect_offset < list_size; indirect_offset++) { 2607 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2608 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2609 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2610 2611 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2612 indirect_offset += 2; 2613 2614 /* look for the matching indice */ 2615 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2616 if (unique_indirect_regs[idx] == 2617 register_list_format[indirect_offset] || 2618 !unique_indirect_regs[idx]) 2619 break; 2620 } 2621 2622 BUG_ON(idx >= unique_indirect_reg_count); 2623 2624 if (!unique_indirect_regs[idx]) 2625 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2626 2627 indirect_offset++; 2628 } 2629 } 2630 } 2631 2632 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2633 { 2634 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2635 int unique_indirect_reg_count = 0; 2636 2637 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2638 int indirect_start_offsets_count = 0; 2639 2640 int list_size = 0; 2641 int i = 0, j = 0; 2642 u32 tmp = 0; 2643 2644 u32 *register_list_format = 2645 kmemdup(adev->gfx.rlc.register_list_format, 2646 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2647 if (!register_list_format) 2648 return -ENOMEM; 2649 2650 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2651 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2652 gfx_v9_1_parse_ind_reg_list(register_list_format, 2653 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2654 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2655 unique_indirect_regs, 2656 unique_indirect_reg_count, 2657 indirect_start_offsets, 2658 &indirect_start_offsets_count, 2659 ARRAY_SIZE(indirect_start_offsets)); 2660 2661 /* enable auto inc in case it is disabled */ 2662 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2663 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2664 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2665 2666 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2667 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2668 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2669 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2670 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2671 adev->gfx.rlc.register_restore[i]); 2672 2673 /* load indirect register */ 2674 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2675 adev->gfx.rlc.reg_list_format_start); 2676 2677 /* direct register portion */ 2678 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2679 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2680 register_list_format[i]); 2681 2682 /* indirect register portion */ 2683 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2684 if (register_list_format[i] == 0xFFFFFFFF) { 2685 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2686 continue; 2687 } 2688 2689 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2690 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2691 2692 for (j = 0; j < unique_indirect_reg_count; j++) { 2693 if (register_list_format[i] == unique_indirect_regs[j]) { 2694 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2695 break; 2696 } 2697 } 2698 2699 BUG_ON(j >= unique_indirect_reg_count); 2700 2701 i++; 2702 } 2703 2704 /* set save/restore list size */ 2705 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2706 list_size = list_size >> 1; 2707 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2708 adev->gfx.rlc.reg_restore_list_size); 2709 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2710 2711 /* write the starting offsets to RLC scratch ram */ 2712 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2713 adev->gfx.rlc.starting_offsets_start); 2714 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2715 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2716 indirect_start_offsets[i]); 2717 2718 /* load unique indirect regs*/ 2719 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2720 if (unique_indirect_regs[i] != 0) { 2721 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2722 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2723 unique_indirect_regs[i] & 0x3FFFF); 2724 2725 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2726 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2727 unique_indirect_regs[i] >> 20); 2728 } 2729 } 2730 2731 kfree(register_list_format); 2732 return 0; 2733 } 2734 2735 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2736 { 2737 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2738 } 2739 2740 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2741 bool enable) 2742 { 2743 uint32_t data = 0; 2744 uint32_t default_data = 0; 2745 2746 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2747 if (enable == true) { 2748 /* enable GFXIP control over CGPG */ 2749 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2750 if(default_data != data) 2751 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2752 2753 /* update status */ 2754 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2755 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2756 if(default_data != data) 2757 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2758 } else { 2759 /* restore GFXIP control over GCPG */ 2760 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2761 if(default_data != data) 2762 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2763 } 2764 } 2765 2766 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2767 { 2768 uint32_t data = 0; 2769 2770 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2771 AMD_PG_SUPPORT_GFX_SMG | 2772 AMD_PG_SUPPORT_GFX_DMG)) { 2773 /* init IDLE_POLL_COUNT = 60 */ 2774 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2775 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2776 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2777 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2778 2779 /* init RLC PG Delay */ 2780 data = 0; 2781 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2782 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2783 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2784 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2785 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2786 2787 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2788 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2789 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2790 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2791 2792 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2793 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2794 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2795 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2796 2797 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2798 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2799 2800 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2801 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2802 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2803 2804 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2805 } 2806 } 2807 2808 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2809 bool enable) 2810 { 2811 uint32_t data = 0; 2812 uint32_t default_data = 0; 2813 2814 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2815 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2816 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2817 enable ? 1 : 0); 2818 if (default_data != data) 2819 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2820 } 2821 2822 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2823 bool enable) 2824 { 2825 uint32_t data = 0; 2826 uint32_t default_data = 0; 2827 2828 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2829 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2830 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2831 enable ? 1 : 0); 2832 if(default_data != data) 2833 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2834 } 2835 2836 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2837 bool enable) 2838 { 2839 uint32_t data = 0; 2840 uint32_t default_data = 0; 2841 2842 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2843 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2844 CP_PG_DISABLE, 2845 enable ? 0 : 1); 2846 if(default_data != data) 2847 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2848 } 2849 2850 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2851 bool enable) 2852 { 2853 uint32_t data, default_data; 2854 2855 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2856 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2857 GFX_POWER_GATING_ENABLE, 2858 enable ? 1 : 0); 2859 if(default_data != data) 2860 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2861 } 2862 2863 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 2864 bool enable) 2865 { 2866 uint32_t data, default_data; 2867 2868 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2869 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2870 GFX_PIPELINE_PG_ENABLE, 2871 enable ? 1 : 0); 2872 if(default_data != data) 2873 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2874 2875 if (!enable) 2876 /* read any GFX register to wake up GFX */ 2877 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 2878 } 2879 2880 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 2881 bool enable) 2882 { 2883 uint32_t data, default_data; 2884 2885 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2886 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2887 STATIC_PER_CU_PG_ENABLE, 2888 enable ? 1 : 0); 2889 if(default_data != data) 2890 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2891 } 2892 2893 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 2894 bool enable) 2895 { 2896 uint32_t data, default_data; 2897 2898 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2899 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2900 DYN_PER_CU_PG_ENABLE, 2901 enable ? 1 : 0); 2902 if(default_data != data) 2903 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2904 } 2905 2906 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 2907 { 2908 gfx_v9_0_init_csb(adev); 2909 2910 /* 2911 * Rlc save restore list is workable since v2_1. 2912 * And it's needed by gfxoff feature. 2913 */ 2914 if (adev->gfx.rlc.is_rlc_v2_1) { 2915 gfx_v9_1_init_rlc_save_restore_list(adev); 2916 gfx_v9_0_enable_save_restore_machine(adev); 2917 } 2918 2919 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2920 AMD_PG_SUPPORT_GFX_SMG | 2921 AMD_PG_SUPPORT_GFX_DMG | 2922 AMD_PG_SUPPORT_CP | 2923 AMD_PG_SUPPORT_GDS | 2924 AMD_PG_SUPPORT_RLC_SMU_HS)) { 2925 WREG32(mmRLC_JUMP_TABLE_RESTORE, 2926 adev->gfx.rlc.cp_table_gpu_addr >> 8); 2927 gfx_v9_0_init_gfx_power_gating(adev); 2928 } 2929 } 2930 2931 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 2932 { 2933 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 2934 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 2935 gfx_v9_0_wait_for_rlc_serdes(adev); 2936 } 2937 2938 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 2939 { 2940 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2941 udelay(50); 2942 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2943 udelay(50); 2944 } 2945 2946 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 2947 { 2948 #ifdef AMDGPU_RLC_DEBUG_RETRY 2949 u32 rlc_ucode_ver; 2950 #endif 2951 2952 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2953 udelay(50); 2954 2955 /* carrizo do enable cp interrupt after cp inited */ 2956 if (!(adev->flags & AMD_IS_APU)) { 2957 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 2958 udelay(50); 2959 } 2960 2961 #ifdef AMDGPU_RLC_DEBUG_RETRY 2962 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 2963 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 2964 if(rlc_ucode_ver == 0x108) { 2965 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 2966 rlc_ucode_ver, adev->gfx.rlc_fw_version); 2967 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 2968 * default is 0x9C4 to create a 100us interval */ 2969 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 2970 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 2971 * to disable the page fault retry interrupts, default is 2972 * 0x100 (256) */ 2973 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 2974 } 2975 #endif 2976 } 2977 2978 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 2979 { 2980 const struct rlc_firmware_header_v2_0 *hdr; 2981 const __le32 *fw_data; 2982 unsigned i, fw_size; 2983 2984 if (!adev->gfx.rlc_fw) 2985 return -EINVAL; 2986 2987 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2988 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2989 2990 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2991 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2992 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2993 2994 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 2995 RLCG_UCODE_LOADING_START_ADDRESS); 2996 for (i = 0; i < fw_size; i++) 2997 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 2998 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2999 3000 return 0; 3001 } 3002 3003 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 3004 { 3005 int r; 3006 3007 if (amdgpu_sriov_vf(adev)) { 3008 gfx_v9_0_init_csb(adev); 3009 return 0; 3010 } 3011 3012 adev->gfx.rlc.funcs->stop(adev); 3013 3014 /* disable CG */ 3015 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 3016 3017 gfx_v9_0_init_pg(adev); 3018 3019 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3020 /* legacy rlc firmware loading */ 3021 r = gfx_v9_0_rlc_load_microcode(adev); 3022 if (r) 3023 return r; 3024 } 3025 3026 switch (adev->asic_type) { 3027 case CHIP_RAVEN: 3028 if (amdgpu_lbpw == 0) 3029 gfx_v9_0_enable_lbpw(adev, false); 3030 else 3031 gfx_v9_0_enable_lbpw(adev, true); 3032 break; 3033 case CHIP_VEGA20: 3034 if (amdgpu_lbpw > 0) 3035 gfx_v9_0_enable_lbpw(adev, true); 3036 else 3037 gfx_v9_0_enable_lbpw(adev, false); 3038 break; 3039 default: 3040 break; 3041 } 3042 3043 adev->gfx.rlc.funcs->start(adev); 3044 3045 return 0; 3046 } 3047 3048 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3049 { 3050 int i; 3051 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 3052 3053 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3054 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3055 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 3056 if (!enable) { 3057 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 3058 adev->gfx.gfx_ring[i].sched.ready = false; 3059 } 3060 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 3061 udelay(50); 3062 } 3063 3064 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3065 { 3066 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3067 const struct gfx_firmware_header_v1_0 *ce_hdr; 3068 const struct gfx_firmware_header_v1_0 *me_hdr; 3069 const __le32 *fw_data; 3070 unsigned i, fw_size; 3071 3072 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 3073 return -EINVAL; 3074 3075 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3076 adev->gfx.pfp_fw->data; 3077 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 3078 adev->gfx.ce_fw->data; 3079 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3080 adev->gfx.me_fw->data; 3081 3082 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3083 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 3084 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3085 3086 gfx_v9_0_cp_gfx_enable(adev, false); 3087 3088 /* PFP */ 3089 fw_data = (const __le32 *) 3090 (adev->gfx.pfp_fw->data + 3091 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3092 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 3093 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 3094 for (i = 0; i < fw_size; i++) 3095 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 3096 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3097 3098 /* CE */ 3099 fw_data = (const __le32 *) 3100 (adev->gfx.ce_fw->data + 3101 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 3102 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 3103 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 3104 for (i = 0; i < fw_size; i++) 3105 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 3106 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 3107 3108 /* ME */ 3109 fw_data = (const __le32 *) 3110 (adev->gfx.me_fw->data + 3111 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3112 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 3113 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 3114 for (i = 0; i < fw_size; i++) 3115 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 3116 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 3117 3118 return 0; 3119 } 3120 3121 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 3122 { 3123 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 3124 const struct cs_section_def *sect = NULL; 3125 const struct cs_extent_def *ext = NULL; 3126 int r, i, tmp; 3127 3128 /* init the CP */ 3129 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 3130 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 3131 3132 gfx_v9_0_cp_gfx_enable(adev, true); 3133 3134 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 3135 if (r) { 3136 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3137 return r; 3138 } 3139 3140 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3141 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3142 3143 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3144 amdgpu_ring_write(ring, 0x80000000); 3145 amdgpu_ring_write(ring, 0x80000000); 3146 3147 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 3148 for (ext = sect->section; ext->extent != NULL; ++ext) { 3149 if (sect->id == SECT_CONTEXT) { 3150 amdgpu_ring_write(ring, 3151 PACKET3(PACKET3_SET_CONTEXT_REG, 3152 ext->reg_count)); 3153 amdgpu_ring_write(ring, 3154 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 3155 for (i = 0; i < ext->reg_count; i++) 3156 amdgpu_ring_write(ring, ext->extent[i]); 3157 } 3158 } 3159 } 3160 3161 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3162 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3163 3164 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3165 amdgpu_ring_write(ring, 0); 3166 3167 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3168 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3169 amdgpu_ring_write(ring, 0x8000); 3170 amdgpu_ring_write(ring, 0x8000); 3171 3172 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 3173 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 3174 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 3175 amdgpu_ring_write(ring, tmp); 3176 amdgpu_ring_write(ring, 0); 3177 3178 amdgpu_ring_commit(ring); 3179 3180 return 0; 3181 } 3182 3183 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 3184 { 3185 struct amdgpu_ring *ring; 3186 u32 tmp; 3187 u32 rb_bufsz; 3188 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3189 3190 /* Set the write pointer delay */ 3191 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 3192 3193 /* set the RB to use vmid 0 */ 3194 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 3195 3196 /* Set ring buffer size */ 3197 ring = &adev->gfx.gfx_ring[0]; 3198 rb_bufsz = order_base_2(ring->ring_size / 8); 3199 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3200 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3201 #ifdef __BIG_ENDIAN 3202 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 3203 #endif 3204 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3205 3206 /* Initialize the ring buffer's write pointers */ 3207 ring->wptr = 0; 3208 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3209 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3210 3211 /* set the wb address wether it's enabled or not */ 3212 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3213 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3214 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3215 3216 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3217 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 3218 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 3219 3220 mdelay(1); 3221 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3222 3223 rb_addr = ring->gpu_addr >> 8; 3224 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 3225 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3226 3227 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 3228 if (ring->use_doorbell) { 3229 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3230 DOORBELL_OFFSET, ring->doorbell_index); 3231 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3232 DOORBELL_EN, 1); 3233 } else { 3234 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 3235 } 3236 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 3237 3238 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3239 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3240 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 3241 3242 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 3243 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3244 3245 3246 /* start the ring */ 3247 gfx_v9_0_cp_gfx_start(adev); 3248 ring->sched.ready = true; 3249 3250 return 0; 3251 } 3252 3253 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3254 { 3255 int i; 3256 3257 if (enable) { 3258 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 3259 } else { 3260 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 3261 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 3262 for (i = 0; i < adev->gfx.num_compute_rings; i++) 3263 adev->gfx.compute_ring[i].sched.ready = false; 3264 adev->gfx.kiq.ring.sched.ready = false; 3265 } 3266 udelay(50); 3267 } 3268 3269 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3270 { 3271 const struct gfx_firmware_header_v1_0 *mec_hdr; 3272 const __le32 *fw_data; 3273 unsigned i; 3274 u32 tmp; 3275 3276 if (!adev->gfx.mec_fw) 3277 return -EINVAL; 3278 3279 gfx_v9_0_cp_compute_enable(adev, false); 3280 3281 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3282 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3283 3284 fw_data = (const __le32 *) 3285 (adev->gfx.mec_fw->data + 3286 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3287 tmp = 0; 3288 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3289 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3290 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 3291 3292 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 3293 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 3294 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 3295 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3296 3297 /* MEC1 */ 3298 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3299 mec_hdr->jt_offset); 3300 for (i = 0; i < mec_hdr->jt_size; i++) 3301 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 3302 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3303 3304 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3305 adev->gfx.mec_fw_version); 3306 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 3307 3308 return 0; 3309 } 3310 3311 /* KIQ functions */ 3312 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 3313 { 3314 uint32_t tmp; 3315 struct amdgpu_device *adev = ring->adev; 3316 3317 /* tell RLC which is KIQ queue */ 3318 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 3319 tmp &= 0xffffff00; 3320 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3321 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3322 tmp |= 0x80; 3323 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3324 } 3325 3326 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) 3327 { 3328 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3329 uint64_t queue_mask = 0; 3330 int r, i; 3331 3332 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 3333 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 3334 continue; 3335 3336 /* This situation may be hit in the future if a new HW 3337 * generation exposes more than 64 queues. If so, the 3338 * definition of queue_mask needs updating */ 3339 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 3340 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 3341 break; 3342 } 3343 3344 queue_mask |= (1ull << i); 3345 } 3346 3347 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); 3348 if (r) { 3349 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3350 return r; 3351 } 3352 3353 /* set resources */ 3354 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 3355 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 3356 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 3357 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 3358 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 3359 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 3360 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 3361 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 3362 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 3363 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3364 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3365 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 3366 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3367 3368 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 3369 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 3370 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 3371 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 3372 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 3373 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 3374 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 3375 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 3376 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 3377 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 3378 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ 3379 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 3380 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 3381 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 3382 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 3383 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 3384 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 3385 } 3386 3387 r = amdgpu_ring_test_helper(kiq_ring); 3388 if (r) 3389 DRM_ERROR("KCQ enable failed\n"); 3390 3391 return r; 3392 } 3393 3394 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 3395 { 3396 struct amdgpu_device *adev = ring->adev; 3397 struct v9_mqd *mqd = ring->mqd_ptr; 3398 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3399 uint32_t tmp; 3400 3401 mqd->header = 0xC0310800; 3402 mqd->compute_pipelinestat_enable = 0x00000001; 3403 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3404 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3405 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3406 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3407 mqd->compute_static_thread_mgmt_se4 = 0xffffffff; 3408 mqd->compute_static_thread_mgmt_se5 = 0xffffffff; 3409 mqd->compute_static_thread_mgmt_se6 = 0xffffffff; 3410 mqd->compute_static_thread_mgmt_se7 = 0xffffffff; 3411 mqd->compute_misc_reserved = 0x00000003; 3412 3413 mqd->dynamic_cu_mask_addr_lo = 3414 lower_32_bits(ring->mqd_gpu_addr 3415 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3416 mqd->dynamic_cu_mask_addr_hi = 3417 upper_32_bits(ring->mqd_gpu_addr 3418 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3419 3420 eop_base_addr = ring->eop_gpu_addr >> 8; 3421 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3422 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3423 3424 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3425 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3426 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3427 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 3428 3429 mqd->cp_hqd_eop_control = tmp; 3430 3431 /* enable doorbell? */ 3432 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3433 3434 if (ring->use_doorbell) { 3435 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3436 DOORBELL_OFFSET, ring->doorbell_index); 3437 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3438 DOORBELL_EN, 1); 3439 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3440 DOORBELL_SOURCE, 0); 3441 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3442 DOORBELL_HIT, 0); 3443 } else { 3444 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3445 DOORBELL_EN, 0); 3446 } 3447 3448 mqd->cp_hqd_pq_doorbell_control = tmp; 3449 3450 /* disable the queue if it's active */ 3451 ring->wptr = 0; 3452 mqd->cp_hqd_dequeue_request = 0; 3453 mqd->cp_hqd_pq_rptr = 0; 3454 mqd->cp_hqd_pq_wptr_lo = 0; 3455 mqd->cp_hqd_pq_wptr_hi = 0; 3456 3457 /* set the pointer to the MQD */ 3458 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3459 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3460 3461 /* set MQD vmid to 0 */ 3462 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3463 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3464 mqd->cp_mqd_control = tmp; 3465 3466 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3467 hqd_gpu_addr = ring->gpu_addr >> 8; 3468 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3469 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3470 3471 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3472 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3473 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3474 (order_base_2(ring->ring_size / 4) - 1)); 3475 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3476 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 3477 #ifdef __BIG_ENDIAN 3478 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3479 #endif 3480 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3481 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3482 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3483 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3484 mqd->cp_hqd_pq_control = tmp; 3485 3486 /* set the wb address whether it's enabled or not */ 3487 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3488 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3489 mqd->cp_hqd_pq_rptr_report_addr_hi = 3490 upper_32_bits(wb_gpu_addr) & 0xffff; 3491 3492 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3493 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3494 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3495 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3496 3497 tmp = 0; 3498 /* enable the doorbell if requested */ 3499 if (ring->use_doorbell) { 3500 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3501 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3502 DOORBELL_OFFSET, ring->doorbell_index); 3503 3504 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3505 DOORBELL_EN, 1); 3506 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3507 DOORBELL_SOURCE, 0); 3508 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3509 DOORBELL_HIT, 0); 3510 } 3511 3512 mqd->cp_hqd_pq_doorbell_control = tmp; 3513 3514 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3515 ring->wptr = 0; 3516 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3517 3518 /* set the vmid for the queue */ 3519 mqd->cp_hqd_vmid = 0; 3520 3521 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3522 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3523 mqd->cp_hqd_persistent_state = tmp; 3524 3525 /* set MIN_IB_AVAIL_SIZE */ 3526 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3527 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3528 mqd->cp_hqd_ib_control = tmp; 3529 3530 /* activate the queue */ 3531 mqd->cp_hqd_active = 1; 3532 3533 return 0; 3534 } 3535 3536 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 3537 { 3538 struct amdgpu_device *adev = ring->adev; 3539 struct v9_mqd *mqd = ring->mqd_ptr; 3540 int j; 3541 3542 /* disable wptr polling */ 3543 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3544 3545 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3546 mqd->cp_hqd_eop_base_addr_lo); 3547 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3548 mqd->cp_hqd_eop_base_addr_hi); 3549 3550 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3551 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 3552 mqd->cp_hqd_eop_control); 3553 3554 /* enable doorbell? */ 3555 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3556 mqd->cp_hqd_pq_doorbell_control); 3557 3558 /* disable the queue if it's active */ 3559 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3560 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3561 for (j = 0; j < adev->usec_timeout; j++) { 3562 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3563 break; 3564 udelay(1); 3565 } 3566 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3567 mqd->cp_hqd_dequeue_request); 3568 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3569 mqd->cp_hqd_pq_rptr); 3570 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3571 mqd->cp_hqd_pq_wptr_lo); 3572 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3573 mqd->cp_hqd_pq_wptr_hi); 3574 } 3575 3576 /* set the pointer to the MQD */ 3577 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3578 mqd->cp_mqd_base_addr_lo); 3579 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3580 mqd->cp_mqd_base_addr_hi); 3581 3582 /* set MQD vmid to 0 */ 3583 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3584 mqd->cp_mqd_control); 3585 3586 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3587 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3588 mqd->cp_hqd_pq_base_lo); 3589 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3590 mqd->cp_hqd_pq_base_hi); 3591 3592 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3593 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3594 mqd->cp_hqd_pq_control); 3595 3596 /* set the wb address whether it's enabled or not */ 3597 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3598 mqd->cp_hqd_pq_rptr_report_addr_lo); 3599 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3600 mqd->cp_hqd_pq_rptr_report_addr_hi); 3601 3602 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3603 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3604 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3605 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3606 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3607 3608 /* enable the doorbell if requested */ 3609 if (ring->use_doorbell) { 3610 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3611 (adev->doorbell_index.kiq * 2) << 2); 3612 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3613 (adev->doorbell_index.userqueue_end * 2) << 2); 3614 } 3615 3616 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3617 mqd->cp_hqd_pq_doorbell_control); 3618 3619 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3620 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3621 mqd->cp_hqd_pq_wptr_lo); 3622 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3623 mqd->cp_hqd_pq_wptr_hi); 3624 3625 /* set the vmid for the queue */ 3626 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3627 3628 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3629 mqd->cp_hqd_persistent_state); 3630 3631 /* activate the queue */ 3632 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3633 mqd->cp_hqd_active); 3634 3635 if (ring->use_doorbell) 3636 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3637 3638 return 0; 3639 } 3640 3641 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3642 { 3643 struct amdgpu_device *adev = ring->adev; 3644 int j; 3645 3646 /* disable the queue if it's active */ 3647 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3648 3649 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3650 3651 for (j = 0; j < adev->usec_timeout; j++) { 3652 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3653 break; 3654 udelay(1); 3655 } 3656 3657 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3658 DRM_DEBUG("KIQ dequeue request failed.\n"); 3659 3660 /* Manual disable if dequeue request times out */ 3661 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3662 } 3663 3664 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3665 0); 3666 } 3667 3668 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3669 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3670 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3671 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3672 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3673 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3674 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3675 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3676 3677 return 0; 3678 } 3679 3680 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3681 { 3682 struct amdgpu_device *adev = ring->adev; 3683 struct v9_mqd *mqd = ring->mqd_ptr; 3684 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3685 3686 gfx_v9_0_kiq_setting(ring); 3687 3688 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3689 /* reset MQD to a clean status */ 3690 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3691 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3692 3693 /* reset ring buffer */ 3694 ring->wptr = 0; 3695 amdgpu_ring_clear_ring(ring); 3696 3697 mutex_lock(&adev->srbm_mutex); 3698 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3699 gfx_v9_0_kiq_init_register(ring); 3700 soc15_grbm_select(adev, 0, 0, 0, 0); 3701 mutex_unlock(&adev->srbm_mutex); 3702 } else { 3703 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3704 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3705 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3706 mutex_lock(&adev->srbm_mutex); 3707 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3708 gfx_v9_0_mqd_init(ring); 3709 gfx_v9_0_kiq_init_register(ring); 3710 soc15_grbm_select(adev, 0, 0, 0, 0); 3711 mutex_unlock(&adev->srbm_mutex); 3712 3713 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3714 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3715 } 3716 3717 return 0; 3718 } 3719 3720 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3721 { 3722 struct amdgpu_device *adev = ring->adev; 3723 struct v9_mqd *mqd = ring->mqd_ptr; 3724 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3725 3726 if (!adev->in_gpu_reset && !adev->in_suspend) { 3727 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3728 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3729 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3730 mutex_lock(&adev->srbm_mutex); 3731 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3732 gfx_v9_0_mqd_init(ring); 3733 soc15_grbm_select(adev, 0, 0, 0, 0); 3734 mutex_unlock(&adev->srbm_mutex); 3735 3736 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3737 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3738 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3739 /* reset MQD to a clean status */ 3740 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3741 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3742 3743 /* reset ring buffer */ 3744 ring->wptr = 0; 3745 amdgpu_ring_clear_ring(ring); 3746 } else { 3747 amdgpu_ring_clear_ring(ring); 3748 } 3749 3750 return 0; 3751 } 3752 3753 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3754 { 3755 struct amdgpu_ring *ring; 3756 int r; 3757 3758 ring = &adev->gfx.kiq.ring; 3759 3760 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3761 if (unlikely(r != 0)) 3762 return r; 3763 3764 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3765 if (unlikely(r != 0)) 3766 return r; 3767 3768 gfx_v9_0_kiq_init_queue(ring); 3769 amdgpu_bo_kunmap(ring->mqd_obj); 3770 ring->mqd_ptr = NULL; 3771 amdgpu_bo_unreserve(ring->mqd_obj); 3772 ring->sched.ready = true; 3773 return 0; 3774 } 3775 3776 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3777 { 3778 struct amdgpu_ring *ring = NULL; 3779 int r = 0, i; 3780 3781 gfx_v9_0_cp_compute_enable(adev, true); 3782 3783 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3784 ring = &adev->gfx.compute_ring[i]; 3785 3786 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3787 if (unlikely(r != 0)) 3788 goto done; 3789 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3790 if (!r) { 3791 r = gfx_v9_0_kcq_init_queue(ring); 3792 amdgpu_bo_kunmap(ring->mqd_obj); 3793 ring->mqd_ptr = NULL; 3794 } 3795 amdgpu_bo_unreserve(ring->mqd_obj); 3796 if (r) 3797 goto done; 3798 } 3799 3800 r = gfx_v9_0_kiq_kcq_enable(adev); 3801 done: 3802 return r; 3803 } 3804 3805 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3806 { 3807 int r, i; 3808 struct amdgpu_ring *ring; 3809 3810 if (!(adev->flags & AMD_IS_APU)) 3811 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3812 3813 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3814 if (adev->asic_type != CHIP_ARCTURUS) { 3815 /* legacy firmware loading */ 3816 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3817 if (r) 3818 return r; 3819 } 3820 3821 r = gfx_v9_0_cp_compute_load_microcode(adev); 3822 if (r) 3823 return r; 3824 } 3825 3826 r = gfx_v9_0_kiq_resume(adev); 3827 if (r) 3828 return r; 3829 3830 if (adev->asic_type != CHIP_ARCTURUS) { 3831 r = gfx_v9_0_cp_gfx_resume(adev); 3832 if (r) 3833 return r; 3834 } 3835 3836 r = gfx_v9_0_kcq_resume(adev); 3837 if (r) 3838 return r; 3839 3840 if (adev->asic_type != CHIP_ARCTURUS) { 3841 ring = &adev->gfx.gfx_ring[0]; 3842 r = amdgpu_ring_test_helper(ring); 3843 if (r) 3844 return r; 3845 } 3846 3847 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3848 ring = &adev->gfx.compute_ring[i]; 3849 amdgpu_ring_test_helper(ring); 3850 } 3851 3852 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3853 3854 return 0; 3855 } 3856 3857 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3858 { 3859 if (adev->asic_type != CHIP_ARCTURUS) 3860 gfx_v9_0_cp_gfx_enable(adev, enable); 3861 gfx_v9_0_cp_compute_enable(adev, enable); 3862 } 3863 3864 static int gfx_v9_0_hw_init(void *handle) 3865 { 3866 int r; 3867 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3868 3869 if (!amdgpu_sriov_vf(adev)) 3870 gfx_v9_0_init_golden_registers(adev); 3871 3872 gfx_v9_0_constants_init(adev); 3873 3874 r = gfx_v9_0_csb_vram_pin(adev); 3875 if (r) 3876 return r; 3877 3878 r = adev->gfx.rlc.funcs->resume(adev); 3879 if (r) 3880 return r; 3881 3882 r = gfx_v9_0_cp_resume(adev); 3883 if (r) 3884 return r; 3885 3886 if (adev->asic_type != CHIP_ARCTURUS) { 3887 r = gfx_v9_0_ngg_en(adev); 3888 if (r) 3889 return r; 3890 } 3891 3892 return r; 3893 } 3894 3895 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) 3896 { 3897 int r, i; 3898 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3899 3900 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); 3901 if (r) 3902 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3903 3904 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3905 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3906 3907 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 3908 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 3909 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 3910 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 3911 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 3912 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 3913 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 3914 amdgpu_ring_write(kiq_ring, 0); 3915 amdgpu_ring_write(kiq_ring, 0); 3916 amdgpu_ring_write(kiq_ring, 0); 3917 } 3918 r = amdgpu_ring_test_helper(kiq_ring); 3919 if (r) 3920 DRM_ERROR("KCQ disable failed\n"); 3921 3922 return r; 3923 } 3924 3925 static int gfx_v9_0_hw_fini(void *handle) 3926 { 3927 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3928 3929 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 3930 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3931 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3932 3933 /* disable KCQ to avoid CPC touch memory not valid anymore */ 3934 gfx_v9_0_kcq_disable(adev); 3935 3936 if (amdgpu_sriov_vf(adev)) { 3937 gfx_v9_0_cp_gfx_enable(adev, false); 3938 /* must disable polling for SRIOV when hw finished, otherwise 3939 * CPC engine may still keep fetching WB address which is already 3940 * invalid after sw finished and trigger DMAR reading error in 3941 * hypervisor side. 3942 */ 3943 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3944 return 0; 3945 } 3946 3947 /* Use deinitialize sequence from CAIL when unbinding device from driver, 3948 * otherwise KIQ is hanging when binding back 3949 */ 3950 if (!adev->in_gpu_reset && !adev->in_suspend) { 3951 mutex_lock(&adev->srbm_mutex); 3952 soc15_grbm_select(adev, adev->gfx.kiq.ring.me, 3953 adev->gfx.kiq.ring.pipe, 3954 adev->gfx.kiq.ring.queue, 0); 3955 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); 3956 soc15_grbm_select(adev, 0, 0, 0, 0); 3957 mutex_unlock(&adev->srbm_mutex); 3958 } 3959 3960 gfx_v9_0_cp_enable(adev, false); 3961 adev->gfx.rlc.funcs->stop(adev); 3962 3963 gfx_v9_0_csb_vram_unpin(adev); 3964 3965 return 0; 3966 } 3967 3968 static int gfx_v9_0_suspend(void *handle) 3969 { 3970 return gfx_v9_0_hw_fini(handle); 3971 } 3972 3973 static int gfx_v9_0_resume(void *handle) 3974 { 3975 return gfx_v9_0_hw_init(handle); 3976 } 3977 3978 static bool gfx_v9_0_is_idle(void *handle) 3979 { 3980 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3981 3982 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 3983 GRBM_STATUS, GUI_ACTIVE)) 3984 return false; 3985 else 3986 return true; 3987 } 3988 3989 static int gfx_v9_0_wait_for_idle(void *handle) 3990 { 3991 unsigned i; 3992 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3993 3994 for (i = 0; i < adev->usec_timeout; i++) { 3995 if (gfx_v9_0_is_idle(handle)) 3996 return 0; 3997 udelay(1); 3998 } 3999 return -ETIMEDOUT; 4000 } 4001 4002 static int gfx_v9_0_soft_reset(void *handle) 4003 { 4004 u32 grbm_soft_reset = 0; 4005 u32 tmp; 4006 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4007 4008 /* GRBM_STATUS */ 4009 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 4010 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 4011 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 4012 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 4013 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 4014 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 4015 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 4016 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4017 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4018 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4019 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 4020 } 4021 4022 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 4023 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4024 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4025 } 4026 4027 /* GRBM_STATUS2 */ 4028 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 4029 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 4030 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4031 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4032 4033 4034 if (grbm_soft_reset) { 4035 /* stop the rlc */ 4036 adev->gfx.rlc.funcs->stop(adev); 4037 4038 if (adev->asic_type != CHIP_ARCTURUS) 4039 /* Disable GFX parsing/prefetching */ 4040 gfx_v9_0_cp_gfx_enable(adev, false); 4041 4042 /* Disable MEC parsing/prefetching */ 4043 gfx_v9_0_cp_compute_enable(adev, false); 4044 4045 if (grbm_soft_reset) { 4046 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4047 tmp |= grbm_soft_reset; 4048 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4049 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4050 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4051 4052 udelay(50); 4053 4054 tmp &= ~grbm_soft_reset; 4055 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4056 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4057 } 4058 4059 /* Wait a little for things to settle down */ 4060 udelay(50); 4061 } 4062 return 0; 4063 } 4064 4065 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4066 { 4067 uint64_t clock; 4068 4069 mutex_lock(&adev->gfx.gpu_clock_mutex); 4070 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 4071 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 4072 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 4073 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4074 return clock; 4075 } 4076 4077 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4078 uint32_t vmid, 4079 uint32_t gds_base, uint32_t gds_size, 4080 uint32_t gws_base, uint32_t gws_size, 4081 uint32_t oa_base, uint32_t oa_size) 4082 { 4083 struct amdgpu_device *adev = ring->adev; 4084 4085 /* GDS Base */ 4086 gfx_v9_0_write_data_to_reg(ring, 0, false, 4087 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 4088 gds_base); 4089 4090 /* GDS Size */ 4091 gfx_v9_0_write_data_to_reg(ring, 0, false, 4092 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 4093 gds_size); 4094 4095 /* GWS */ 4096 gfx_v9_0_write_data_to_reg(ring, 0, false, 4097 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 4098 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 4099 4100 /* OA */ 4101 gfx_v9_0_write_data_to_reg(ring, 0, false, 4102 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 4103 (1 << (oa_size + oa_base)) - (1 << oa_base)); 4104 } 4105 4106 static const u32 vgpr_init_compute_shader[] = 4107 { 4108 0xb07c0000, 0xbe8000ff, 4109 0x000000f8, 0xbf110800, 4110 0x7e000280, 0x7e020280, 4111 0x7e040280, 0x7e060280, 4112 0x7e080280, 0x7e0a0280, 4113 0x7e0c0280, 0x7e0e0280, 4114 0x80808800, 0xbe803200, 4115 0xbf84fff5, 0xbf9c0000, 4116 0xd28c0001, 0x0001007f, 4117 0xd28d0001, 0x0002027e, 4118 0x10020288, 0xb8810904, 4119 0xb7814000, 0xd1196a01, 4120 0x00000301, 0xbe800087, 4121 0xbefc00c1, 0xd89c4000, 4122 0x00020201, 0xd89cc080, 4123 0x00040401, 0x320202ff, 4124 0x00000800, 0x80808100, 4125 0xbf84fff8, 0x7e020280, 4126 0xbf810000, 0x00000000, 4127 }; 4128 4129 static const u32 sgpr_init_compute_shader[] = 4130 { 4131 0xb07c0000, 0xbe8000ff, 4132 0x0000005f, 0xbee50080, 4133 0xbe812c65, 0xbe822c65, 4134 0xbe832c65, 0xbe842c65, 4135 0xbe852c65, 0xb77c0005, 4136 0x80808500, 0xbf84fff8, 4137 0xbe800080, 0xbf810000, 4138 }; 4139 4140 static const struct soc15_reg_entry vgpr_init_regs[] = { 4141 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4142 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4143 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4144 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4145 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 4146 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 4147 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 4148 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4149 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ 4150 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4151 }; 4152 4153 static const struct soc15_reg_entry sgpr_init_regs[] = { 4154 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4155 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4156 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4157 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4158 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 4159 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 4160 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 4161 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4162 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ 4163 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4164 }; 4165 4166 static const struct soc15_reg_entry sec_ded_counter_registers[] = { 4167 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 4168 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 4169 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 4170 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 4171 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 4172 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 4173 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 4174 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 4175 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 4176 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 4177 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 4178 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 4179 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 4180 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 4181 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 4182 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 4183 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 4184 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 4185 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 4186 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 4187 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 4188 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 4189 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 4190 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 4191 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 4192 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 4193 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 4194 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 4195 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 4196 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 4197 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 4198 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 4199 }; 4200 4201 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 4202 { 4203 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4204 int i, r; 4205 4206 r = amdgpu_ring_alloc(ring, 7); 4207 if (r) { 4208 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 4209 ring->name, r); 4210 return r; 4211 } 4212 4213 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 4214 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 4215 4216 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 4217 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 4218 PACKET3_DMA_DATA_DST_SEL(1) | 4219 PACKET3_DMA_DATA_SRC_SEL(2) | 4220 PACKET3_DMA_DATA_ENGINE(0))); 4221 amdgpu_ring_write(ring, 0); 4222 amdgpu_ring_write(ring, 0); 4223 amdgpu_ring_write(ring, 0); 4224 amdgpu_ring_write(ring, 0); 4225 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 4226 adev->gds.gds_size); 4227 4228 amdgpu_ring_commit(ring); 4229 4230 for (i = 0; i < adev->usec_timeout; i++) { 4231 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 4232 break; 4233 udelay(1); 4234 } 4235 4236 if (i >= adev->usec_timeout) 4237 r = -ETIMEDOUT; 4238 4239 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 4240 4241 return r; 4242 } 4243 4244 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 4245 { 4246 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4247 struct amdgpu_ib ib; 4248 struct dma_fence *f = NULL; 4249 int r, i, j, k; 4250 unsigned total_size, vgpr_offset, sgpr_offset; 4251 u64 gpu_addr; 4252 4253 /* only support when RAS is enabled */ 4254 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4255 return 0; 4256 4257 /* bail if the compute ring is not ready */ 4258 if (!ring->sched.ready) 4259 return 0; 4260 4261 total_size = 4262 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 4263 total_size += 4264 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 4265 total_size = ALIGN(total_size, 256); 4266 vgpr_offset = total_size; 4267 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 4268 sgpr_offset = total_size; 4269 total_size += sizeof(sgpr_init_compute_shader); 4270 4271 /* allocate an indirect buffer to put the commands in */ 4272 memset(&ib, 0, sizeof(ib)); 4273 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 4274 if (r) { 4275 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 4276 return r; 4277 } 4278 4279 /* load the compute shaders */ 4280 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 4281 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 4282 4283 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 4284 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 4285 4286 /* init the ib length to 0 */ 4287 ib.length_dw = 0; 4288 4289 /* VGPR */ 4290 /* write the register state for the compute dispatch */ 4291 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { 4292 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4293 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) 4294 - PACKET3_SET_SH_REG_START; 4295 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value; 4296 } 4297 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4298 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 4299 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4300 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4301 - PACKET3_SET_SH_REG_START; 4302 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4303 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4304 4305 /* write dispatch packet */ 4306 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4307 ib.ptr[ib.length_dw++] = 128; /* x */ 4308 ib.ptr[ib.length_dw++] = 1; /* y */ 4309 ib.ptr[ib.length_dw++] = 1; /* z */ 4310 ib.ptr[ib.length_dw++] = 4311 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4312 4313 /* write CS partial flush packet */ 4314 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4315 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4316 4317 /* SGPR */ 4318 /* write the register state for the compute dispatch */ 4319 for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { 4320 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4321 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) 4322 - PACKET3_SET_SH_REG_START; 4323 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; 4324 } 4325 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4326 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4327 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4328 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4329 - PACKET3_SET_SH_REG_START; 4330 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4331 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4332 4333 /* write dispatch packet */ 4334 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4335 ib.ptr[ib.length_dw++] = 128; /* x */ 4336 ib.ptr[ib.length_dw++] = 1; /* y */ 4337 ib.ptr[ib.length_dw++] = 1; /* z */ 4338 ib.ptr[ib.length_dw++] = 4339 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4340 4341 /* write CS partial flush packet */ 4342 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4343 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4344 4345 /* shedule the ib on the ring */ 4346 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 4347 if (r) { 4348 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 4349 goto fail; 4350 } 4351 4352 /* wait for the GPU to finish processing the IB */ 4353 r = dma_fence_wait(f, false); 4354 if (r) { 4355 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 4356 goto fail; 4357 } 4358 4359 /* read back registers to clear the counters */ 4360 mutex_lock(&adev->grbm_idx_mutex); 4361 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { 4362 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { 4363 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { 4364 gfx_v9_0_select_se_sh(adev, j, 0x0, k); 4365 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); 4366 } 4367 } 4368 } 4369 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 4370 mutex_unlock(&adev->grbm_idx_mutex); 4371 4372 fail: 4373 amdgpu_ib_free(adev, &ib, NULL); 4374 dma_fence_put(f); 4375 4376 return r; 4377 } 4378 4379 static int gfx_v9_0_early_init(void *handle) 4380 { 4381 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4382 4383 if (adev->asic_type == CHIP_ARCTURUS) 4384 adev->gfx.num_gfx_rings = 0; 4385 else 4386 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 4387 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 4388 gfx_v9_0_set_ring_funcs(adev); 4389 gfx_v9_0_set_irq_funcs(adev); 4390 gfx_v9_0_set_gds_init(adev); 4391 gfx_v9_0_set_rlc_funcs(adev); 4392 4393 return 0; 4394 } 4395 4396 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 4397 struct ras_err_data *err_data, 4398 struct amdgpu_iv_entry *entry); 4399 4400 static int gfx_v9_0_ecc_late_init(void *handle) 4401 { 4402 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4403 struct ras_common_if **ras_if = &adev->gfx.ras_if; 4404 struct ras_ih_if ih_info = { 4405 .cb = gfx_v9_0_process_ras_data_cb, 4406 }; 4407 struct ras_fs_if fs_info = { 4408 .sysfs_name = "gfx_err_count", 4409 .debugfs_name = "gfx_err_inject", 4410 }; 4411 struct ras_common_if ras_block = { 4412 .block = AMDGPU_RAS_BLOCK__GFX, 4413 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, 4414 .sub_block_index = 0, 4415 .name = "gfx", 4416 }; 4417 int r; 4418 4419 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { 4420 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); 4421 return 0; 4422 } 4423 4424 r = gfx_v9_0_do_edc_gds_workarounds(adev); 4425 if (r) 4426 return r; 4427 4428 /* requires IBs so do in late init after IB pool is initialized */ 4429 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 4430 if (r) 4431 return r; 4432 4433 /* handle resume path. */ 4434 if (*ras_if) { 4435 /* resend ras TA enable cmd during resume. 4436 * prepare to handle failure. 4437 */ 4438 ih_info.head = **ras_if; 4439 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 4440 if (r) { 4441 if (r == -EAGAIN) { 4442 /* request a gpu reset. will run again. */ 4443 amdgpu_ras_request_reset_on_boot(adev, 4444 AMDGPU_RAS_BLOCK__GFX); 4445 return 0; 4446 } 4447 /* fail to enable ras, cleanup all. */ 4448 goto irq; 4449 } 4450 /* enable successfully. continue. */ 4451 goto resume; 4452 } 4453 4454 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); 4455 if (!*ras_if) 4456 return -ENOMEM; 4457 4458 **ras_if = ras_block; 4459 4460 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 4461 if (r) { 4462 if (r == -EAGAIN) { 4463 amdgpu_ras_request_reset_on_boot(adev, 4464 AMDGPU_RAS_BLOCK__GFX); 4465 r = 0; 4466 } 4467 goto feature; 4468 } 4469 4470 ih_info.head = **ras_if; 4471 fs_info.head = **ras_if; 4472 4473 r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); 4474 if (r) 4475 goto interrupt; 4476 4477 amdgpu_ras_debugfs_create(adev, &fs_info); 4478 4479 r = amdgpu_ras_sysfs_create(adev, &fs_info); 4480 if (r) 4481 goto sysfs; 4482 resume: 4483 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 4484 if (r) 4485 goto irq; 4486 4487 return 0; 4488 irq: 4489 amdgpu_ras_sysfs_remove(adev, *ras_if); 4490 sysfs: 4491 amdgpu_ras_debugfs_remove(adev, *ras_if); 4492 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 4493 interrupt: 4494 amdgpu_ras_feature_enable(adev, *ras_if, 0); 4495 feature: 4496 kfree(*ras_if); 4497 *ras_if = NULL; 4498 return r; 4499 } 4500 4501 static int gfx_v9_0_late_init(void *handle) 4502 { 4503 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4504 int r; 4505 4506 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4507 if (r) 4508 return r; 4509 4510 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4511 if (r) 4512 return r; 4513 4514 r = gfx_v9_0_ecc_late_init(handle); 4515 if (r) 4516 return r; 4517 4518 return 0; 4519 } 4520 4521 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 4522 { 4523 uint32_t rlc_setting; 4524 4525 /* if RLC is not enabled, do nothing */ 4526 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 4527 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 4528 return false; 4529 4530 return true; 4531 } 4532 4533 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) 4534 { 4535 uint32_t data; 4536 unsigned i; 4537 4538 data = RLC_SAFE_MODE__CMD_MASK; 4539 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4540 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4541 4542 /* wait for RLC_SAFE_MODE */ 4543 for (i = 0; i < adev->usec_timeout; i++) { 4544 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 4545 break; 4546 udelay(1); 4547 } 4548 } 4549 4550 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) 4551 { 4552 uint32_t data; 4553 4554 data = RLC_SAFE_MODE__CMD_MASK; 4555 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4556 } 4557 4558 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 4559 bool enable) 4560 { 4561 amdgpu_gfx_rlc_enter_safe_mode(adev); 4562 4563 if (is_support_sw_smu(adev) && !enable) 4564 smu_set_gfx_cgpg(&adev->smu, enable); 4565 4566 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 4567 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 4568 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4569 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 4570 } else { 4571 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 4572 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 4573 } 4574 4575 amdgpu_gfx_rlc_exit_safe_mode(adev); 4576 } 4577 4578 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4579 bool enable) 4580 { 4581 /* TODO: double check if we need to perform under safe mode */ 4582 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4583 4584 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4585 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4586 else 4587 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4588 4589 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4590 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4591 else 4592 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4593 4594 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4595 } 4596 4597 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4598 bool enable) 4599 { 4600 uint32_t data, def; 4601 4602 amdgpu_gfx_rlc_enter_safe_mode(adev); 4603 4604 /* It is disabled by HW by default */ 4605 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4606 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4607 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4608 4609 if (adev->asic_type != CHIP_VEGA12) 4610 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4611 4612 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4613 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4614 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4615 4616 /* only for Vega10 & Raven1 */ 4617 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4618 4619 if (def != data) 4620 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4621 4622 /* MGLS is a global flag to control all MGLS in GFX */ 4623 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4624 /* 2 - RLC memory Light sleep */ 4625 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4626 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4627 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4628 if (def != data) 4629 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4630 } 4631 /* 3 - CP memory Light sleep */ 4632 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4633 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4634 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4635 if (def != data) 4636 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4637 } 4638 } 4639 } else { 4640 /* 1 - MGCG_OVERRIDE */ 4641 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4642 4643 if (adev->asic_type != CHIP_VEGA12) 4644 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4645 4646 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4647 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4648 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4649 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4650 4651 if (def != data) 4652 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4653 4654 /* 2 - disable MGLS in RLC */ 4655 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4656 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4657 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4658 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4659 } 4660 4661 /* 3 - disable MGLS in CP */ 4662 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4663 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4664 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4665 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4666 } 4667 } 4668 4669 amdgpu_gfx_rlc_exit_safe_mode(adev); 4670 } 4671 4672 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4673 bool enable) 4674 { 4675 uint32_t data, def; 4676 4677 if (adev->asic_type == CHIP_ARCTURUS) 4678 return; 4679 4680 amdgpu_gfx_rlc_enter_safe_mode(adev); 4681 4682 /* Enable 3D CGCG/CGLS */ 4683 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 4684 /* write cmd to clear cgcg/cgls ov */ 4685 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4686 /* unset CGCG override */ 4687 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4688 /* update CGCG and CGLS override bits */ 4689 if (def != data) 4690 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4691 4692 /* enable 3Dcgcg FSM(0x0000363f) */ 4693 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4694 4695 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4696 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4697 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4698 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4699 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4700 if (def != data) 4701 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4702 4703 /* set IDLE_POLL_COUNT(0x00900100) */ 4704 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4705 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4706 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4707 if (def != data) 4708 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4709 } else { 4710 /* Disable CGCG/CGLS */ 4711 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4712 /* disable cgcg, cgls should be disabled */ 4713 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 4714 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 4715 /* disable cgcg and cgls in FSM */ 4716 if (def != data) 4717 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4718 } 4719 4720 amdgpu_gfx_rlc_exit_safe_mode(adev); 4721 } 4722 4723 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4724 bool enable) 4725 { 4726 uint32_t def, data; 4727 4728 amdgpu_gfx_rlc_enter_safe_mode(adev); 4729 4730 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 4731 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4732 /* unset CGCG override */ 4733 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 4734 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4735 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4736 else 4737 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4738 /* update CGCG and CGLS override bits */ 4739 if (def != data) 4740 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4741 4742 /* enable cgcg FSM(0x0000363F) */ 4743 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4744 4745 if (adev->asic_type == CHIP_ARCTURUS) 4746 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4747 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4748 else 4749 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4750 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4751 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4752 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4753 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4754 if (def != data) 4755 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4756 4757 /* set IDLE_POLL_COUNT(0x00900100) */ 4758 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4759 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4760 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4761 if (def != data) 4762 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4763 } else { 4764 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4765 /* reset CGCG/CGLS bits */ 4766 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4767 /* disable cgcg and cgls in FSM */ 4768 if (def != data) 4769 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4770 } 4771 4772 amdgpu_gfx_rlc_exit_safe_mode(adev); 4773 } 4774 4775 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 4776 bool enable) 4777 { 4778 if (enable) { 4779 /* CGCG/CGLS should be enabled after MGCG/MGLS 4780 * === MGCG + MGLS === 4781 */ 4782 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4783 /* === CGCG /CGLS for GFX 3D Only === */ 4784 gfx_v9_0_update_3d_clock_gating(adev, enable); 4785 /* === CGCG + CGLS === */ 4786 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4787 } else { 4788 /* CGCG/CGLS should be disabled before MGCG/MGLS 4789 * === CGCG + CGLS === 4790 */ 4791 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4792 /* === CGCG /CGLS for GFX 3D Only === */ 4793 gfx_v9_0_update_3d_clock_gating(adev, enable); 4794 /* === MGCG + MGLS === */ 4795 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4796 } 4797 return 0; 4798 } 4799 4800 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 4801 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 4802 .set_safe_mode = gfx_v9_0_set_safe_mode, 4803 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 4804 .init = gfx_v9_0_rlc_init, 4805 .get_csb_size = gfx_v9_0_get_csb_size, 4806 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 4807 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 4808 .resume = gfx_v9_0_rlc_resume, 4809 .stop = gfx_v9_0_rlc_stop, 4810 .reset = gfx_v9_0_rlc_reset, 4811 .start = gfx_v9_0_rlc_start 4812 }; 4813 4814 static int gfx_v9_0_set_powergating_state(void *handle, 4815 enum amd_powergating_state state) 4816 { 4817 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4818 bool enable = (state == AMD_PG_STATE_GATE) ? true : false; 4819 4820 switch (adev->asic_type) { 4821 case CHIP_RAVEN: 4822 case CHIP_RENOIR: 4823 if (!enable) { 4824 amdgpu_gfx_off_ctrl(adev, false); 4825 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4826 } 4827 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 4828 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 4829 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 4830 } else { 4831 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 4832 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 4833 } 4834 4835 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 4836 gfx_v9_0_enable_cp_power_gating(adev, true); 4837 else 4838 gfx_v9_0_enable_cp_power_gating(adev, false); 4839 4840 /* update gfx cgpg state */ 4841 if (is_support_sw_smu(adev) && enable) 4842 smu_set_gfx_cgpg(&adev->smu, enable); 4843 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 4844 4845 /* update mgcg state */ 4846 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 4847 4848 if (enable) 4849 amdgpu_gfx_off_ctrl(adev, true); 4850 break; 4851 case CHIP_VEGA12: 4852 if (!enable) { 4853 amdgpu_gfx_off_ctrl(adev, false); 4854 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4855 } else { 4856 amdgpu_gfx_off_ctrl(adev, true); 4857 } 4858 break; 4859 default: 4860 break; 4861 } 4862 4863 return 0; 4864 } 4865 4866 static int gfx_v9_0_set_clockgating_state(void *handle, 4867 enum amd_clockgating_state state) 4868 { 4869 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4870 4871 if (amdgpu_sriov_vf(adev)) 4872 return 0; 4873 4874 switch (adev->asic_type) { 4875 case CHIP_VEGA10: 4876 case CHIP_VEGA12: 4877 case CHIP_VEGA20: 4878 case CHIP_RAVEN: 4879 case CHIP_ARCTURUS: 4880 case CHIP_RENOIR: 4881 gfx_v9_0_update_gfx_clock_gating(adev, 4882 state == AMD_CG_STATE_GATE ? true : false); 4883 break; 4884 default: 4885 break; 4886 } 4887 return 0; 4888 } 4889 4890 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 4891 { 4892 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4893 int data; 4894 4895 if (amdgpu_sriov_vf(adev)) 4896 *flags = 0; 4897 4898 /* AMD_CG_SUPPORT_GFX_MGCG */ 4899 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4900 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 4901 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 4902 4903 /* AMD_CG_SUPPORT_GFX_CGCG */ 4904 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4905 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 4906 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 4907 4908 /* AMD_CG_SUPPORT_GFX_CGLS */ 4909 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 4910 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 4911 4912 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 4913 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4914 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 4915 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 4916 4917 /* AMD_CG_SUPPORT_GFX_CP_LS */ 4918 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4919 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 4920 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 4921 4922 if (adev->asic_type != CHIP_ARCTURUS) { 4923 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 4924 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4925 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 4926 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 4927 4928 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 4929 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 4930 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 4931 } 4932 } 4933 4934 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 4935 { 4936 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 4937 } 4938 4939 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 4940 { 4941 struct amdgpu_device *adev = ring->adev; 4942 u64 wptr; 4943 4944 /* XXX check if swapping is necessary on BE */ 4945 if (ring->use_doorbell) { 4946 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 4947 } else { 4948 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 4949 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 4950 } 4951 4952 return wptr; 4953 } 4954 4955 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 4956 { 4957 struct amdgpu_device *adev = ring->adev; 4958 4959 if (ring->use_doorbell) { 4960 /* XXX check if swapping is necessary on BE */ 4961 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4962 WDOORBELL64(ring->doorbell_index, ring->wptr); 4963 } else { 4964 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4965 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 4966 } 4967 } 4968 4969 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 4970 { 4971 struct amdgpu_device *adev = ring->adev; 4972 u32 ref_and_mask, reg_mem_engine; 4973 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; 4974 4975 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 4976 switch (ring->me) { 4977 case 1: 4978 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 4979 break; 4980 case 2: 4981 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 4982 break; 4983 default: 4984 return; 4985 } 4986 reg_mem_engine = 0; 4987 } else { 4988 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 4989 reg_mem_engine = 1; /* pfp */ 4990 } 4991 4992 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 4993 adev->nbio_funcs->get_hdp_flush_req_offset(adev), 4994 adev->nbio_funcs->get_hdp_flush_done_offset(adev), 4995 ref_and_mask, ref_and_mask, 0x20); 4996 } 4997 4998 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 4999 struct amdgpu_job *job, 5000 struct amdgpu_ib *ib, 5001 uint32_t flags) 5002 { 5003 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5004 u32 header, control = 0; 5005 5006 if (ib->flags & AMDGPU_IB_FLAG_CE) 5007 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 5008 else 5009 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5010 5011 control |= ib->length_dw | (vmid << 24); 5012 5013 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5014 control |= INDIRECT_BUFFER_PRE_ENB(1); 5015 5016 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 5017 gfx_v9_0_ring_emit_de_meta(ring); 5018 } 5019 5020 amdgpu_ring_write(ring, header); 5021 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5022 amdgpu_ring_write(ring, 5023 #ifdef __BIG_ENDIAN 5024 (2 << 0) | 5025 #endif 5026 lower_32_bits(ib->gpu_addr)); 5027 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5028 amdgpu_ring_write(ring, control); 5029 } 5030 5031 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5032 struct amdgpu_job *job, 5033 struct amdgpu_ib *ib, 5034 uint32_t flags) 5035 { 5036 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5037 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5038 5039 /* Currently, there is a high possibility to get wave ID mismatch 5040 * between ME and GDS, leading to a hw deadlock, because ME generates 5041 * different wave IDs than the GDS expects. This situation happens 5042 * randomly when at least 5 compute pipes use GDS ordered append. 5043 * The wave IDs generated by ME are also wrong after suspend/resume. 5044 * Those are probably bugs somewhere else in the kernel driver. 5045 * 5046 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5047 * GDS to 0 for this ring (me/pipe). 5048 */ 5049 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5050 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5051 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 5052 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5053 } 5054 5055 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5056 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5057 amdgpu_ring_write(ring, 5058 #ifdef __BIG_ENDIAN 5059 (2 << 0) | 5060 #endif 5061 lower_32_bits(ib->gpu_addr)); 5062 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5063 amdgpu_ring_write(ring, control); 5064 } 5065 5066 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5067 u64 seq, unsigned flags) 5068 { 5069 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5070 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5071 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 5072 5073 /* RELEASE_MEM - flush caches, send int */ 5074 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5075 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 5076 EOP_TC_NC_ACTION_EN) : 5077 (EOP_TCL1_ACTION_EN | 5078 EOP_TC_ACTION_EN | 5079 EOP_TC_WB_ACTION_EN | 5080 EOP_TC_MD_ACTION_EN)) | 5081 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5082 EVENT_INDEX(5))); 5083 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 5084 5085 /* 5086 * the address should be Qword aligned if 64bit write, Dword 5087 * aligned if only send 32bit data low (discard data high) 5088 */ 5089 if (write64bit) 5090 BUG_ON(addr & 0x7); 5091 else 5092 BUG_ON(addr & 0x3); 5093 amdgpu_ring_write(ring, lower_32_bits(addr)); 5094 amdgpu_ring_write(ring, upper_32_bits(addr)); 5095 amdgpu_ring_write(ring, lower_32_bits(seq)); 5096 amdgpu_ring_write(ring, upper_32_bits(seq)); 5097 amdgpu_ring_write(ring, 0); 5098 } 5099 5100 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5101 { 5102 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5103 uint32_t seq = ring->fence_drv.sync_seq; 5104 uint64_t addr = ring->fence_drv.gpu_addr; 5105 5106 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 5107 lower_32_bits(addr), upper_32_bits(addr), 5108 seq, 0xffffffff, 4); 5109 } 5110 5111 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5112 unsigned vmid, uint64_t pd_addr) 5113 { 5114 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5115 5116 /* compute doesn't have PFP */ 5117 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5118 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5119 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5120 amdgpu_ring_write(ring, 0x0); 5121 } 5122 } 5123 5124 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5125 { 5126 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 5127 } 5128 5129 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5130 { 5131 u64 wptr; 5132 5133 /* XXX check if swapping is necessary on BE */ 5134 if (ring->use_doorbell) 5135 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 5136 else 5137 BUG(); 5138 return wptr; 5139 } 5140 5141 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 5142 bool acquire) 5143 { 5144 struct amdgpu_device *adev = ring->adev; 5145 int pipe_num, tmp, reg; 5146 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 5147 5148 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 5149 5150 /* first me only has 2 entries, GFX and HP3D */ 5151 if (ring->me > 0) 5152 pipe_num -= 2; 5153 5154 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num; 5155 tmp = RREG32(reg); 5156 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 5157 WREG32(reg, tmp); 5158 } 5159 5160 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, 5161 struct amdgpu_ring *ring, 5162 bool acquire) 5163 { 5164 int i, pipe; 5165 bool reserve; 5166 struct amdgpu_ring *iring; 5167 5168 mutex_lock(&adev->gfx.pipe_reserve_mutex); 5169 pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0); 5170 if (acquire) 5171 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5172 else 5173 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5174 5175 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 5176 /* Clear all reservations - everyone reacquires all resources */ 5177 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 5178 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 5179 true); 5180 5181 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 5182 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 5183 true); 5184 } else { 5185 /* Lower all pipes without a current reservation */ 5186 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 5187 iring = &adev->gfx.gfx_ring[i]; 5188 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 5189 iring->me, 5190 iring->pipe, 5191 0); 5192 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5193 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 5194 } 5195 5196 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 5197 iring = &adev->gfx.compute_ring[i]; 5198 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 5199 iring->me, 5200 iring->pipe, 5201 0); 5202 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5203 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 5204 } 5205 } 5206 5207 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 5208 } 5209 5210 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, 5211 struct amdgpu_ring *ring, 5212 bool acquire) 5213 { 5214 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 5215 uint32_t queue_priority = acquire ? 0xf : 0x0; 5216 5217 mutex_lock(&adev->srbm_mutex); 5218 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5219 5220 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); 5221 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); 5222 5223 soc15_grbm_select(adev, 0, 0, 0, 0); 5224 mutex_unlock(&adev->srbm_mutex); 5225 } 5226 5227 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring, 5228 enum drm_sched_priority priority) 5229 { 5230 struct amdgpu_device *adev = ring->adev; 5231 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 5232 5233 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 5234 return; 5235 5236 gfx_v9_0_hqd_set_priority(adev, ring, acquire); 5237 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire); 5238 } 5239 5240 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5241 { 5242 struct amdgpu_device *adev = ring->adev; 5243 5244 /* XXX check if swapping is necessary on BE */ 5245 if (ring->use_doorbell) { 5246 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 5247 WDOORBELL64(ring->doorbell_index, ring->wptr); 5248 } else{ 5249 BUG(); /* only DOORBELL method supported on gfx9 now */ 5250 } 5251 } 5252 5253 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5254 u64 seq, unsigned int flags) 5255 { 5256 struct amdgpu_device *adev = ring->adev; 5257 5258 /* we only allocate 32bit for each seq wb address */ 5259 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5260 5261 /* write fence seq to the "addr" */ 5262 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5263 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5264 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5265 amdgpu_ring_write(ring, lower_32_bits(addr)); 5266 amdgpu_ring_write(ring, upper_32_bits(addr)); 5267 amdgpu_ring_write(ring, lower_32_bits(seq)); 5268 5269 if (flags & AMDGPU_FENCE_FLAG_INT) { 5270 /* set register to trigger INT */ 5271 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5272 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5273 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5274 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 5275 amdgpu_ring_write(ring, 0); 5276 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5277 } 5278 } 5279 5280 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 5281 { 5282 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5283 amdgpu_ring_write(ring, 0); 5284 } 5285 5286 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 5287 { 5288 struct v9_ce_ib_state ce_payload = {0}; 5289 uint64_t csa_addr; 5290 int cnt; 5291 5292 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 5293 csa_addr = amdgpu_csa_vaddr(ring->adev); 5294 5295 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5296 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 5297 WRITE_DATA_DST_SEL(8) | 5298 WR_CONFIRM) | 5299 WRITE_DATA_CACHE_POLICY(0)); 5300 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5301 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5302 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 5303 } 5304 5305 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 5306 { 5307 struct v9_de_ib_state de_payload = {0}; 5308 uint64_t csa_addr, gds_addr; 5309 int cnt; 5310 5311 csa_addr = amdgpu_csa_vaddr(ring->adev); 5312 gds_addr = csa_addr + 4096; 5313 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5314 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5315 5316 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 5317 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5318 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 5319 WRITE_DATA_DST_SEL(8) | 5320 WR_CONFIRM) | 5321 WRITE_DATA_CACHE_POLICY(0)); 5322 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5323 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5324 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 5325 } 5326 5327 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) 5328 { 5329 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 5330 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ 5331 } 5332 5333 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 5334 { 5335 uint32_t dw2 = 0; 5336 5337 if (amdgpu_sriov_vf(ring->adev)) 5338 gfx_v9_0_ring_emit_ce_meta(ring); 5339 5340 gfx_v9_0_ring_emit_tmz(ring, true); 5341 5342 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5343 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5344 /* set load_global_config & load_global_uconfig */ 5345 dw2 |= 0x8001; 5346 /* set load_cs_sh_regs */ 5347 dw2 |= 0x01000000; 5348 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5349 dw2 |= 0x10002; 5350 5351 /* set load_ce_ram if preamble presented */ 5352 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 5353 dw2 |= 0x10000000; 5354 } else { 5355 /* still load_ce_ram if this is the first time preamble presented 5356 * although there is no context switch happens. 5357 */ 5358 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 5359 dw2 |= 0x10000000; 5360 } 5361 5362 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5363 amdgpu_ring_write(ring, dw2); 5364 amdgpu_ring_write(ring, 0); 5365 } 5366 5367 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 5368 { 5369 unsigned ret; 5370 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5371 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 5372 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 5373 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 5374 ret = ring->wptr & ring->buf_mask; 5375 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 5376 return ret; 5377 } 5378 5379 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 5380 { 5381 unsigned cur; 5382 BUG_ON(offset > ring->buf_mask); 5383 BUG_ON(ring->ring[offset] != 0x55aa55aa); 5384 5385 cur = (ring->wptr & ring->buf_mask) - 1; 5386 if (likely(cur > offset)) 5387 ring->ring[offset] = cur - offset; 5388 else 5389 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 5390 } 5391 5392 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 5393 { 5394 struct amdgpu_device *adev = ring->adev; 5395 5396 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 5397 amdgpu_ring_write(ring, 0 | /* src: register*/ 5398 (5 << 8) | /* dst: memory */ 5399 (1 << 20)); /* write confirm */ 5400 amdgpu_ring_write(ring, reg); 5401 amdgpu_ring_write(ring, 0); 5402 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 5403 adev->virt.reg_val_offs * 4)); 5404 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 5405 adev->virt.reg_val_offs * 4)); 5406 } 5407 5408 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 5409 uint32_t val) 5410 { 5411 uint32_t cmd = 0; 5412 5413 switch (ring->funcs->type) { 5414 case AMDGPU_RING_TYPE_GFX: 5415 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 5416 break; 5417 case AMDGPU_RING_TYPE_KIQ: 5418 cmd = (1 << 16); /* no inc addr */ 5419 break; 5420 default: 5421 cmd = WR_CONFIRM; 5422 break; 5423 } 5424 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5425 amdgpu_ring_write(ring, cmd); 5426 amdgpu_ring_write(ring, reg); 5427 amdgpu_ring_write(ring, 0); 5428 amdgpu_ring_write(ring, val); 5429 } 5430 5431 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 5432 uint32_t val, uint32_t mask) 5433 { 5434 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 5435 } 5436 5437 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 5438 uint32_t reg0, uint32_t reg1, 5439 uint32_t ref, uint32_t mask) 5440 { 5441 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5442 struct amdgpu_device *adev = ring->adev; 5443 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 5444 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 5445 5446 if (fw_version_ok) 5447 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 5448 ref, mask, 0x20); 5449 else 5450 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 5451 ref, mask); 5452 } 5453 5454 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 5455 { 5456 struct amdgpu_device *adev = ring->adev; 5457 uint32_t value = 0; 5458 5459 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 5460 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 5461 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 5462 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 5463 WREG32_SOC15(GC, 0, mmSQ_CMD, value); 5464 } 5465 5466 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5467 enum amdgpu_interrupt_state state) 5468 { 5469 switch (state) { 5470 case AMDGPU_IRQ_STATE_DISABLE: 5471 case AMDGPU_IRQ_STATE_ENABLE: 5472 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5473 TIME_STAMP_INT_ENABLE, 5474 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5475 break; 5476 default: 5477 break; 5478 } 5479 } 5480 5481 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5482 int me, int pipe, 5483 enum amdgpu_interrupt_state state) 5484 { 5485 u32 mec_int_cntl, mec_int_cntl_reg; 5486 5487 /* 5488 * amdgpu controls only the first MEC. That's why this function only 5489 * handles the setting of interrupts for this specific MEC. All other 5490 * pipes' interrupts are set by amdkfd. 5491 */ 5492 5493 if (me == 1) { 5494 switch (pipe) { 5495 case 0: 5496 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 5497 break; 5498 case 1: 5499 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 5500 break; 5501 case 2: 5502 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 5503 break; 5504 case 3: 5505 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 5506 break; 5507 default: 5508 DRM_DEBUG("invalid pipe %d\n", pipe); 5509 return; 5510 } 5511 } else { 5512 DRM_DEBUG("invalid me %d\n", me); 5513 return; 5514 } 5515 5516 switch (state) { 5517 case AMDGPU_IRQ_STATE_DISABLE: 5518 mec_int_cntl = RREG32(mec_int_cntl_reg); 5519 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5520 TIME_STAMP_INT_ENABLE, 0); 5521 WREG32(mec_int_cntl_reg, mec_int_cntl); 5522 break; 5523 case AMDGPU_IRQ_STATE_ENABLE: 5524 mec_int_cntl = RREG32(mec_int_cntl_reg); 5525 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5526 TIME_STAMP_INT_ENABLE, 1); 5527 WREG32(mec_int_cntl_reg, mec_int_cntl); 5528 break; 5529 default: 5530 break; 5531 } 5532 } 5533 5534 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 5535 struct amdgpu_irq_src *source, 5536 unsigned type, 5537 enum amdgpu_interrupt_state state) 5538 { 5539 switch (state) { 5540 case AMDGPU_IRQ_STATE_DISABLE: 5541 case AMDGPU_IRQ_STATE_ENABLE: 5542 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5543 PRIV_REG_INT_ENABLE, 5544 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5545 break; 5546 default: 5547 break; 5548 } 5549 5550 return 0; 5551 } 5552 5553 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 5554 struct amdgpu_irq_src *source, 5555 unsigned type, 5556 enum amdgpu_interrupt_state state) 5557 { 5558 switch (state) { 5559 case AMDGPU_IRQ_STATE_DISABLE: 5560 case AMDGPU_IRQ_STATE_ENABLE: 5561 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5562 PRIV_INSTR_INT_ENABLE, 5563 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5564 default: 5565 break; 5566 } 5567 5568 return 0; 5569 } 5570 5571 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 5572 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5573 CP_ECC_ERROR_INT_ENABLE, 1) 5574 5575 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 5576 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5577 CP_ECC_ERROR_INT_ENABLE, 0) 5578 5579 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 5580 struct amdgpu_irq_src *source, 5581 unsigned type, 5582 enum amdgpu_interrupt_state state) 5583 { 5584 switch (state) { 5585 case AMDGPU_IRQ_STATE_DISABLE: 5586 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5587 CP_ECC_ERROR_INT_ENABLE, 0); 5588 DISABLE_ECC_ON_ME_PIPE(1, 0); 5589 DISABLE_ECC_ON_ME_PIPE(1, 1); 5590 DISABLE_ECC_ON_ME_PIPE(1, 2); 5591 DISABLE_ECC_ON_ME_PIPE(1, 3); 5592 break; 5593 5594 case AMDGPU_IRQ_STATE_ENABLE: 5595 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5596 CP_ECC_ERROR_INT_ENABLE, 1); 5597 ENABLE_ECC_ON_ME_PIPE(1, 0); 5598 ENABLE_ECC_ON_ME_PIPE(1, 1); 5599 ENABLE_ECC_ON_ME_PIPE(1, 2); 5600 ENABLE_ECC_ON_ME_PIPE(1, 3); 5601 break; 5602 default: 5603 break; 5604 } 5605 5606 return 0; 5607 } 5608 5609 5610 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5611 struct amdgpu_irq_src *src, 5612 unsigned type, 5613 enum amdgpu_interrupt_state state) 5614 { 5615 switch (type) { 5616 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 5617 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 5618 break; 5619 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5620 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5621 break; 5622 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5623 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5624 break; 5625 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5626 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5627 break; 5628 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5629 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5630 break; 5631 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 5632 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 5633 break; 5634 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 5635 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 5636 break; 5637 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 5638 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 5639 break; 5640 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 5641 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 5642 break; 5643 default: 5644 break; 5645 } 5646 return 0; 5647 } 5648 5649 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 5650 struct amdgpu_irq_src *source, 5651 struct amdgpu_iv_entry *entry) 5652 { 5653 int i; 5654 u8 me_id, pipe_id, queue_id; 5655 struct amdgpu_ring *ring; 5656 5657 DRM_DEBUG("IH: CP EOP\n"); 5658 me_id = (entry->ring_id & 0x0c) >> 2; 5659 pipe_id = (entry->ring_id & 0x03) >> 0; 5660 queue_id = (entry->ring_id & 0x70) >> 4; 5661 5662 switch (me_id) { 5663 case 0: 5664 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5665 break; 5666 case 1: 5667 case 2: 5668 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5669 ring = &adev->gfx.compute_ring[i]; 5670 /* Per-queue interrupt is supported for MEC starting from VI. 5671 * The interrupt can only be enabled/disabled per pipe instead of per queue. 5672 */ 5673 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 5674 amdgpu_fence_process(ring); 5675 } 5676 break; 5677 } 5678 return 0; 5679 } 5680 5681 static void gfx_v9_0_fault(struct amdgpu_device *adev, 5682 struct amdgpu_iv_entry *entry) 5683 { 5684 u8 me_id, pipe_id, queue_id; 5685 struct amdgpu_ring *ring; 5686 int i; 5687 5688 me_id = (entry->ring_id & 0x0c) >> 2; 5689 pipe_id = (entry->ring_id & 0x03) >> 0; 5690 queue_id = (entry->ring_id & 0x70) >> 4; 5691 5692 switch (me_id) { 5693 case 0: 5694 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 5695 break; 5696 case 1: 5697 case 2: 5698 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5699 ring = &adev->gfx.compute_ring[i]; 5700 if (ring->me == me_id && ring->pipe == pipe_id && 5701 ring->queue == queue_id) 5702 drm_sched_fault(&ring->sched); 5703 } 5704 break; 5705 } 5706 } 5707 5708 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 5709 struct amdgpu_irq_src *source, 5710 struct amdgpu_iv_entry *entry) 5711 { 5712 DRM_ERROR("Illegal register access in command stream\n"); 5713 gfx_v9_0_fault(adev, entry); 5714 return 0; 5715 } 5716 5717 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 5718 struct amdgpu_irq_src *source, 5719 struct amdgpu_iv_entry *entry) 5720 { 5721 DRM_ERROR("Illegal instruction in command stream\n"); 5722 gfx_v9_0_fault(adev, entry); 5723 return 0; 5724 } 5725 5726 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 5727 struct ras_err_data *err_data, 5728 struct amdgpu_iv_entry *entry) 5729 { 5730 /* TODO ue will trigger an interrupt. */ 5731 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); 5732 if (adev->gfx.funcs->query_ras_error_count) 5733 adev->gfx.funcs->query_ras_error_count(adev, err_data); 5734 amdgpu_ras_reset_gpu(adev, 0); 5735 return AMDGPU_RAS_SUCCESS; 5736 } 5737 5738 static const struct { 5739 const char *name; 5740 uint32_t ip; 5741 uint32_t inst; 5742 uint32_t seg; 5743 uint32_t reg_offset; 5744 uint32_t per_se_instance; 5745 int32_t num_instance; 5746 uint32_t sec_count_mask; 5747 uint32_t ded_count_mask; 5748 } gfx_ras_edc_regs[] = { 5749 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 5750 REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT), 5751 REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) }, 5752 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 5753 REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT), 5754 REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) }, 5755 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 5756 REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 }, 5757 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 5758 REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 }, 5759 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 5760 REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT), 5761 REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) }, 5762 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 5763 REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 }, 5764 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 5765 REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), 5766 REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) }, 5767 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 5768 REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT), 5769 REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) }, 5770 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 5771 REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 }, 5772 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 5773 REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 }, 5774 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 5775 REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 }, 5776 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 5777 REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC), 5778 REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) }, 5779 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 5780 REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 }, 5781 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 5782 0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), 5783 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) }, 5784 { "GDS_OA_PHY_PHY_CMD_RAM_MEM", 5785 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 5786 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), 5787 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) }, 5788 { "GDS_OA_PHY_PHY_DATA_RAM_MEM", 5789 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 5790 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 }, 5791 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", 5792 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 5793 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), 5794 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) }, 5795 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", 5796 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 5797 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), 5798 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) }, 5799 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", 5800 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 5801 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), 5802 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) }, 5803 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", 5804 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 5805 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), 5806 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) }, 5807 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1, 5808 REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 }, 5809 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, 5810 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), 5811 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) }, 5812 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, 5813 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 }, 5814 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, 5815 REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 }, 5816 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, 5817 REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 }, 5818 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, 5819 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 }, 5820 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2, 5821 REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 }, 5822 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2, 5823 REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 }, 5824 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5825 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), 5826 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) }, 5827 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5828 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), 5829 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) }, 5830 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5831 REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), 5832 REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) }, 5833 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5834 REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), 5835 REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) }, 5836 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5837 REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), 5838 REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) }, 5839 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5840 REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 }, 5841 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5842 REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 }, 5843 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5844 REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 }, 5845 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5846 REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 }, 5847 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5848 REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 }, 5849 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5850 REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 }, 5851 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16, 5852 REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 }, 5853 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16, 5854 REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 }, 5855 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 5856 16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 }, 5857 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5858 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), 5859 0 }, 5860 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 5861 16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 }, 5862 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5863 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), 5864 0 }, 5865 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 5866 16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 }, 5867 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72, 5868 REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 }, 5869 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5870 REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), 5871 REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) }, 5872 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5873 REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), 5874 REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) }, 5875 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5876 REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 }, 5877 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5878 REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 }, 5879 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5880 REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 }, 5881 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5882 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), 5883 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) }, 5884 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5885 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), 5886 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) }, 5887 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, 5888 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), 5889 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) }, 5890 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, 5891 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), 5892 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) }, 5893 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, 5894 REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 }, 5895 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5896 REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT), 5897 REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) }, 5898 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5899 REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT), 5900 REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) }, 5901 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5902 REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT), 5903 REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) }, 5904 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5905 REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT), 5906 REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) }, 5907 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5908 REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT), 5909 REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) }, 5910 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5911 REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT), 5912 REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) }, 5913 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5914 REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT), 5915 REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) }, 5916 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5917 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), 5918 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) }, 5919 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, 5920 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), 5921 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) }, 5922 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5923 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), 5924 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) }, 5925 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, 5926 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), 5927 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) }, 5928 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5929 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), 5930 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) }, 5931 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, 5932 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), 5933 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) }, 5934 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5935 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), 5936 REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) }, 5937 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5938 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), 5939 REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) }, 5940 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5941 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), 5942 REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) }, 5943 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5944 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), 5945 REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) }, 5946 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", 5947 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, 5948 REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), 5949 0 }, 5950 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5951 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 }, 5952 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5953 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 }, 5954 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5955 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 }, 5956 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", 5957 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, 5958 REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 }, 5959 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, 5960 REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), 5961 REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) }, 5962 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5963 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), 5964 REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) }, 5965 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5966 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), 5967 REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) }, 5968 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5969 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), 5970 REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) }, 5971 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5972 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), 5973 REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) }, 5974 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", 5975 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6, 5976 REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), 5977 0 }, 5978 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5979 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 }, 5980 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5981 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 }, 5982 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5983 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 }, 5984 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", 5985 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6, 5986 REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 }, 5987 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 5988 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), 5989 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) }, 5990 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 5991 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), 5992 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) }, 5993 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 5994 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), 5995 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) }, 5996 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 5997 REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), 5998 REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) }, 5999 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6000 REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), 6001 REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) }, 6002 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6003 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 }, 6004 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6005 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 }, 6006 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6007 REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 }, 6008 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6009 REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 }, 6010 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6011 REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 }, 6012 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6013 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), 6014 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) }, 6015 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6016 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), 6017 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) }, 6018 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6019 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), 6020 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) }, 6021 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6022 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 }, 6023 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6024 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 }, 6025 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6026 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 }, 6027 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6028 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 }, 6029 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6030 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 }, 6031 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6032 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 }, 6033 }; 6034 6035 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 6036 void *inject_if) 6037 { 6038 struct ras_inject_if *info = (struct ras_inject_if *)inject_if; 6039 int ret; 6040 struct ta_ras_trigger_error_input block_info = { 0 }; 6041 6042 if (adev->asic_type != CHIP_VEGA20) 6043 return -EINVAL; 6044 6045 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) 6046 return -EINVAL; 6047 6048 if (!ras_gfx_subblocks[info->head.sub_block_index].name) 6049 return -EPERM; 6050 6051 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & 6052 info->head.type)) { 6053 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", 6054 ras_gfx_subblocks[info->head.sub_block_index].name, 6055 info->head.type); 6056 return -EPERM; 6057 } 6058 6059 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & 6060 info->head.type)) { 6061 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", 6062 ras_gfx_subblocks[info->head.sub_block_index].name, 6063 info->head.type); 6064 return -EPERM; 6065 } 6066 6067 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); 6068 block_info.sub_block_index = 6069 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; 6070 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); 6071 block_info.address = info->address; 6072 block_info.value = info->value; 6073 6074 mutex_lock(&adev->grbm_idx_mutex); 6075 ret = psp_ras_trigger_error(&adev->psp, &block_info); 6076 mutex_unlock(&adev->grbm_idx_mutex); 6077 6078 return ret; 6079 } 6080 6081 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 6082 void *ras_error_status) 6083 { 6084 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 6085 uint32_t sec_count, ded_count; 6086 uint32_t i; 6087 uint32_t reg_value; 6088 uint32_t se_id, instance_id; 6089 6090 if (adev->asic_type != CHIP_VEGA20) 6091 return -EINVAL; 6092 6093 err_data->ue_count = 0; 6094 err_data->ce_count = 0; 6095 6096 mutex_lock(&adev->grbm_idx_mutex); 6097 for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) { 6098 for (instance_id = 0; instance_id < 256; instance_id++) { 6099 for (i = 0; 6100 i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]); 6101 i++) { 6102 if (se_id != 0 && 6103 !gfx_ras_edc_regs[i].per_se_instance) 6104 continue; 6105 if (instance_id >= gfx_ras_edc_regs[i].num_instance) 6106 continue; 6107 6108 gfx_v9_0_select_se_sh(adev, se_id, 0, 6109 instance_id); 6110 6111 reg_value = RREG32( 6112 adev->reg_offset[gfx_ras_edc_regs[i].ip] 6113 [gfx_ras_edc_regs[i].inst] 6114 [gfx_ras_edc_regs[i].seg] + 6115 gfx_ras_edc_regs[i].reg_offset); 6116 sec_count = reg_value & 6117 gfx_ras_edc_regs[i].sec_count_mask; 6118 ded_count = reg_value & 6119 gfx_ras_edc_regs[i].ded_count_mask; 6120 if (sec_count) { 6121 DRM_INFO( 6122 "Instance[%d][%d]: SubBlock %s, SEC %d\n", 6123 se_id, instance_id, 6124 gfx_ras_edc_regs[i].name, 6125 sec_count); 6126 err_data->ce_count++; 6127 } 6128 6129 if (ded_count) { 6130 DRM_INFO( 6131 "Instance[%d][%d]: SubBlock %s, DED %d\n", 6132 se_id, instance_id, 6133 gfx_ras_edc_regs[i].name, 6134 ded_count); 6135 err_data->ue_count++; 6136 } 6137 } 6138 } 6139 } 6140 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6141 mutex_unlock(&adev->grbm_idx_mutex); 6142 6143 return 0; 6144 } 6145 6146 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev, 6147 struct amdgpu_irq_src *source, 6148 struct amdgpu_iv_entry *entry) 6149 { 6150 struct ras_common_if *ras_if = adev->gfx.ras_if; 6151 struct ras_dispatch_if ih_data = { 6152 .entry = entry, 6153 }; 6154 6155 if (!ras_if) 6156 return 0; 6157 6158 ih_data.head = *ras_if; 6159 6160 DRM_ERROR("CP ECC ERROR IRQ\n"); 6161 amdgpu_ras_interrupt_dispatch(adev, &ih_data); 6162 return 0; 6163 } 6164 6165 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 6166 .name = "gfx_v9_0", 6167 .early_init = gfx_v9_0_early_init, 6168 .late_init = gfx_v9_0_late_init, 6169 .sw_init = gfx_v9_0_sw_init, 6170 .sw_fini = gfx_v9_0_sw_fini, 6171 .hw_init = gfx_v9_0_hw_init, 6172 .hw_fini = gfx_v9_0_hw_fini, 6173 .suspend = gfx_v9_0_suspend, 6174 .resume = gfx_v9_0_resume, 6175 .is_idle = gfx_v9_0_is_idle, 6176 .wait_for_idle = gfx_v9_0_wait_for_idle, 6177 .soft_reset = gfx_v9_0_soft_reset, 6178 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 6179 .set_powergating_state = gfx_v9_0_set_powergating_state, 6180 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 6181 }; 6182 6183 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 6184 .type = AMDGPU_RING_TYPE_GFX, 6185 .align_mask = 0xff, 6186 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6187 .support_64bit_ptrs = true, 6188 .vmhub = AMDGPU_GFXHUB_0, 6189 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 6190 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 6191 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 6192 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 6193 5 + /* COND_EXEC */ 6194 7 + /* PIPELINE_SYNC */ 6195 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6196 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6197 2 + /* VM_FLUSH */ 6198 8 + /* FENCE for VM_FLUSH */ 6199 20 + /* GDS switch */ 6200 4 + /* double SWITCH_BUFFER, 6201 the first COND_EXEC jump to the place just 6202 prior to this double SWITCH_BUFFER */ 6203 5 + /* COND_EXEC */ 6204 7 + /* HDP_flush */ 6205 4 + /* VGT_flush */ 6206 14 + /* CE_META */ 6207 31 + /* DE_META */ 6208 3 + /* CNTX_CTRL */ 6209 5 + /* HDP_INVL */ 6210 8 + 8 + /* FENCE x2 */ 6211 2, /* SWITCH_BUFFER */ 6212 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 6213 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 6214 .emit_fence = gfx_v9_0_ring_emit_fence, 6215 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6216 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6217 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6218 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6219 .test_ring = gfx_v9_0_ring_test_ring, 6220 .test_ib = gfx_v9_0_ring_test_ib, 6221 .insert_nop = amdgpu_ring_insert_nop, 6222 .pad_ib = amdgpu_ring_generic_pad_ib, 6223 .emit_switch_buffer = gfx_v9_ring_emit_sb, 6224 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 6225 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 6226 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 6227 .emit_tmz = gfx_v9_0_ring_emit_tmz, 6228 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6229 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6230 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6231 .soft_recovery = gfx_v9_0_ring_soft_recovery, 6232 }; 6233 6234 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 6235 .type = AMDGPU_RING_TYPE_COMPUTE, 6236 .align_mask = 0xff, 6237 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6238 .support_64bit_ptrs = true, 6239 .vmhub = AMDGPU_GFXHUB_0, 6240 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6241 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6242 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6243 .emit_frame_size = 6244 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6245 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6246 5 + /* hdp invalidate */ 6247 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6248 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6249 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6250 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6251 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 6252 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6253 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 6254 .emit_fence = gfx_v9_0_ring_emit_fence, 6255 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6256 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6257 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6258 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6259 .test_ring = gfx_v9_0_ring_test_ring, 6260 .test_ib = gfx_v9_0_ring_test_ib, 6261 .insert_nop = amdgpu_ring_insert_nop, 6262 .pad_ib = amdgpu_ring_generic_pad_ib, 6263 .set_priority = gfx_v9_0_ring_set_priority_compute, 6264 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6265 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6266 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6267 }; 6268 6269 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 6270 .type = AMDGPU_RING_TYPE_KIQ, 6271 .align_mask = 0xff, 6272 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6273 .support_64bit_ptrs = true, 6274 .vmhub = AMDGPU_GFXHUB_0, 6275 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6276 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6277 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6278 .emit_frame_size = 6279 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6280 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6281 5 + /* hdp invalidate */ 6282 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6283 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6284 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6285 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6286 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6287 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6288 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 6289 .test_ring = gfx_v9_0_ring_test_ring, 6290 .insert_nop = amdgpu_ring_insert_nop, 6291 .pad_ib = amdgpu_ring_generic_pad_ib, 6292 .emit_rreg = gfx_v9_0_ring_emit_rreg, 6293 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6294 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6295 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6296 }; 6297 6298 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 6299 { 6300 int i; 6301 6302 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 6303 6304 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6305 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 6306 6307 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6308 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 6309 } 6310 6311 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 6312 .set = gfx_v9_0_set_eop_interrupt_state, 6313 .process = gfx_v9_0_eop_irq, 6314 }; 6315 6316 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 6317 .set = gfx_v9_0_set_priv_reg_fault_state, 6318 .process = gfx_v9_0_priv_reg_irq, 6319 }; 6320 6321 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 6322 .set = gfx_v9_0_set_priv_inst_fault_state, 6323 .process = gfx_v9_0_priv_inst_irq, 6324 }; 6325 6326 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 6327 .set = gfx_v9_0_set_cp_ecc_error_state, 6328 .process = gfx_v9_0_cp_ecc_error_irq, 6329 }; 6330 6331 6332 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 6333 { 6334 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6335 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 6336 6337 adev->gfx.priv_reg_irq.num_types = 1; 6338 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 6339 6340 adev->gfx.priv_inst_irq.num_types = 1; 6341 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 6342 6343 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 6344 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 6345 } 6346 6347 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 6348 { 6349 switch (adev->asic_type) { 6350 case CHIP_VEGA10: 6351 case CHIP_VEGA12: 6352 case CHIP_VEGA20: 6353 case CHIP_RAVEN: 6354 case CHIP_ARCTURUS: 6355 case CHIP_RENOIR: 6356 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 6357 break; 6358 default: 6359 break; 6360 } 6361 } 6362 6363 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 6364 { 6365 /* init asci gds info */ 6366 switch (adev->asic_type) { 6367 case CHIP_VEGA10: 6368 case CHIP_VEGA12: 6369 case CHIP_VEGA20: 6370 adev->gds.gds_size = 0x10000; 6371 break; 6372 case CHIP_RAVEN: 6373 case CHIP_ARCTURUS: 6374 adev->gds.gds_size = 0x1000; 6375 break; 6376 default: 6377 adev->gds.gds_size = 0x10000; 6378 break; 6379 } 6380 6381 switch (adev->asic_type) { 6382 case CHIP_VEGA10: 6383 case CHIP_VEGA20: 6384 adev->gds.gds_compute_max_wave_id = 0x7ff; 6385 break; 6386 case CHIP_VEGA12: 6387 adev->gds.gds_compute_max_wave_id = 0x27f; 6388 break; 6389 case CHIP_RAVEN: 6390 if (adev->rev_id >= 0x8) 6391 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 6392 else 6393 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 6394 break; 6395 case CHIP_ARCTURUS: 6396 adev->gds.gds_compute_max_wave_id = 0xfff; 6397 break; 6398 default: 6399 /* this really depends on the chip */ 6400 adev->gds.gds_compute_max_wave_id = 0x7ff; 6401 break; 6402 } 6403 6404 adev->gds.gws_size = 64; 6405 adev->gds.oa_size = 16; 6406 } 6407 6408 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 6409 u32 bitmap) 6410 { 6411 u32 data; 6412 6413 if (!bitmap) 6414 return; 6415 6416 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6417 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6418 6419 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 6420 } 6421 6422 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 6423 { 6424 u32 data, mask; 6425 6426 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 6427 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 6428 6429 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6430 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6431 6432 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 6433 6434 return (~data) & mask; 6435 } 6436 6437 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 6438 struct amdgpu_cu_info *cu_info) 6439 { 6440 int i, j, k, counter, active_cu_number = 0; 6441 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 6442 unsigned disable_masks[4 * 4]; 6443 6444 if (!adev || !cu_info) 6445 return -EINVAL; 6446 6447 /* 6448 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs 6449 */ 6450 if (adev->gfx.config.max_shader_engines * 6451 adev->gfx.config.max_sh_per_se > 16) 6452 return -EINVAL; 6453 6454 amdgpu_gfx_parse_disable_cu(disable_masks, 6455 adev->gfx.config.max_shader_engines, 6456 adev->gfx.config.max_sh_per_se); 6457 6458 mutex_lock(&adev->grbm_idx_mutex); 6459 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 6460 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 6461 mask = 1; 6462 ao_bitmap = 0; 6463 counter = 0; 6464 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 6465 gfx_v9_0_set_user_cu_inactive_bitmap( 6466 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); 6467 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 6468 6469 /* 6470 * The bitmap(and ao_cu_bitmap) in cu_info structure is 6471 * 4x4 size array, and it's usually suitable for Vega 6472 * ASICs which has 4*2 SE/SH layout. 6473 * But for Arcturus, SE/SH layout is changed to 8*1. 6474 * To mostly reduce the impact, we make it compatible 6475 * with current bitmap array as below: 6476 * SE4,SH0 --> bitmap[0][1] 6477 * SE5,SH0 --> bitmap[1][1] 6478 * SE6,SH0 --> bitmap[2][1] 6479 * SE7,SH0 --> bitmap[3][1] 6480 */ 6481 cu_info->bitmap[i % 4][j + i / 4] = bitmap; 6482 6483 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 6484 if (bitmap & mask) { 6485 if (counter < adev->gfx.config.max_cu_per_sh) 6486 ao_bitmap |= mask; 6487 counter ++; 6488 } 6489 mask <<= 1; 6490 } 6491 active_cu_number += counter; 6492 if (i < 2 && j < 2) 6493 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 6494 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; 6495 } 6496 } 6497 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6498 mutex_unlock(&adev->grbm_idx_mutex); 6499 6500 cu_info->number = active_cu_number; 6501 cu_info->ao_cu_mask = ao_cu_mask; 6502 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 6503 6504 return 0; 6505 } 6506 6507 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 6508 { 6509 .type = AMD_IP_BLOCK_TYPE_GFX, 6510 .major = 9, 6511 .minor = 0, 6512 .rev = 0, 6513 .funcs = &gfx_v9_0_ip_funcs, 6514 }; 6515