1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 40 #include "vega10_enum.h" 41 #include "hdp/hdp_4_0_offset.h" 42 43 #include "soc15_common.h" 44 #include "clearstate_gfx9.h" 45 #include "v9_structs.h" 46 47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 48 49 #include "amdgpu_ras.h" 50 51 #define GFX9_NUM_GFX_RINGS 1 52 #define GFX9_MEC_HPD_SIZE 4096 53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 55 56 #define mmPWR_MISC_CNTL_STATUS 0x0183 57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0 59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1 60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L 61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L 62 63 #define mmGCEA_PROBE_MAP 0x070c 64 #define mmGCEA_PROBE_MAP_BASE_IDX 0 65 66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 68 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 72 73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 75 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 79 80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 82 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 86 87 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 89 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 90 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 93 94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 96 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 101 102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 109 110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); 111 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin"); 112 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); 113 114 MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); 115 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); 116 MODULE_FIRMWARE("amdgpu/renoir_me.bin"); 117 MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); 118 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin"); 119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); 120 121 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 122 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 123 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04 124 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 125 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09 126 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 127 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a 128 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 129 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b 130 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 131 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c 132 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 133 134 enum ta_ras_gfx_subblock { 135 /*CPC*/ 136 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 137 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, 138 TA_RAS_BLOCK__GFX_CPC_UCODE, 139 TA_RAS_BLOCK__GFX_DC_STATE_ME1, 140 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 141 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, 142 TA_RAS_BLOCK__GFX_DC_STATE_ME2, 143 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 144 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 145 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 146 /* CPF*/ 147 TA_RAS_BLOCK__GFX_CPF_INDEX_START, 148 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, 149 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, 150 TA_RAS_BLOCK__GFX_CPF_TAG, 151 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, 152 /* CPG*/ 153 TA_RAS_BLOCK__GFX_CPG_INDEX_START, 154 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, 155 TA_RAS_BLOCK__GFX_CPG_DMA_TAG, 156 TA_RAS_BLOCK__GFX_CPG_TAG, 157 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, 158 /* GDS*/ 159 TA_RAS_BLOCK__GFX_GDS_INDEX_START, 160 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, 161 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 162 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 163 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 164 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 165 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 166 /* SPI*/ 167 TA_RAS_BLOCK__GFX_SPI_SR_MEM, 168 /* SQ*/ 169 TA_RAS_BLOCK__GFX_SQ_INDEX_START, 170 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, 171 TA_RAS_BLOCK__GFX_SQ_LDS_D, 172 TA_RAS_BLOCK__GFX_SQ_LDS_I, 173 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ 174 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, 175 /* SQC (3 ranges)*/ 176 TA_RAS_BLOCK__GFX_SQC_INDEX_START, 177 /* SQC range 0*/ 178 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, 179 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 180 TA_RAS_BLOCK__GFX_SQC_INDEX0_START, 181 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 182 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 183 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 184 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 185 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 186 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 187 TA_RAS_BLOCK__GFX_SQC_INDEX0_END = 188 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 189 /* SQC range 1*/ 190 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 191 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 192 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 193 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 194 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 195 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 196 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 197 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 198 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 199 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 200 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 201 TA_RAS_BLOCK__GFX_SQC_INDEX1_END = 202 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 203 /* SQC range 2*/ 204 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 205 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 206 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 207 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 208 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 209 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 210 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 211 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 212 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 213 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 214 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 215 TA_RAS_BLOCK__GFX_SQC_INDEX2_END = 216 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 217 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, 218 /* TA*/ 219 TA_RAS_BLOCK__GFX_TA_INDEX_START, 220 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, 221 TA_RAS_BLOCK__GFX_TA_FS_AFIFO, 222 TA_RAS_BLOCK__GFX_TA_FL_LFIFO, 223 TA_RAS_BLOCK__GFX_TA_FX_LFIFO, 224 TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 225 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 226 /* TCA*/ 227 TA_RAS_BLOCK__GFX_TCA_INDEX_START, 228 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, 229 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 230 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 231 /* TCC (5 sub-ranges)*/ 232 TA_RAS_BLOCK__GFX_TCC_INDEX_START, 233 /* TCC range 0*/ 234 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, 235 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, 236 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 237 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 238 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 239 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 240 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 241 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 242 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 243 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 244 /* TCC range 1*/ 245 TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 246 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 247 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 248 TA_RAS_BLOCK__GFX_TCC_INDEX1_END = 249 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 250 /* TCC range 2*/ 251 TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 252 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 253 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 254 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 255 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 256 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 257 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, 258 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 259 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 260 TA_RAS_BLOCK__GFX_TCC_INDEX2_END = 261 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 262 /* TCC range 3*/ 263 TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 264 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 265 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 266 TA_RAS_BLOCK__GFX_TCC_INDEX3_END = 267 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 268 /* TCC range 4*/ 269 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 270 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 271 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 272 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 273 TA_RAS_BLOCK__GFX_TCC_INDEX4_END = 274 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 275 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, 276 /* TCI*/ 277 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, 278 /* TCP*/ 279 TA_RAS_BLOCK__GFX_TCP_INDEX_START, 280 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, 281 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 282 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, 283 TA_RAS_BLOCK__GFX_TCP_VM_FIFO, 284 TA_RAS_BLOCK__GFX_TCP_DB_RAM, 285 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 286 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 287 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 288 /* TD*/ 289 TA_RAS_BLOCK__GFX_TD_INDEX_START, 290 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, 291 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 292 TA_RAS_BLOCK__GFX_TD_CS_FIFO, 293 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, 294 /* EA (3 sub-ranges)*/ 295 TA_RAS_BLOCK__GFX_EA_INDEX_START, 296 /* EA range 0*/ 297 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, 298 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, 299 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 300 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 301 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 302 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 303 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 304 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 305 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 306 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 307 /* EA range 1*/ 308 TA_RAS_BLOCK__GFX_EA_INDEX1_START, 309 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, 310 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 311 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 312 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 313 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 314 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 315 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 316 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 317 /* EA range 2*/ 318 TA_RAS_BLOCK__GFX_EA_INDEX2_START, 319 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, 320 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, 321 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, 322 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 323 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 324 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, 325 /* UTC VM L2 bank*/ 326 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, 327 /* UTC VM walker*/ 328 TA_RAS_BLOCK__UTC_VML2_WALKER, 329 /* UTC ATC L2 2MB cache*/ 330 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 331 /* UTC ATC L2 4KB cache*/ 332 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 333 TA_RAS_BLOCK__GFX_MAX 334 }; 335 336 struct ras_gfx_subblock { 337 unsigned char *name; 338 int ta_subblock; 339 int hw_supported_error_type; 340 int sw_supported_error_type; 341 }; 342 343 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ 344 [AMDGPU_RAS_BLOCK__##subblock] = { \ 345 #subblock, \ 346 TA_RAS_BLOCK__##subblock, \ 347 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ 348 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ 349 } 350 351 static const struct ras_gfx_subblock ras_gfx_subblocks[] = { 352 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), 353 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), 354 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 355 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 356 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 357 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 358 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 359 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 360 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 361 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 362 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), 363 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), 364 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), 365 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), 366 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 367 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), 368 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, 369 0), 370 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, 371 0), 372 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 373 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), 374 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), 375 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), 376 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), 377 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), 378 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), 379 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 380 0, 0), 381 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 382 0), 383 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 384 0, 0), 385 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, 386 0), 387 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 388 0, 0), 389 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 390 0), 391 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 392 1), 393 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 394 0, 0, 0), 395 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 396 0), 397 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 398 0), 399 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 400 0), 401 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 402 0), 403 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 404 0), 405 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 406 0, 0), 407 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 408 0), 409 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 410 0), 411 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 412 0, 0, 0), 413 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 414 0), 415 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 416 0), 417 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 418 0), 419 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 420 0), 421 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 422 0), 423 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 424 0, 0), 425 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 426 0), 427 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), 428 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 429 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 430 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 431 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 432 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), 433 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 434 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), 435 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, 436 1), 437 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, 438 1), 439 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, 440 1), 441 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, 442 0), 443 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, 444 0), 445 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 446 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 447 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), 448 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), 449 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), 450 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), 451 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 452 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), 453 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), 454 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 455 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), 456 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, 457 0), 458 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 459 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, 460 0), 461 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, 462 0, 0), 463 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, 464 0), 465 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 466 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), 467 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), 468 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 469 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 470 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 471 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), 472 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), 473 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), 474 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), 475 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 476 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), 477 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 478 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 479 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 480 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 481 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 482 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 483 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 484 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 485 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 486 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 487 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 488 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), 489 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 490 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 491 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), 492 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), 493 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), 494 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), 495 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), 496 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), 497 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), 498 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), 499 }; 500 501 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 502 { 503 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 504 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 505 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 506 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 507 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 508 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 509 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 510 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 511 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 512 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 513 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 514 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 515 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 516 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 517 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 518 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 519 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 520 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 521 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 522 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 523 }; 524 525 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 526 { 527 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 528 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 532 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 533 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 534 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 535 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 536 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 537 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 538 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 539 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 540 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 541 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 542 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 543 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 544 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 545 }; 546 547 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 548 { 549 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 550 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 551 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 552 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 553 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 554 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 555 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 556 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 557 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 558 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 559 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 560 }; 561 562 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 563 { 564 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 565 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 566 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 567 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 568 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 569 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 570 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 571 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 572 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 573 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 574 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 575 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 576 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 577 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 578 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 579 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 580 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 581 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 582 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 583 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 584 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 587 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 588 }; 589 590 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 591 { 592 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 593 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 594 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 595 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 596 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 597 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 598 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 599 }; 600 601 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 602 { 603 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 604 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 605 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 606 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 607 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 608 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 609 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 610 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 611 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 612 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 613 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 614 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 615 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 616 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 617 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 618 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 619 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 620 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 621 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 622 }; 623 624 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = 625 { 626 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 627 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), 630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), 631 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 632 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 633 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 634 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 635 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 636 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 637 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), 638 }; 639 640 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 641 { 642 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 643 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 644 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 645 }; 646 647 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 648 { 649 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 650 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 657 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 659 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 665 }; 666 667 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 668 { 669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 671 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 672 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 673 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 674 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 682 }; 683 684 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = 685 { 686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), 688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), 689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), 690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), 691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), 692 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), 693 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), 694 }; 695 696 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 697 { 698 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 699 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 700 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 701 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 702 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 703 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 704 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 705 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 706 }; 707 708 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 709 { 710 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 711 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 712 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 713 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 714 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 715 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 716 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 717 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 718 }; 719 720 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 721 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 722 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 723 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 724 725 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 726 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 727 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 728 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 729 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 730 struct amdgpu_cu_info *cu_info); 731 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 732 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); 733 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 734 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 735 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 736 void *ras_error_status); 737 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 738 void *inject_if); 739 740 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 741 { 742 switch (adev->asic_type) { 743 case CHIP_VEGA10: 744 soc15_program_register_sequence(adev, 745 golden_settings_gc_9_0, 746 ARRAY_SIZE(golden_settings_gc_9_0)); 747 soc15_program_register_sequence(adev, 748 golden_settings_gc_9_0_vg10, 749 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 750 break; 751 case CHIP_VEGA12: 752 soc15_program_register_sequence(adev, 753 golden_settings_gc_9_2_1, 754 ARRAY_SIZE(golden_settings_gc_9_2_1)); 755 soc15_program_register_sequence(adev, 756 golden_settings_gc_9_2_1_vg12, 757 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 758 break; 759 case CHIP_VEGA20: 760 soc15_program_register_sequence(adev, 761 golden_settings_gc_9_0, 762 ARRAY_SIZE(golden_settings_gc_9_0)); 763 soc15_program_register_sequence(adev, 764 golden_settings_gc_9_0_vg20, 765 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 766 break; 767 case CHIP_ARCTURUS: 768 soc15_program_register_sequence(adev, 769 golden_settings_gc_9_4_1_arct, 770 ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); 771 break; 772 case CHIP_RAVEN: 773 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 774 ARRAY_SIZE(golden_settings_gc_9_1)); 775 if (adev->rev_id >= 8) 776 soc15_program_register_sequence(adev, 777 golden_settings_gc_9_1_rv2, 778 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 779 else 780 soc15_program_register_sequence(adev, 781 golden_settings_gc_9_1_rv1, 782 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 783 break; 784 case CHIP_RENOIR: 785 soc15_program_register_sequence(adev, 786 golden_settings_gc_9_1_rn, 787 ARRAY_SIZE(golden_settings_gc_9_1_rn)); 788 return; /* for renoir, don't need common goldensetting */ 789 default: 790 break; 791 } 792 793 if (adev->asic_type != CHIP_ARCTURUS) 794 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 795 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 796 } 797 798 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 799 { 800 adev->gfx.scratch.num_reg = 8; 801 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 802 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 803 } 804 805 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 806 bool wc, uint32_t reg, uint32_t val) 807 { 808 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 809 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 810 WRITE_DATA_DST_SEL(0) | 811 (wc ? WR_CONFIRM : 0)); 812 amdgpu_ring_write(ring, reg); 813 amdgpu_ring_write(ring, 0); 814 amdgpu_ring_write(ring, val); 815 } 816 817 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 818 int mem_space, int opt, uint32_t addr0, 819 uint32_t addr1, uint32_t ref, uint32_t mask, 820 uint32_t inv) 821 { 822 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 823 amdgpu_ring_write(ring, 824 /* memory (1) or register (0) */ 825 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 826 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 827 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 828 WAIT_REG_MEM_ENGINE(eng_sel))); 829 830 if (mem_space) 831 BUG_ON(addr0 & 0x3); /* Dword align */ 832 amdgpu_ring_write(ring, addr0); 833 amdgpu_ring_write(ring, addr1); 834 amdgpu_ring_write(ring, ref); 835 amdgpu_ring_write(ring, mask); 836 amdgpu_ring_write(ring, inv); /* poll interval */ 837 } 838 839 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 840 { 841 struct amdgpu_device *adev = ring->adev; 842 uint32_t scratch; 843 uint32_t tmp = 0; 844 unsigned i; 845 int r; 846 847 r = amdgpu_gfx_scratch_get(adev, &scratch); 848 if (r) 849 return r; 850 851 WREG32(scratch, 0xCAFEDEAD); 852 r = amdgpu_ring_alloc(ring, 3); 853 if (r) 854 goto error_free_scratch; 855 856 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 857 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 858 amdgpu_ring_write(ring, 0xDEADBEEF); 859 amdgpu_ring_commit(ring); 860 861 for (i = 0; i < adev->usec_timeout; i++) { 862 tmp = RREG32(scratch); 863 if (tmp == 0xDEADBEEF) 864 break; 865 udelay(1); 866 } 867 868 if (i >= adev->usec_timeout) 869 r = -ETIMEDOUT; 870 871 error_free_scratch: 872 amdgpu_gfx_scratch_free(adev, scratch); 873 return r; 874 } 875 876 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 877 { 878 struct amdgpu_device *adev = ring->adev; 879 struct amdgpu_ib ib; 880 struct dma_fence *f = NULL; 881 882 unsigned index; 883 uint64_t gpu_addr; 884 uint32_t tmp; 885 long r; 886 887 r = amdgpu_device_wb_get(adev, &index); 888 if (r) 889 return r; 890 891 gpu_addr = adev->wb.gpu_addr + (index * 4); 892 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 893 memset(&ib, 0, sizeof(ib)); 894 r = amdgpu_ib_get(adev, NULL, 16, &ib); 895 if (r) 896 goto err1; 897 898 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 899 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 900 ib.ptr[2] = lower_32_bits(gpu_addr); 901 ib.ptr[3] = upper_32_bits(gpu_addr); 902 ib.ptr[4] = 0xDEADBEEF; 903 ib.length_dw = 5; 904 905 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 906 if (r) 907 goto err2; 908 909 r = dma_fence_wait_timeout(f, false, timeout); 910 if (r == 0) { 911 r = -ETIMEDOUT; 912 goto err2; 913 } else if (r < 0) { 914 goto err2; 915 } 916 917 tmp = adev->wb.wb[index]; 918 if (tmp == 0xDEADBEEF) 919 r = 0; 920 else 921 r = -EINVAL; 922 923 err2: 924 amdgpu_ib_free(adev, &ib, NULL); 925 dma_fence_put(f); 926 err1: 927 amdgpu_device_wb_free(adev, index); 928 return r; 929 } 930 931 932 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 933 { 934 release_firmware(adev->gfx.pfp_fw); 935 adev->gfx.pfp_fw = NULL; 936 release_firmware(adev->gfx.me_fw); 937 adev->gfx.me_fw = NULL; 938 release_firmware(adev->gfx.ce_fw); 939 adev->gfx.ce_fw = NULL; 940 release_firmware(adev->gfx.rlc_fw); 941 adev->gfx.rlc_fw = NULL; 942 release_firmware(adev->gfx.mec_fw); 943 adev->gfx.mec_fw = NULL; 944 release_firmware(adev->gfx.mec2_fw); 945 adev->gfx.mec2_fw = NULL; 946 947 kfree(adev->gfx.rlc.register_list_format); 948 } 949 950 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 951 { 952 const struct rlc_firmware_header_v2_1 *rlc_hdr; 953 954 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 955 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 956 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 957 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 958 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 959 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 960 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 961 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 962 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 963 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 964 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 965 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 966 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 967 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 968 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 969 } 970 971 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 972 { 973 adev->gfx.me_fw_write_wait = false; 974 adev->gfx.mec_fw_write_wait = false; 975 976 switch (adev->asic_type) { 977 case CHIP_VEGA10: 978 if ((adev->gfx.me_fw_version >= 0x0000009c) && 979 (adev->gfx.me_feature_version >= 42) && 980 (adev->gfx.pfp_fw_version >= 0x000000b1) && 981 (adev->gfx.pfp_feature_version >= 42)) 982 adev->gfx.me_fw_write_wait = true; 983 984 if ((adev->gfx.mec_fw_version >= 0x00000193) && 985 (adev->gfx.mec_feature_version >= 42)) 986 adev->gfx.mec_fw_write_wait = true; 987 break; 988 case CHIP_VEGA12: 989 if ((adev->gfx.me_fw_version >= 0x0000009c) && 990 (adev->gfx.me_feature_version >= 44) && 991 (adev->gfx.pfp_fw_version >= 0x000000b2) && 992 (adev->gfx.pfp_feature_version >= 44)) 993 adev->gfx.me_fw_write_wait = true; 994 995 if ((adev->gfx.mec_fw_version >= 0x00000196) && 996 (adev->gfx.mec_feature_version >= 44)) 997 adev->gfx.mec_fw_write_wait = true; 998 break; 999 case CHIP_VEGA20: 1000 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1001 (adev->gfx.me_feature_version >= 44) && 1002 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1003 (adev->gfx.pfp_feature_version >= 44)) 1004 adev->gfx.me_fw_write_wait = true; 1005 1006 if ((adev->gfx.mec_fw_version >= 0x00000197) && 1007 (adev->gfx.mec_feature_version >= 44)) 1008 adev->gfx.mec_fw_write_wait = true; 1009 break; 1010 case CHIP_RAVEN: 1011 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1012 (adev->gfx.me_feature_version >= 42) && 1013 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1014 (adev->gfx.pfp_feature_version >= 42)) 1015 adev->gfx.me_fw_write_wait = true; 1016 1017 if ((adev->gfx.mec_fw_version >= 0x00000192) && 1018 (adev->gfx.mec_feature_version >= 42)) 1019 adev->gfx.mec_fw_write_wait = true; 1020 break; 1021 default: 1022 break; 1023 } 1024 } 1025 1026 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 1027 { 1028 switch (adev->asic_type) { 1029 case CHIP_VEGA10: 1030 case CHIP_VEGA12: 1031 case CHIP_VEGA20: 1032 break; 1033 case CHIP_RAVEN: 1034 if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) 1035 &&((adev->gfx.rlc_fw_version != 106 && 1036 adev->gfx.rlc_fw_version < 531) || 1037 (adev->gfx.rlc_fw_version == 53815) || 1038 (adev->gfx.rlc_feature_version < 1) || 1039 !adev->gfx.rlc.is_rlc_v2_1)) 1040 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1041 1042 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1043 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1044 AMD_PG_SUPPORT_CP | 1045 AMD_PG_SUPPORT_RLC_SMU_HS; 1046 break; 1047 default: 1048 break; 1049 } 1050 } 1051 1052 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, 1053 const char *chip_name) 1054 { 1055 char fw_name[30]; 1056 int err; 1057 struct amdgpu_firmware_info *info = NULL; 1058 const struct common_firmware_header *header = NULL; 1059 const struct gfx_firmware_header_v1_0 *cp_hdr; 1060 1061 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1062 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1063 if (err) 1064 goto out; 1065 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 1066 if (err) 1067 goto out; 1068 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1069 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1070 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1071 1072 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1073 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1074 if (err) 1075 goto out; 1076 err = amdgpu_ucode_validate(adev->gfx.me_fw); 1077 if (err) 1078 goto out; 1079 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1080 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1081 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1082 1083 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1084 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1085 if (err) 1086 goto out; 1087 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 1088 if (err) 1089 goto out; 1090 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1091 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1092 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1093 1094 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1095 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1096 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1097 info->fw = adev->gfx.pfp_fw; 1098 header = (const struct common_firmware_header *)info->fw->data; 1099 adev->firmware.fw_size += 1100 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1101 1102 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1103 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1104 info->fw = adev->gfx.me_fw; 1105 header = (const struct common_firmware_header *)info->fw->data; 1106 adev->firmware.fw_size += 1107 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1108 1109 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1110 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1111 info->fw = adev->gfx.ce_fw; 1112 header = (const struct common_firmware_header *)info->fw->data; 1113 adev->firmware.fw_size += 1114 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1115 } 1116 1117 out: 1118 if (err) { 1119 dev_err(adev->dev, 1120 "gfx9: Failed to load firmware \"%s\"\n", 1121 fw_name); 1122 release_firmware(adev->gfx.pfp_fw); 1123 adev->gfx.pfp_fw = NULL; 1124 release_firmware(adev->gfx.me_fw); 1125 adev->gfx.me_fw = NULL; 1126 release_firmware(adev->gfx.ce_fw); 1127 adev->gfx.ce_fw = NULL; 1128 } 1129 return err; 1130 } 1131 1132 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, 1133 const char *chip_name) 1134 { 1135 char fw_name[30]; 1136 int err; 1137 struct amdgpu_firmware_info *info = NULL; 1138 const struct common_firmware_header *header = NULL; 1139 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1140 unsigned int *tmp = NULL; 1141 unsigned int i = 0; 1142 uint16_t version_major; 1143 uint16_t version_minor; 1144 uint32_t smu_version; 1145 1146 /* 1147 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 1148 * instead of picasso_rlc.bin. 1149 * Judgment method: 1150 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 1151 * or revision >= 0xD8 && revision <= 0xDF 1152 * otherwise is PCO FP5 1153 */ 1154 if (!strcmp(chip_name, "picasso") && 1155 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 1156 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 1157 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); 1158 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 1159 (smu_version >= 0x41e2b)) 1160 /** 1161 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 1162 */ 1163 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); 1164 else 1165 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1166 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1167 if (err) 1168 goto out; 1169 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1170 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1171 1172 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1173 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1174 if (version_major == 2 && version_minor == 1) 1175 adev->gfx.rlc.is_rlc_v2_1 = true; 1176 1177 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1178 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1179 adev->gfx.rlc.save_and_restore_offset = 1180 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1181 adev->gfx.rlc.clear_state_descriptor_offset = 1182 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1183 adev->gfx.rlc.avail_scratch_ram_locations = 1184 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1185 adev->gfx.rlc.reg_restore_list_size = 1186 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1187 adev->gfx.rlc.reg_list_format_start = 1188 le32_to_cpu(rlc_hdr->reg_list_format_start); 1189 adev->gfx.rlc.reg_list_format_separate_start = 1190 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1191 adev->gfx.rlc.starting_offsets_start = 1192 le32_to_cpu(rlc_hdr->starting_offsets_start); 1193 adev->gfx.rlc.reg_list_format_size_bytes = 1194 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1195 adev->gfx.rlc.reg_list_size_bytes = 1196 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1197 adev->gfx.rlc.register_list_format = 1198 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1199 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1200 if (!adev->gfx.rlc.register_list_format) { 1201 err = -ENOMEM; 1202 goto out; 1203 } 1204 1205 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1206 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1207 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 1208 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1209 1210 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1211 1212 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1213 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1214 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 1215 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1216 1217 if (adev->gfx.rlc.is_rlc_v2_1) 1218 gfx_v9_0_init_rlc_ext_microcode(adev); 1219 1220 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1221 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1222 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1223 info->fw = adev->gfx.rlc_fw; 1224 header = (const struct common_firmware_header *)info->fw->data; 1225 adev->firmware.fw_size += 1226 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1227 1228 if (adev->gfx.rlc.is_rlc_v2_1 && 1229 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 1230 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 1231 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 1232 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 1233 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 1234 info->fw = adev->gfx.rlc_fw; 1235 adev->firmware.fw_size += 1236 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 1237 1238 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 1239 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 1240 info->fw = adev->gfx.rlc_fw; 1241 adev->firmware.fw_size += 1242 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 1243 1244 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 1245 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 1246 info->fw = adev->gfx.rlc_fw; 1247 adev->firmware.fw_size += 1248 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 1249 } 1250 } 1251 1252 out: 1253 if (err) { 1254 dev_err(adev->dev, 1255 "gfx9: Failed to load firmware \"%s\"\n", 1256 fw_name); 1257 release_firmware(adev->gfx.rlc_fw); 1258 adev->gfx.rlc_fw = NULL; 1259 } 1260 return err; 1261 } 1262 1263 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, 1264 const char *chip_name) 1265 { 1266 char fw_name[30]; 1267 int err; 1268 struct amdgpu_firmware_info *info = NULL; 1269 const struct common_firmware_header *header = NULL; 1270 const struct gfx_firmware_header_v1_0 *cp_hdr; 1271 1272 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1273 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1274 if (err) 1275 goto out; 1276 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1277 if (err) 1278 goto out; 1279 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1280 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1281 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1282 1283 1284 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1285 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1286 if (!err) { 1287 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1288 if (err) 1289 goto out; 1290 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1291 adev->gfx.mec2_fw->data; 1292 adev->gfx.mec2_fw_version = 1293 le32_to_cpu(cp_hdr->header.ucode_version); 1294 adev->gfx.mec2_feature_version = 1295 le32_to_cpu(cp_hdr->ucode_feature_version); 1296 } else { 1297 err = 0; 1298 adev->gfx.mec2_fw = NULL; 1299 } 1300 1301 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1302 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1303 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1304 info->fw = adev->gfx.mec_fw; 1305 header = (const struct common_firmware_header *)info->fw->data; 1306 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1307 adev->firmware.fw_size += 1308 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1309 1310 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 1311 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 1312 info->fw = adev->gfx.mec_fw; 1313 adev->firmware.fw_size += 1314 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1315 1316 if (adev->gfx.mec2_fw) { 1317 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1318 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1319 info->fw = adev->gfx.mec2_fw; 1320 header = (const struct common_firmware_header *)info->fw->data; 1321 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1322 adev->firmware.fw_size += 1323 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1324 1325 /* TODO: Determine if MEC2 JT FW loading can be removed 1326 for all GFX V9 asic and above */ 1327 if (adev->asic_type != CHIP_ARCTURUS) { 1328 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 1329 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 1330 info->fw = adev->gfx.mec2_fw; 1331 adev->firmware.fw_size += 1332 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, 1333 PAGE_SIZE); 1334 } 1335 } 1336 } 1337 1338 out: 1339 gfx_v9_0_check_if_need_gfxoff(adev); 1340 gfx_v9_0_check_fw_write_wait(adev); 1341 if (err) { 1342 dev_err(adev->dev, 1343 "gfx9: Failed to load firmware \"%s\"\n", 1344 fw_name); 1345 release_firmware(adev->gfx.mec_fw); 1346 adev->gfx.mec_fw = NULL; 1347 release_firmware(adev->gfx.mec2_fw); 1348 adev->gfx.mec2_fw = NULL; 1349 } 1350 return err; 1351 } 1352 1353 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 1354 { 1355 const char *chip_name; 1356 int r; 1357 1358 DRM_DEBUG("\n"); 1359 1360 switch (adev->asic_type) { 1361 case CHIP_VEGA10: 1362 chip_name = "vega10"; 1363 break; 1364 case CHIP_VEGA12: 1365 chip_name = "vega12"; 1366 break; 1367 case CHIP_VEGA20: 1368 chip_name = "vega20"; 1369 break; 1370 case CHIP_RAVEN: 1371 if (adev->rev_id >= 8) 1372 chip_name = "raven2"; 1373 else if (adev->pdev->device == 0x15d8) 1374 chip_name = "picasso"; 1375 else 1376 chip_name = "raven"; 1377 break; 1378 case CHIP_ARCTURUS: 1379 chip_name = "arcturus"; 1380 break; 1381 case CHIP_RENOIR: 1382 chip_name = "renoir"; 1383 break; 1384 default: 1385 BUG(); 1386 } 1387 1388 /* No CPG in Arcturus */ 1389 if (adev->asic_type != CHIP_ARCTURUS) { 1390 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); 1391 if (r) 1392 return r; 1393 } 1394 1395 r = gfx_v9_0_init_rlc_microcode(adev, chip_name); 1396 if (r) 1397 return r; 1398 1399 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name); 1400 if (r) 1401 return r; 1402 1403 return r; 1404 } 1405 1406 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 1407 { 1408 u32 count = 0; 1409 const struct cs_section_def *sect = NULL; 1410 const struct cs_extent_def *ext = NULL; 1411 1412 /* begin clear state */ 1413 count += 2; 1414 /* context control state */ 1415 count += 3; 1416 1417 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1418 for (ext = sect->section; ext->extent != NULL; ++ext) { 1419 if (sect->id == SECT_CONTEXT) 1420 count += 2 + ext->reg_count; 1421 else 1422 return 0; 1423 } 1424 } 1425 1426 /* end clear state */ 1427 count += 2; 1428 /* clear state */ 1429 count += 2; 1430 1431 return count; 1432 } 1433 1434 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 1435 volatile u32 *buffer) 1436 { 1437 u32 count = 0, i; 1438 const struct cs_section_def *sect = NULL; 1439 const struct cs_extent_def *ext = NULL; 1440 1441 if (adev->gfx.rlc.cs_data == NULL) 1442 return; 1443 if (buffer == NULL) 1444 return; 1445 1446 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1447 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1448 1449 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1450 buffer[count++] = cpu_to_le32(0x80000000); 1451 buffer[count++] = cpu_to_le32(0x80000000); 1452 1453 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1454 for (ext = sect->section; ext->extent != NULL; ++ext) { 1455 if (sect->id == SECT_CONTEXT) { 1456 buffer[count++] = 1457 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1458 buffer[count++] = cpu_to_le32(ext->reg_index - 1459 PACKET3_SET_CONTEXT_REG_START); 1460 for (i = 0; i < ext->reg_count; i++) 1461 buffer[count++] = cpu_to_le32(ext->extent[i]); 1462 } else { 1463 return; 1464 } 1465 } 1466 } 1467 1468 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1469 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1470 1471 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1472 buffer[count++] = cpu_to_le32(0); 1473 } 1474 1475 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 1476 { 1477 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 1478 uint32_t pg_always_on_cu_num = 2; 1479 uint32_t always_on_cu_num; 1480 uint32_t i, j, k; 1481 uint32_t mask, cu_bitmap, counter; 1482 1483 if (adev->flags & AMD_IS_APU) 1484 always_on_cu_num = 4; 1485 else if (adev->asic_type == CHIP_VEGA12) 1486 always_on_cu_num = 8; 1487 else 1488 always_on_cu_num = 12; 1489 1490 mutex_lock(&adev->grbm_idx_mutex); 1491 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1492 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1493 mask = 1; 1494 cu_bitmap = 0; 1495 counter = 0; 1496 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1497 1498 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 1499 if (cu_info->bitmap[i][j] & mask) { 1500 if (counter == pg_always_on_cu_num) 1501 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 1502 if (counter < always_on_cu_num) 1503 cu_bitmap |= mask; 1504 else 1505 break; 1506 counter++; 1507 } 1508 mask <<= 1; 1509 } 1510 1511 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1512 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1513 } 1514 } 1515 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1516 mutex_unlock(&adev->grbm_idx_mutex); 1517 } 1518 1519 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1520 { 1521 uint32_t data; 1522 1523 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1524 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1525 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1526 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1527 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1528 1529 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1530 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1531 1532 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1533 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1534 1535 mutex_lock(&adev->grbm_idx_mutex); 1536 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1537 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1538 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1539 1540 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1541 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1542 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1543 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1544 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1545 1546 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1547 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1548 data &= 0x0000FFFF; 1549 data |= 0x00C00000; 1550 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1551 1552 /* 1553 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1554 * programmed in gfx_v9_0_init_always_on_cu_mask() 1555 */ 1556 1557 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1558 * but used for RLC_LB_CNTL configuration */ 1559 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1560 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1561 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1562 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1563 mutex_unlock(&adev->grbm_idx_mutex); 1564 1565 gfx_v9_0_init_always_on_cu_mask(adev); 1566 } 1567 1568 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1569 { 1570 uint32_t data; 1571 1572 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1573 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1574 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1575 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1576 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1577 1578 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1579 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1580 1581 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1582 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1583 1584 mutex_lock(&adev->grbm_idx_mutex); 1585 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1586 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1587 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1588 1589 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1590 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1591 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1592 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1593 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1594 1595 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1596 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1597 data &= 0x0000FFFF; 1598 data |= 0x00C00000; 1599 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1600 1601 /* 1602 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1603 * programmed in gfx_v9_0_init_always_on_cu_mask() 1604 */ 1605 1606 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1607 * but used for RLC_LB_CNTL configuration */ 1608 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1609 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1610 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1611 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1612 mutex_unlock(&adev->grbm_idx_mutex); 1613 1614 gfx_v9_0_init_always_on_cu_mask(adev); 1615 } 1616 1617 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1618 { 1619 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1620 } 1621 1622 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1623 { 1624 return 5; 1625 } 1626 1627 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1628 { 1629 const struct cs_section_def *cs_data; 1630 int r; 1631 1632 adev->gfx.rlc.cs_data = gfx9_cs_data; 1633 1634 cs_data = adev->gfx.rlc.cs_data; 1635 1636 if (cs_data) { 1637 /* init clear state block */ 1638 r = amdgpu_gfx_rlc_init_csb(adev); 1639 if (r) 1640 return r; 1641 } 1642 1643 if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { 1644 /* TODO: double check the cp_table_size for RV */ 1645 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1646 r = amdgpu_gfx_rlc_init_cpt(adev); 1647 if (r) 1648 return r; 1649 } 1650 1651 switch (adev->asic_type) { 1652 case CHIP_RAVEN: 1653 case CHIP_RENOIR: 1654 gfx_v9_0_init_lbpw(adev); 1655 break; 1656 case CHIP_VEGA20: 1657 gfx_v9_4_init_lbpw(adev); 1658 break; 1659 default: 1660 break; 1661 } 1662 1663 return 0; 1664 } 1665 1666 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev) 1667 { 1668 int r; 1669 1670 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1671 if (unlikely(r != 0)) 1672 return r; 1673 1674 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, 1675 AMDGPU_GEM_DOMAIN_VRAM); 1676 if (!r) 1677 adev->gfx.rlc.clear_state_gpu_addr = 1678 amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); 1679 1680 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1681 1682 return r; 1683 } 1684 1685 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev) 1686 { 1687 int r; 1688 1689 if (!adev->gfx.rlc.clear_state_obj) 1690 return; 1691 1692 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); 1693 if (likely(r == 0)) { 1694 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 1695 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1696 } 1697 } 1698 1699 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1700 { 1701 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1702 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1703 } 1704 1705 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1706 { 1707 int r; 1708 u32 *hpd; 1709 const __le32 *fw_data; 1710 unsigned fw_size; 1711 u32 *fw; 1712 size_t mec_hpd_size; 1713 1714 const struct gfx_firmware_header_v1_0 *mec_hdr; 1715 1716 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1717 1718 /* take ownership of the relevant compute queues */ 1719 amdgpu_gfx_compute_queue_acquire(adev); 1720 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1721 1722 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1723 AMDGPU_GEM_DOMAIN_VRAM, 1724 &adev->gfx.mec.hpd_eop_obj, 1725 &adev->gfx.mec.hpd_eop_gpu_addr, 1726 (void **)&hpd); 1727 if (r) { 1728 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1729 gfx_v9_0_mec_fini(adev); 1730 return r; 1731 } 1732 1733 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); 1734 1735 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1736 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1737 1738 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1739 1740 fw_data = (const __le32 *) 1741 (adev->gfx.mec_fw->data + 1742 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1743 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 1744 1745 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 1746 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1747 &adev->gfx.mec.mec_fw_obj, 1748 &adev->gfx.mec.mec_fw_gpu_addr, 1749 (void **)&fw); 1750 if (r) { 1751 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 1752 gfx_v9_0_mec_fini(adev); 1753 return r; 1754 } 1755 1756 memcpy(fw, fw_data, fw_size); 1757 1758 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1759 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1760 1761 return 0; 1762 } 1763 1764 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 1765 { 1766 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1767 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1768 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1769 (address << SQ_IND_INDEX__INDEX__SHIFT) | 1770 (SQ_IND_INDEX__FORCE_READ_MASK)); 1771 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1772 } 1773 1774 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 1775 uint32_t wave, uint32_t thread, 1776 uint32_t regno, uint32_t num, uint32_t *out) 1777 { 1778 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1779 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1780 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1781 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1782 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 1783 (SQ_IND_INDEX__FORCE_READ_MASK) | 1784 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1785 while (num--) 1786 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1787 } 1788 1789 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1790 { 1791 /* type 1 wave data */ 1792 dst[(*no_fields)++] = 1; 1793 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 1794 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 1795 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 1796 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 1797 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 1798 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 1799 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 1800 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 1801 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 1802 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 1803 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 1804 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 1805 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 1806 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 1807 } 1808 1809 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 1810 uint32_t wave, uint32_t start, 1811 uint32_t size, uint32_t *dst) 1812 { 1813 wave_read_regs( 1814 adev, simd, wave, 0, 1815 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 1816 } 1817 1818 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 1819 uint32_t wave, uint32_t thread, 1820 uint32_t start, uint32_t size, 1821 uint32_t *dst) 1822 { 1823 wave_read_regs( 1824 adev, simd, wave, thread, 1825 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1826 } 1827 1828 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 1829 u32 me, u32 pipe, u32 q, u32 vm) 1830 { 1831 soc15_grbm_select(adev, me, pipe, q, vm); 1832 } 1833 1834 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 1835 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 1836 .select_se_sh = &gfx_v9_0_select_se_sh, 1837 .read_wave_data = &gfx_v9_0_read_wave_data, 1838 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 1839 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 1840 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 1841 .ras_error_inject = &gfx_v9_0_ras_error_inject, 1842 .query_ras_error_count = &gfx_v9_0_query_ras_error_count 1843 }; 1844 1845 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 1846 { 1847 u32 gb_addr_config; 1848 int err; 1849 1850 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 1851 1852 switch (adev->asic_type) { 1853 case CHIP_VEGA10: 1854 adev->gfx.config.max_hw_contexts = 8; 1855 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1856 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1857 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1858 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1859 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 1860 break; 1861 case CHIP_VEGA12: 1862 adev->gfx.config.max_hw_contexts = 8; 1863 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1864 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1865 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1866 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1867 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 1868 DRM_INFO("fix gfx.config for vega12\n"); 1869 break; 1870 case CHIP_VEGA20: 1871 adev->gfx.config.max_hw_contexts = 8; 1872 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1873 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1874 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1875 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1876 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1877 gb_addr_config &= ~0xf3e777ff; 1878 gb_addr_config |= 0x22014042; 1879 /* check vbios table if gpu info is not available */ 1880 err = amdgpu_atomfirmware_get_gfx_info(adev); 1881 if (err) 1882 return err; 1883 break; 1884 case CHIP_RAVEN: 1885 adev->gfx.config.max_hw_contexts = 8; 1886 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1887 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1888 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1889 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1890 if (adev->rev_id >= 8) 1891 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 1892 else 1893 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 1894 break; 1895 case CHIP_ARCTURUS: 1896 adev->gfx.config.max_hw_contexts = 8; 1897 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1898 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1899 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1900 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1901 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1902 gb_addr_config &= ~0xf3e777ff; 1903 gb_addr_config |= 0x22014042; 1904 break; 1905 case CHIP_RENOIR: 1906 adev->gfx.config.max_hw_contexts = 8; 1907 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1908 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1909 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 1910 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1911 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1912 gb_addr_config &= ~0xf3e777ff; 1913 gb_addr_config |= 0x22010042; 1914 break; 1915 default: 1916 BUG(); 1917 break; 1918 } 1919 1920 adev->gfx.config.gb_addr_config = gb_addr_config; 1921 1922 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 1923 REG_GET_FIELD( 1924 adev->gfx.config.gb_addr_config, 1925 GB_ADDR_CONFIG, 1926 NUM_PIPES); 1927 1928 adev->gfx.config.max_tile_pipes = 1929 adev->gfx.config.gb_addr_config_fields.num_pipes; 1930 1931 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 1932 REG_GET_FIELD( 1933 adev->gfx.config.gb_addr_config, 1934 GB_ADDR_CONFIG, 1935 NUM_BANKS); 1936 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 1937 REG_GET_FIELD( 1938 adev->gfx.config.gb_addr_config, 1939 GB_ADDR_CONFIG, 1940 MAX_COMPRESSED_FRAGS); 1941 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 1942 REG_GET_FIELD( 1943 adev->gfx.config.gb_addr_config, 1944 GB_ADDR_CONFIG, 1945 NUM_RB_PER_SE); 1946 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 1947 REG_GET_FIELD( 1948 adev->gfx.config.gb_addr_config, 1949 GB_ADDR_CONFIG, 1950 NUM_SHADER_ENGINES); 1951 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 1952 REG_GET_FIELD( 1953 adev->gfx.config.gb_addr_config, 1954 GB_ADDR_CONFIG, 1955 PIPE_INTERLEAVE_SIZE)); 1956 1957 return 0; 1958 } 1959 1960 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, 1961 struct amdgpu_ngg_buf *ngg_buf, 1962 int size_se, 1963 int default_size_se) 1964 { 1965 int r; 1966 1967 if (size_se < 0) { 1968 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se); 1969 return -EINVAL; 1970 } 1971 size_se = size_se ? size_se : default_size_se; 1972 1973 ngg_buf->size = size_se * adev->gfx.config.max_shader_engines; 1974 r = amdgpu_bo_create_kernel(adev, ngg_buf->size, 1975 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1976 &ngg_buf->bo, 1977 &ngg_buf->gpu_addr, 1978 NULL); 1979 if (r) { 1980 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r); 1981 return r; 1982 } 1983 ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo); 1984 1985 return r; 1986 } 1987 1988 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev) 1989 { 1990 int i; 1991 1992 for (i = 0; i < NGG_BUF_MAX; i++) 1993 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo, 1994 &adev->gfx.ngg.buf[i].gpu_addr, 1995 NULL); 1996 1997 memset(&adev->gfx.ngg.buf[0], 0, 1998 sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX); 1999 2000 adev->gfx.ngg.init = false; 2001 2002 return 0; 2003 } 2004 2005 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) 2006 { 2007 int r; 2008 2009 if (!amdgpu_ngg || adev->gfx.ngg.init == true) 2010 return 0; 2011 2012 /* GDS reserve memory: 64 bytes alignment */ 2013 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); 2014 adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size; 2015 adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); 2016 adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); 2017 2018 /* Primitive Buffer */ 2019 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], 2020 amdgpu_prim_buf_per_se, 2021 64 * 1024); 2022 if (r) { 2023 dev_err(adev->dev, "Failed to create Primitive Buffer\n"); 2024 goto err; 2025 } 2026 2027 /* Position Buffer */ 2028 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS], 2029 amdgpu_pos_buf_per_se, 2030 256 * 1024); 2031 if (r) { 2032 dev_err(adev->dev, "Failed to create Position Buffer\n"); 2033 goto err; 2034 } 2035 2036 /* Control Sideband */ 2037 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL], 2038 amdgpu_cntl_sb_buf_per_se, 2039 256); 2040 if (r) { 2041 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n"); 2042 goto err; 2043 } 2044 2045 /* Parameter Cache, not created by default */ 2046 if (amdgpu_param_buf_per_se <= 0) 2047 goto out; 2048 2049 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM], 2050 amdgpu_param_buf_per_se, 2051 512 * 1024); 2052 if (r) { 2053 dev_err(adev->dev, "Failed to create Parameter Cache\n"); 2054 goto err; 2055 } 2056 2057 out: 2058 adev->gfx.ngg.init = true; 2059 return 0; 2060 err: 2061 gfx_v9_0_ngg_fini(adev); 2062 return r; 2063 } 2064 2065 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) 2066 { 2067 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 2068 int r; 2069 u32 data, base; 2070 2071 if (!amdgpu_ngg) 2072 return 0; 2073 2074 /* Program buffer size */ 2075 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, 2076 adev->gfx.ngg.buf[NGG_PRIM].size >> 8); 2077 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, 2078 adev->gfx.ngg.buf[NGG_POS].size >> 8); 2079 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); 2080 2081 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, 2082 adev->gfx.ngg.buf[NGG_CNTL].size >> 8); 2083 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, 2084 adev->gfx.ngg.buf[NGG_PARAM].size >> 10); 2085 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); 2086 2087 /* Program buffer base address */ 2088 base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 2089 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); 2090 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data); 2091 2092 base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 2093 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); 2094 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data); 2095 2096 base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 2097 data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); 2098 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data); 2099 2100 base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 2101 data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); 2102 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data); 2103 2104 base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 2105 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); 2106 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data); 2107 2108 base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 2109 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); 2110 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data); 2111 2112 /* Clear GDS reserved memory */ 2113 r = amdgpu_ring_alloc(ring, 17); 2114 if (r) { 2115 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n", 2116 ring->name, r); 2117 return r; 2118 } 2119 2120 gfx_v9_0_write_data_to_reg(ring, 0, false, 2121 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 2122 (adev->gds.gds_size + 2123 adev->gfx.ngg.gds_reserve_size)); 2124 2125 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 2126 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 2127 PACKET3_DMA_DATA_DST_SEL(1) | 2128 PACKET3_DMA_DATA_SRC_SEL(2))); 2129 amdgpu_ring_write(ring, 0); 2130 amdgpu_ring_write(ring, 0); 2131 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); 2132 amdgpu_ring_write(ring, 0); 2133 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 2134 adev->gfx.ngg.gds_reserve_size); 2135 2136 gfx_v9_0_write_data_to_reg(ring, 0, false, 2137 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0); 2138 2139 amdgpu_ring_commit(ring); 2140 2141 return 0; 2142 } 2143 2144 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 2145 int mec, int pipe, int queue) 2146 { 2147 int r; 2148 unsigned irq_type; 2149 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 2150 2151 ring = &adev->gfx.compute_ring[ring_id]; 2152 2153 /* mec0 is me1 */ 2154 ring->me = mec + 1; 2155 ring->pipe = pipe; 2156 ring->queue = queue; 2157 2158 ring->ring_obj = NULL; 2159 ring->use_doorbell = true; 2160 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 2161 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2162 + (ring_id * GFX9_MEC_HPD_SIZE); 2163 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2164 2165 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2166 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2167 + ring->pipe; 2168 2169 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2170 r = amdgpu_ring_init(adev, ring, 1024, 2171 &adev->gfx.eop_irq, irq_type); 2172 if (r) 2173 return r; 2174 2175 2176 return 0; 2177 } 2178 2179 static int gfx_v9_0_sw_init(void *handle) 2180 { 2181 int i, j, k, r, ring_id; 2182 struct amdgpu_ring *ring; 2183 struct amdgpu_kiq *kiq; 2184 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2185 2186 switch (adev->asic_type) { 2187 case CHIP_VEGA10: 2188 case CHIP_VEGA12: 2189 case CHIP_VEGA20: 2190 case CHIP_RAVEN: 2191 case CHIP_ARCTURUS: 2192 case CHIP_RENOIR: 2193 adev->gfx.mec.num_mec = 2; 2194 break; 2195 default: 2196 adev->gfx.mec.num_mec = 1; 2197 break; 2198 } 2199 2200 adev->gfx.mec.num_pipe_per_mec = 4; 2201 adev->gfx.mec.num_queue_per_pipe = 8; 2202 2203 /* EOP Event */ 2204 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 2205 if (r) 2206 return r; 2207 2208 /* Privileged reg */ 2209 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 2210 &adev->gfx.priv_reg_irq); 2211 if (r) 2212 return r; 2213 2214 /* Privileged inst */ 2215 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 2216 &adev->gfx.priv_inst_irq); 2217 if (r) 2218 return r; 2219 2220 /* ECC error */ 2221 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 2222 &adev->gfx.cp_ecc_error_irq); 2223 if (r) 2224 return r; 2225 2226 /* FUE error */ 2227 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 2228 &adev->gfx.cp_ecc_error_irq); 2229 if (r) 2230 return r; 2231 2232 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2233 2234 gfx_v9_0_scratch_init(adev); 2235 2236 r = gfx_v9_0_init_microcode(adev); 2237 if (r) { 2238 DRM_ERROR("Failed to load gfx firmware!\n"); 2239 return r; 2240 } 2241 2242 r = adev->gfx.rlc.funcs->init(adev); 2243 if (r) { 2244 DRM_ERROR("Failed to init rlc BOs!\n"); 2245 return r; 2246 } 2247 2248 r = gfx_v9_0_mec_init(adev); 2249 if (r) { 2250 DRM_ERROR("Failed to init MEC BOs!\n"); 2251 return r; 2252 } 2253 2254 /* set up the gfx ring */ 2255 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2256 ring = &adev->gfx.gfx_ring[i]; 2257 ring->ring_obj = NULL; 2258 if (!i) 2259 sprintf(ring->name, "gfx"); 2260 else 2261 sprintf(ring->name, "gfx_%d", i); 2262 ring->use_doorbell = true; 2263 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2264 r = amdgpu_ring_init(adev, ring, 1024, 2265 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); 2266 if (r) 2267 return r; 2268 } 2269 2270 /* set up the compute queues - allocate horizontally across pipes */ 2271 ring_id = 0; 2272 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2273 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2274 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2275 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2276 continue; 2277 2278 r = gfx_v9_0_compute_ring_init(adev, 2279 ring_id, 2280 i, k, j); 2281 if (r) 2282 return r; 2283 2284 ring_id++; 2285 } 2286 } 2287 } 2288 2289 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); 2290 if (r) { 2291 DRM_ERROR("Failed to init KIQ BOs!\n"); 2292 return r; 2293 } 2294 2295 kiq = &adev->gfx.kiq; 2296 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2297 if (r) 2298 return r; 2299 2300 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2301 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); 2302 if (r) 2303 return r; 2304 2305 adev->gfx.ce_ram_size = 0x8000; 2306 2307 r = gfx_v9_0_gpu_early_init(adev); 2308 if (r) 2309 return r; 2310 2311 r = gfx_v9_0_ngg_init(adev); 2312 if (r) 2313 return r; 2314 2315 return 0; 2316 } 2317 2318 2319 static int gfx_v9_0_sw_fini(void *handle) 2320 { 2321 int i; 2322 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2323 2324 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && 2325 adev->gfx.ras_if) { 2326 struct ras_common_if *ras_if = adev->gfx.ras_if; 2327 struct ras_ih_if ih_info = { 2328 .head = *ras_if, 2329 }; 2330 2331 amdgpu_ras_debugfs_remove(adev, ras_if); 2332 amdgpu_ras_sysfs_remove(adev, ras_if); 2333 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 2334 amdgpu_ras_feature_enable(adev, ras_if, 0); 2335 kfree(ras_if); 2336 } 2337 2338 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2339 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2340 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2341 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2342 2343 amdgpu_gfx_mqd_sw_fini(adev); 2344 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2345 amdgpu_gfx_kiq_fini(adev); 2346 2347 gfx_v9_0_mec_fini(adev); 2348 gfx_v9_0_ngg_fini(adev); 2349 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 2350 if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { 2351 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2352 &adev->gfx.rlc.cp_table_gpu_addr, 2353 (void **)&adev->gfx.rlc.cp_table_ptr); 2354 } 2355 gfx_v9_0_free_microcode(adev); 2356 2357 return 0; 2358 } 2359 2360 2361 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 2362 { 2363 /* TODO */ 2364 } 2365 2366 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) 2367 { 2368 u32 data; 2369 2370 if (instance == 0xffffffff) 2371 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 2372 else 2373 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 2374 2375 if (se_num == 0xffffffff) 2376 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 2377 else 2378 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2379 2380 if (sh_num == 0xffffffff) 2381 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 2382 else 2383 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 2384 2385 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 2386 } 2387 2388 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2389 { 2390 u32 data, mask; 2391 2392 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 2393 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 2394 2395 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 2396 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 2397 2398 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 2399 adev->gfx.config.max_sh_per_se); 2400 2401 return (~data) & mask; 2402 } 2403 2404 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 2405 { 2406 int i, j; 2407 u32 data; 2408 u32 active_rbs = 0; 2409 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 2410 adev->gfx.config.max_sh_per_se; 2411 2412 mutex_lock(&adev->grbm_idx_mutex); 2413 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2414 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2415 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2416 data = gfx_v9_0_get_rb_active_bitmap(adev); 2417 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 2418 rb_bitmap_width_per_sh); 2419 } 2420 } 2421 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2422 mutex_unlock(&adev->grbm_idx_mutex); 2423 2424 adev->gfx.config.backend_enable_mask = active_rbs; 2425 adev->gfx.config.num_rbs = hweight32(active_rbs); 2426 } 2427 2428 #define DEFAULT_SH_MEM_BASES (0x6000) 2429 #define FIRST_COMPUTE_VMID (8) 2430 #define LAST_COMPUTE_VMID (16) 2431 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 2432 { 2433 int i; 2434 uint32_t sh_mem_config; 2435 uint32_t sh_mem_bases; 2436 2437 /* 2438 * Configure apertures: 2439 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2440 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2441 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2442 */ 2443 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 2444 2445 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 2446 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 2447 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 2448 2449 mutex_lock(&adev->srbm_mutex); 2450 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 2451 soc15_grbm_select(adev, 0, 0, 0, i); 2452 /* CP and shaders */ 2453 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 2454 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 2455 } 2456 soc15_grbm_select(adev, 0, 0, 0, 0); 2457 mutex_unlock(&adev->srbm_mutex); 2458 2459 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 2460 acccess. These should be enabled by FW for target VMIDs. */ 2461 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 2462 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 2463 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 2464 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 2465 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 2466 } 2467 } 2468 2469 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) 2470 { 2471 int vmid; 2472 2473 /* 2474 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2475 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2476 * the driver can enable them for graphics. VMID0 should maintain 2477 * access so that HWS firmware can save/restore entries. 2478 */ 2479 for (vmid = 1; vmid < 16; vmid++) { 2480 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); 2481 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); 2482 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); 2483 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); 2484 } 2485 } 2486 2487 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 2488 { 2489 u32 tmp; 2490 int i; 2491 2492 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2493 2494 gfx_v9_0_tiling_mode_table_init(adev); 2495 2496 gfx_v9_0_setup_rb(adev); 2497 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 2498 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 2499 2500 /* XXX SH_MEM regs */ 2501 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2502 mutex_lock(&adev->srbm_mutex); 2503 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { 2504 soc15_grbm_select(adev, 0, 0, 0, i); 2505 /* CP and shaders */ 2506 if (i == 0) { 2507 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2508 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2509 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2510 !!amdgpu_noretry); 2511 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2512 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 2513 } else { 2514 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2515 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2516 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2517 !!amdgpu_noretry); 2518 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2519 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2520 (adev->gmc.private_aperture_start >> 48)); 2521 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2522 (adev->gmc.shared_aperture_start >> 48)); 2523 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 2524 } 2525 } 2526 soc15_grbm_select(adev, 0, 0, 0, 0); 2527 2528 mutex_unlock(&adev->srbm_mutex); 2529 2530 gfx_v9_0_init_compute_vmid(adev); 2531 gfx_v9_0_init_gds_vmid(adev); 2532 } 2533 2534 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 2535 { 2536 u32 i, j, k; 2537 u32 mask; 2538 2539 mutex_lock(&adev->grbm_idx_mutex); 2540 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2541 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2542 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2543 for (k = 0; k < adev->usec_timeout; k++) { 2544 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 2545 break; 2546 udelay(1); 2547 } 2548 if (k == adev->usec_timeout) { 2549 gfx_v9_0_select_se_sh(adev, 0xffffffff, 2550 0xffffffff, 0xffffffff); 2551 mutex_unlock(&adev->grbm_idx_mutex); 2552 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2553 i, j); 2554 return; 2555 } 2556 } 2557 } 2558 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2559 mutex_unlock(&adev->grbm_idx_mutex); 2560 2561 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2562 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2563 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2564 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2565 for (k = 0; k < adev->usec_timeout; k++) { 2566 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2567 break; 2568 udelay(1); 2569 } 2570 } 2571 2572 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2573 bool enable) 2574 { 2575 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2576 2577 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2578 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2579 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2580 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2581 2582 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2583 } 2584 2585 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2586 { 2587 /* csib */ 2588 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2589 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2590 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2591 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2592 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2593 adev->gfx.rlc.clear_state_size); 2594 } 2595 2596 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2597 int indirect_offset, 2598 int list_size, 2599 int *unique_indirect_regs, 2600 int unique_indirect_reg_count, 2601 int *indirect_start_offsets, 2602 int *indirect_start_offsets_count, 2603 int max_start_offsets_count) 2604 { 2605 int idx; 2606 2607 for (; indirect_offset < list_size; indirect_offset++) { 2608 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2609 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2610 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2611 2612 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2613 indirect_offset += 2; 2614 2615 /* look for the matching indice */ 2616 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2617 if (unique_indirect_regs[idx] == 2618 register_list_format[indirect_offset] || 2619 !unique_indirect_regs[idx]) 2620 break; 2621 } 2622 2623 BUG_ON(idx >= unique_indirect_reg_count); 2624 2625 if (!unique_indirect_regs[idx]) 2626 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2627 2628 indirect_offset++; 2629 } 2630 } 2631 } 2632 2633 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2634 { 2635 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2636 int unique_indirect_reg_count = 0; 2637 2638 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2639 int indirect_start_offsets_count = 0; 2640 2641 int list_size = 0; 2642 int i = 0, j = 0; 2643 u32 tmp = 0; 2644 2645 u32 *register_list_format = 2646 kmemdup(adev->gfx.rlc.register_list_format, 2647 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2648 if (!register_list_format) 2649 return -ENOMEM; 2650 2651 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2652 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2653 gfx_v9_1_parse_ind_reg_list(register_list_format, 2654 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2655 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2656 unique_indirect_regs, 2657 unique_indirect_reg_count, 2658 indirect_start_offsets, 2659 &indirect_start_offsets_count, 2660 ARRAY_SIZE(indirect_start_offsets)); 2661 2662 /* enable auto inc in case it is disabled */ 2663 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2664 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2665 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2666 2667 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2668 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2669 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2670 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2671 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2672 adev->gfx.rlc.register_restore[i]); 2673 2674 /* load indirect register */ 2675 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2676 adev->gfx.rlc.reg_list_format_start); 2677 2678 /* direct register portion */ 2679 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2680 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2681 register_list_format[i]); 2682 2683 /* indirect register portion */ 2684 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2685 if (register_list_format[i] == 0xFFFFFFFF) { 2686 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2687 continue; 2688 } 2689 2690 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2691 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2692 2693 for (j = 0; j < unique_indirect_reg_count; j++) { 2694 if (register_list_format[i] == unique_indirect_regs[j]) { 2695 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2696 break; 2697 } 2698 } 2699 2700 BUG_ON(j >= unique_indirect_reg_count); 2701 2702 i++; 2703 } 2704 2705 /* set save/restore list size */ 2706 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2707 list_size = list_size >> 1; 2708 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2709 adev->gfx.rlc.reg_restore_list_size); 2710 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2711 2712 /* write the starting offsets to RLC scratch ram */ 2713 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2714 adev->gfx.rlc.starting_offsets_start); 2715 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2716 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2717 indirect_start_offsets[i]); 2718 2719 /* load unique indirect regs*/ 2720 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2721 if (unique_indirect_regs[i] != 0) { 2722 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2723 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2724 unique_indirect_regs[i] & 0x3FFFF); 2725 2726 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2727 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2728 unique_indirect_regs[i] >> 20); 2729 } 2730 } 2731 2732 kfree(register_list_format); 2733 return 0; 2734 } 2735 2736 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2737 { 2738 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2739 } 2740 2741 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2742 bool enable) 2743 { 2744 uint32_t data = 0; 2745 uint32_t default_data = 0; 2746 2747 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2748 if (enable == true) { 2749 /* enable GFXIP control over CGPG */ 2750 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2751 if(default_data != data) 2752 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2753 2754 /* update status */ 2755 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2756 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2757 if(default_data != data) 2758 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2759 } else { 2760 /* restore GFXIP control over GCPG */ 2761 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2762 if(default_data != data) 2763 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2764 } 2765 } 2766 2767 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2768 { 2769 uint32_t data = 0; 2770 2771 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2772 AMD_PG_SUPPORT_GFX_SMG | 2773 AMD_PG_SUPPORT_GFX_DMG)) { 2774 /* init IDLE_POLL_COUNT = 60 */ 2775 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2776 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2777 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2778 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2779 2780 /* init RLC PG Delay */ 2781 data = 0; 2782 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2783 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2784 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2785 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2786 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2787 2788 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2789 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2790 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2791 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2792 2793 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2794 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2795 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2796 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2797 2798 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2799 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2800 2801 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2802 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2803 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2804 2805 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2806 } 2807 } 2808 2809 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2810 bool enable) 2811 { 2812 uint32_t data = 0; 2813 uint32_t default_data = 0; 2814 2815 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2816 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2817 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2818 enable ? 1 : 0); 2819 if (default_data != data) 2820 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2821 } 2822 2823 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2824 bool enable) 2825 { 2826 uint32_t data = 0; 2827 uint32_t default_data = 0; 2828 2829 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2830 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2831 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2832 enable ? 1 : 0); 2833 if(default_data != data) 2834 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2835 } 2836 2837 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2838 bool enable) 2839 { 2840 uint32_t data = 0; 2841 uint32_t default_data = 0; 2842 2843 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2844 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2845 CP_PG_DISABLE, 2846 enable ? 0 : 1); 2847 if(default_data != data) 2848 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2849 } 2850 2851 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2852 bool enable) 2853 { 2854 uint32_t data, default_data; 2855 2856 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2857 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2858 GFX_POWER_GATING_ENABLE, 2859 enable ? 1 : 0); 2860 if(default_data != data) 2861 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2862 } 2863 2864 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 2865 bool enable) 2866 { 2867 uint32_t data, default_data; 2868 2869 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2870 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2871 GFX_PIPELINE_PG_ENABLE, 2872 enable ? 1 : 0); 2873 if(default_data != data) 2874 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2875 2876 if (!enable) 2877 /* read any GFX register to wake up GFX */ 2878 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 2879 } 2880 2881 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 2882 bool enable) 2883 { 2884 uint32_t data, default_data; 2885 2886 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2887 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2888 STATIC_PER_CU_PG_ENABLE, 2889 enable ? 1 : 0); 2890 if(default_data != data) 2891 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2892 } 2893 2894 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 2895 bool enable) 2896 { 2897 uint32_t data, default_data; 2898 2899 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2900 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2901 DYN_PER_CU_PG_ENABLE, 2902 enable ? 1 : 0); 2903 if(default_data != data) 2904 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2905 } 2906 2907 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 2908 { 2909 gfx_v9_0_init_csb(adev); 2910 2911 /* 2912 * Rlc save restore list is workable since v2_1. 2913 * And it's needed by gfxoff feature. 2914 */ 2915 if (adev->gfx.rlc.is_rlc_v2_1) { 2916 gfx_v9_1_init_rlc_save_restore_list(adev); 2917 gfx_v9_0_enable_save_restore_machine(adev); 2918 } 2919 2920 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2921 AMD_PG_SUPPORT_GFX_SMG | 2922 AMD_PG_SUPPORT_GFX_DMG | 2923 AMD_PG_SUPPORT_CP | 2924 AMD_PG_SUPPORT_GDS | 2925 AMD_PG_SUPPORT_RLC_SMU_HS)) { 2926 WREG32(mmRLC_JUMP_TABLE_RESTORE, 2927 adev->gfx.rlc.cp_table_gpu_addr >> 8); 2928 gfx_v9_0_init_gfx_power_gating(adev); 2929 } 2930 } 2931 2932 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 2933 { 2934 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 2935 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 2936 gfx_v9_0_wait_for_rlc_serdes(adev); 2937 } 2938 2939 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 2940 { 2941 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2942 udelay(50); 2943 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2944 udelay(50); 2945 } 2946 2947 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 2948 { 2949 #ifdef AMDGPU_RLC_DEBUG_RETRY 2950 u32 rlc_ucode_ver; 2951 #endif 2952 2953 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2954 udelay(50); 2955 2956 /* carrizo do enable cp interrupt after cp inited */ 2957 if (!(adev->flags & AMD_IS_APU)) { 2958 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 2959 udelay(50); 2960 } 2961 2962 #ifdef AMDGPU_RLC_DEBUG_RETRY 2963 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 2964 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 2965 if(rlc_ucode_ver == 0x108) { 2966 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 2967 rlc_ucode_ver, adev->gfx.rlc_fw_version); 2968 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 2969 * default is 0x9C4 to create a 100us interval */ 2970 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 2971 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 2972 * to disable the page fault retry interrupts, default is 2973 * 0x100 (256) */ 2974 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 2975 } 2976 #endif 2977 } 2978 2979 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 2980 { 2981 const struct rlc_firmware_header_v2_0 *hdr; 2982 const __le32 *fw_data; 2983 unsigned i, fw_size; 2984 2985 if (!adev->gfx.rlc_fw) 2986 return -EINVAL; 2987 2988 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2989 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2990 2991 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2992 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2993 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2994 2995 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 2996 RLCG_UCODE_LOADING_START_ADDRESS); 2997 for (i = 0; i < fw_size; i++) 2998 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 2999 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 3000 3001 return 0; 3002 } 3003 3004 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 3005 { 3006 int r; 3007 3008 if (amdgpu_sriov_vf(adev)) { 3009 gfx_v9_0_init_csb(adev); 3010 return 0; 3011 } 3012 3013 adev->gfx.rlc.funcs->stop(adev); 3014 3015 /* disable CG */ 3016 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 3017 3018 gfx_v9_0_init_pg(adev); 3019 3020 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3021 /* legacy rlc firmware loading */ 3022 r = gfx_v9_0_rlc_load_microcode(adev); 3023 if (r) 3024 return r; 3025 } 3026 3027 switch (adev->asic_type) { 3028 case CHIP_RAVEN: 3029 case CHIP_RENOIR: 3030 if (amdgpu_lbpw == 0) 3031 gfx_v9_0_enable_lbpw(adev, false); 3032 else 3033 gfx_v9_0_enable_lbpw(adev, true); 3034 break; 3035 case CHIP_VEGA20: 3036 if (amdgpu_lbpw > 0) 3037 gfx_v9_0_enable_lbpw(adev, true); 3038 else 3039 gfx_v9_0_enable_lbpw(adev, false); 3040 break; 3041 default: 3042 break; 3043 } 3044 3045 adev->gfx.rlc.funcs->start(adev); 3046 3047 return 0; 3048 } 3049 3050 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3051 { 3052 int i; 3053 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 3054 3055 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3056 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3057 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 3058 if (!enable) { 3059 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 3060 adev->gfx.gfx_ring[i].sched.ready = false; 3061 } 3062 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 3063 udelay(50); 3064 } 3065 3066 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3067 { 3068 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3069 const struct gfx_firmware_header_v1_0 *ce_hdr; 3070 const struct gfx_firmware_header_v1_0 *me_hdr; 3071 const __le32 *fw_data; 3072 unsigned i, fw_size; 3073 3074 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 3075 return -EINVAL; 3076 3077 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3078 adev->gfx.pfp_fw->data; 3079 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 3080 adev->gfx.ce_fw->data; 3081 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3082 adev->gfx.me_fw->data; 3083 3084 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3085 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 3086 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3087 3088 gfx_v9_0_cp_gfx_enable(adev, false); 3089 3090 /* PFP */ 3091 fw_data = (const __le32 *) 3092 (adev->gfx.pfp_fw->data + 3093 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3094 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 3095 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 3096 for (i = 0; i < fw_size; i++) 3097 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 3098 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3099 3100 /* CE */ 3101 fw_data = (const __le32 *) 3102 (adev->gfx.ce_fw->data + 3103 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 3104 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 3105 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 3106 for (i = 0; i < fw_size; i++) 3107 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 3108 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 3109 3110 /* ME */ 3111 fw_data = (const __le32 *) 3112 (adev->gfx.me_fw->data + 3113 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3114 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 3115 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 3116 for (i = 0; i < fw_size; i++) 3117 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 3118 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 3119 3120 return 0; 3121 } 3122 3123 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 3124 { 3125 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 3126 const struct cs_section_def *sect = NULL; 3127 const struct cs_extent_def *ext = NULL; 3128 int r, i, tmp; 3129 3130 /* init the CP */ 3131 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 3132 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 3133 3134 gfx_v9_0_cp_gfx_enable(adev, true); 3135 3136 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 3137 if (r) { 3138 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3139 return r; 3140 } 3141 3142 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3143 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3144 3145 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3146 amdgpu_ring_write(ring, 0x80000000); 3147 amdgpu_ring_write(ring, 0x80000000); 3148 3149 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 3150 for (ext = sect->section; ext->extent != NULL; ++ext) { 3151 if (sect->id == SECT_CONTEXT) { 3152 amdgpu_ring_write(ring, 3153 PACKET3(PACKET3_SET_CONTEXT_REG, 3154 ext->reg_count)); 3155 amdgpu_ring_write(ring, 3156 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 3157 for (i = 0; i < ext->reg_count; i++) 3158 amdgpu_ring_write(ring, ext->extent[i]); 3159 } 3160 } 3161 } 3162 3163 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3164 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3165 3166 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3167 amdgpu_ring_write(ring, 0); 3168 3169 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3170 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3171 amdgpu_ring_write(ring, 0x8000); 3172 amdgpu_ring_write(ring, 0x8000); 3173 3174 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 3175 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 3176 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 3177 amdgpu_ring_write(ring, tmp); 3178 amdgpu_ring_write(ring, 0); 3179 3180 amdgpu_ring_commit(ring); 3181 3182 return 0; 3183 } 3184 3185 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 3186 { 3187 struct amdgpu_ring *ring; 3188 u32 tmp; 3189 u32 rb_bufsz; 3190 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3191 3192 /* Set the write pointer delay */ 3193 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 3194 3195 /* set the RB to use vmid 0 */ 3196 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 3197 3198 /* Set ring buffer size */ 3199 ring = &adev->gfx.gfx_ring[0]; 3200 rb_bufsz = order_base_2(ring->ring_size / 8); 3201 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3202 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3203 #ifdef __BIG_ENDIAN 3204 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 3205 #endif 3206 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3207 3208 /* Initialize the ring buffer's write pointers */ 3209 ring->wptr = 0; 3210 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3211 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3212 3213 /* set the wb address wether it's enabled or not */ 3214 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3215 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3216 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3217 3218 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3219 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 3220 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 3221 3222 mdelay(1); 3223 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3224 3225 rb_addr = ring->gpu_addr >> 8; 3226 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 3227 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3228 3229 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 3230 if (ring->use_doorbell) { 3231 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3232 DOORBELL_OFFSET, ring->doorbell_index); 3233 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3234 DOORBELL_EN, 1); 3235 } else { 3236 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 3237 } 3238 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 3239 3240 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3241 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3242 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 3243 3244 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 3245 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3246 3247 3248 /* start the ring */ 3249 gfx_v9_0_cp_gfx_start(adev); 3250 ring->sched.ready = true; 3251 3252 return 0; 3253 } 3254 3255 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3256 { 3257 int i; 3258 3259 if (enable) { 3260 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 3261 } else { 3262 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 3263 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 3264 for (i = 0; i < adev->gfx.num_compute_rings; i++) 3265 adev->gfx.compute_ring[i].sched.ready = false; 3266 adev->gfx.kiq.ring.sched.ready = false; 3267 } 3268 udelay(50); 3269 } 3270 3271 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3272 { 3273 const struct gfx_firmware_header_v1_0 *mec_hdr; 3274 const __le32 *fw_data; 3275 unsigned i; 3276 u32 tmp; 3277 3278 if (!adev->gfx.mec_fw) 3279 return -EINVAL; 3280 3281 gfx_v9_0_cp_compute_enable(adev, false); 3282 3283 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3284 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3285 3286 fw_data = (const __le32 *) 3287 (adev->gfx.mec_fw->data + 3288 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3289 tmp = 0; 3290 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3291 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3292 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 3293 3294 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 3295 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 3296 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 3297 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3298 3299 /* MEC1 */ 3300 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3301 mec_hdr->jt_offset); 3302 for (i = 0; i < mec_hdr->jt_size; i++) 3303 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 3304 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3305 3306 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3307 adev->gfx.mec_fw_version); 3308 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 3309 3310 return 0; 3311 } 3312 3313 /* KIQ functions */ 3314 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 3315 { 3316 uint32_t tmp; 3317 struct amdgpu_device *adev = ring->adev; 3318 3319 /* tell RLC which is KIQ queue */ 3320 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 3321 tmp &= 0xffffff00; 3322 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3323 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3324 tmp |= 0x80; 3325 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3326 } 3327 3328 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) 3329 { 3330 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3331 uint64_t queue_mask = 0; 3332 int r, i; 3333 3334 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 3335 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 3336 continue; 3337 3338 /* This situation may be hit in the future if a new HW 3339 * generation exposes more than 64 queues. If so, the 3340 * definition of queue_mask needs updating */ 3341 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 3342 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 3343 break; 3344 } 3345 3346 queue_mask |= (1ull << i); 3347 } 3348 3349 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); 3350 if (r) { 3351 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3352 return r; 3353 } 3354 3355 /* set resources */ 3356 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 3357 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 3358 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 3359 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 3360 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 3361 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 3362 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 3363 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 3364 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 3365 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3366 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3367 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 3368 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3369 3370 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 3371 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 3372 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 3373 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 3374 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 3375 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 3376 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 3377 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 3378 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 3379 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 3380 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ 3381 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 3382 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 3383 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 3384 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 3385 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 3386 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 3387 } 3388 3389 r = amdgpu_ring_test_helper(kiq_ring); 3390 if (r) 3391 DRM_ERROR("KCQ enable failed\n"); 3392 3393 return r; 3394 } 3395 3396 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 3397 { 3398 struct amdgpu_device *adev = ring->adev; 3399 struct v9_mqd *mqd = ring->mqd_ptr; 3400 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3401 uint32_t tmp; 3402 3403 mqd->header = 0xC0310800; 3404 mqd->compute_pipelinestat_enable = 0x00000001; 3405 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3406 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3407 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3408 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3409 mqd->compute_static_thread_mgmt_se4 = 0xffffffff; 3410 mqd->compute_static_thread_mgmt_se5 = 0xffffffff; 3411 mqd->compute_static_thread_mgmt_se6 = 0xffffffff; 3412 mqd->compute_static_thread_mgmt_se7 = 0xffffffff; 3413 mqd->compute_misc_reserved = 0x00000003; 3414 3415 mqd->dynamic_cu_mask_addr_lo = 3416 lower_32_bits(ring->mqd_gpu_addr 3417 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3418 mqd->dynamic_cu_mask_addr_hi = 3419 upper_32_bits(ring->mqd_gpu_addr 3420 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3421 3422 eop_base_addr = ring->eop_gpu_addr >> 8; 3423 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3424 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3425 3426 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3427 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3428 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3429 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 3430 3431 mqd->cp_hqd_eop_control = tmp; 3432 3433 /* enable doorbell? */ 3434 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3435 3436 if (ring->use_doorbell) { 3437 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3438 DOORBELL_OFFSET, ring->doorbell_index); 3439 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3440 DOORBELL_EN, 1); 3441 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3442 DOORBELL_SOURCE, 0); 3443 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3444 DOORBELL_HIT, 0); 3445 } else { 3446 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3447 DOORBELL_EN, 0); 3448 } 3449 3450 mqd->cp_hqd_pq_doorbell_control = tmp; 3451 3452 /* disable the queue if it's active */ 3453 ring->wptr = 0; 3454 mqd->cp_hqd_dequeue_request = 0; 3455 mqd->cp_hqd_pq_rptr = 0; 3456 mqd->cp_hqd_pq_wptr_lo = 0; 3457 mqd->cp_hqd_pq_wptr_hi = 0; 3458 3459 /* set the pointer to the MQD */ 3460 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3461 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3462 3463 /* set MQD vmid to 0 */ 3464 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3465 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3466 mqd->cp_mqd_control = tmp; 3467 3468 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3469 hqd_gpu_addr = ring->gpu_addr >> 8; 3470 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3471 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3472 3473 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3474 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3475 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3476 (order_base_2(ring->ring_size / 4) - 1)); 3477 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3478 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 3479 #ifdef __BIG_ENDIAN 3480 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3481 #endif 3482 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3483 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3484 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3485 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3486 mqd->cp_hqd_pq_control = tmp; 3487 3488 /* set the wb address whether it's enabled or not */ 3489 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3490 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3491 mqd->cp_hqd_pq_rptr_report_addr_hi = 3492 upper_32_bits(wb_gpu_addr) & 0xffff; 3493 3494 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3495 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3496 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3497 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3498 3499 tmp = 0; 3500 /* enable the doorbell if requested */ 3501 if (ring->use_doorbell) { 3502 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3503 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3504 DOORBELL_OFFSET, ring->doorbell_index); 3505 3506 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3507 DOORBELL_EN, 1); 3508 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3509 DOORBELL_SOURCE, 0); 3510 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3511 DOORBELL_HIT, 0); 3512 } 3513 3514 mqd->cp_hqd_pq_doorbell_control = tmp; 3515 3516 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3517 ring->wptr = 0; 3518 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3519 3520 /* set the vmid for the queue */ 3521 mqd->cp_hqd_vmid = 0; 3522 3523 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3524 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3525 mqd->cp_hqd_persistent_state = tmp; 3526 3527 /* set MIN_IB_AVAIL_SIZE */ 3528 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3529 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3530 mqd->cp_hqd_ib_control = tmp; 3531 3532 /* activate the queue */ 3533 mqd->cp_hqd_active = 1; 3534 3535 return 0; 3536 } 3537 3538 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 3539 { 3540 struct amdgpu_device *adev = ring->adev; 3541 struct v9_mqd *mqd = ring->mqd_ptr; 3542 int j; 3543 3544 /* disable wptr polling */ 3545 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3546 3547 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3548 mqd->cp_hqd_eop_base_addr_lo); 3549 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3550 mqd->cp_hqd_eop_base_addr_hi); 3551 3552 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3553 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 3554 mqd->cp_hqd_eop_control); 3555 3556 /* enable doorbell? */ 3557 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3558 mqd->cp_hqd_pq_doorbell_control); 3559 3560 /* disable the queue if it's active */ 3561 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3562 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3563 for (j = 0; j < adev->usec_timeout; j++) { 3564 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3565 break; 3566 udelay(1); 3567 } 3568 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3569 mqd->cp_hqd_dequeue_request); 3570 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3571 mqd->cp_hqd_pq_rptr); 3572 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3573 mqd->cp_hqd_pq_wptr_lo); 3574 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3575 mqd->cp_hqd_pq_wptr_hi); 3576 } 3577 3578 /* set the pointer to the MQD */ 3579 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3580 mqd->cp_mqd_base_addr_lo); 3581 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3582 mqd->cp_mqd_base_addr_hi); 3583 3584 /* set MQD vmid to 0 */ 3585 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3586 mqd->cp_mqd_control); 3587 3588 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3589 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3590 mqd->cp_hqd_pq_base_lo); 3591 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3592 mqd->cp_hqd_pq_base_hi); 3593 3594 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3595 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3596 mqd->cp_hqd_pq_control); 3597 3598 /* set the wb address whether it's enabled or not */ 3599 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3600 mqd->cp_hqd_pq_rptr_report_addr_lo); 3601 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3602 mqd->cp_hqd_pq_rptr_report_addr_hi); 3603 3604 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3605 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3606 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3607 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3608 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3609 3610 /* enable the doorbell if requested */ 3611 if (ring->use_doorbell) { 3612 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3613 (adev->doorbell_index.kiq * 2) << 2); 3614 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3615 (adev->doorbell_index.userqueue_end * 2) << 2); 3616 } 3617 3618 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3619 mqd->cp_hqd_pq_doorbell_control); 3620 3621 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3622 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3623 mqd->cp_hqd_pq_wptr_lo); 3624 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3625 mqd->cp_hqd_pq_wptr_hi); 3626 3627 /* set the vmid for the queue */ 3628 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3629 3630 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3631 mqd->cp_hqd_persistent_state); 3632 3633 /* activate the queue */ 3634 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3635 mqd->cp_hqd_active); 3636 3637 if (ring->use_doorbell) 3638 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3639 3640 return 0; 3641 } 3642 3643 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3644 { 3645 struct amdgpu_device *adev = ring->adev; 3646 int j; 3647 3648 /* disable the queue if it's active */ 3649 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3650 3651 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3652 3653 for (j = 0; j < adev->usec_timeout; j++) { 3654 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3655 break; 3656 udelay(1); 3657 } 3658 3659 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3660 DRM_DEBUG("KIQ dequeue request failed.\n"); 3661 3662 /* Manual disable if dequeue request times out */ 3663 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3664 } 3665 3666 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3667 0); 3668 } 3669 3670 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3671 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3672 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3673 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3674 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3675 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3676 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3677 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3678 3679 return 0; 3680 } 3681 3682 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3683 { 3684 struct amdgpu_device *adev = ring->adev; 3685 struct v9_mqd *mqd = ring->mqd_ptr; 3686 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3687 3688 gfx_v9_0_kiq_setting(ring); 3689 3690 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3691 /* reset MQD to a clean status */ 3692 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3693 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3694 3695 /* reset ring buffer */ 3696 ring->wptr = 0; 3697 amdgpu_ring_clear_ring(ring); 3698 3699 mutex_lock(&adev->srbm_mutex); 3700 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3701 gfx_v9_0_kiq_init_register(ring); 3702 soc15_grbm_select(adev, 0, 0, 0, 0); 3703 mutex_unlock(&adev->srbm_mutex); 3704 } else { 3705 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3706 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3707 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3708 mutex_lock(&adev->srbm_mutex); 3709 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3710 gfx_v9_0_mqd_init(ring); 3711 gfx_v9_0_kiq_init_register(ring); 3712 soc15_grbm_select(adev, 0, 0, 0, 0); 3713 mutex_unlock(&adev->srbm_mutex); 3714 3715 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3716 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3717 } 3718 3719 return 0; 3720 } 3721 3722 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3723 { 3724 struct amdgpu_device *adev = ring->adev; 3725 struct v9_mqd *mqd = ring->mqd_ptr; 3726 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3727 3728 if (!adev->in_gpu_reset && !adev->in_suspend) { 3729 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3730 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3731 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3732 mutex_lock(&adev->srbm_mutex); 3733 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3734 gfx_v9_0_mqd_init(ring); 3735 soc15_grbm_select(adev, 0, 0, 0, 0); 3736 mutex_unlock(&adev->srbm_mutex); 3737 3738 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3739 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3740 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3741 /* reset MQD to a clean status */ 3742 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3743 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3744 3745 /* reset ring buffer */ 3746 ring->wptr = 0; 3747 amdgpu_ring_clear_ring(ring); 3748 } else { 3749 amdgpu_ring_clear_ring(ring); 3750 } 3751 3752 return 0; 3753 } 3754 3755 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3756 { 3757 struct amdgpu_ring *ring; 3758 int r; 3759 3760 ring = &adev->gfx.kiq.ring; 3761 3762 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3763 if (unlikely(r != 0)) 3764 return r; 3765 3766 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3767 if (unlikely(r != 0)) 3768 return r; 3769 3770 gfx_v9_0_kiq_init_queue(ring); 3771 amdgpu_bo_kunmap(ring->mqd_obj); 3772 ring->mqd_ptr = NULL; 3773 amdgpu_bo_unreserve(ring->mqd_obj); 3774 ring->sched.ready = true; 3775 return 0; 3776 } 3777 3778 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3779 { 3780 struct amdgpu_ring *ring = NULL; 3781 int r = 0, i; 3782 3783 gfx_v9_0_cp_compute_enable(adev, true); 3784 3785 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3786 ring = &adev->gfx.compute_ring[i]; 3787 3788 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3789 if (unlikely(r != 0)) 3790 goto done; 3791 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3792 if (!r) { 3793 r = gfx_v9_0_kcq_init_queue(ring); 3794 amdgpu_bo_kunmap(ring->mqd_obj); 3795 ring->mqd_ptr = NULL; 3796 } 3797 amdgpu_bo_unreserve(ring->mqd_obj); 3798 if (r) 3799 goto done; 3800 } 3801 3802 r = gfx_v9_0_kiq_kcq_enable(adev); 3803 done: 3804 return r; 3805 } 3806 3807 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3808 { 3809 int r, i; 3810 struct amdgpu_ring *ring; 3811 3812 if (!(adev->flags & AMD_IS_APU)) 3813 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3814 3815 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3816 if (adev->asic_type != CHIP_ARCTURUS) { 3817 /* legacy firmware loading */ 3818 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3819 if (r) 3820 return r; 3821 } 3822 3823 r = gfx_v9_0_cp_compute_load_microcode(adev); 3824 if (r) 3825 return r; 3826 } 3827 3828 r = gfx_v9_0_kiq_resume(adev); 3829 if (r) 3830 return r; 3831 3832 if (adev->asic_type != CHIP_ARCTURUS) { 3833 r = gfx_v9_0_cp_gfx_resume(adev); 3834 if (r) 3835 return r; 3836 } 3837 3838 r = gfx_v9_0_kcq_resume(adev); 3839 if (r) 3840 return r; 3841 3842 if (adev->asic_type != CHIP_ARCTURUS) { 3843 ring = &adev->gfx.gfx_ring[0]; 3844 r = amdgpu_ring_test_helper(ring); 3845 if (r) 3846 return r; 3847 } 3848 3849 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3850 ring = &adev->gfx.compute_ring[i]; 3851 amdgpu_ring_test_helper(ring); 3852 } 3853 3854 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3855 3856 return 0; 3857 } 3858 3859 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3860 { 3861 if (adev->asic_type != CHIP_ARCTURUS) 3862 gfx_v9_0_cp_gfx_enable(adev, enable); 3863 gfx_v9_0_cp_compute_enable(adev, enable); 3864 } 3865 3866 static int gfx_v9_0_hw_init(void *handle) 3867 { 3868 int r; 3869 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3870 3871 if (!amdgpu_sriov_vf(adev)) 3872 gfx_v9_0_init_golden_registers(adev); 3873 3874 gfx_v9_0_constants_init(adev); 3875 3876 r = gfx_v9_0_csb_vram_pin(adev); 3877 if (r) 3878 return r; 3879 3880 r = adev->gfx.rlc.funcs->resume(adev); 3881 if (r) 3882 return r; 3883 3884 r = gfx_v9_0_cp_resume(adev); 3885 if (r) 3886 return r; 3887 3888 if (adev->asic_type != CHIP_ARCTURUS) { 3889 r = gfx_v9_0_ngg_en(adev); 3890 if (r) 3891 return r; 3892 } 3893 3894 return r; 3895 } 3896 3897 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) 3898 { 3899 int r, i; 3900 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3901 3902 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); 3903 if (r) 3904 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3905 3906 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3907 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3908 3909 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 3910 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 3911 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 3912 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 3913 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 3914 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 3915 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 3916 amdgpu_ring_write(kiq_ring, 0); 3917 amdgpu_ring_write(kiq_ring, 0); 3918 amdgpu_ring_write(kiq_ring, 0); 3919 } 3920 r = amdgpu_ring_test_helper(kiq_ring); 3921 if (r) 3922 DRM_ERROR("KCQ disable failed\n"); 3923 3924 return r; 3925 } 3926 3927 static int gfx_v9_0_hw_fini(void *handle) 3928 { 3929 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3930 3931 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 3932 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3933 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3934 3935 /* disable KCQ to avoid CPC touch memory not valid anymore */ 3936 gfx_v9_0_kcq_disable(adev); 3937 3938 if (amdgpu_sriov_vf(adev)) { 3939 gfx_v9_0_cp_gfx_enable(adev, false); 3940 /* must disable polling for SRIOV when hw finished, otherwise 3941 * CPC engine may still keep fetching WB address which is already 3942 * invalid after sw finished and trigger DMAR reading error in 3943 * hypervisor side. 3944 */ 3945 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3946 return 0; 3947 } 3948 3949 /* Use deinitialize sequence from CAIL when unbinding device from driver, 3950 * otherwise KIQ is hanging when binding back 3951 */ 3952 if (!adev->in_gpu_reset && !adev->in_suspend) { 3953 mutex_lock(&adev->srbm_mutex); 3954 soc15_grbm_select(adev, adev->gfx.kiq.ring.me, 3955 adev->gfx.kiq.ring.pipe, 3956 adev->gfx.kiq.ring.queue, 0); 3957 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); 3958 soc15_grbm_select(adev, 0, 0, 0, 0); 3959 mutex_unlock(&adev->srbm_mutex); 3960 } 3961 3962 gfx_v9_0_cp_enable(adev, false); 3963 adev->gfx.rlc.funcs->stop(adev); 3964 3965 gfx_v9_0_csb_vram_unpin(adev); 3966 3967 return 0; 3968 } 3969 3970 static int gfx_v9_0_suspend(void *handle) 3971 { 3972 return gfx_v9_0_hw_fini(handle); 3973 } 3974 3975 static int gfx_v9_0_resume(void *handle) 3976 { 3977 return gfx_v9_0_hw_init(handle); 3978 } 3979 3980 static bool gfx_v9_0_is_idle(void *handle) 3981 { 3982 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3983 3984 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 3985 GRBM_STATUS, GUI_ACTIVE)) 3986 return false; 3987 else 3988 return true; 3989 } 3990 3991 static int gfx_v9_0_wait_for_idle(void *handle) 3992 { 3993 unsigned i; 3994 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3995 3996 for (i = 0; i < adev->usec_timeout; i++) { 3997 if (gfx_v9_0_is_idle(handle)) 3998 return 0; 3999 udelay(1); 4000 } 4001 return -ETIMEDOUT; 4002 } 4003 4004 static int gfx_v9_0_soft_reset(void *handle) 4005 { 4006 u32 grbm_soft_reset = 0; 4007 u32 tmp; 4008 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4009 4010 /* GRBM_STATUS */ 4011 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 4012 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 4013 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 4014 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 4015 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 4016 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 4017 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 4018 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4019 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4020 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4021 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 4022 } 4023 4024 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 4025 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4026 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4027 } 4028 4029 /* GRBM_STATUS2 */ 4030 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 4031 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 4032 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4033 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4034 4035 4036 if (grbm_soft_reset) { 4037 /* stop the rlc */ 4038 adev->gfx.rlc.funcs->stop(adev); 4039 4040 if (adev->asic_type != CHIP_ARCTURUS) 4041 /* Disable GFX parsing/prefetching */ 4042 gfx_v9_0_cp_gfx_enable(adev, false); 4043 4044 /* Disable MEC parsing/prefetching */ 4045 gfx_v9_0_cp_compute_enable(adev, false); 4046 4047 if (grbm_soft_reset) { 4048 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4049 tmp |= grbm_soft_reset; 4050 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4051 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4052 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4053 4054 udelay(50); 4055 4056 tmp &= ~grbm_soft_reset; 4057 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4058 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4059 } 4060 4061 /* Wait a little for things to settle down */ 4062 udelay(50); 4063 } 4064 return 0; 4065 } 4066 4067 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4068 { 4069 uint64_t clock; 4070 4071 mutex_lock(&adev->gfx.gpu_clock_mutex); 4072 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 4073 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 4074 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 4075 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4076 return clock; 4077 } 4078 4079 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4080 uint32_t vmid, 4081 uint32_t gds_base, uint32_t gds_size, 4082 uint32_t gws_base, uint32_t gws_size, 4083 uint32_t oa_base, uint32_t oa_size) 4084 { 4085 struct amdgpu_device *adev = ring->adev; 4086 4087 /* GDS Base */ 4088 gfx_v9_0_write_data_to_reg(ring, 0, false, 4089 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 4090 gds_base); 4091 4092 /* GDS Size */ 4093 gfx_v9_0_write_data_to_reg(ring, 0, false, 4094 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 4095 gds_size); 4096 4097 /* GWS */ 4098 gfx_v9_0_write_data_to_reg(ring, 0, false, 4099 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 4100 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 4101 4102 /* OA */ 4103 gfx_v9_0_write_data_to_reg(ring, 0, false, 4104 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 4105 (1 << (oa_size + oa_base)) - (1 << oa_base)); 4106 } 4107 4108 static const u32 vgpr_init_compute_shader[] = 4109 { 4110 0xb07c0000, 0xbe8000ff, 4111 0x000000f8, 0xbf110800, 4112 0x7e000280, 0x7e020280, 4113 0x7e040280, 0x7e060280, 4114 0x7e080280, 0x7e0a0280, 4115 0x7e0c0280, 0x7e0e0280, 4116 0x80808800, 0xbe803200, 4117 0xbf84fff5, 0xbf9c0000, 4118 0xd28c0001, 0x0001007f, 4119 0xd28d0001, 0x0002027e, 4120 0x10020288, 0xb8810904, 4121 0xb7814000, 0xd1196a01, 4122 0x00000301, 0xbe800087, 4123 0xbefc00c1, 0xd89c4000, 4124 0x00020201, 0xd89cc080, 4125 0x00040401, 0x320202ff, 4126 0x00000800, 0x80808100, 4127 0xbf84fff8, 0x7e020280, 4128 0xbf810000, 0x00000000, 4129 }; 4130 4131 static const u32 sgpr_init_compute_shader[] = 4132 { 4133 0xb07c0000, 0xbe8000ff, 4134 0x0000005f, 0xbee50080, 4135 0xbe812c65, 0xbe822c65, 4136 0xbe832c65, 0xbe842c65, 4137 0xbe852c65, 0xb77c0005, 4138 0x80808500, 0xbf84fff8, 4139 0xbe800080, 0xbf810000, 4140 }; 4141 4142 static const struct soc15_reg_entry vgpr_init_regs[] = { 4143 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4144 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4145 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4146 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4147 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 4148 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 4149 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 4150 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4151 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ 4152 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4153 }; 4154 4155 static const struct soc15_reg_entry sgpr_init_regs[] = { 4156 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4157 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4158 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4159 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4160 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 4161 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 4162 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 4163 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4164 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ 4165 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4166 }; 4167 4168 static const struct soc15_reg_entry sec_ded_counter_registers[] = { 4169 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 4170 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 4171 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 4172 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 4173 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 4174 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 4175 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 4176 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 4177 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 4178 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 4179 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 4180 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 4181 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 4182 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 4183 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 4184 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 4185 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 4186 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 4187 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 4188 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 4189 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 4190 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 4191 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 4192 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 4193 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 4194 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 4195 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 4196 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 4197 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 4198 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 4199 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 4200 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 4201 }; 4202 4203 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 4204 { 4205 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4206 int i, r; 4207 4208 r = amdgpu_ring_alloc(ring, 7); 4209 if (r) { 4210 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 4211 ring->name, r); 4212 return r; 4213 } 4214 4215 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 4216 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 4217 4218 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 4219 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 4220 PACKET3_DMA_DATA_DST_SEL(1) | 4221 PACKET3_DMA_DATA_SRC_SEL(2) | 4222 PACKET3_DMA_DATA_ENGINE(0))); 4223 amdgpu_ring_write(ring, 0); 4224 amdgpu_ring_write(ring, 0); 4225 amdgpu_ring_write(ring, 0); 4226 amdgpu_ring_write(ring, 0); 4227 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 4228 adev->gds.gds_size); 4229 4230 amdgpu_ring_commit(ring); 4231 4232 for (i = 0; i < adev->usec_timeout; i++) { 4233 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 4234 break; 4235 udelay(1); 4236 } 4237 4238 if (i >= adev->usec_timeout) 4239 r = -ETIMEDOUT; 4240 4241 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 4242 4243 return r; 4244 } 4245 4246 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 4247 { 4248 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4249 struct amdgpu_ib ib; 4250 struct dma_fence *f = NULL; 4251 int r, i, j, k; 4252 unsigned total_size, vgpr_offset, sgpr_offset; 4253 u64 gpu_addr; 4254 4255 /* only support when RAS is enabled */ 4256 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4257 return 0; 4258 4259 /* bail if the compute ring is not ready */ 4260 if (!ring->sched.ready) 4261 return 0; 4262 4263 total_size = 4264 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 4265 total_size += 4266 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 4267 total_size = ALIGN(total_size, 256); 4268 vgpr_offset = total_size; 4269 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 4270 sgpr_offset = total_size; 4271 total_size += sizeof(sgpr_init_compute_shader); 4272 4273 /* allocate an indirect buffer to put the commands in */ 4274 memset(&ib, 0, sizeof(ib)); 4275 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 4276 if (r) { 4277 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 4278 return r; 4279 } 4280 4281 /* load the compute shaders */ 4282 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 4283 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 4284 4285 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 4286 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 4287 4288 /* init the ib length to 0 */ 4289 ib.length_dw = 0; 4290 4291 /* VGPR */ 4292 /* write the register state for the compute dispatch */ 4293 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { 4294 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4295 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) 4296 - PACKET3_SET_SH_REG_START; 4297 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value; 4298 } 4299 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4300 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 4301 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4302 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4303 - PACKET3_SET_SH_REG_START; 4304 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4305 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4306 4307 /* write dispatch packet */ 4308 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4309 ib.ptr[ib.length_dw++] = 128; /* x */ 4310 ib.ptr[ib.length_dw++] = 1; /* y */ 4311 ib.ptr[ib.length_dw++] = 1; /* z */ 4312 ib.ptr[ib.length_dw++] = 4313 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4314 4315 /* write CS partial flush packet */ 4316 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4317 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4318 4319 /* SGPR */ 4320 /* write the register state for the compute dispatch */ 4321 for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { 4322 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4323 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) 4324 - PACKET3_SET_SH_REG_START; 4325 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; 4326 } 4327 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4328 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4329 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4330 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4331 - PACKET3_SET_SH_REG_START; 4332 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4333 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4334 4335 /* write dispatch packet */ 4336 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4337 ib.ptr[ib.length_dw++] = 128; /* x */ 4338 ib.ptr[ib.length_dw++] = 1; /* y */ 4339 ib.ptr[ib.length_dw++] = 1; /* z */ 4340 ib.ptr[ib.length_dw++] = 4341 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4342 4343 /* write CS partial flush packet */ 4344 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4345 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4346 4347 /* shedule the ib on the ring */ 4348 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 4349 if (r) { 4350 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 4351 goto fail; 4352 } 4353 4354 /* wait for the GPU to finish processing the IB */ 4355 r = dma_fence_wait(f, false); 4356 if (r) { 4357 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 4358 goto fail; 4359 } 4360 4361 /* read back registers to clear the counters */ 4362 mutex_lock(&adev->grbm_idx_mutex); 4363 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { 4364 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { 4365 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { 4366 gfx_v9_0_select_se_sh(adev, j, 0x0, k); 4367 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); 4368 } 4369 } 4370 } 4371 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 4372 mutex_unlock(&adev->grbm_idx_mutex); 4373 4374 fail: 4375 amdgpu_ib_free(adev, &ib, NULL); 4376 dma_fence_put(f); 4377 4378 return r; 4379 } 4380 4381 static int gfx_v9_0_early_init(void *handle) 4382 { 4383 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4384 4385 if (adev->asic_type == CHIP_ARCTURUS) 4386 adev->gfx.num_gfx_rings = 0; 4387 else 4388 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 4389 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 4390 gfx_v9_0_set_ring_funcs(adev); 4391 gfx_v9_0_set_irq_funcs(adev); 4392 gfx_v9_0_set_gds_init(adev); 4393 gfx_v9_0_set_rlc_funcs(adev); 4394 4395 return 0; 4396 } 4397 4398 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 4399 struct ras_err_data *err_data, 4400 struct amdgpu_iv_entry *entry); 4401 4402 static int gfx_v9_0_ecc_late_init(void *handle) 4403 { 4404 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4405 struct ras_common_if **ras_if = &adev->gfx.ras_if; 4406 struct ras_ih_if ih_info = { 4407 .cb = gfx_v9_0_process_ras_data_cb, 4408 }; 4409 struct ras_fs_if fs_info = { 4410 .sysfs_name = "gfx_err_count", 4411 .debugfs_name = "gfx_err_inject", 4412 }; 4413 struct ras_common_if ras_block = { 4414 .block = AMDGPU_RAS_BLOCK__GFX, 4415 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, 4416 .sub_block_index = 0, 4417 .name = "gfx", 4418 }; 4419 int r; 4420 4421 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { 4422 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); 4423 return 0; 4424 } 4425 4426 r = gfx_v9_0_do_edc_gds_workarounds(adev); 4427 if (r) 4428 return r; 4429 4430 /* requires IBs so do in late init after IB pool is initialized */ 4431 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 4432 if (r) 4433 return r; 4434 4435 /* handle resume path. */ 4436 if (*ras_if) { 4437 /* resend ras TA enable cmd during resume. 4438 * prepare to handle failure. 4439 */ 4440 ih_info.head = **ras_if; 4441 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 4442 if (r) { 4443 if (r == -EAGAIN) { 4444 /* request a gpu reset. will run again. */ 4445 amdgpu_ras_request_reset_on_boot(adev, 4446 AMDGPU_RAS_BLOCK__GFX); 4447 return 0; 4448 } 4449 /* fail to enable ras, cleanup all. */ 4450 goto irq; 4451 } 4452 /* enable successfully. continue. */ 4453 goto resume; 4454 } 4455 4456 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); 4457 if (!*ras_if) 4458 return -ENOMEM; 4459 4460 **ras_if = ras_block; 4461 4462 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 4463 if (r) { 4464 if (r == -EAGAIN) { 4465 amdgpu_ras_request_reset_on_boot(adev, 4466 AMDGPU_RAS_BLOCK__GFX); 4467 r = 0; 4468 } 4469 goto feature; 4470 } 4471 4472 ih_info.head = **ras_if; 4473 fs_info.head = **ras_if; 4474 4475 r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); 4476 if (r) 4477 goto interrupt; 4478 4479 amdgpu_ras_debugfs_create(adev, &fs_info); 4480 4481 r = amdgpu_ras_sysfs_create(adev, &fs_info); 4482 if (r) 4483 goto sysfs; 4484 resume: 4485 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 4486 if (r) 4487 goto irq; 4488 4489 return 0; 4490 irq: 4491 amdgpu_ras_sysfs_remove(adev, *ras_if); 4492 sysfs: 4493 amdgpu_ras_debugfs_remove(adev, *ras_if); 4494 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 4495 interrupt: 4496 amdgpu_ras_feature_enable(adev, *ras_if, 0); 4497 feature: 4498 kfree(*ras_if); 4499 *ras_if = NULL; 4500 return r; 4501 } 4502 4503 static int gfx_v9_0_late_init(void *handle) 4504 { 4505 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4506 int r; 4507 4508 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4509 if (r) 4510 return r; 4511 4512 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4513 if (r) 4514 return r; 4515 4516 r = gfx_v9_0_ecc_late_init(handle); 4517 if (r) 4518 return r; 4519 4520 return 0; 4521 } 4522 4523 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 4524 { 4525 uint32_t rlc_setting; 4526 4527 /* if RLC is not enabled, do nothing */ 4528 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 4529 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 4530 return false; 4531 4532 return true; 4533 } 4534 4535 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) 4536 { 4537 uint32_t data; 4538 unsigned i; 4539 4540 data = RLC_SAFE_MODE__CMD_MASK; 4541 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4542 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4543 4544 /* wait for RLC_SAFE_MODE */ 4545 for (i = 0; i < adev->usec_timeout; i++) { 4546 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 4547 break; 4548 udelay(1); 4549 } 4550 } 4551 4552 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) 4553 { 4554 uint32_t data; 4555 4556 data = RLC_SAFE_MODE__CMD_MASK; 4557 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4558 } 4559 4560 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 4561 bool enable) 4562 { 4563 amdgpu_gfx_rlc_enter_safe_mode(adev); 4564 4565 if (is_support_sw_smu(adev) && !enable) 4566 smu_set_gfx_cgpg(&adev->smu, enable); 4567 4568 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 4569 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 4570 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4571 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 4572 } else { 4573 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 4574 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 4575 } 4576 4577 amdgpu_gfx_rlc_exit_safe_mode(adev); 4578 } 4579 4580 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4581 bool enable) 4582 { 4583 /* TODO: double check if we need to perform under safe mode */ 4584 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4585 4586 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4587 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4588 else 4589 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4590 4591 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4592 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4593 else 4594 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4595 4596 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4597 } 4598 4599 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4600 bool enable) 4601 { 4602 uint32_t data, def; 4603 4604 amdgpu_gfx_rlc_enter_safe_mode(adev); 4605 4606 /* It is disabled by HW by default */ 4607 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4608 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4609 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4610 4611 if (adev->asic_type != CHIP_VEGA12) 4612 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4613 4614 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4615 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4616 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4617 4618 /* only for Vega10 & Raven1 */ 4619 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4620 4621 if (def != data) 4622 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4623 4624 /* MGLS is a global flag to control all MGLS in GFX */ 4625 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4626 /* 2 - RLC memory Light sleep */ 4627 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4628 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4629 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4630 if (def != data) 4631 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4632 } 4633 /* 3 - CP memory Light sleep */ 4634 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4635 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4636 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4637 if (def != data) 4638 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4639 } 4640 } 4641 } else { 4642 /* 1 - MGCG_OVERRIDE */ 4643 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4644 4645 if (adev->asic_type != CHIP_VEGA12) 4646 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4647 4648 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4649 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4650 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4651 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4652 4653 if (def != data) 4654 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4655 4656 /* 2 - disable MGLS in RLC */ 4657 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4658 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4659 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4660 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4661 } 4662 4663 /* 3 - disable MGLS in CP */ 4664 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4665 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4666 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4667 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4668 } 4669 } 4670 4671 amdgpu_gfx_rlc_exit_safe_mode(adev); 4672 } 4673 4674 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4675 bool enable) 4676 { 4677 uint32_t data, def; 4678 4679 if (adev->asic_type == CHIP_ARCTURUS) 4680 return; 4681 4682 amdgpu_gfx_rlc_enter_safe_mode(adev); 4683 4684 /* Enable 3D CGCG/CGLS */ 4685 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 4686 /* write cmd to clear cgcg/cgls ov */ 4687 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4688 /* unset CGCG override */ 4689 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4690 /* update CGCG and CGLS override bits */ 4691 if (def != data) 4692 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4693 4694 /* enable 3Dcgcg FSM(0x0000363f) */ 4695 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4696 4697 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4698 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4699 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4700 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4701 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4702 if (def != data) 4703 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4704 4705 /* set IDLE_POLL_COUNT(0x00900100) */ 4706 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4707 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4708 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4709 if (def != data) 4710 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4711 } else { 4712 /* Disable CGCG/CGLS */ 4713 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4714 /* disable cgcg, cgls should be disabled */ 4715 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 4716 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 4717 /* disable cgcg and cgls in FSM */ 4718 if (def != data) 4719 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4720 } 4721 4722 amdgpu_gfx_rlc_exit_safe_mode(adev); 4723 } 4724 4725 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4726 bool enable) 4727 { 4728 uint32_t def, data; 4729 4730 amdgpu_gfx_rlc_enter_safe_mode(adev); 4731 4732 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 4733 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4734 /* unset CGCG override */ 4735 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 4736 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4737 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4738 else 4739 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4740 /* update CGCG and CGLS override bits */ 4741 if (def != data) 4742 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4743 4744 /* enable cgcg FSM(0x0000363F) */ 4745 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4746 4747 if (adev->asic_type == CHIP_ARCTURUS) 4748 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4749 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4750 else 4751 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4752 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4753 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4754 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4755 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4756 if (def != data) 4757 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4758 4759 /* set IDLE_POLL_COUNT(0x00900100) */ 4760 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4761 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4762 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4763 if (def != data) 4764 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4765 } else { 4766 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4767 /* reset CGCG/CGLS bits */ 4768 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4769 /* disable cgcg and cgls in FSM */ 4770 if (def != data) 4771 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4772 } 4773 4774 amdgpu_gfx_rlc_exit_safe_mode(adev); 4775 } 4776 4777 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 4778 bool enable) 4779 { 4780 if (enable) { 4781 /* CGCG/CGLS should be enabled after MGCG/MGLS 4782 * === MGCG + MGLS === 4783 */ 4784 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4785 /* === CGCG /CGLS for GFX 3D Only === */ 4786 gfx_v9_0_update_3d_clock_gating(adev, enable); 4787 /* === CGCG + CGLS === */ 4788 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4789 } else { 4790 /* CGCG/CGLS should be disabled before MGCG/MGLS 4791 * === CGCG + CGLS === 4792 */ 4793 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4794 /* === CGCG /CGLS for GFX 3D Only === */ 4795 gfx_v9_0_update_3d_clock_gating(adev, enable); 4796 /* === MGCG + MGLS === */ 4797 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4798 } 4799 return 0; 4800 } 4801 4802 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 4803 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 4804 .set_safe_mode = gfx_v9_0_set_safe_mode, 4805 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 4806 .init = gfx_v9_0_rlc_init, 4807 .get_csb_size = gfx_v9_0_get_csb_size, 4808 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 4809 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 4810 .resume = gfx_v9_0_rlc_resume, 4811 .stop = gfx_v9_0_rlc_stop, 4812 .reset = gfx_v9_0_rlc_reset, 4813 .start = gfx_v9_0_rlc_start 4814 }; 4815 4816 static int gfx_v9_0_set_powergating_state(void *handle, 4817 enum amd_powergating_state state) 4818 { 4819 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4820 bool enable = (state == AMD_PG_STATE_GATE) ? true : false; 4821 4822 switch (adev->asic_type) { 4823 case CHIP_RAVEN: 4824 case CHIP_RENOIR: 4825 if (!enable) { 4826 amdgpu_gfx_off_ctrl(adev, false); 4827 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4828 } 4829 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 4830 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 4831 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 4832 } else { 4833 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 4834 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 4835 } 4836 4837 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 4838 gfx_v9_0_enable_cp_power_gating(adev, true); 4839 else 4840 gfx_v9_0_enable_cp_power_gating(adev, false); 4841 4842 /* update gfx cgpg state */ 4843 if (is_support_sw_smu(adev) && enable) 4844 smu_set_gfx_cgpg(&adev->smu, enable); 4845 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 4846 4847 /* update mgcg state */ 4848 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 4849 4850 if (enable) 4851 amdgpu_gfx_off_ctrl(adev, true); 4852 break; 4853 case CHIP_VEGA12: 4854 if (!enable) { 4855 amdgpu_gfx_off_ctrl(adev, false); 4856 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4857 } else { 4858 amdgpu_gfx_off_ctrl(adev, true); 4859 } 4860 break; 4861 default: 4862 break; 4863 } 4864 4865 return 0; 4866 } 4867 4868 static int gfx_v9_0_set_clockgating_state(void *handle, 4869 enum amd_clockgating_state state) 4870 { 4871 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4872 4873 if (amdgpu_sriov_vf(adev)) 4874 return 0; 4875 4876 switch (adev->asic_type) { 4877 case CHIP_VEGA10: 4878 case CHIP_VEGA12: 4879 case CHIP_VEGA20: 4880 case CHIP_RAVEN: 4881 case CHIP_ARCTURUS: 4882 case CHIP_RENOIR: 4883 gfx_v9_0_update_gfx_clock_gating(adev, 4884 state == AMD_CG_STATE_GATE ? true : false); 4885 break; 4886 default: 4887 break; 4888 } 4889 return 0; 4890 } 4891 4892 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 4893 { 4894 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4895 int data; 4896 4897 if (amdgpu_sriov_vf(adev)) 4898 *flags = 0; 4899 4900 /* AMD_CG_SUPPORT_GFX_MGCG */ 4901 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4902 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 4903 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 4904 4905 /* AMD_CG_SUPPORT_GFX_CGCG */ 4906 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4907 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 4908 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 4909 4910 /* AMD_CG_SUPPORT_GFX_CGLS */ 4911 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 4912 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 4913 4914 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 4915 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4916 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 4917 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 4918 4919 /* AMD_CG_SUPPORT_GFX_CP_LS */ 4920 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4921 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 4922 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 4923 4924 if (adev->asic_type != CHIP_ARCTURUS) { 4925 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 4926 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4927 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 4928 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 4929 4930 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 4931 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 4932 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 4933 } 4934 } 4935 4936 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 4937 { 4938 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 4939 } 4940 4941 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 4942 { 4943 struct amdgpu_device *adev = ring->adev; 4944 u64 wptr; 4945 4946 /* XXX check if swapping is necessary on BE */ 4947 if (ring->use_doorbell) { 4948 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 4949 } else { 4950 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 4951 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 4952 } 4953 4954 return wptr; 4955 } 4956 4957 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 4958 { 4959 struct amdgpu_device *adev = ring->adev; 4960 4961 if (ring->use_doorbell) { 4962 /* XXX check if swapping is necessary on BE */ 4963 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4964 WDOORBELL64(ring->doorbell_index, ring->wptr); 4965 } else { 4966 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4967 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 4968 } 4969 } 4970 4971 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 4972 { 4973 struct amdgpu_device *adev = ring->adev; 4974 u32 ref_and_mask, reg_mem_engine; 4975 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 4976 4977 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 4978 switch (ring->me) { 4979 case 1: 4980 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 4981 break; 4982 case 2: 4983 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 4984 break; 4985 default: 4986 return; 4987 } 4988 reg_mem_engine = 0; 4989 } else { 4990 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 4991 reg_mem_engine = 1; /* pfp */ 4992 } 4993 4994 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 4995 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 4996 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 4997 ref_and_mask, ref_and_mask, 0x20); 4998 } 4999 5000 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5001 struct amdgpu_job *job, 5002 struct amdgpu_ib *ib, 5003 uint32_t flags) 5004 { 5005 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5006 u32 header, control = 0; 5007 5008 if (ib->flags & AMDGPU_IB_FLAG_CE) 5009 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 5010 else 5011 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5012 5013 control |= ib->length_dw | (vmid << 24); 5014 5015 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5016 control |= INDIRECT_BUFFER_PRE_ENB(1); 5017 5018 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 5019 gfx_v9_0_ring_emit_de_meta(ring); 5020 } 5021 5022 amdgpu_ring_write(ring, header); 5023 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5024 amdgpu_ring_write(ring, 5025 #ifdef __BIG_ENDIAN 5026 (2 << 0) | 5027 #endif 5028 lower_32_bits(ib->gpu_addr)); 5029 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5030 amdgpu_ring_write(ring, control); 5031 } 5032 5033 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5034 struct amdgpu_job *job, 5035 struct amdgpu_ib *ib, 5036 uint32_t flags) 5037 { 5038 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5039 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5040 5041 /* Currently, there is a high possibility to get wave ID mismatch 5042 * between ME and GDS, leading to a hw deadlock, because ME generates 5043 * different wave IDs than the GDS expects. This situation happens 5044 * randomly when at least 5 compute pipes use GDS ordered append. 5045 * The wave IDs generated by ME are also wrong after suspend/resume. 5046 * Those are probably bugs somewhere else in the kernel driver. 5047 * 5048 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5049 * GDS to 0 for this ring (me/pipe). 5050 */ 5051 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5052 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5053 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 5054 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5055 } 5056 5057 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5058 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5059 amdgpu_ring_write(ring, 5060 #ifdef __BIG_ENDIAN 5061 (2 << 0) | 5062 #endif 5063 lower_32_bits(ib->gpu_addr)); 5064 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5065 amdgpu_ring_write(ring, control); 5066 } 5067 5068 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5069 u64 seq, unsigned flags) 5070 { 5071 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5072 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5073 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 5074 5075 /* RELEASE_MEM - flush caches, send int */ 5076 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5077 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 5078 EOP_TC_NC_ACTION_EN) : 5079 (EOP_TCL1_ACTION_EN | 5080 EOP_TC_ACTION_EN | 5081 EOP_TC_WB_ACTION_EN | 5082 EOP_TC_MD_ACTION_EN)) | 5083 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5084 EVENT_INDEX(5))); 5085 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 5086 5087 /* 5088 * the address should be Qword aligned if 64bit write, Dword 5089 * aligned if only send 32bit data low (discard data high) 5090 */ 5091 if (write64bit) 5092 BUG_ON(addr & 0x7); 5093 else 5094 BUG_ON(addr & 0x3); 5095 amdgpu_ring_write(ring, lower_32_bits(addr)); 5096 amdgpu_ring_write(ring, upper_32_bits(addr)); 5097 amdgpu_ring_write(ring, lower_32_bits(seq)); 5098 amdgpu_ring_write(ring, upper_32_bits(seq)); 5099 amdgpu_ring_write(ring, 0); 5100 } 5101 5102 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5103 { 5104 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5105 uint32_t seq = ring->fence_drv.sync_seq; 5106 uint64_t addr = ring->fence_drv.gpu_addr; 5107 5108 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 5109 lower_32_bits(addr), upper_32_bits(addr), 5110 seq, 0xffffffff, 4); 5111 } 5112 5113 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5114 unsigned vmid, uint64_t pd_addr) 5115 { 5116 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5117 5118 /* compute doesn't have PFP */ 5119 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5120 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5121 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5122 amdgpu_ring_write(ring, 0x0); 5123 } 5124 } 5125 5126 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5127 { 5128 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 5129 } 5130 5131 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5132 { 5133 u64 wptr; 5134 5135 /* XXX check if swapping is necessary on BE */ 5136 if (ring->use_doorbell) 5137 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 5138 else 5139 BUG(); 5140 return wptr; 5141 } 5142 5143 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 5144 bool acquire) 5145 { 5146 struct amdgpu_device *adev = ring->adev; 5147 int pipe_num, tmp, reg; 5148 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 5149 5150 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 5151 5152 /* first me only has 2 entries, GFX and HP3D */ 5153 if (ring->me > 0) 5154 pipe_num -= 2; 5155 5156 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num; 5157 tmp = RREG32(reg); 5158 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 5159 WREG32(reg, tmp); 5160 } 5161 5162 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, 5163 struct amdgpu_ring *ring, 5164 bool acquire) 5165 { 5166 int i, pipe; 5167 bool reserve; 5168 struct amdgpu_ring *iring; 5169 5170 mutex_lock(&adev->gfx.pipe_reserve_mutex); 5171 pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0); 5172 if (acquire) 5173 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5174 else 5175 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5176 5177 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 5178 /* Clear all reservations - everyone reacquires all resources */ 5179 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 5180 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 5181 true); 5182 5183 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 5184 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 5185 true); 5186 } else { 5187 /* Lower all pipes without a current reservation */ 5188 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 5189 iring = &adev->gfx.gfx_ring[i]; 5190 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 5191 iring->me, 5192 iring->pipe, 5193 0); 5194 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5195 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 5196 } 5197 5198 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 5199 iring = &adev->gfx.compute_ring[i]; 5200 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 5201 iring->me, 5202 iring->pipe, 5203 0); 5204 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5205 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 5206 } 5207 } 5208 5209 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 5210 } 5211 5212 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, 5213 struct amdgpu_ring *ring, 5214 bool acquire) 5215 { 5216 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 5217 uint32_t queue_priority = acquire ? 0xf : 0x0; 5218 5219 mutex_lock(&adev->srbm_mutex); 5220 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5221 5222 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); 5223 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); 5224 5225 soc15_grbm_select(adev, 0, 0, 0, 0); 5226 mutex_unlock(&adev->srbm_mutex); 5227 } 5228 5229 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring, 5230 enum drm_sched_priority priority) 5231 { 5232 struct amdgpu_device *adev = ring->adev; 5233 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 5234 5235 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 5236 return; 5237 5238 gfx_v9_0_hqd_set_priority(adev, ring, acquire); 5239 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire); 5240 } 5241 5242 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5243 { 5244 struct amdgpu_device *adev = ring->adev; 5245 5246 /* XXX check if swapping is necessary on BE */ 5247 if (ring->use_doorbell) { 5248 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 5249 WDOORBELL64(ring->doorbell_index, ring->wptr); 5250 } else{ 5251 BUG(); /* only DOORBELL method supported on gfx9 now */ 5252 } 5253 } 5254 5255 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5256 u64 seq, unsigned int flags) 5257 { 5258 struct amdgpu_device *adev = ring->adev; 5259 5260 /* we only allocate 32bit for each seq wb address */ 5261 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5262 5263 /* write fence seq to the "addr" */ 5264 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5265 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5266 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5267 amdgpu_ring_write(ring, lower_32_bits(addr)); 5268 amdgpu_ring_write(ring, upper_32_bits(addr)); 5269 amdgpu_ring_write(ring, lower_32_bits(seq)); 5270 5271 if (flags & AMDGPU_FENCE_FLAG_INT) { 5272 /* set register to trigger INT */ 5273 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5274 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5275 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5276 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 5277 amdgpu_ring_write(ring, 0); 5278 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5279 } 5280 } 5281 5282 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 5283 { 5284 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5285 amdgpu_ring_write(ring, 0); 5286 } 5287 5288 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 5289 { 5290 struct v9_ce_ib_state ce_payload = {0}; 5291 uint64_t csa_addr; 5292 int cnt; 5293 5294 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 5295 csa_addr = amdgpu_csa_vaddr(ring->adev); 5296 5297 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5298 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 5299 WRITE_DATA_DST_SEL(8) | 5300 WR_CONFIRM) | 5301 WRITE_DATA_CACHE_POLICY(0)); 5302 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5303 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5304 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 5305 } 5306 5307 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 5308 { 5309 struct v9_de_ib_state de_payload = {0}; 5310 uint64_t csa_addr, gds_addr; 5311 int cnt; 5312 5313 csa_addr = amdgpu_csa_vaddr(ring->adev); 5314 gds_addr = csa_addr + 4096; 5315 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5316 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5317 5318 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 5319 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5320 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 5321 WRITE_DATA_DST_SEL(8) | 5322 WR_CONFIRM) | 5323 WRITE_DATA_CACHE_POLICY(0)); 5324 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5325 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5326 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 5327 } 5328 5329 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) 5330 { 5331 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 5332 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ 5333 } 5334 5335 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 5336 { 5337 uint32_t dw2 = 0; 5338 5339 if (amdgpu_sriov_vf(ring->adev)) 5340 gfx_v9_0_ring_emit_ce_meta(ring); 5341 5342 gfx_v9_0_ring_emit_tmz(ring, true); 5343 5344 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5345 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5346 /* set load_global_config & load_global_uconfig */ 5347 dw2 |= 0x8001; 5348 /* set load_cs_sh_regs */ 5349 dw2 |= 0x01000000; 5350 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5351 dw2 |= 0x10002; 5352 5353 /* set load_ce_ram if preamble presented */ 5354 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 5355 dw2 |= 0x10000000; 5356 } else { 5357 /* still load_ce_ram if this is the first time preamble presented 5358 * although there is no context switch happens. 5359 */ 5360 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 5361 dw2 |= 0x10000000; 5362 } 5363 5364 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5365 amdgpu_ring_write(ring, dw2); 5366 amdgpu_ring_write(ring, 0); 5367 } 5368 5369 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 5370 { 5371 unsigned ret; 5372 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5373 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 5374 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 5375 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 5376 ret = ring->wptr & ring->buf_mask; 5377 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 5378 return ret; 5379 } 5380 5381 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 5382 { 5383 unsigned cur; 5384 BUG_ON(offset > ring->buf_mask); 5385 BUG_ON(ring->ring[offset] != 0x55aa55aa); 5386 5387 cur = (ring->wptr & ring->buf_mask) - 1; 5388 if (likely(cur > offset)) 5389 ring->ring[offset] = cur - offset; 5390 else 5391 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 5392 } 5393 5394 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 5395 { 5396 struct amdgpu_device *adev = ring->adev; 5397 5398 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 5399 amdgpu_ring_write(ring, 0 | /* src: register*/ 5400 (5 << 8) | /* dst: memory */ 5401 (1 << 20)); /* write confirm */ 5402 amdgpu_ring_write(ring, reg); 5403 amdgpu_ring_write(ring, 0); 5404 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 5405 adev->virt.reg_val_offs * 4)); 5406 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 5407 adev->virt.reg_val_offs * 4)); 5408 } 5409 5410 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 5411 uint32_t val) 5412 { 5413 uint32_t cmd = 0; 5414 5415 switch (ring->funcs->type) { 5416 case AMDGPU_RING_TYPE_GFX: 5417 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 5418 break; 5419 case AMDGPU_RING_TYPE_KIQ: 5420 cmd = (1 << 16); /* no inc addr */ 5421 break; 5422 default: 5423 cmd = WR_CONFIRM; 5424 break; 5425 } 5426 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5427 amdgpu_ring_write(ring, cmd); 5428 amdgpu_ring_write(ring, reg); 5429 amdgpu_ring_write(ring, 0); 5430 amdgpu_ring_write(ring, val); 5431 } 5432 5433 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 5434 uint32_t val, uint32_t mask) 5435 { 5436 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 5437 } 5438 5439 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 5440 uint32_t reg0, uint32_t reg1, 5441 uint32_t ref, uint32_t mask) 5442 { 5443 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5444 struct amdgpu_device *adev = ring->adev; 5445 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 5446 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 5447 5448 if (fw_version_ok) 5449 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 5450 ref, mask, 0x20); 5451 else 5452 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 5453 ref, mask); 5454 } 5455 5456 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 5457 { 5458 struct amdgpu_device *adev = ring->adev; 5459 uint32_t value = 0; 5460 5461 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 5462 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 5463 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 5464 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 5465 WREG32_SOC15(GC, 0, mmSQ_CMD, value); 5466 } 5467 5468 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5469 enum amdgpu_interrupt_state state) 5470 { 5471 switch (state) { 5472 case AMDGPU_IRQ_STATE_DISABLE: 5473 case AMDGPU_IRQ_STATE_ENABLE: 5474 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5475 TIME_STAMP_INT_ENABLE, 5476 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5477 break; 5478 default: 5479 break; 5480 } 5481 } 5482 5483 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5484 int me, int pipe, 5485 enum amdgpu_interrupt_state state) 5486 { 5487 u32 mec_int_cntl, mec_int_cntl_reg; 5488 5489 /* 5490 * amdgpu controls only the first MEC. That's why this function only 5491 * handles the setting of interrupts for this specific MEC. All other 5492 * pipes' interrupts are set by amdkfd. 5493 */ 5494 5495 if (me == 1) { 5496 switch (pipe) { 5497 case 0: 5498 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 5499 break; 5500 case 1: 5501 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 5502 break; 5503 case 2: 5504 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 5505 break; 5506 case 3: 5507 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 5508 break; 5509 default: 5510 DRM_DEBUG("invalid pipe %d\n", pipe); 5511 return; 5512 } 5513 } else { 5514 DRM_DEBUG("invalid me %d\n", me); 5515 return; 5516 } 5517 5518 switch (state) { 5519 case AMDGPU_IRQ_STATE_DISABLE: 5520 mec_int_cntl = RREG32(mec_int_cntl_reg); 5521 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5522 TIME_STAMP_INT_ENABLE, 0); 5523 WREG32(mec_int_cntl_reg, mec_int_cntl); 5524 break; 5525 case AMDGPU_IRQ_STATE_ENABLE: 5526 mec_int_cntl = RREG32(mec_int_cntl_reg); 5527 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5528 TIME_STAMP_INT_ENABLE, 1); 5529 WREG32(mec_int_cntl_reg, mec_int_cntl); 5530 break; 5531 default: 5532 break; 5533 } 5534 } 5535 5536 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 5537 struct amdgpu_irq_src *source, 5538 unsigned type, 5539 enum amdgpu_interrupt_state state) 5540 { 5541 switch (state) { 5542 case AMDGPU_IRQ_STATE_DISABLE: 5543 case AMDGPU_IRQ_STATE_ENABLE: 5544 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5545 PRIV_REG_INT_ENABLE, 5546 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5547 break; 5548 default: 5549 break; 5550 } 5551 5552 return 0; 5553 } 5554 5555 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 5556 struct amdgpu_irq_src *source, 5557 unsigned type, 5558 enum amdgpu_interrupt_state state) 5559 { 5560 switch (state) { 5561 case AMDGPU_IRQ_STATE_DISABLE: 5562 case AMDGPU_IRQ_STATE_ENABLE: 5563 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5564 PRIV_INSTR_INT_ENABLE, 5565 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5566 default: 5567 break; 5568 } 5569 5570 return 0; 5571 } 5572 5573 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 5574 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5575 CP_ECC_ERROR_INT_ENABLE, 1) 5576 5577 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 5578 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5579 CP_ECC_ERROR_INT_ENABLE, 0) 5580 5581 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 5582 struct amdgpu_irq_src *source, 5583 unsigned type, 5584 enum amdgpu_interrupt_state state) 5585 { 5586 switch (state) { 5587 case AMDGPU_IRQ_STATE_DISABLE: 5588 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5589 CP_ECC_ERROR_INT_ENABLE, 0); 5590 DISABLE_ECC_ON_ME_PIPE(1, 0); 5591 DISABLE_ECC_ON_ME_PIPE(1, 1); 5592 DISABLE_ECC_ON_ME_PIPE(1, 2); 5593 DISABLE_ECC_ON_ME_PIPE(1, 3); 5594 break; 5595 5596 case AMDGPU_IRQ_STATE_ENABLE: 5597 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5598 CP_ECC_ERROR_INT_ENABLE, 1); 5599 ENABLE_ECC_ON_ME_PIPE(1, 0); 5600 ENABLE_ECC_ON_ME_PIPE(1, 1); 5601 ENABLE_ECC_ON_ME_PIPE(1, 2); 5602 ENABLE_ECC_ON_ME_PIPE(1, 3); 5603 break; 5604 default: 5605 break; 5606 } 5607 5608 return 0; 5609 } 5610 5611 5612 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5613 struct amdgpu_irq_src *src, 5614 unsigned type, 5615 enum amdgpu_interrupt_state state) 5616 { 5617 switch (type) { 5618 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 5619 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 5620 break; 5621 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5622 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5623 break; 5624 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5625 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5626 break; 5627 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5628 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5629 break; 5630 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5631 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5632 break; 5633 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 5634 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 5635 break; 5636 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 5637 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 5638 break; 5639 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 5640 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 5641 break; 5642 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 5643 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 5644 break; 5645 default: 5646 break; 5647 } 5648 return 0; 5649 } 5650 5651 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 5652 struct amdgpu_irq_src *source, 5653 struct amdgpu_iv_entry *entry) 5654 { 5655 int i; 5656 u8 me_id, pipe_id, queue_id; 5657 struct amdgpu_ring *ring; 5658 5659 DRM_DEBUG("IH: CP EOP\n"); 5660 me_id = (entry->ring_id & 0x0c) >> 2; 5661 pipe_id = (entry->ring_id & 0x03) >> 0; 5662 queue_id = (entry->ring_id & 0x70) >> 4; 5663 5664 switch (me_id) { 5665 case 0: 5666 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5667 break; 5668 case 1: 5669 case 2: 5670 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5671 ring = &adev->gfx.compute_ring[i]; 5672 /* Per-queue interrupt is supported for MEC starting from VI. 5673 * The interrupt can only be enabled/disabled per pipe instead of per queue. 5674 */ 5675 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 5676 amdgpu_fence_process(ring); 5677 } 5678 break; 5679 } 5680 return 0; 5681 } 5682 5683 static void gfx_v9_0_fault(struct amdgpu_device *adev, 5684 struct amdgpu_iv_entry *entry) 5685 { 5686 u8 me_id, pipe_id, queue_id; 5687 struct amdgpu_ring *ring; 5688 int i; 5689 5690 me_id = (entry->ring_id & 0x0c) >> 2; 5691 pipe_id = (entry->ring_id & 0x03) >> 0; 5692 queue_id = (entry->ring_id & 0x70) >> 4; 5693 5694 switch (me_id) { 5695 case 0: 5696 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 5697 break; 5698 case 1: 5699 case 2: 5700 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5701 ring = &adev->gfx.compute_ring[i]; 5702 if (ring->me == me_id && ring->pipe == pipe_id && 5703 ring->queue == queue_id) 5704 drm_sched_fault(&ring->sched); 5705 } 5706 break; 5707 } 5708 } 5709 5710 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 5711 struct amdgpu_irq_src *source, 5712 struct amdgpu_iv_entry *entry) 5713 { 5714 DRM_ERROR("Illegal register access in command stream\n"); 5715 gfx_v9_0_fault(adev, entry); 5716 return 0; 5717 } 5718 5719 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 5720 struct amdgpu_irq_src *source, 5721 struct amdgpu_iv_entry *entry) 5722 { 5723 DRM_ERROR("Illegal instruction in command stream\n"); 5724 gfx_v9_0_fault(adev, entry); 5725 return 0; 5726 } 5727 5728 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 5729 struct ras_err_data *err_data, 5730 struct amdgpu_iv_entry *entry) 5731 { 5732 /* TODO ue will trigger an interrupt. */ 5733 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); 5734 if (adev->gfx.funcs->query_ras_error_count) 5735 adev->gfx.funcs->query_ras_error_count(adev, err_data); 5736 amdgpu_ras_reset_gpu(adev, 0); 5737 return AMDGPU_RAS_SUCCESS; 5738 } 5739 5740 static const struct { 5741 const char *name; 5742 uint32_t ip; 5743 uint32_t inst; 5744 uint32_t seg; 5745 uint32_t reg_offset; 5746 uint32_t per_se_instance; 5747 int32_t num_instance; 5748 uint32_t sec_count_mask; 5749 uint32_t ded_count_mask; 5750 } gfx_ras_edc_regs[] = { 5751 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 5752 REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT), 5753 REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) }, 5754 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 5755 REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT), 5756 REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) }, 5757 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 5758 REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 }, 5759 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 5760 REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 }, 5761 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 5762 REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT), 5763 REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) }, 5764 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 5765 REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 }, 5766 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 5767 REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), 5768 REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) }, 5769 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 5770 REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT), 5771 REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) }, 5772 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 5773 REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 }, 5774 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 5775 REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 }, 5776 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 5777 REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 }, 5778 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 5779 REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC), 5780 REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) }, 5781 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 5782 REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 }, 5783 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 5784 0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), 5785 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) }, 5786 { "GDS_OA_PHY_PHY_CMD_RAM_MEM", 5787 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 5788 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), 5789 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) }, 5790 { "GDS_OA_PHY_PHY_DATA_RAM_MEM", 5791 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 5792 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 }, 5793 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", 5794 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 5795 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), 5796 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) }, 5797 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", 5798 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 5799 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), 5800 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) }, 5801 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", 5802 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 5803 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), 5804 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) }, 5805 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", 5806 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 5807 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), 5808 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) }, 5809 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1, 5810 REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 }, 5811 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, 5812 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), 5813 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) }, 5814 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, 5815 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 }, 5816 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, 5817 REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 }, 5818 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, 5819 REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 }, 5820 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, 5821 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 }, 5822 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2, 5823 REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 }, 5824 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2, 5825 REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 }, 5826 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5827 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), 5828 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) }, 5829 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5830 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), 5831 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) }, 5832 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5833 REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), 5834 REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) }, 5835 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5836 REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), 5837 REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) }, 5838 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5839 REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), 5840 REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) }, 5841 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5842 REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 }, 5843 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5844 REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 }, 5845 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5846 REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 }, 5847 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5848 REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 }, 5849 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5850 REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 }, 5851 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5852 REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 }, 5853 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16, 5854 REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 }, 5855 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16, 5856 REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 }, 5857 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 5858 16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 }, 5859 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5860 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), 5861 0 }, 5862 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 5863 16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 }, 5864 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5865 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), 5866 0 }, 5867 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 5868 16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 }, 5869 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72, 5870 REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 }, 5871 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5872 REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), 5873 REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) }, 5874 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5875 REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), 5876 REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) }, 5877 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5878 REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 }, 5879 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5880 REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 }, 5881 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5882 REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 }, 5883 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5884 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), 5885 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) }, 5886 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5887 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), 5888 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) }, 5889 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, 5890 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), 5891 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) }, 5892 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, 5893 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), 5894 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) }, 5895 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, 5896 REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 }, 5897 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5898 REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT), 5899 REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) }, 5900 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5901 REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT), 5902 REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) }, 5903 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5904 REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT), 5905 REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) }, 5906 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5907 REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT), 5908 REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) }, 5909 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5910 REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT), 5911 REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) }, 5912 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5913 REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT), 5914 REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) }, 5915 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5916 REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT), 5917 REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) }, 5918 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5919 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), 5920 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) }, 5921 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, 5922 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), 5923 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) }, 5924 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5925 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), 5926 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) }, 5927 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, 5928 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), 5929 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) }, 5930 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5931 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), 5932 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) }, 5933 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, 5934 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), 5935 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) }, 5936 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5937 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), 5938 REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) }, 5939 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5940 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), 5941 REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) }, 5942 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5943 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), 5944 REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) }, 5945 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5946 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), 5947 REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) }, 5948 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", 5949 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, 5950 REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), 5951 0 }, 5952 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5953 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 }, 5954 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5955 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 }, 5956 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5957 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 }, 5958 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", 5959 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, 5960 REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 }, 5961 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, 5962 REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), 5963 REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) }, 5964 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5965 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), 5966 REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) }, 5967 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5968 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), 5969 REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) }, 5970 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5971 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), 5972 REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) }, 5973 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5974 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), 5975 REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) }, 5976 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", 5977 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6, 5978 REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), 5979 0 }, 5980 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5981 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 }, 5982 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5983 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 }, 5984 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5985 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 }, 5986 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", 5987 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6, 5988 REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 }, 5989 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 5990 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), 5991 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) }, 5992 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 5993 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), 5994 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) }, 5995 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 5996 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), 5997 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) }, 5998 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 5999 REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), 6000 REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) }, 6001 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6002 REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), 6003 REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) }, 6004 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6005 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 }, 6006 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6007 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 }, 6008 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6009 REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 }, 6010 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6011 REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 }, 6012 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6013 REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 }, 6014 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6015 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), 6016 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) }, 6017 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6018 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), 6019 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) }, 6020 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6021 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), 6022 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) }, 6023 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6024 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 }, 6025 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6026 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 }, 6027 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6028 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 }, 6029 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6030 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 }, 6031 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6032 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 }, 6033 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6034 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 }, 6035 }; 6036 6037 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 6038 void *inject_if) 6039 { 6040 struct ras_inject_if *info = (struct ras_inject_if *)inject_if; 6041 int ret; 6042 struct ta_ras_trigger_error_input block_info = { 0 }; 6043 6044 if (adev->asic_type != CHIP_VEGA20) 6045 return -EINVAL; 6046 6047 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) 6048 return -EINVAL; 6049 6050 if (!ras_gfx_subblocks[info->head.sub_block_index].name) 6051 return -EPERM; 6052 6053 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & 6054 info->head.type)) { 6055 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", 6056 ras_gfx_subblocks[info->head.sub_block_index].name, 6057 info->head.type); 6058 return -EPERM; 6059 } 6060 6061 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & 6062 info->head.type)) { 6063 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", 6064 ras_gfx_subblocks[info->head.sub_block_index].name, 6065 info->head.type); 6066 return -EPERM; 6067 } 6068 6069 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); 6070 block_info.sub_block_index = 6071 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; 6072 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); 6073 block_info.address = info->address; 6074 block_info.value = info->value; 6075 6076 mutex_lock(&adev->grbm_idx_mutex); 6077 ret = psp_ras_trigger_error(&adev->psp, &block_info); 6078 mutex_unlock(&adev->grbm_idx_mutex); 6079 6080 return ret; 6081 } 6082 6083 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 6084 void *ras_error_status) 6085 { 6086 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 6087 uint32_t sec_count, ded_count; 6088 uint32_t i; 6089 uint32_t reg_value; 6090 uint32_t se_id, instance_id; 6091 6092 if (adev->asic_type != CHIP_VEGA20) 6093 return -EINVAL; 6094 6095 err_data->ue_count = 0; 6096 err_data->ce_count = 0; 6097 6098 mutex_lock(&adev->grbm_idx_mutex); 6099 for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) { 6100 for (instance_id = 0; instance_id < 256; instance_id++) { 6101 for (i = 0; 6102 i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]); 6103 i++) { 6104 if (se_id != 0 && 6105 !gfx_ras_edc_regs[i].per_se_instance) 6106 continue; 6107 if (instance_id >= gfx_ras_edc_regs[i].num_instance) 6108 continue; 6109 6110 gfx_v9_0_select_se_sh(adev, se_id, 0, 6111 instance_id); 6112 6113 reg_value = RREG32( 6114 adev->reg_offset[gfx_ras_edc_regs[i].ip] 6115 [gfx_ras_edc_regs[i].inst] 6116 [gfx_ras_edc_regs[i].seg] + 6117 gfx_ras_edc_regs[i].reg_offset); 6118 sec_count = reg_value & 6119 gfx_ras_edc_regs[i].sec_count_mask; 6120 ded_count = reg_value & 6121 gfx_ras_edc_regs[i].ded_count_mask; 6122 if (sec_count) { 6123 DRM_INFO( 6124 "Instance[%d][%d]: SubBlock %s, SEC %d\n", 6125 se_id, instance_id, 6126 gfx_ras_edc_regs[i].name, 6127 sec_count); 6128 err_data->ce_count++; 6129 } 6130 6131 if (ded_count) { 6132 DRM_INFO( 6133 "Instance[%d][%d]: SubBlock %s, DED %d\n", 6134 se_id, instance_id, 6135 gfx_ras_edc_regs[i].name, 6136 ded_count); 6137 err_data->ue_count++; 6138 } 6139 } 6140 } 6141 } 6142 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6143 mutex_unlock(&adev->grbm_idx_mutex); 6144 6145 return 0; 6146 } 6147 6148 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev, 6149 struct amdgpu_irq_src *source, 6150 struct amdgpu_iv_entry *entry) 6151 { 6152 struct ras_common_if *ras_if = adev->gfx.ras_if; 6153 struct ras_dispatch_if ih_data = { 6154 .entry = entry, 6155 }; 6156 6157 if (!ras_if) 6158 return 0; 6159 6160 ih_data.head = *ras_if; 6161 6162 DRM_ERROR("CP ECC ERROR IRQ\n"); 6163 amdgpu_ras_interrupt_dispatch(adev, &ih_data); 6164 return 0; 6165 } 6166 6167 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 6168 .name = "gfx_v9_0", 6169 .early_init = gfx_v9_0_early_init, 6170 .late_init = gfx_v9_0_late_init, 6171 .sw_init = gfx_v9_0_sw_init, 6172 .sw_fini = gfx_v9_0_sw_fini, 6173 .hw_init = gfx_v9_0_hw_init, 6174 .hw_fini = gfx_v9_0_hw_fini, 6175 .suspend = gfx_v9_0_suspend, 6176 .resume = gfx_v9_0_resume, 6177 .is_idle = gfx_v9_0_is_idle, 6178 .wait_for_idle = gfx_v9_0_wait_for_idle, 6179 .soft_reset = gfx_v9_0_soft_reset, 6180 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 6181 .set_powergating_state = gfx_v9_0_set_powergating_state, 6182 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 6183 }; 6184 6185 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 6186 .type = AMDGPU_RING_TYPE_GFX, 6187 .align_mask = 0xff, 6188 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6189 .support_64bit_ptrs = true, 6190 .vmhub = AMDGPU_GFXHUB_0, 6191 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 6192 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 6193 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 6194 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 6195 5 + /* COND_EXEC */ 6196 7 + /* PIPELINE_SYNC */ 6197 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6198 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6199 2 + /* VM_FLUSH */ 6200 8 + /* FENCE for VM_FLUSH */ 6201 20 + /* GDS switch */ 6202 4 + /* double SWITCH_BUFFER, 6203 the first COND_EXEC jump to the place just 6204 prior to this double SWITCH_BUFFER */ 6205 5 + /* COND_EXEC */ 6206 7 + /* HDP_flush */ 6207 4 + /* VGT_flush */ 6208 14 + /* CE_META */ 6209 31 + /* DE_META */ 6210 3 + /* CNTX_CTRL */ 6211 5 + /* HDP_INVL */ 6212 8 + 8 + /* FENCE x2 */ 6213 2, /* SWITCH_BUFFER */ 6214 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 6215 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 6216 .emit_fence = gfx_v9_0_ring_emit_fence, 6217 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6218 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6219 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6220 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6221 .test_ring = gfx_v9_0_ring_test_ring, 6222 .test_ib = gfx_v9_0_ring_test_ib, 6223 .insert_nop = amdgpu_ring_insert_nop, 6224 .pad_ib = amdgpu_ring_generic_pad_ib, 6225 .emit_switch_buffer = gfx_v9_ring_emit_sb, 6226 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 6227 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 6228 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 6229 .emit_tmz = gfx_v9_0_ring_emit_tmz, 6230 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6231 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6232 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6233 .soft_recovery = gfx_v9_0_ring_soft_recovery, 6234 }; 6235 6236 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 6237 .type = AMDGPU_RING_TYPE_COMPUTE, 6238 .align_mask = 0xff, 6239 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6240 .support_64bit_ptrs = true, 6241 .vmhub = AMDGPU_GFXHUB_0, 6242 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6243 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6244 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6245 .emit_frame_size = 6246 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6247 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6248 5 + /* hdp invalidate */ 6249 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6250 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6251 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6252 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6253 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 6254 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6255 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 6256 .emit_fence = gfx_v9_0_ring_emit_fence, 6257 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6258 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6259 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6260 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6261 .test_ring = gfx_v9_0_ring_test_ring, 6262 .test_ib = gfx_v9_0_ring_test_ib, 6263 .insert_nop = amdgpu_ring_insert_nop, 6264 .pad_ib = amdgpu_ring_generic_pad_ib, 6265 .set_priority = gfx_v9_0_ring_set_priority_compute, 6266 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6267 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6268 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6269 }; 6270 6271 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 6272 .type = AMDGPU_RING_TYPE_KIQ, 6273 .align_mask = 0xff, 6274 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6275 .support_64bit_ptrs = true, 6276 .vmhub = AMDGPU_GFXHUB_0, 6277 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6278 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6279 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6280 .emit_frame_size = 6281 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6282 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6283 5 + /* hdp invalidate */ 6284 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6285 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6286 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6287 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6288 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6289 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6290 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 6291 .test_ring = gfx_v9_0_ring_test_ring, 6292 .insert_nop = amdgpu_ring_insert_nop, 6293 .pad_ib = amdgpu_ring_generic_pad_ib, 6294 .emit_rreg = gfx_v9_0_ring_emit_rreg, 6295 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6296 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6297 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6298 }; 6299 6300 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 6301 { 6302 int i; 6303 6304 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 6305 6306 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6307 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 6308 6309 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6310 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 6311 } 6312 6313 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 6314 .set = gfx_v9_0_set_eop_interrupt_state, 6315 .process = gfx_v9_0_eop_irq, 6316 }; 6317 6318 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 6319 .set = gfx_v9_0_set_priv_reg_fault_state, 6320 .process = gfx_v9_0_priv_reg_irq, 6321 }; 6322 6323 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 6324 .set = gfx_v9_0_set_priv_inst_fault_state, 6325 .process = gfx_v9_0_priv_inst_irq, 6326 }; 6327 6328 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 6329 .set = gfx_v9_0_set_cp_ecc_error_state, 6330 .process = gfx_v9_0_cp_ecc_error_irq, 6331 }; 6332 6333 6334 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 6335 { 6336 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6337 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 6338 6339 adev->gfx.priv_reg_irq.num_types = 1; 6340 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 6341 6342 adev->gfx.priv_inst_irq.num_types = 1; 6343 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 6344 6345 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 6346 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 6347 } 6348 6349 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 6350 { 6351 switch (adev->asic_type) { 6352 case CHIP_VEGA10: 6353 case CHIP_VEGA12: 6354 case CHIP_VEGA20: 6355 case CHIP_RAVEN: 6356 case CHIP_ARCTURUS: 6357 case CHIP_RENOIR: 6358 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 6359 break; 6360 default: 6361 break; 6362 } 6363 } 6364 6365 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 6366 { 6367 /* init asci gds info */ 6368 switch (adev->asic_type) { 6369 case CHIP_VEGA10: 6370 case CHIP_VEGA12: 6371 case CHIP_VEGA20: 6372 adev->gds.gds_size = 0x10000; 6373 break; 6374 case CHIP_RAVEN: 6375 case CHIP_ARCTURUS: 6376 adev->gds.gds_size = 0x1000; 6377 break; 6378 default: 6379 adev->gds.gds_size = 0x10000; 6380 break; 6381 } 6382 6383 switch (adev->asic_type) { 6384 case CHIP_VEGA10: 6385 case CHIP_VEGA20: 6386 adev->gds.gds_compute_max_wave_id = 0x7ff; 6387 break; 6388 case CHIP_VEGA12: 6389 adev->gds.gds_compute_max_wave_id = 0x27f; 6390 break; 6391 case CHIP_RAVEN: 6392 if (adev->rev_id >= 0x8) 6393 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 6394 else 6395 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 6396 break; 6397 case CHIP_ARCTURUS: 6398 adev->gds.gds_compute_max_wave_id = 0xfff; 6399 break; 6400 default: 6401 /* this really depends on the chip */ 6402 adev->gds.gds_compute_max_wave_id = 0x7ff; 6403 break; 6404 } 6405 6406 adev->gds.gws_size = 64; 6407 adev->gds.oa_size = 16; 6408 } 6409 6410 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 6411 u32 bitmap) 6412 { 6413 u32 data; 6414 6415 if (!bitmap) 6416 return; 6417 6418 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6419 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6420 6421 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 6422 } 6423 6424 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 6425 { 6426 u32 data, mask; 6427 6428 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 6429 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 6430 6431 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6432 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6433 6434 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 6435 6436 return (~data) & mask; 6437 } 6438 6439 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 6440 struct amdgpu_cu_info *cu_info) 6441 { 6442 int i, j, k, counter, active_cu_number = 0; 6443 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 6444 unsigned disable_masks[4 * 4]; 6445 6446 if (!adev || !cu_info) 6447 return -EINVAL; 6448 6449 /* 6450 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs 6451 */ 6452 if (adev->gfx.config.max_shader_engines * 6453 adev->gfx.config.max_sh_per_se > 16) 6454 return -EINVAL; 6455 6456 amdgpu_gfx_parse_disable_cu(disable_masks, 6457 adev->gfx.config.max_shader_engines, 6458 adev->gfx.config.max_sh_per_se); 6459 6460 mutex_lock(&adev->grbm_idx_mutex); 6461 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 6462 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 6463 mask = 1; 6464 ao_bitmap = 0; 6465 counter = 0; 6466 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 6467 gfx_v9_0_set_user_cu_inactive_bitmap( 6468 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); 6469 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 6470 6471 /* 6472 * The bitmap(and ao_cu_bitmap) in cu_info structure is 6473 * 4x4 size array, and it's usually suitable for Vega 6474 * ASICs which has 4*2 SE/SH layout. 6475 * But for Arcturus, SE/SH layout is changed to 8*1. 6476 * To mostly reduce the impact, we make it compatible 6477 * with current bitmap array as below: 6478 * SE4,SH0 --> bitmap[0][1] 6479 * SE5,SH0 --> bitmap[1][1] 6480 * SE6,SH0 --> bitmap[2][1] 6481 * SE7,SH0 --> bitmap[3][1] 6482 */ 6483 cu_info->bitmap[i % 4][j + i / 4] = bitmap; 6484 6485 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 6486 if (bitmap & mask) { 6487 if (counter < adev->gfx.config.max_cu_per_sh) 6488 ao_bitmap |= mask; 6489 counter ++; 6490 } 6491 mask <<= 1; 6492 } 6493 active_cu_number += counter; 6494 if (i < 2 && j < 2) 6495 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 6496 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; 6497 } 6498 } 6499 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6500 mutex_unlock(&adev->grbm_idx_mutex); 6501 6502 cu_info->number = active_cu_number; 6503 cu_info->ao_cu_mask = ao_cu_mask; 6504 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 6505 6506 return 0; 6507 } 6508 6509 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 6510 { 6511 .type = AMD_IP_BLOCK_TYPE_GFX, 6512 .major = 9, 6513 .minor = 0, 6514 .rev = 0, 6515 .funcs = &gfx_v9_0_ip_funcs, 6516 }; 6517