1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 40 #include "vega10_enum.h" 41 #include "hdp/hdp_4_0_offset.h" 42 43 #include "soc15_common.h" 44 #include "clearstate_gfx9.h" 45 #include "v9_structs.h" 46 47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 48 49 #include "amdgpu_ras.h" 50 51 #include "gfx_v9_4.h" 52 53 #define GFX9_NUM_GFX_RINGS 1 54 #define GFX9_MEC_HPD_SIZE 4096 55 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 56 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 57 58 #define mmPWR_MISC_CNTL_STATUS 0x0183 59 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0 61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1 62 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L 63 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L 64 65 #define mmGCEA_PROBE_MAP 0x070c 66 #define mmGCEA_PROBE_MAP_BASE_IDX 0 67 68 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 69 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 70 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 71 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 72 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 73 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 74 75 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 76 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 77 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 78 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 79 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 80 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 81 82 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 83 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 84 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 85 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 86 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 87 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 88 89 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 90 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 91 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 92 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 93 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 94 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 95 96 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 97 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 98 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 99 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 100 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 101 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 102 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 103 104 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 105 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 106 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 107 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 108 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 109 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 110 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 111 112 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); 113 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin"); 114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); 115 116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); 117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); 118 MODULE_FIRMWARE("amdgpu/renoir_me.bin"); 119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); 120 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin"); 121 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); 122 123 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 124 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 125 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04 126 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 127 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09 128 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 129 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a 130 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 131 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b 132 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 133 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c 134 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 135 136 enum ta_ras_gfx_subblock { 137 /*CPC*/ 138 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 139 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, 140 TA_RAS_BLOCK__GFX_CPC_UCODE, 141 TA_RAS_BLOCK__GFX_DC_STATE_ME1, 142 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 143 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, 144 TA_RAS_BLOCK__GFX_DC_STATE_ME2, 145 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 146 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 147 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 148 /* CPF*/ 149 TA_RAS_BLOCK__GFX_CPF_INDEX_START, 150 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, 151 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, 152 TA_RAS_BLOCK__GFX_CPF_TAG, 153 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, 154 /* CPG*/ 155 TA_RAS_BLOCK__GFX_CPG_INDEX_START, 156 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, 157 TA_RAS_BLOCK__GFX_CPG_DMA_TAG, 158 TA_RAS_BLOCK__GFX_CPG_TAG, 159 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, 160 /* GDS*/ 161 TA_RAS_BLOCK__GFX_GDS_INDEX_START, 162 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, 163 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 164 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 165 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 166 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 167 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 168 /* SPI*/ 169 TA_RAS_BLOCK__GFX_SPI_SR_MEM, 170 /* SQ*/ 171 TA_RAS_BLOCK__GFX_SQ_INDEX_START, 172 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, 173 TA_RAS_BLOCK__GFX_SQ_LDS_D, 174 TA_RAS_BLOCK__GFX_SQ_LDS_I, 175 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ 176 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, 177 /* SQC (3 ranges)*/ 178 TA_RAS_BLOCK__GFX_SQC_INDEX_START, 179 /* SQC range 0*/ 180 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, 181 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 182 TA_RAS_BLOCK__GFX_SQC_INDEX0_START, 183 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 184 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 185 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 186 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 187 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 188 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 189 TA_RAS_BLOCK__GFX_SQC_INDEX0_END = 190 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 191 /* SQC range 1*/ 192 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 193 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 194 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 195 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 196 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 197 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 198 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 199 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 200 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 201 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 202 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 203 TA_RAS_BLOCK__GFX_SQC_INDEX1_END = 204 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 205 /* SQC range 2*/ 206 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 207 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 208 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 209 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 210 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 211 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 212 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 213 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 214 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 215 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 216 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 217 TA_RAS_BLOCK__GFX_SQC_INDEX2_END = 218 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 219 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, 220 /* TA*/ 221 TA_RAS_BLOCK__GFX_TA_INDEX_START, 222 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, 223 TA_RAS_BLOCK__GFX_TA_FS_AFIFO, 224 TA_RAS_BLOCK__GFX_TA_FL_LFIFO, 225 TA_RAS_BLOCK__GFX_TA_FX_LFIFO, 226 TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 227 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 228 /* TCA*/ 229 TA_RAS_BLOCK__GFX_TCA_INDEX_START, 230 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, 231 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 232 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 233 /* TCC (5 sub-ranges)*/ 234 TA_RAS_BLOCK__GFX_TCC_INDEX_START, 235 /* TCC range 0*/ 236 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, 237 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, 238 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 239 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 240 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 241 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 242 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 243 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 244 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 245 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 246 /* TCC range 1*/ 247 TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 248 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 249 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 250 TA_RAS_BLOCK__GFX_TCC_INDEX1_END = 251 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 252 /* TCC range 2*/ 253 TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 254 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 255 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 256 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 257 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 258 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 259 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, 260 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 261 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 262 TA_RAS_BLOCK__GFX_TCC_INDEX2_END = 263 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 264 /* TCC range 3*/ 265 TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 266 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 267 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 268 TA_RAS_BLOCK__GFX_TCC_INDEX3_END = 269 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 270 /* TCC range 4*/ 271 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 272 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 273 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 274 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 275 TA_RAS_BLOCK__GFX_TCC_INDEX4_END = 276 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 277 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, 278 /* TCI*/ 279 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, 280 /* TCP*/ 281 TA_RAS_BLOCK__GFX_TCP_INDEX_START, 282 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, 283 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 284 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, 285 TA_RAS_BLOCK__GFX_TCP_VM_FIFO, 286 TA_RAS_BLOCK__GFX_TCP_DB_RAM, 287 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 288 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 289 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 290 /* TD*/ 291 TA_RAS_BLOCK__GFX_TD_INDEX_START, 292 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, 293 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 294 TA_RAS_BLOCK__GFX_TD_CS_FIFO, 295 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, 296 /* EA (3 sub-ranges)*/ 297 TA_RAS_BLOCK__GFX_EA_INDEX_START, 298 /* EA range 0*/ 299 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, 300 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, 301 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 302 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 303 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 304 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 305 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 306 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 307 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 308 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 309 /* EA range 1*/ 310 TA_RAS_BLOCK__GFX_EA_INDEX1_START, 311 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, 312 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 313 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 314 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 315 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 316 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 317 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 318 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 319 /* EA range 2*/ 320 TA_RAS_BLOCK__GFX_EA_INDEX2_START, 321 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, 322 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, 323 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, 324 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 325 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 326 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, 327 /* UTC VM L2 bank*/ 328 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, 329 /* UTC VM walker*/ 330 TA_RAS_BLOCK__UTC_VML2_WALKER, 331 /* UTC ATC L2 2MB cache*/ 332 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 333 /* UTC ATC L2 4KB cache*/ 334 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 335 TA_RAS_BLOCK__GFX_MAX 336 }; 337 338 struct ras_gfx_subblock { 339 unsigned char *name; 340 int ta_subblock; 341 int hw_supported_error_type; 342 int sw_supported_error_type; 343 }; 344 345 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ 346 [AMDGPU_RAS_BLOCK__##subblock] = { \ 347 #subblock, \ 348 TA_RAS_BLOCK__##subblock, \ 349 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ 350 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ 351 } 352 353 static const struct ras_gfx_subblock ras_gfx_subblocks[] = { 354 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), 355 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), 356 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 357 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 358 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 359 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 360 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 361 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 362 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 363 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 364 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), 365 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), 366 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), 367 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), 368 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 369 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), 370 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, 371 0), 372 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, 373 0), 374 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 375 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), 376 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), 377 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), 378 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), 379 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), 380 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), 381 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 382 0, 0), 383 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 384 0), 385 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 386 0, 0), 387 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, 388 0), 389 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 390 0, 0), 391 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 392 0), 393 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 394 1), 395 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 396 0, 0, 0), 397 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 398 0), 399 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 400 0), 401 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 402 0), 403 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 404 0), 405 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 406 0), 407 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 408 0, 0), 409 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 410 0), 411 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 412 0), 413 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 414 0, 0, 0), 415 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 416 0), 417 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 418 0), 419 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 420 0), 421 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 422 0), 423 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 424 0), 425 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 426 0, 0), 427 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 428 0), 429 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), 430 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 431 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 432 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 433 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 434 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), 435 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 436 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), 437 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, 438 1), 439 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, 440 1), 441 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, 442 1), 443 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, 444 0), 445 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, 446 0), 447 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 448 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 449 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), 450 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), 451 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), 452 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), 453 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 454 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), 455 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), 456 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 457 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), 458 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, 459 0), 460 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 461 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, 462 0), 463 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, 464 0, 0), 465 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, 466 0), 467 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 468 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), 469 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), 470 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 471 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 472 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 473 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), 474 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), 475 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), 476 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), 477 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 478 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), 479 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 480 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 481 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 482 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 483 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 484 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 485 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 486 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 487 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 488 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 489 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 490 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), 491 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 492 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 493 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), 494 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), 495 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), 496 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), 497 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), 498 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), 499 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), 500 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), 501 }; 502 503 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 504 { 505 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 506 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 507 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 508 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 509 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 510 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 511 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 512 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 513 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 514 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 515 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 516 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 517 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 518 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 519 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 520 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 521 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 522 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 523 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 524 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 525 }; 526 527 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 528 { 529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 532 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 533 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 534 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 535 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 536 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 537 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 538 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 539 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 540 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 541 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 542 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 543 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 544 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 545 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 546 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 547 }; 548 549 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 550 { 551 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 552 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 553 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 554 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 555 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 556 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 557 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 558 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 559 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 560 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 561 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 562 }; 563 564 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 565 { 566 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 567 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 568 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 569 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 570 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 571 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 572 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 573 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 574 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 575 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 576 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 577 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 578 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 579 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 580 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 581 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 582 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 583 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 584 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 587 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 588 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 589 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 590 }; 591 592 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 593 { 594 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 595 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 596 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 597 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 598 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 599 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 600 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 601 }; 602 603 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 604 { 605 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 606 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 607 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 608 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 609 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 610 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 611 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 612 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 613 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 614 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 615 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 616 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 617 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 618 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 619 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 620 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 621 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 622 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 623 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 624 }; 625 626 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = 627 { 628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 631 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), 632 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), 633 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 634 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 635 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 636 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 637 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 638 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 639 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), 640 }; 641 642 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 643 { 644 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 645 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 646 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 647 }; 648 649 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 650 { 651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 657 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 659 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 667 }; 668 669 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 670 { 671 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 672 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 673 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 674 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 682 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 684 }; 685 686 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = 687 { 688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), 690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), 691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), 692 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), 693 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), 694 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), 695 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), 696 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), 697 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), 698 }; 699 700 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 701 { 702 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 703 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 704 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 705 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 706 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 707 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 708 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 709 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 710 }; 711 712 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 713 { 714 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 715 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 716 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 717 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 718 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 719 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 720 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 721 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 722 }; 723 724 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 725 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 726 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 727 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 728 729 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 730 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 731 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 732 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 733 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 734 struct amdgpu_cu_info *cu_info); 735 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 736 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); 737 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 738 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 739 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 740 void *ras_error_status); 741 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 742 void *inject_if); 743 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev); 744 745 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, 746 uint64_t queue_mask) 747 { 748 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 749 amdgpu_ring_write(kiq_ring, 750 PACKET3_SET_RESOURCES_VMID_MASK(0) | 751 /* vmid_mask:0* queue_type:0 (KIQ) */ 752 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); 753 amdgpu_ring_write(kiq_ring, 754 lower_32_bits(queue_mask)); /* queue mask lo */ 755 amdgpu_ring_write(kiq_ring, 756 upper_32_bits(queue_mask)); /* queue mask hi */ 757 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 758 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 759 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 760 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 761 } 762 763 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, 764 struct amdgpu_ring *ring) 765 { 766 struct amdgpu_device *adev = kiq_ring->adev; 767 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 768 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 769 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 770 771 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 772 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 773 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 774 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 775 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 776 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 777 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 778 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 779 /*queue_type: normal compute queue */ 780 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | 781 /* alloc format: all_on_one_pipe */ 782 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | 783 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 784 /* num_queues: must be 1 */ 785 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 786 amdgpu_ring_write(kiq_ring, 787 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 788 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 789 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 790 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 791 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 792 } 793 794 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 795 struct amdgpu_ring *ring, 796 enum amdgpu_unmap_queues_action action, 797 u64 gpu_addr, u64 seq) 798 { 799 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 800 801 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 802 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 803 PACKET3_UNMAP_QUEUES_ACTION(action) | 804 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 805 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 806 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 807 amdgpu_ring_write(kiq_ring, 808 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 809 810 if (action == PREEMPT_QUEUES_NO_UNMAP) { 811 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 812 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 813 amdgpu_ring_write(kiq_ring, seq); 814 } else { 815 amdgpu_ring_write(kiq_ring, 0); 816 amdgpu_ring_write(kiq_ring, 0); 817 amdgpu_ring_write(kiq_ring, 0); 818 } 819 } 820 821 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, 822 struct amdgpu_ring *ring, 823 u64 addr, 824 u64 seq) 825 { 826 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 827 828 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 829 amdgpu_ring_write(kiq_ring, 830 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 831 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 832 PACKET3_QUERY_STATUS_COMMAND(2)); 833 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 834 amdgpu_ring_write(kiq_ring, 835 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 836 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 837 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 838 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 839 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 840 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 841 } 842 843 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 844 uint16_t pasid, uint32_t flush_type, 845 bool all_hub) 846 { 847 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 848 amdgpu_ring_write(kiq_ring, 849 PACKET3_INVALIDATE_TLBS_DST_SEL(1) | 850 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 851 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 852 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 853 } 854 855 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { 856 .kiq_set_resources = gfx_v9_0_kiq_set_resources, 857 .kiq_map_queues = gfx_v9_0_kiq_map_queues, 858 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, 859 .kiq_query_status = gfx_v9_0_kiq_query_status, 860 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, 861 .set_resources_size = 8, 862 .map_queues_size = 7, 863 .unmap_queues_size = 6, 864 .query_status_size = 7, 865 .invalidate_tlbs_size = 2, 866 }; 867 868 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 869 { 870 adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs; 871 } 872 873 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 874 { 875 switch (adev->asic_type) { 876 case CHIP_VEGA10: 877 soc15_program_register_sequence(adev, 878 golden_settings_gc_9_0, 879 ARRAY_SIZE(golden_settings_gc_9_0)); 880 soc15_program_register_sequence(adev, 881 golden_settings_gc_9_0_vg10, 882 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 883 break; 884 case CHIP_VEGA12: 885 soc15_program_register_sequence(adev, 886 golden_settings_gc_9_2_1, 887 ARRAY_SIZE(golden_settings_gc_9_2_1)); 888 soc15_program_register_sequence(adev, 889 golden_settings_gc_9_2_1_vg12, 890 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 891 break; 892 case CHIP_VEGA20: 893 soc15_program_register_sequence(adev, 894 golden_settings_gc_9_0, 895 ARRAY_SIZE(golden_settings_gc_9_0)); 896 soc15_program_register_sequence(adev, 897 golden_settings_gc_9_0_vg20, 898 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 899 break; 900 case CHIP_ARCTURUS: 901 soc15_program_register_sequence(adev, 902 golden_settings_gc_9_4_1_arct, 903 ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); 904 break; 905 case CHIP_RAVEN: 906 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 907 ARRAY_SIZE(golden_settings_gc_9_1)); 908 if (adev->rev_id >= 8) 909 soc15_program_register_sequence(adev, 910 golden_settings_gc_9_1_rv2, 911 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 912 else 913 soc15_program_register_sequence(adev, 914 golden_settings_gc_9_1_rv1, 915 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 916 break; 917 case CHIP_RENOIR: 918 soc15_program_register_sequence(adev, 919 golden_settings_gc_9_1_rn, 920 ARRAY_SIZE(golden_settings_gc_9_1_rn)); 921 return; /* for renoir, don't need common goldensetting */ 922 default: 923 break; 924 } 925 926 if (adev->asic_type != CHIP_ARCTURUS) 927 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 928 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 929 } 930 931 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 932 { 933 adev->gfx.scratch.num_reg = 8; 934 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 935 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 936 } 937 938 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 939 bool wc, uint32_t reg, uint32_t val) 940 { 941 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 942 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 943 WRITE_DATA_DST_SEL(0) | 944 (wc ? WR_CONFIRM : 0)); 945 amdgpu_ring_write(ring, reg); 946 amdgpu_ring_write(ring, 0); 947 amdgpu_ring_write(ring, val); 948 } 949 950 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 951 int mem_space, int opt, uint32_t addr0, 952 uint32_t addr1, uint32_t ref, uint32_t mask, 953 uint32_t inv) 954 { 955 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 956 amdgpu_ring_write(ring, 957 /* memory (1) or register (0) */ 958 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 959 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 960 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 961 WAIT_REG_MEM_ENGINE(eng_sel))); 962 963 if (mem_space) 964 BUG_ON(addr0 & 0x3); /* Dword align */ 965 amdgpu_ring_write(ring, addr0); 966 amdgpu_ring_write(ring, addr1); 967 amdgpu_ring_write(ring, ref); 968 amdgpu_ring_write(ring, mask); 969 amdgpu_ring_write(ring, inv); /* poll interval */ 970 } 971 972 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 973 { 974 struct amdgpu_device *adev = ring->adev; 975 uint32_t scratch; 976 uint32_t tmp = 0; 977 unsigned i; 978 int r; 979 980 r = amdgpu_gfx_scratch_get(adev, &scratch); 981 if (r) 982 return r; 983 984 WREG32(scratch, 0xCAFEDEAD); 985 r = amdgpu_ring_alloc(ring, 3); 986 if (r) 987 goto error_free_scratch; 988 989 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 990 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 991 amdgpu_ring_write(ring, 0xDEADBEEF); 992 amdgpu_ring_commit(ring); 993 994 for (i = 0; i < adev->usec_timeout; i++) { 995 tmp = RREG32(scratch); 996 if (tmp == 0xDEADBEEF) 997 break; 998 udelay(1); 999 } 1000 1001 if (i >= adev->usec_timeout) 1002 r = -ETIMEDOUT; 1003 1004 error_free_scratch: 1005 amdgpu_gfx_scratch_free(adev, scratch); 1006 return r; 1007 } 1008 1009 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 1010 { 1011 struct amdgpu_device *adev = ring->adev; 1012 struct amdgpu_ib ib; 1013 struct dma_fence *f = NULL; 1014 1015 unsigned index; 1016 uint64_t gpu_addr; 1017 uint32_t tmp; 1018 long r; 1019 1020 r = amdgpu_device_wb_get(adev, &index); 1021 if (r) 1022 return r; 1023 1024 gpu_addr = adev->wb.gpu_addr + (index * 4); 1025 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 1026 memset(&ib, 0, sizeof(ib)); 1027 r = amdgpu_ib_get(adev, NULL, 16, &ib); 1028 if (r) 1029 goto err1; 1030 1031 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 1032 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1033 ib.ptr[2] = lower_32_bits(gpu_addr); 1034 ib.ptr[3] = upper_32_bits(gpu_addr); 1035 ib.ptr[4] = 0xDEADBEEF; 1036 ib.length_dw = 5; 1037 1038 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1039 if (r) 1040 goto err2; 1041 1042 r = dma_fence_wait_timeout(f, false, timeout); 1043 if (r == 0) { 1044 r = -ETIMEDOUT; 1045 goto err2; 1046 } else if (r < 0) { 1047 goto err2; 1048 } 1049 1050 tmp = adev->wb.wb[index]; 1051 if (tmp == 0xDEADBEEF) 1052 r = 0; 1053 else 1054 r = -EINVAL; 1055 1056 err2: 1057 amdgpu_ib_free(adev, &ib, NULL); 1058 dma_fence_put(f); 1059 err1: 1060 amdgpu_device_wb_free(adev, index); 1061 return r; 1062 } 1063 1064 1065 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 1066 { 1067 release_firmware(adev->gfx.pfp_fw); 1068 adev->gfx.pfp_fw = NULL; 1069 release_firmware(adev->gfx.me_fw); 1070 adev->gfx.me_fw = NULL; 1071 release_firmware(adev->gfx.ce_fw); 1072 adev->gfx.ce_fw = NULL; 1073 release_firmware(adev->gfx.rlc_fw); 1074 adev->gfx.rlc_fw = NULL; 1075 release_firmware(adev->gfx.mec_fw); 1076 adev->gfx.mec_fw = NULL; 1077 release_firmware(adev->gfx.mec2_fw); 1078 adev->gfx.mec2_fw = NULL; 1079 1080 kfree(adev->gfx.rlc.register_list_format); 1081 } 1082 1083 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 1084 { 1085 const struct rlc_firmware_header_v2_1 *rlc_hdr; 1086 1087 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 1088 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 1089 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 1090 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 1091 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 1092 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 1093 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 1094 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 1095 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 1096 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 1097 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 1098 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 1099 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 1100 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 1101 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 1102 } 1103 1104 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 1105 { 1106 adev->gfx.me_fw_write_wait = false; 1107 adev->gfx.mec_fw_write_wait = false; 1108 1109 if ((adev->asic_type != CHIP_ARCTURUS) && 1110 ((adev->gfx.mec_fw_version < 0x000001a5) || 1111 (adev->gfx.mec_feature_version < 46) || 1112 (adev->gfx.pfp_fw_version < 0x000000b7) || 1113 (adev->gfx.pfp_feature_version < 46))) 1114 DRM_WARN_ONCE("CP firmware version too old, please update!"); 1115 1116 switch (adev->asic_type) { 1117 case CHIP_VEGA10: 1118 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1119 (adev->gfx.me_feature_version >= 42) && 1120 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1121 (adev->gfx.pfp_feature_version >= 42)) 1122 adev->gfx.me_fw_write_wait = true; 1123 1124 if ((adev->gfx.mec_fw_version >= 0x00000193) && 1125 (adev->gfx.mec_feature_version >= 42)) 1126 adev->gfx.mec_fw_write_wait = true; 1127 break; 1128 case CHIP_VEGA12: 1129 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1130 (adev->gfx.me_feature_version >= 44) && 1131 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1132 (adev->gfx.pfp_feature_version >= 44)) 1133 adev->gfx.me_fw_write_wait = true; 1134 1135 if ((adev->gfx.mec_fw_version >= 0x00000196) && 1136 (adev->gfx.mec_feature_version >= 44)) 1137 adev->gfx.mec_fw_write_wait = true; 1138 break; 1139 case CHIP_VEGA20: 1140 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1141 (adev->gfx.me_feature_version >= 44) && 1142 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1143 (adev->gfx.pfp_feature_version >= 44)) 1144 adev->gfx.me_fw_write_wait = true; 1145 1146 if ((adev->gfx.mec_fw_version >= 0x00000197) && 1147 (adev->gfx.mec_feature_version >= 44)) 1148 adev->gfx.mec_fw_write_wait = true; 1149 break; 1150 case CHIP_RAVEN: 1151 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1152 (adev->gfx.me_feature_version >= 42) && 1153 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1154 (adev->gfx.pfp_feature_version >= 42)) 1155 adev->gfx.me_fw_write_wait = true; 1156 1157 if ((adev->gfx.mec_fw_version >= 0x00000192) && 1158 (adev->gfx.mec_feature_version >= 42)) 1159 adev->gfx.mec_fw_write_wait = true; 1160 break; 1161 default: 1162 break; 1163 } 1164 } 1165 1166 struct amdgpu_gfxoff_quirk { 1167 u16 chip_vendor; 1168 u16 chip_device; 1169 u16 subsys_vendor; 1170 u16 subsys_device; 1171 u8 revision; 1172 }; 1173 1174 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { 1175 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */ 1176 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 }, 1177 { 0, 0, 0, 0, 0 }, 1178 }; 1179 1180 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev) 1181 { 1182 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list; 1183 1184 while (p && p->chip_device != 0) { 1185 if (pdev->vendor == p->chip_vendor && 1186 pdev->device == p->chip_device && 1187 pdev->subsystem_vendor == p->subsys_vendor && 1188 pdev->subsystem_device == p->subsys_device && 1189 pdev->revision == p->revision) { 1190 return true; 1191 } 1192 ++p; 1193 } 1194 return false; 1195 } 1196 1197 static bool is_raven_kicker(struct amdgpu_device *adev) 1198 { 1199 if (adev->pm.fw_version >= 0x41e2b) 1200 return true; 1201 else 1202 return false; 1203 } 1204 1205 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 1206 { 1207 if (gfx_v9_0_should_disable_gfxoff(adev->pdev)) 1208 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1209 1210 switch (adev->asic_type) { 1211 case CHIP_VEGA10: 1212 case CHIP_VEGA12: 1213 case CHIP_VEGA20: 1214 break; 1215 case CHIP_RAVEN: 1216 if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) && 1217 ((!is_raven_kicker(adev) && 1218 adev->gfx.rlc_fw_version < 531) || 1219 (adev->gfx.rlc_feature_version < 1) || 1220 !adev->gfx.rlc.is_rlc_v2_1)) 1221 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1222 1223 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1224 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1225 AMD_PG_SUPPORT_CP | 1226 AMD_PG_SUPPORT_RLC_SMU_HS; 1227 break; 1228 case CHIP_RENOIR: 1229 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1230 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1231 AMD_PG_SUPPORT_CP | 1232 AMD_PG_SUPPORT_RLC_SMU_HS; 1233 break; 1234 default: 1235 break; 1236 } 1237 } 1238 1239 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, 1240 const char *chip_name) 1241 { 1242 char fw_name[30]; 1243 int err; 1244 struct amdgpu_firmware_info *info = NULL; 1245 const struct common_firmware_header *header = NULL; 1246 const struct gfx_firmware_header_v1_0 *cp_hdr; 1247 1248 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1249 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1250 if (err) 1251 goto out; 1252 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 1253 if (err) 1254 goto out; 1255 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1256 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1257 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1258 1259 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1260 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1261 if (err) 1262 goto out; 1263 err = amdgpu_ucode_validate(adev->gfx.me_fw); 1264 if (err) 1265 goto out; 1266 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1267 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1268 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1269 1270 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1271 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1272 if (err) 1273 goto out; 1274 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 1275 if (err) 1276 goto out; 1277 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1278 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1279 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1280 1281 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1282 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1283 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1284 info->fw = adev->gfx.pfp_fw; 1285 header = (const struct common_firmware_header *)info->fw->data; 1286 adev->firmware.fw_size += 1287 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1288 1289 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1290 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1291 info->fw = adev->gfx.me_fw; 1292 header = (const struct common_firmware_header *)info->fw->data; 1293 adev->firmware.fw_size += 1294 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1295 1296 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1297 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1298 info->fw = adev->gfx.ce_fw; 1299 header = (const struct common_firmware_header *)info->fw->data; 1300 adev->firmware.fw_size += 1301 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1302 } 1303 1304 out: 1305 if (err) { 1306 dev_err(adev->dev, 1307 "gfx9: Failed to load firmware \"%s\"\n", 1308 fw_name); 1309 release_firmware(adev->gfx.pfp_fw); 1310 adev->gfx.pfp_fw = NULL; 1311 release_firmware(adev->gfx.me_fw); 1312 adev->gfx.me_fw = NULL; 1313 release_firmware(adev->gfx.ce_fw); 1314 adev->gfx.ce_fw = NULL; 1315 } 1316 return err; 1317 } 1318 1319 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, 1320 const char *chip_name) 1321 { 1322 char fw_name[30]; 1323 int err; 1324 struct amdgpu_firmware_info *info = NULL; 1325 const struct common_firmware_header *header = NULL; 1326 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1327 unsigned int *tmp = NULL; 1328 unsigned int i = 0; 1329 uint16_t version_major; 1330 uint16_t version_minor; 1331 uint32_t smu_version; 1332 1333 /* 1334 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 1335 * instead of picasso_rlc.bin. 1336 * Judgment method: 1337 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 1338 * or revision >= 0xD8 && revision <= 0xDF 1339 * otherwise is PCO FP5 1340 */ 1341 if (!strcmp(chip_name, "picasso") && 1342 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 1343 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 1344 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); 1345 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 1346 (smu_version >= 0x41e2b)) 1347 /** 1348 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 1349 */ 1350 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); 1351 else 1352 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1353 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1354 if (err) 1355 goto out; 1356 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1357 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1358 1359 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1360 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1361 if (version_major == 2 && version_minor == 1) 1362 adev->gfx.rlc.is_rlc_v2_1 = true; 1363 1364 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1365 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1366 adev->gfx.rlc.save_and_restore_offset = 1367 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1368 adev->gfx.rlc.clear_state_descriptor_offset = 1369 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1370 adev->gfx.rlc.avail_scratch_ram_locations = 1371 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1372 adev->gfx.rlc.reg_restore_list_size = 1373 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1374 adev->gfx.rlc.reg_list_format_start = 1375 le32_to_cpu(rlc_hdr->reg_list_format_start); 1376 adev->gfx.rlc.reg_list_format_separate_start = 1377 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1378 adev->gfx.rlc.starting_offsets_start = 1379 le32_to_cpu(rlc_hdr->starting_offsets_start); 1380 adev->gfx.rlc.reg_list_format_size_bytes = 1381 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1382 adev->gfx.rlc.reg_list_size_bytes = 1383 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1384 adev->gfx.rlc.register_list_format = 1385 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1386 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1387 if (!adev->gfx.rlc.register_list_format) { 1388 err = -ENOMEM; 1389 goto out; 1390 } 1391 1392 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1393 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1394 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 1395 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1396 1397 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1398 1399 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1400 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1401 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 1402 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1403 1404 if (adev->gfx.rlc.is_rlc_v2_1) 1405 gfx_v9_0_init_rlc_ext_microcode(adev); 1406 1407 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1408 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1409 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1410 info->fw = adev->gfx.rlc_fw; 1411 header = (const struct common_firmware_header *)info->fw->data; 1412 adev->firmware.fw_size += 1413 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1414 1415 if (adev->gfx.rlc.is_rlc_v2_1 && 1416 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 1417 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 1418 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 1419 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 1420 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 1421 info->fw = adev->gfx.rlc_fw; 1422 adev->firmware.fw_size += 1423 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 1424 1425 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 1426 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 1427 info->fw = adev->gfx.rlc_fw; 1428 adev->firmware.fw_size += 1429 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 1430 1431 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 1432 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 1433 info->fw = adev->gfx.rlc_fw; 1434 adev->firmware.fw_size += 1435 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 1436 } 1437 } 1438 1439 out: 1440 if (err) { 1441 dev_err(adev->dev, 1442 "gfx9: Failed to load firmware \"%s\"\n", 1443 fw_name); 1444 release_firmware(adev->gfx.rlc_fw); 1445 adev->gfx.rlc_fw = NULL; 1446 } 1447 return err; 1448 } 1449 1450 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, 1451 const char *chip_name) 1452 { 1453 char fw_name[30]; 1454 int err; 1455 struct amdgpu_firmware_info *info = NULL; 1456 const struct common_firmware_header *header = NULL; 1457 const struct gfx_firmware_header_v1_0 *cp_hdr; 1458 1459 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1460 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1461 if (err) 1462 goto out; 1463 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1464 if (err) 1465 goto out; 1466 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1467 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1468 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1469 1470 1471 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1472 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1473 if (!err) { 1474 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1475 if (err) 1476 goto out; 1477 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1478 adev->gfx.mec2_fw->data; 1479 adev->gfx.mec2_fw_version = 1480 le32_to_cpu(cp_hdr->header.ucode_version); 1481 adev->gfx.mec2_feature_version = 1482 le32_to_cpu(cp_hdr->ucode_feature_version); 1483 } else { 1484 err = 0; 1485 adev->gfx.mec2_fw = NULL; 1486 } 1487 1488 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1489 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1490 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1491 info->fw = adev->gfx.mec_fw; 1492 header = (const struct common_firmware_header *)info->fw->data; 1493 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1494 adev->firmware.fw_size += 1495 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1496 1497 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 1498 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 1499 info->fw = adev->gfx.mec_fw; 1500 adev->firmware.fw_size += 1501 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1502 1503 if (adev->gfx.mec2_fw) { 1504 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1505 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1506 info->fw = adev->gfx.mec2_fw; 1507 header = (const struct common_firmware_header *)info->fw->data; 1508 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1509 adev->firmware.fw_size += 1510 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1511 1512 /* TODO: Determine if MEC2 JT FW loading can be removed 1513 for all GFX V9 asic and above */ 1514 if (adev->asic_type != CHIP_ARCTURUS && 1515 adev->asic_type != CHIP_RENOIR) { 1516 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 1517 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 1518 info->fw = adev->gfx.mec2_fw; 1519 adev->firmware.fw_size += 1520 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, 1521 PAGE_SIZE); 1522 } 1523 } 1524 } 1525 1526 out: 1527 gfx_v9_0_check_if_need_gfxoff(adev); 1528 gfx_v9_0_check_fw_write_wait(adev); 1529 if (err) { 1530 dev_err(adev->dev, 1531 "gfx9: Failed to load firmware \"%s\"\n", 1532 fw_name); 1533 release_firmware(adev->gfx.mec_fw); 1534 adev->gfx.mec_fw = NULL; 1535 release_firmware(adev->gfx.mec2_fw); 1536 adev->gfx.mec2_fw = NULL; 1537 } 1538 return err; 1539 } 1540 1541 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 1542 { 1543 const char *chip_name; 1544 int r; 1545 1546 DRM_DEBUG("\n"); 1547 1548 switch (adev->asic_type) { 1549 case CHIP_VEGA10: 1550 chip_name = "vega10"; 1551 break; 1552 case CHIP_VEGA12: 1553 chip_name = "vega12"; 1554 break; 1555 case CHIP_VEGA20: 1556 chip_name = "vega20"; 1557 break; 1558 case CHIP_RAVEN: 1559 if (adev->rev_id >= 8) 1560 chip_name = "raven2"; 1561 else if (adev->pdev->device == 0x15d8) 1562 chip_name = "picasso"; 1563 else 1564 chip_name = "raven"; 1565 break; 1566 case CHIP_ARCTURUS: 1567 chip_name = "arcturus"; 1568 break; 1569 case CHIP_RENOIR: 1570 chip_name = "renoir"; 1571 break; 1572 default: 1573 BUG(); 1574 } 1575 1576 /* No CPG in Arcturus */ 1577 if (adev->asic_type != CHIP_ARCTURUS) { 1578 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); 1579 if (r) 1580 return r; 1581 } 1582 1583 r = gfx_v9_0_init_rlc_microcode(adev, chip_name); 1584 if (r) 1585 return r; 1586 1587 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name); 1588 if (r) 1589 return r; 1590 1591 return r; 1592 } 1593 1594 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 1595 { 1596 u32 count = 0; 1597 const struct cs_section_def *sect = NULL; 1598 const struct cs_extent_def *ext = NULL; 1599 1600 /* begin clear state */ 1601 count += 2; 1602 /* context control state */ 1603 count += 3; 1604 1605 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1606 for (ext = sect->section; ext->extent != NULL; ++ext) { 1607 if (sect->id == SECT_CONTEXT) 1608 count += 2 + ext->reg_count; 1609 else 1610 return 0; 1611 } 1612 } 1613 1614 /* end clear state */ 1615 count += 2; 1616 /* clear state */ 1617 count += 2; 1618 1619 return count; 1620 } 1621 1622 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 1623 volatile u32 *buffer) 1624 { 1625 u32 count = 0, i; 1626 const struct cs_section_def *sect = NULL; 1627 const struct cs_extent_def *ext = NULL; 1628 1629 if (adev->gfx.rlc.cs_data == NULL) 1630 return; 1631 if (buffer == NULL) 1632 return; 1633 1634 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1635 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1636 1637 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1638 buffer[count++] = cpu_to_le32(0x80000000); 1639 buffer[count++] = cpu_to_le32(0x80000000); 1640 1641 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1642 for (ext = sect->section; ext->extent != NULL; ++ext) { 1643 if (sect->id == SECT_CONTEXT) { 1644 buffer[count++] = 1645 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1646 buffer[count++] = cpu_to_le32(ext->reg_index - 1647 PACKET3_SET_CONTEXT_REG_START); 1648 for (i = 0; i < ext->reg_count; i++) 1649 buffer[count++] = cpu_to_le32(ext->extent[i]); 1650 } else { 1651 return; 1652 } 1653 } 1654 } 1655 1656 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1657 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1658 1659 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1660 buffer[count++] = cpu_to_le32(0); 1661 } 1662 1663 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 1664 { 1665 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 1666 uint32_t pg_always_on_cu_num = 2; 1667 uint32_t always_on_cu_num; 1668 uint32_t i, j, k; 1669 uint32_t mask, cu_bitmap, counter; 1670 1671 if (adev->flags & AMD_IS_APU) 1672 always_on_cu_num = 4; 1673 else if (adev->asic_type == CHIP_VEGA12) 1674 always_on_cu_num = 8; 1675 else 1676 always_on_cu_num = 12; 1677 1678 mutex_lock(&adev->grbm_idx_mutex); 1679 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1680 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1681 mask = 1; 1682 cu_bitmap = 0; 1683 counter = 0; 1684 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1685 1686 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 1687 if (cu_info->bitmap[i][j] & mask) { 1688 if (counter == pg_always_on_cu_num) 1689 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 1690 if (counter < always_on_cu_num) 1691 cu_bitmap |= mask; 1692 else 1693 break; 1694 counter++; 1695 } 1696 mask <<= 1; 1697 } 1698 1699 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1700 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1701 } 1702 } 1703 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1704 mutex_unlock(&adev->grbm_idx_mutex); 1705 } 1706 1707 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1708 { 1709 uint32_t data; 1710 1711 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1712 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1713 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1714 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1715 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1716 1717 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1718 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1719 1720 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1721 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1722 1723 mutex_lock(&adev->grbm_idx_mutex); 1724 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1725 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1726 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1727 1728 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1729 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1730 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1731 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1732 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1733 1734 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1735 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1736 data &= 0x0000FFFF; 1737 data |= 0x00C00000; 1738 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1739 1740 /* 1741 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1742 * programmed in gfx_v9_0_init_always_on_cu_mask() 1743 */ 1744 1745 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1746 * but used for RLC_LB_CNTL configuration */ 1747 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1748 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1749 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1750 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1751 mutex_unlock(&adev->grbm_idx_mutex); 1752 1753 gfx_v9_0_init_always_on_cu_mask(adev); 1754 } 1755 1756 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1757 { 1758 uint32_t data; 1759 1760 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1761 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1762 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1763 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1764 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1765 1766 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1767 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1768 1769 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1770 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1771 1772 mutex_lock(&adev->grbm_idx_mutex); 1773 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1774 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1775 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1776 1777 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1778 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1779 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1780 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1781 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1782 1783 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1784 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1785 data &= 0x0000FFFF; 1786 data |= 0x00C00000; 1787 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1788 1789 /* 1790 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1791 * programmed in gfx_v9_0_init_always_on_cu_mask() 1792 */ 1793 1794 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1795 * but used for RLC_LB_CNTL configuration */ 1796 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1797 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1798 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1799 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1800 mutex_unlock(&adev->grbm_idx_mutex); 1801 1802 gfx_v9_0_init_always_on_cu_mask(adev); 1803 } 1804 1805 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1806 { 1807 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1808 } 1809 1810 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1811 { 1812 return 5; 1813 } 1814 1815 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1816 { 1817 const struct cs_section_def *cs_data; 1818 int r; 1819 1820 adev->gfx.rlc.cs_data = gfx9_cs_data; 1821 1822 cs_data = adev->gfx.rlc.cs_data; 1823 1824 if (cs_data) { 1825 /* init clear state block */ 1826 r = amdgpu_gfx_rlc_init_csb(adev); 1827 if (r) 1828 return r; 1829 } 1830 1831 if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { 1832 /* TODO: double check the cp_table_size for RV */ 1833 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1834 r = amdgpu_gfx_rlc_init_cpt(adev); 1835 if (r) 1836 return r; 1837 } 1838 1839 switch (adev->asic_type) { 1840 case CHIP_RAVEN: 1841 gfx_v9_0_init_lbpw(adev); 1842 break; 1843 case CHIP_VEGA20: 1844 gfx_v9_4_init_lbpw(adev); 1845 break; 1846 default: 1847 break; 1848 } 1849 1850 /* init spm vmid with 0xf */ 1851 if (adev->gfx.rlc.funcs->update_spm_vmid) 1852 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); 1853 1854 return 0; 1855 } 1856 1857 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1858 { 1859 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1860 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1861 } 1862 1863 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1864 { 1865 int r; 1866 u32 *hpd; 1867 const __le32 *fw_data; 1868 unsigned fw_size; 1869 u32 *fw; 1870 size_t mec_hpd_size; 1871 1872 const struct gfx_firmware_header_v1_0 *mec_hdr; 1873 1874 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1875 1876 /* take ownership of the relevant compute queues */ 1877 amdgpu_gfx_compute_queue_acquire(adev); 1878 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1879 1880 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1881 AMDGPU_GEM_DOMAIN_VRAM, 1882 &adev->gfx.mec.hpd_eop_obj, 1883 &adev->gfx.mec.hpd_eop_gpu_addr, 1884 (void **)&hpd); 1885 if (r) { 1886 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1887 gfx_v9_0_mec_fini(adev); 1888 return r; 1889 } 1890 1891 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); 1892 1893 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1894 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1895 1896 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1897 1898 fw_data = (const __le32 *) 1899 (adev->gfx.mec_fw->data + 1900 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1901 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 1902 1903 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 1904 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1905 &adev->gfx.mec.mec_fw_obj, 1906 &adev->gfx.mec.mec_fw_gpu_addr, 1907 (void **)&fw); 1908 if (r) { 1909 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 1910 gfx_v9_0_mec_fini(adev); 1911 return r; 1912 } 1913 1914 memcpy(fw, fw_data, fw_size); 1915 1916 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1917 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1918 1919 return 0; 1920 } 1921 1922 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 1923 { 1924 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1925 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1926 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1927 (address << SQ_IND_INDEX__INDEX__SHIFT) | 1928 (SQ_IND_INDEX__FORCE_READ_MASK)); 1929 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1930 } 1931 1932 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 1933 uint32_t wave, uint32_t thread, 1934 uint32_t regno, uint32_t num, uint32_t *out) 1935 { 1936 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1937 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1938 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1939 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1940 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 1941 (SQ_IND_INDEX__FORCE_READ_MASK) | 1942 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1943 while (num--) 1944 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1945 } 1946 1947 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1948 { 1949 /* type 1 wave data */ 1950 dst[(*no_fields)++] = 1; 1951 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 1952 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 1953 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 1954 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 1955 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 1956 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 1957 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 1958 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 1959 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 1960 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 1961 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 1962 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 1963 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 1964 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 1965 } 1966 1967 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 1968 uint32_t wave, uint32_t start, 1969 uint32_t size, uint32_t *dst) 1970 { 1971 wave_read_regs( 1972 adev, simd, wave, 0, 1973 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 1974 } 1975 1976 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 1977 uint32_t wave, uint32_t thread, 1978 uint32_t start, uint32_t size, 1979 uint32_t *dst) 1980 { 1981 wave_read_regs( 1982 adev, simd, wave, thread, 1983 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1984 } 1985 1986 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 1987 u32 me, u32 pipe, u32 q, u32 vm) 1988 { 1989 soc15_grbm_select(adev, me, pipe, q, vm); 1990 } 1991 1992 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 1993 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 1994 .select_se_sh = &gfx_v9_0_select_se_sh, 1995 .read_wave_data = &gfx_v9_0_read_wave_data, 1996 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 1997 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 1998 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 1999 .ras_error_inject = &gfx_v9_0_ras_error_inject, 2000 .query_ras_error_count = &gfx_v9_0_query_ras_error_count, 2001 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count, 2002 }; 2003 2004 static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = { 2005 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 2006 .select_se_sh = &gfx_v9_0_select_se_sh, 2007 .read_wave_data = &gfx_v9_0_read_wave_data, 2008 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 2009 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 2010 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 2011 .ras_error_inject = &gfx_v9_4_ras_error_inject, 2012 .query_ras_error_count = &gfx_v9_4_query_ras_error_count, 2013 .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count, 2014 }; 2015 2016 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 2017 { 2018 u32 gb_addr_config; 2019 int err; 2020 2021 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 2022 2023 switch (adev->asic_type) { 2024 case CHIP_VEGA10: 2025 adev->gfx.config.max_hw_contexts = 8; 2026 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2027 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2028 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2029 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2030 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 2031 break; 2032 case CHIP_VEGA12: 2033 adev->gfx.config.max_hw_contexts = 8; 2034 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2035 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2036 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2037 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2038 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 2039 DRM_INFO("fix gfx.config for vega12\n"); 2040 break; 2041 case CHIP_VEGA20: 2042 adev->gfx.config.max_hw_contexts = 8; 2043 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2044 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2045 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2046 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2047 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2048 gb_addr_config &= ~0xf3e777ff; 2049 gb_addr_config |= 0x22014042; 2050 /* check vbios table if gpu info is not available */ 2051 err = amdgpu_atomfirmware_get_gfx_info(adev); 2052 if (err) 2053 return err; 2054 break; 2055 case CHIP_RAVEN: 2056 adev->gfx.config.max_hw_contexts = 8; 2057 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2058 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2059 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2060 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2061 if (adev->rev_id >= 8) 2062 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 2063 else 2064 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 2065 break; 2066 case CHIP_ARCTURUS: 2067 adev->gfx.funcs = &gfx_v9_4_gfx_funcs; 2068 adev->gfx.config.max_hw_contexts = 8; 2069 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2070 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2071 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2072 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2073 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2074 gb_addr_config &= ~0xf3e777ff; 2075 gb_addr_config |= 0x22014042; 2076 break; 2077 case CHIP_RENOIR: 2078 adev->gfx.config.max_hw_contexts = 8; 2079 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2080 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2081 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 2082 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2083 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2084 gb_addr_config &= ~0xf3e777ff; 2085 gb_addr_config |= 0x22010042; 2086 break; 2087 default: 2088 BUG(); 2089 break; 2090 } 2091 2092 adev->gfx.config.gb_addr_config = gb_addr_config; 2093 2094 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 2095 REG_GET_FIELD( 2096 adev->gfx.config.gb_addr_config, 2097 GB_ADDR_CONFIG, 2098 NUM_PIPES); 2099 2100 adev->gfx.config.max_tile_pipes = 2101 adev->gfx.config.gb_addr_config_fields.num_pipes; 2102 2103 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 2104 REG_GET_FIELD( 2105 adev->gfx.config.gb_addr_config, 2106 GB_ADDR_CONFIG, 2107 NUM_BANKS); 2108 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 2109 REG_GET_FIELD( 2110 adev->gfx.config.gb_addr_config, 2111 GB_ADDR_CONFIG, 2112 MAX_COMPRESSED_FRAGS); 2113 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 2114 REG_GET_FIELD( 2115 adev->gfx.config.gb_addr_config, 2116 GB_ADDR_CONFIG, 2117 NUM_RB_PER_SE); 2118 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 2119 REG_GET_FIELD( 2120 adev->gfx.config.gb_addr_config, 2121 GB_ADDR_CONFIG, 2122 NUM_SHADER_ENGINES); 2123 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 2124 REG_GET_FIELD( 2125 adev->gfx.config.gb_addr_config, 2126 GB_ADDR_CONFIG, 2127 PIPE_INTERLEAVE_SIZE)); 2128 2129 return 0; 2130 } 2131 2132 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 2133 int mec, int pipe, int queue) 2134 { 2135 int r; 2136 unsigned irq_type; 2137 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 2138 2139 ring = &adev->gfx.compute_ring[ring_id]; 2140 2141 /* mec0 is me1 */ 2142 ring->me = mec + 1; 2143 ring->pipe = pipe; 2144 ring->queue = queue; 2145 2146 ring->ring_obj = NULL; 2147 ring->use_doorbell = true; 2148 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 2149 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2150 + (ring_id * GFX9_MEC_HPD_SIZE); 2151 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2152 2153 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2154 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2155 + ring->pipe; 2156 2157 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2158 r = amdgpu_ring_init(adev, ring, 1024, 2159 &adev->gfx.eop_irq, irq_type); 2160 if (r) 2161 return r; 2162 2163 2164 return 0; 2165 } 2166 2167 static int gfx_v9_0_sw_init(void *handle) 2168 { 2169 int i, j, k, r, ring_id; 2170 struct amdgpu_ring *ring; 2171 struct amdgpu_kiq *kiq; 2172 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2173 2174 switch (adev->asic_type) { 2175 case CHIP_VEGA10: 2176 case CHIP_VEGA12: 2177 case CHIP_VEGA20: 2178 case CHIP_RAVEN: 2179 case CHIP_ARCTURUS: 2180 case CHIP_RENOIR: 2181 adev->gfx.mec.num_mec = 2; 2182 break; 2183 default: 2184 adev->gfx.mec.num_mec = 1; 2185 break; 2186 } 2187 2188 adev->gfx.mec.num_pipe_per_mec = 4; 2189 adev->gfx.mec.num_queue_per_pipe = 8; 2190 2191 /* EOP Event */ 2192 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 2193 if (r) 2194 return r; 2195 2196 /* Privileged reg */ 2197 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 2198 &adev->gfx.priv_reg_irq); 2199 if (r) 2200 return r; 2201 2202 /* Privileged inst */ 2203 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 2204 &adev->gfx.priv_inst_irq); 2205 if (r) 2206 return r; 2207 2208 /* ECC error */ 2209 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 2210 &adev->gfx.cp_ecc_error_irq); 2211 if (r) 2212 return r; 2213 2214 /* FUE error */ 2215 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 2216 &adev->gfx.cp_ecc_error_irq); 2217 if (r) 2218 return r; 2219 2220 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2221 2222 gfx_v9_0_scratch_init(adev); 2223 2224 r = gfx_v9_0_init_microcode(adev); 2225 if (r) { 2226 DRM_ERROR("Failed to load gfx firmware!\n"); 2227 return r; 2228 } 2229 2230 r = adev->gfx.rlc.funcs->init(adev); 2231 if (r) { 2232 DRM_ERROR("Failed to init rlc BOs!\n"); 2233 return r; 2234 } 2235 2236 r = gfx_v9_0_mec_init(adev); 2237 if (r) { 2238 DRM_ERROR("Failed to init MEC BOs!\n"); 2239 return r; 2240 } 2241 2242 /* set up the gfx ring */ 2243 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2244 ring = &adev->gfx.gfx_ring[i]; 2245 ring->ring_obj = NULL; 2246 if (!i) 2247 sprintf(ring->name, "gfx"); 2248 else 2249 sprintf(ring->name, "gfx_%d", i); 2250 ring->use_doorbell = true; 2251 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2252 r = amdgpu_ring_init(adev, ring, 1024, 2253 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); 2254 if (r) 2255 return r; 2256 } 2257 2258 /* set up the compute queues - allocate horizontally across pipes */ 2259 ring_id = 0; 2260 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2261 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2262 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2263 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2264 continue; 2265 2266 r = gfx_v9_0_compute_ring_init(adev, 2267 ring_id, 2268 i, k, j); 2269 if (r) 2270 return r; 2271 2272 ring_id++; 2273 } 2274 } 2275 } 2276 2277 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); 2278 if (r) { 2279 DRM_ERROR("Failed to init KIQ BOs!\n"); 2280 return r; 2281 } 2282 2283 kiq = &adev->gfx.kiq; 2284 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2285 if (r) 2286 return r; 2287 2288 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2289 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); 2290 if (r) 2291 return r; 2292 2293 adev->gfx.ce_ram_size = 0x8000; 2294 2295 r = gfx_v9_0_gpu_early_init(adev); 2296 if (r) 2297 return r; 2298 2299 return 0; 2300 } 2301 2302 2303 static int gfx_v9_0_sw_fini(void *handle) 2304 { 2305 int i; 2306 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2307 2308 amdgpu_gfx_ras_fini(adev); 2309 2310 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2311 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2312 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2313 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2314 2315 amdgpu_gfx_mqd_sw_fini(adev); 2316 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); 2317 amdgpu_gfx_kiq_fini(adev); 2318 2319 gfx_v9_0_mec_fini(adev); 2320 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 2321 if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { 2322 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2323 &adev->gfx.rlc.cp_table_gpu_addr, 2324 (void **)&adev->gfx.rlc.cp_table_ptr); 2325 } 2326 gfx_v9_0_free_microcode(adev); 2327 2328 return 0; 2329 } 2330 2331 2332 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 2333 { 2334 /* TODO */ 2335 } 2336 2337 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) 2338 { 2339 u32 data; 2340 2341 if (instance == 0xffffffff) 2342 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 2343 else 2344 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 2345 2346 if (se_num == 0xffffffff) 2347 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 2348 else 2349 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2350 2351 if (sh_num == 0xffffffff) 2352 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 2353 else 2354 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 2355 2356 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 2357 } 2358 2359 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2360 { 2361 u32 data, mask; 2362 2363 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 2364 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 2365 2366 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 2367 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 2368 2369 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 2370 adev->gfx.config.max_sh_per_se); 2371 2372 return (~data) & mask; 2373 } 2374 2375 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 2376 { 2377 int i, j; 2378 u32 data; 2379 u32 active_rbs = 0; 2380 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 2381 adev->gfx.config.max_sh_per_se; 2382 2383 mutex_lock(&adev->grbm_idx_mutex); 2384 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2385 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2386 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2387 data = gfx_v9_0_get_rb_active_bitmap(adev); 2388 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 2389 rb_bitmap_width_per_sh); 2390 } 2391 } 2392 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2393 mutex_unlock(&adev->grbm_idx_mutex); 2394 2395 adev->gfx.config.backend_enable_mask = active_rbs; 2396 adev->gfx.config.num_rbs = hweight32(active_rbs); 2397 } 2398 2399 #define DEFAULT_SH_MEM_BASES (0x6000) 2400 #define FIRST_COMPUTE_VMID (8) 2401 #define LAST_COMPUTE_VMID (16) 2402 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 2403 { 2404 int i; 2405 uint32_t sh_mem_config; 2406 uint32_t sh_mem_bases; 2407 2408 /* 2409 * Configure apertures: 2410 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2411 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2412 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2413 */ 2414 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 2415 2416 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 2417 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 2418 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 2419 2420 mutex_lock(&adev->srbm_mutex); 2421 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 2422 soc15_grbm_select(adev, 0, 0, 0, i); 2423 /* CP and shaders */ 2424 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 2425 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 2426 } 2427 soc15_grbm_select(adev, 0, 0, 0, 0); 2428 mutex_unlock(&adev->srbm_mutex); 2429 2430 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 2431 acccess. These should be enabled by FW for target VMIDs. */ 2432 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 2433 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 2434 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 2435 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 2436 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 2437 } 2438 } 2439 2440 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) 2441 { 2442 int vmid; 2443 2444 /* 2445 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2446 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2447 * the driver can enable them for graphics. VMID0 should maintain 2448 * access so that HWS firmware can save/restore entries. 2449 */ 2450 for (vmid = 1; vmid < 16; vmid++) { 2451 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); 2452 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); 2453 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); 2454 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); 2455 } 2456 } 2457 2458 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) 2459 { 2460 uint32_t tmp; 2461 2462 switch (adev->asic_type) { 2463 case CHIP_ARCTURUS: 2464 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG); 2465 tmp = REG_SET_FIELD(tmp, SQ_CONFIG, 2466 DISABLE_BARRIER_WAITCNT, 1); 2467 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp); 2468 break; 2469 default: 2470 break; 2471 }; 2472 } 2473 2474 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 2475 { 2476 u32 tmp; 2477 int i; 2478 2479 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2480 2481 gfx_v9_0_tiling_mode_table_init(adev); 2482 2483 gfx_v9_0_setup_rb(adev); 2484 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 2485 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 2486 2487 /* XXX SH_MEM regs */ 2488 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2489 mutex_lock(&adev->srbm_mutex); 2490 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { 2491 soc15_grbm_select(adev, 0, 0, 0, i); 2492 /* CP and shaders */ 2493 if (i == 0) { 2494 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2495 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2496 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2497 !!amdgpu_noretry); 2498 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2499 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 2500 } else { 2501 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2502 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2503 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2504 !!amdgpu_noretry); 2505 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2506 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2507 (adev->gmc.private_aperture_start >> 48)); 2508 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2509 (adev->gmc.shared_aperture_start >> 48)); 2510 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 2511 } 2512 } 2513 soc15_grbm_select(adev, 0, 0, 0, 0); 2514 2515 mutex_unlock(&adev->srbm_mutex); 2516 2517 gfx_v9_0_init_compute_vmid(adev); 2518 gfx_v9_0_init_gds_vmid(adev); 2519 gfx_v9_0_init_sq_config(adev); 2520 } 2521 2522 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 2523 { 2524 u32 i, j, k; 2525 u32 mask; 2526 2527 mutex_lock(&adev->grbm_idx_mutex); 2528 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2529 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2530 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2531 for (k = 0; k < adev->usec_timeout; k++) { 2532 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 2533 break; 2534 udelay(1); 2535 } 2536 if (k == adev->usec_timeout) { 2537 gfx_v9_0_select_se_sh(adev, 0xffffffff, 2538 0xffffffff, 0xffffffff); 2539 mutex_unlock(&adev->grbm_idx_mutex); 2540 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2541 i, j); 2542 return; 2543 } 2544 } 2545 } 2546 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2547 mutex_unlock(&adev->grbm_idx_mutex); 2548 2549 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2550 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2551 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2552 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2553 for (k = 0; k < adev->usec_timeout; k++) { 2554 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2555 break; 2556 udelay(1); 2557 } 2558 } 2559 2560 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2561 bool enable) 2562 { 2563 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2564 2565 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2566 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2567 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2568 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2569 2570 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2571 } 2572 2573 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2574 { 2575 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2576 /* csib */ 2577 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2578 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2579 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2580 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2581 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2582 adev->gfx.rlc.clear_state_size); 2583 } 2584 2585 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2586 int indirect_offset, 2587 int list_size, 2588 int *unique_indirect_regs, 2589 int unique_indirect_reg_count, 2590 int *indirect_start_offsets, 2591 int *indirect_start_offsets_count, 2592 int max_start_offsets_count) 2593 { 2594 int idx; 2595 2596 for (; indirect_offset < list_size; indirect_offset++) { 2597 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2598 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2599 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2600 2601 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2602 indirect_offset += 2; 2603 2604 /* look for the matching indice */ 2605 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2606 if (unique_indirect_regs[idx] == 2607 register_list_format[indirect_offset] || 2608 !unique_indirect_regs[idx]) 2609 break; 2610 } 2611 2612 BUG_ON(idx >= unique_indirect_reg_count); 2613 2614 if (!unique_indirect_regs[idx]) 2615 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2616 2617 indirect_offset++; 2618 } 2619 } 2620 } 2621 2622 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2623 { 2624 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2625 int unique_indirect_reg_count = 0; 2626 2627 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2628 int indirect_start_offsets_count = 0; 2629 2630 int list_size = 0; 2631 int i = 0, j = 0; 2632 u32 tmp = 0; 2633 2634 u32 *register_list_format = 2635 kmemdup(adev->gfx.rlc.register_list_format, 2636 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2637 if (!register_list_format) 2638 return -ENOMEM; 2639 2640 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2641 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2642 gfx_v9_1_parse_ind_reg_list(register_list_format, 2643 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2644 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2645 unique_indirect_regs, 2646 unique_indirect_reg_count, 2647 indirect_start_offsets, 2648 &indirect_start_offsets_count, 2649 ARRAY_SIZE(indirect_start_offsets)); 2650 2651 /* enable auto inc in case it is disabled */ 2652 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2653 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2654 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2655 2656 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2657 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2658 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2659 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2660 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2661 adev->gfx.rlc.register_restore[i]); 2662 2663 /* load indirect register */ 2664 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2665 adev->gfx.rlc.reg_list_format_start); 2666 2667 /* direct register portion */ 2668 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2669 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2670 register_list_format[i]); 2671 2672 /* indirect register portion */ 2673 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2674 if (register_list_format[i] == 0xFFFFFFFF) { 2675 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2676 continue; 2677 } 2678 2679 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2680 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2681 2682 for (j = 0; j < unique_indirect_reg_count; j++) { 2683 if (register_list_format[i] == unique_indirect_regs[j]) { 2684 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2685 break; 2686 } 2687 } 2688 2689 BUG_ON(j >= unique_indirect_reg_count); 2690 2691 i++; 2692 } 2693 2694 /* set save/restore list size */ 2695 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2696 list_size = list_size >> 1; 2697 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2698 adev->gfx.rlc.reg_restore_list_size); 2699 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2700 2701 /* write the starting offsets to RLC scratch ram */ 2702 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2703 adev->gfx.rlc.starting_offsets_start); 2704 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2705 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2706 indirect_start_offsets[i]); 2707 2708 /* load unique indirect regs*/ 2709 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2710 if (unique_indirect_regs[i] != 0) { 2711 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2712 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2713 unique_indirect_regs[i] & 0x3FFFF); 2714 2715 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2716 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2717 unique_indirect_regs[i] >> 20); 2718 } 2719 } 2720 2721 kfree(register_list_format); 2722 return 0; 2723 } 2724 2725 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2726 { 2727 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2728 } 2729 2730 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2731 bool enable) 2732 { 2733 uint32_t data = 0; 2734 uint32_t default_data = 0; 2735 2736 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2737 if (enable == true) { 2738 /* enable GFXIP control over CGPG */ 2739 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2740 if(default_data != data) 2741 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2742 2743 /* update status */ 2744 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2745 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2746 if(default_data != data) 2747 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2748 } else { 2749 /* restore GFXIP control over GCPG */ 2750 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2751 if(default_data != data) 2752 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2753 } 2754 } 2755 2756 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2757 { 2758 uint32_t data = 0; 2759 2760 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2761 AMD_PG_SUPPORT_GFX_SMG | 2762 AMD_PG_SUPPORT_GFX_DMG)) { 2763 /* init IDLE_POLL_COUNT = 60 */ 2764 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2765 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2766 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2767 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2768 2769 /* init RLC PG Delay */ 2770 data = 0; 2771 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2772 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2773 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2774 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2775 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2776 2777 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2778 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2779 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2780 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2781 2782 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2783 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2784 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2785 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2786 2787 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2788 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2789 2790 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2791 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2792 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2793 2794 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2795 } 2796 } 2797 2798 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2799 bool enable) 2800 { 2801 uint32_t data = 0; 2802 uint32_t default_data = 0; 2803 2804 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2805 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2806 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2807 enable ? 1 : 0); 2808 if (default_data != data) 2809 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2810 } 2811 2812 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2813 bool enable) 2814 { 2815 uint32_t data = 0; 2816 uint32_t default_data = 0; 2817 2818 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2819 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2820 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2821 enable ? 1 : 0); 2822 if(default_data != data) 2823 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2824 } 2825 2826 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2827 bool enable) 2828 { 2829 uint32_t data = 0; 2830 uint32_t default_data = 0; 2831 2832 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2833 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2834 CP_PG_DISABLE, 2835 enable ? 0 : 1); 2836 if(default_data != data) 2837 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2838 } 2839 2840 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2841 bool enable) 2842 { 2843 uint32_t data, default_data; 2844 2845 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2846 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2847 GFX_POWER_GATING_ENABLE, 2848 enable ? 1 : 0); 2849 if(default_data != data) 2850 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2851 } 2852 2853 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 2854 bool enable) 2855 { 2856 uint32_t data, default_data; 2857 2858 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2859 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2860 GFX_PIPELINE_PG_ENABLE, 2861 enable ? 1 : 0); 2862 if(default_data != data) 2863 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2864 2865 if (!enable) 2866 /* read any GFX register to wake up GFX */ 2867 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 2868 } 2869 2870 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 2871 bool enable) 2872 { 2873 uint32_t data, default_data; 2874 2875 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2876 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2877 STATIC_PER_CU_PG_ENABLE, 2878 enable ? 1 : 0); 2879 if(default_data != data) 2880 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2881 } 2882 2883 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 2884 bool enable) 2885 { 2886 uint32_t data, default_data; 2887 2888 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2889 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2890 DYN_PER_CU_PG_ENABLE, 2891 enable ? 1 : 0); 2892 if(default_data != data) 2893 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2894 } 2895 2896 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 2897 { 2898 gfx_v9_0_init_csb(adev); 2899 2900 /* 2901 * Rlc save restore list is workable since v2_1. 2902 * And it's needed by gfxoff feature. 2903 */ 2904 if (adev->gfx.rlc.is_rlc_v2_1) { 2905 if (adev->asic_type == CHIP_VEGA12 || 2906 (adev->asic_type == CHIP_RAVEN && 2907 adev->rev_id >= 8)) 2908 gfx_v9_1_init_rlc_save_restore_list(adev); 2909 gfx_v9_0_enable_save_restore_machine(adev); 2910 } 2911 2912 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2913 AMD_PG_SUPPORT_GFX_SMG | 2914 AMD_PG_SUPPORT_GFX_DMG | 2915 AMD_PG_SUPPORT_CP | 2916 AMD_PG_SUPPORT_GDS | 2917 AMD_PG_SUPPORT_RLC_SMU_HS)) { 2918 WREG32(mmRLC_JUMP_TABLE_RESTORE, 2919 adev->gfx.rlc.cp_table_gpu_addr >> 8); 2920 gfx_v9_0_init_gfx_power_gating(adev); 2921 } 2922 } 2923 2924 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 2925 { 2926 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 2927 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 2928 gfx_v9_0_wait_for_rlc_serdes(adev); 2929 } 2930 2931 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 2932 { 2933 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2934 udelay(50); 2935 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2936 udelay(50); 2937 } 2938 2939 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 2940 { 2941 #ifdef AMDGPU_RLC_DEBUG_RETRY 2942 u32 rlc_ucode_ver; 2943 #endif 2944 2945 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2946 udelay(50); 2947 2948 /* carrizo do enable cp interrupt after cp inited */ 2949 if (!(adev->flags & AMD_IS_APU)) { 2950 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 2951 udelay(50); 2952 } 2953 2954 #ifdef AMDGPU_RLC_DEBUG_RETRY 2955 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 2956 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 2957 if(rlc_ucode_ver == 0x108) { 2958 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 2959 rlc_ucode_ver, adev->gfx.rlc_fw_version); 2960 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 2961 * default is 0x9C4 to create a 100us interval */ 2962 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 2963 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 2964 * to disable the page fault retry interrupts, default is 2965 * 0x100 (256) */ 2966 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 2967 } 2968 #endif 2969 } 2970 2971 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 2972 { 2973 const struct rlc_firmware_header_v2_0 *hdr; 2974 const __le32 *fw_data; 2975 unsigned i, fw_size; 2976 2977 if (!adev->gfx.rlc_fw) 2978 return -EINVAL; 2979 2980 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2981 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2982 2983 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2984 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2985 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2986 2987 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 2988 RLCG_UCODE_LOADING_START_ADDRESS); 2989 for (i = 0; i < fw_size; i++) 2990 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 2991 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2992 2993 return 0; 2994 } 2995 2996 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 2997 { 2998 int r; 2999 3000 if (amdgpu_sriov_vf(adev)) { 3001 gfx_v9_0_init_csb(adev); 3002 return 0; 3003 } 3004 3005 adev->gfx.rlc.funcs->stop(adev); 3006 3007 /* disable CG */ 3008 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 3009 3010 gfx_v9_0_init_pg(adev); 3011 3012 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3013 /* legacy rlc firmware loading */ 3014 r = gfx_v9_0_rlc_load_microcode(adev); 3015 if (r) 3016 return r; 3017 } 3018 3019 switch (adev->asic_type) { 3020 case CHIP_RAVEN: 3021 if (amdgpu_lbpw == 0) 3022 gfx_v9_0_enable_lbpw(adev, false); 3023 else 3024 gfx_v9_0_enable_lbpw(adev, true); 3025 break; 3026 case CHIP_VEGA20: 3027 if (amdgpu_lbpw > 0) 3028 gfx_v9_0_enable_lbpw(adev, true); 3029 else 3030 gfx_v9_0_enable_lbpw(adev, false); 3031 break; 3032 default: 3033 break; 3034 } 3035 3036 adev->gfx.rlc.funcs->start(adev); 3037 3038 return 0; 3039 } 3040 3041 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3042 { 3043 int i; 3044 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 3045 3046 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3047 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3048 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 3049 if (!enable) { 3050 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 3051 adev->gfx.gfx_ring[i].sched.ready = false; 3052 } 3053 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 3054 udelay(50); 3055 } 3056 3057 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3058 { 3059 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3060 const struct gfx_firmware_header_v1_0 *ce_hdr; 3061 const struct gfx_firmware_header_v1_0 *me_hdr; 3062 const __le32 *fw_data; 3063 unsigned i, fw_size; 3064 3065 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 3066 return -EINVAL; 3067 3068 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3069 adev->gfx.pfp_fw->data; 3070 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 3071 adev->gfx.ce_fw->data; 3072 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3073 adev->gfx.me_fw->data; 3074 3075 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3076 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 3077 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3078 3079 gfx_v9_0_cp_gfx_enable(adev, false); 3080 3081 /* PFP */ 3082 fw_data = (const __le32 *) 3083 (adev->gfx.pfp_fw->data + 3084 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3085 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 3086 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 3087 for (i = 0; i < fw_size; i++) 3088 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 3089 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3090 3091 /* CE */ 3092 fw_data = (const __le32 *) 3093 (adev->gfx.ce_fw->data + 3094 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 3095 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 3096 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 3097 for (i = 0; i < fw_size; i++) 3098 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 3099 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 3100 3101 /* ME */ 3102 fw_data = (const __le32 *) 3103 (adev->gfx.me_fw->data + 3104 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3105 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 3106 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 3107 for (i = 0; i < fw_size; i++) 3108 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 3109 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 3110 3111 return 0; 3112 } 3113 3114 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 3115 { 3116 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 3117 const struct cs_section_def *sect = NULL; 3118 const struct cs_extent_def *ext = NULL; 3119 int r, i, tmp; 3120 3121 /* init the CP */ 3122 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 3123 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 3124 3125 gfx_v9_0_cp_gfx_enable(adev, true); 3126 3127 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 3128 if (r) { 3129 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3130 return r; 3131 } 3132 3133 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3134 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3135 3136 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3137 amdgpu_ring_write(ring, 0x80000000); 3138 amdgpu_ring_write(ring, 0x80000000); 3139 3140 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 3141 for (ext = sect->section; ext->extent != NULL; ++ext) { 3142 if (sect->id == SECT_CONTEXT) { 3143 amdgpu_ring_write(ring, 3144 PACKET3(PACKET3_SET_CONTEXT_REG, 3145 ext->reg_count)); 3146 amdgpu_ring_write(ring, 3147 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 3148 for (i = 0; i < ext->reg_count; i++) 3149 amdgpu_ring_write(ring, ext->extent[i]); 3150 } 3151 } 3152 } 3153 3154 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3155 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3156 3157 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3158 amdgpu_ring_write(ring, 0); 3159 3160 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3161 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3162 amdgpu_ring_write(ring, 0x8000); 3163 amdgpu_ring_write(ring, 0x8000); 3164 3165 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 3166 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 3167 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 3168 amdgpu_ring_write(ring, tmp); 3169 amdgpu_ring_write(ring, 0); 3170 3171 amdgpu_ring_commit(ring); 3172 3173 return 0; 3174 } 3175 3176 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 3177 { 3178 struct amdgpu_ring *ring; 3179 u32 tmp; 3180 u32 rb_bufsz; 3181 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3182 3183 /* Set the write pointer delay */ 3184 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 3185 3186 /* set the RB to use vmid 0 */ 3187 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 3188 3189 /* Set ring buffer size */ 3190 ring = &adev->gfx.gfx_ring[0]; 3191 rb_bufsz = order_base_2(ring->ring_size / 8); 3192 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3193 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3194 #ifdef __BIG_ENDIAN 3195 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 3196 #endif 3197 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3198 3199 /* Initialize the ring buffer's write pointers */ 3200 ring->wptr = 0; 3201 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3202 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3203 3204 /* set the wb address wether it's enabled or not */ 3205 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3206 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3207 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3208 3209 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3210 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 3211 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 3212 3213 mdelay(1); 3214 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3215 3216 rb_addr = ring->gpu_addr >> 8; 3217 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 3218 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3219 3220 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 3221 if (ring->use_doorbell) { 3222 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3223 DOORBELL_OFFSET, ring->doorbell_index); 3224 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3225 DOORBELL_EN, 1); 3226 } else { 3227 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 3228 } 3229 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 3230 3231 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3232 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3233 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 3234 3235 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 3236 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3237 3238 3239 /* start the ring */ 3240 gfx_v9_0_cp_gfx_start(adev); 3241 ring->sched.ready = true; 3242 3243 return 0; 3244 } 3245 3246 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3247 { 3248 int i; 3249 3250 if (enable) { 3251 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 3252 } else { 3253 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 3254 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 3255 for (i = 0; i < adev->gfx.num_compute_rings; i++) 3256 adev->gfx.compute_ring[i].sched.ready = false; 3257 adev->gfx.kiq.ring.sched.ready = false; 3258 } 3259 udelay(50); 3260 } 3261 3262 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3263 { 3264 const struct gfx_firmware_header_v1_0 *mec_hdr; 3265 const __le32 *fw_data; 3266 unsigned i; 3267 u32 tmp; 3268 3269 if (!adev->gfx.mec_fw) 3270 return -EINVAL; 3271 3272 gfx_v9_0_cp_compute_enable(adev, false); 3273 3274 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3275 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3276 3277 fw_data = (const __le32 *) 3278 (adev->gfx.mec_fw->data + 3279 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3280 tmp = 0; 3281 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3282 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3283 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 3284 3285 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 3286 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 3287 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 3288 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3289 3290 /* MEC1 */ 3291 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3292 mec_hdr->jt_offset); 3293 for (i = 0; i < mec_hdr->jt_size; i++) 3294 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 3295 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3296 3297 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3298 adev->gfx.mec_fw_version); 3299 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 3300 3301 return 0; 3302 } 3303 3304 /* KIQ functions */ 3305 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 3306 { 3307 uint32_t tmp; 3308 struct amdgpu_device *adev = ring->adev; 3309 3310 /* tell RLC which is KIQ queue */ 3311 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 3312 tmp &= 0xffffff00; 3313 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3314 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3315 tmp |= 0x80; 3316 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3317 } 3318 3319 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 3320 { 3321 struct amdgpu_device *adev = ring->adev; 3322 struct v9_mqd *mqd = ring->mqd_ptr; 3323 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3324 uint32_t tmp; 3325 3326 mqd->header = 0xC0310800; 3327 mqd->compute_pipelinestat_enable = 0x00000001; 3328 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3329 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3330 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3331 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3332 mqd->compute_static_thread_mgmt_se4 = 0xffffffff; 3333 mqd->compute_static_thread_mgmt_se5 = 0xffffffff; 3334 mqd->compute_static_thread_mgmt_se6 = 0xffffffff; 3335 mqd->compute_static_thread_mgmt_se7 = 0xffffffff; 3336 mqd->compute_misc_reserved = 0x00000003; 3337 3338 mqd->dynamic_cu_mask_addr_lo = 3339 lower_32_bits(ring->mqd_gpu_addr 3340 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3341 mqd->dynamic_cu_mask_addr_hi = 3342 upper_32_bits(ring->mqd_gpu_addr 3343 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3344 3345 eop_base_addr = ring->eop_gpu_addr >> 8; 3346 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3347 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3348 3349 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3350 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3351 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3352 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 3353 3354 mqd->cp_hqd_eop_control = tmp; 3355 3356 /* enable doorbell? */ 3357 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3358 3359 if (ring->use_doorbell) { 3360 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3361 DOORBELL_OFFSET, ring->doorbell_index); 3362 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3363 DOORBELL_EN, 1); 3364 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3365 DOORBELL_SOURCE, 0); 3366 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3367 DOORBELL_HIT, 0); 3368 } else { 3369 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3370 DOORBELL_EN, 0); 3371 } 3372 3373 mqd->cp_hqd_pq_doorbell_control = tmp; 3374 3375 /* disable the queue if it's active */ 3376 ring->wptr = 0; 3377 mqd->cp_hqd_dequeue_request = 0; 3378 mqd->cp_hqd_pq_rptr = 0; 3379 mqd->cp_hqd_pq_wptr_lo = 0; 3380 mqd->cp_hqd_pq_wptr_hi = 0; 3381 3382 /* set the pointer to the MQD */ 3383 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3384 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3385 3386 /* set MQD vmid to 0 */ 3387 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3388 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3389 mqd->cp_mqd_control = tmp; 3390 3391 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3392 hqd_gpu_addr = ring->gpu_addr >> 8; 3393 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3394 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3395 3396 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3397 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3398 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3399 (order_base_2(ring->ring_size / 4) - 1)); 3400 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3401 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 3402 #ifdef __BIG_ENDIAN 3403 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3404 #endif 3405 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3406 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3407 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3408 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3409 mqd->cp_hqd_pq_control = tmp; 3410 3411 /* set the wb address whether it's enabled or not */ 3412 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3413 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3414 mqd->cp_hqd_pq_rptr_report_addr_hi = 3415 upper_32_bits(wb_gpu_addr) & 0xffff; 3416 3417 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3418 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3419 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3420 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3421 3422 tmp = 0; 3423 /* enable the doorbell if requested */ 3424 if (ring->use_doorbell) { 3425 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3426 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3427 DOORBELL_OFFSET, ring->doorbell_index); 3428 3429 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3430 DOORBELL_EN, 1); 3431 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3432 DOORBELL_SOURCE, 0); 3433 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3434 DOORBELL_HIT, 0); 3435 } 3436 3437 mqd->cp_hqd_pq_doorbell_control = tmp; 3438 3439 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3440 ring->wptr = 0; 3441 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3442 3443 /* set the vmid for the queue */ 3444 mqd->cp_hqd_vmid = 0; 3445 3446 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3447 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3448 mqd->cp_hqd_persistent_state = tmp; 3449 3450 /* set MIN_IB_AVAIL_SIZE */ 3451 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3452 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3453 mqd->cp_hqd_ib_control = tmp; 3454 3455 /* map_queues packet doesn't need activate the queue, 3456 * so only kiq need set this field. 3457 */ 3458 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 3459 mqd->cp_hqd_active = 1; 3460 3461 return 0; 3462 } 3463 3464 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 3465 { 3466 struct amdgpu_device *adev = ring->adev; 3467 struct v9_mqd *mqd = ring->mqd_ptr; 3468 int j; 3469 3470 /* disable wptr polling */ 3471 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3472 3473 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3474 mqd->cp_hqd_eop_base_addr_lo); 3475 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3476 mqd->cp_hqd_eop_base_addr_hi); 3477 3478 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3479 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 3480 mqd->cp_hqd_eop_control); 3481 3482 /* enable doorbell? */ 3483 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3484 mqd->cp_hqd_pq_doorbell_control); 3485 3486 /* disable the queue if it's active */ 3487 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3488 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3489 for (j = 0; j < adev->usec_timeout; j++) { 3490 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3491 break; 3492 udelay(1); 3493 } 3494 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3495 mqd->cp_hqd_dequeue_request); 3496 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3497 mqd->cp_hqd_pq_rptr); 3498 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3499 mqd->cp_hqd_pq_wptr_lo); 3500 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3501 mqd->cp_hqd_pq_wptr_hi); 3502 } 3503 3504 /* set the pointer to the MQD */ 3505 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3506 mqd->cp_mqd_base_addr_lo); 3507 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3508 mqd->cp_mqd_base_addr_hi); 3509 3510 /* set MQD vmid to 0 */ 3511 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3512 mqd->cp_mqd_control); 3513 3514 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3515 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3516 mqd->cp_hqd_pq_base_lo); 3517 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3518 mqd->cp_hqd_pq_base_hi); 3519 3520 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3521 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3522 mqd->cp_hqd_pq_control); 3523 3524 /* set the wb address whether it's enabled or not */ 3525 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3526 mqd->cp_hqd_pq_rptr_report_addr_lo); 3527 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3528 mqd->cp_hqd_pq_rptr_report_addr_hi); 3529 3530 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3531 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3532 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3533 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3534 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3535 3536 /* enable the doorbell if requested */ 3537 if (ring->use_doorbell) { 3538 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3539 (adev->doorbell_index.kiq * 2) << 2); 3540 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3541 (adev->doorbell_index.userqueue_end * 2) << 2); 3542 } 3543 3544 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3545 mqd->cp_hqd_pq_doorbell_control); 3546 3547 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3548 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3549 mqd->cp_hqd_pq_wptr_lo); 3550 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3551 mqd->cp_hqd_pq_wptr_hi); 3552 3553 /* set the vmid for the queue */ 3554 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3555 3556 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3557 mqd->cp_hqd_persistent_state); 3558 3559 /* activate the queue */ 3560 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3561 mqd->cp_hqd_active); 3562 3563 if (ring->use_doorbell) 3564 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3565 3566 return 0; 3567 } 3568 3569 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3570 { 3571 struct amdgpu_device *adev = ring->adev; 3572 int j; 3573 3574 /* disable the queue if it's active */ 3575 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3576 3577 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3578 3579 for (j = 0; j < adev->usec_timeout; j++) { 3580 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3581 break; 3582 udelay(1); 3583 } 3584 3585 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3586 DRM_DEBUG("KIQ dequeue request failed.\n"); 3587 3588 /* Manual disable if dequeue request times out */ 3589 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3590 } 3591 3592 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3593 0); 3594 } 3595 3596 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3597 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3598 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3599 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3600 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3601 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3602 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3603 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3604 3605 return 0; 3606 } 3607 3608 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3609 { 3610 struct amdgpu_device *adev = ring->adev; 3611 struct v9_mqd *mqd = ring->mqd_ptr; 3612 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3613 3614 gfx_v9_0_kiq_setting(ring); 3615 3616 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3617 /* reset MQD to a clean status */ 3618 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3619 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3620 3621 /* reset ring buffer */ 3622 ring->wptr = 0; 3623 amdgpu_ring_clear_ring(ring); 3624 3625 mutex_lock(&adev->srbm_mutex); 3626 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3627 gfx_v9_0_kiq_init_register(ring); 3628 soc15_grbm_select(adev, 0, 0, 0, 0); 3629 mutex_unlock(&adev->srbm_mutex); 3630 } else { 3631 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3632 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3633 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3634 mutex_lock(&adev->srbm_mutex); 3635 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3636 gfx_v9_0_mqd_init(ring); 3637 gfx_v9_0_kiq_init_register(ring); 3638 soc15_grbm_select(adev, 0, 0, 0, 0); 3639 mutex_unlock(&adev->srbm_mutex); 3640 3641 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3642 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3643 } 3644 3645 return 0; 3646 } 3647 3648 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3649 { 3650 struct amdgpu_device *adev = ring->adev; 3651 struct v9_mqd *mqd = ring->mqd_ptr; 3652 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3653 3654 if (!adev->in_gpu_reset && !adev->in_suspend) { 3655 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3656 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3657 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3658 mutex_lock(&adev->srbm_mutex); 3659 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3660 gfx_v9_0_mqd_init(ring); 3661 soc15_grbm_select(adev, 0, 0, 0, 0); 3662 mutex_unlock(&adev->srbm_mutex); 3663 3664 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3665 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3666 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3667 /* reset MQD to a clean status */ 3668 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3669 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3670 3671 /* reset ring buffer */ 3672 ring->wptr = 0; 3673 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0); 3674 amdgpu_ring_clear_ring(ring); 3675 } else { 3676 amdgpu_ring_clear_ring(ring); 3677 } 3678 3679 return 0; 3680 } 3681 3682 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3683 { 3684 struct amdgpu_ring *ring; 3685 int r; 3686 3687 ring = &adev->gfx.kiq.ring; 3688 3689 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3690 if (unlikely(r != 0)) 3691 return r; 3692 3693 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3694 if (unlikely(r != 0)) 3695 return r; 3696 3697 gfx_v9_0_kiq_init_queue(ring); 3698 amdgpu_bo_kunmap(ring->mqd_obj); 3699 ring->mqd_ptr = NULL; 3700 amdgpu_bo_unreserve(ring->mqd_obj); 3701 ring->sched.ready = true; 3702 return 0; 3703 } 3704 3705 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3706 { 3707 struct amdgpu_ring *ring = NULL; 3708 int r = 0, i; 3709 3710 gfx_v9_0_cp_compute_enable(adev, true); 3711 3712 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3713 ring = &adev->gfx.compute_ring[i]; 3714 3715 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3716 if (unlikely(r != 0)) 3717 goto done; 3718 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3719 if (!r) { 3720 r = gfx_v9_0_kcq_init_queue(ring); 3721 amdgpu_bo_kunmap(ring->mqd_obj); 3722 ring->mqd_ptr = NULL; 3723 } 3724 amdgpu_bo_unreserve(ring->mqd_obj); 3725 if (r) 3726 goto done; 3727 } 3728 3729 r = amdgpu_gfx_enable_kcq(adev); 3730 done: 3731 return r; 3732 } 3733 3734 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3735 { 3736 int r, i; 3737 struct amdgpu_ring *ring; 3738 3739 if (!(adev->flags & AMD_IS_APU)) 3740 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3741 3742 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3743 if (adev->asic_type != CHIP_ARCTURUS) { 3744 /* legacy firmware loading */ 3745 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3746 if (r) 3747 return r; 3748 } 3749 3750 r = gfx_v9_0_cp_compute_load_microcode(adev); 3751 if (r) 3752 return r; 3753 } 3754 3755 r = gfx_v9_0_kiq_resume(adev); 3756 if (r) 3757 return r; 3758 3759 if (adev->asic_type != CHIP_ARCTURUS) { 3760 r = gfx_v9_0_cp_gfx_resume(adev); 3761 if (r) 3762 return r; 3763 } 3764 3765 r = gfx_v9_0_kcq_resume(adev); 3766 if (r) 3767 return r; 3768 3769 if (adev->asic_type != CHIP_ARCTURUS) { 3770 ring = &adev->gfx.gfx_ring[0]; 3771 r = amdgpu_ring_test_helper(ring); 3772 if (r) 3773 return r; 3774 } 3775 3776 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3777 ring = &adev->gfx.compute_ring[i]; 3778 amdgpu_ring_test_helper(ring); 3779 } 3780 3781 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3782 3783 return 0; 3784 } 3785 3786 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) 3787 { 3788 u32 tmp; 3789 3790 if (adev->asic_type != CHIP_ARCTURUS) 3791 return; 3792 3793 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); 3794 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH, 3795 adev->df.hash_status.hash_64k); 3796 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH, 3797 adev->df.hash_status.hash_2m); 3798 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH, 3799 adev->df.hash_status.hash_1g); 3800 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp); 3801 } 3802 3803 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3804 { 3805 if (adev->asic_type != CHIP_ARCTURUS) 3806 gfx_v9_0_cp_gfx_enable(adev, enable); 3807 gfx_v9_0_cp_compute_enable(adev, enable); 3808 } 3809 3810 static int gfx_v9_0_hw_init(void *handle) 3811 { 3812 int r; 3813 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3814 3815 if (!amdgpu_sriov_vf(adev)) 3816 gfx_v9_0_init_golden_registers(adev); 3817 3818 gfx_v9_0_constants_init(adev); 3819 3820 gfx_v9_0_init_tcp_config(adev); 3821 3822 r = adev->gfx.rlc.funcs->resume(adev); 3823 if (r) 3824 return r; 3825 3826 r = gfx_v9_0_cp_resume(adev); 3827 if (r) 3828 return r; 3829 3830 return r; 3831 } 3832 3833 static int gfx_v9_0_hw_fini(void *handle) 3834 { 3835 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3836 3837 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 3838 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3839 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3840 3841 /* DF freeze and kcq disable will fail */ 3842 if (!amdgpu_ras_intr_triggered()) 3843 /* disable KCQ to avoid CPC touch memory not valid anymore */ 3844 amdgpu_gfx_disable_kcq(adev); 3845 3846 if (amdgpu_sriov_vf(adev)) { 3847 gfx_v9_0_cp_gfx_enable(adev, false); 3848 /* must disable polling for SRIOV when hw finished, otherwise 3849 * CPC engine may still keep fetching WB address which is already 3850 * invalid after sw finished and trigger DMAR reading error in 3851 * hypervisor side. 3852 */ 3853 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3854 return 0; 3855 } 3856 3857 /* Use deinitialize sequence from CAIL when unbinding device from driver, 3858 * otherwise KIQ is hanging when binding back 3859 */ 3860 if (!adev->in_gpu_reset && !adev->in_suspend) { 3861 mutex_lock(&adev->srbm_mutex); 3862 soc15_grbm_select(adev, adev->gfx.kiq.ring.me, 3863 adev->gfx.kiq.ring.pipe, 3864 adev->gfx.kiq.ring.queue, 0); 3865 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); 3866 soc15_grbm_select(adev, 0, 0, 0, 0); 3867 mutex_unlock(&adev->srbm_mutex); 3868 } 3869 3870 gfx_v9_0_cp_enable(adev, false); 3871 adev->gfx.rlc.funcs->stop(adev); 3872 3873 return 0; 3874 } 3875 3876 static int gfx_v9_0_suspend(void *handle) 3877 { 3878 return gfx_v9_0_hw_fini(handle); 3879 } 3880 3881 static int gfx_v9_0_resume(void *handle) 3882 { 3883 return gfx_v9_0_hw_init(handle); 3884 } 3885 3886 static bool gfx_v9_0_is_idle(void *handle) 3887 { 3888 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3889 3890 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 3891 GRBM_STATUS, GUI_ACTIVE)) 3892 return false; 3893 else 3894 return true; 3895 } 3896 3897 static int gfx_v9_0_wait_for_idle(void *handle) 3898 { 3899 unsigned i; 3900 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3901 3902 for (i = 0; i < adev->usec_timeout; i++) { 3903 if (gfx_v9_0_is_idle(handle)) 3904 return 0; 3905 udelay(1); 3906 } 3907 return -ETIMEDOUT; 3908 } 3909 3910 static int gfx_v9_0_soft_reset(void *handle) 3911 { 3912 u32 grbm_soft_reset = 0; 3913 u32 tmp; 3914 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3915 3916 /* GRBM_STATUS */ 3917 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 3918 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 3919 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 3920 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 3921 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 3922 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 3923 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 3924 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3925 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3926 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3927 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 3928 } 3929 3930 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 3931 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3932 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3933 } 3934 3935 /* GRBM_STATUS2 */ 3936 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 3937 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 3938 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3939 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3940 3941 3942 if (grbm_soft_reset) { 3943 /* stop the rlc */ 3944 adev->gfx.rlc.funcs->stop(adev); 3945 3946 if (adev->asic_type != CHIP_ARCTURUS) 3947 /* Disable GFX parsing/prefetching */ 3948 gfx_v9_0_cp_gfx_enable(adev, false); 3949 3950 /* Disable MEC parsing/prefetching */ 3951 gfx_v9_0_cp_compute_enable(adev, false); 3952 3953 if (grbm_soft_reset) { 3954 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3955 tmp |= grbm_soft_reset; 3956 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 3957 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3958 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3959 3960 udelay(50); 3961 3962 tmp &= ~grbm_soft_reset; 3963 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3964 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3965 } 3966 3967 /* Wait a little for things to settle down */ 3968 udelay(50); 3969 } 3970 return 0; 3971 } 3972 3973 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) 3974 { 3975 signed long r, cnt = 0; 3976 unsigned long flags; 3977 uint32_t seq; 3978 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 3979 struct amdgpu_ring *ring = &kiq->ring; 3980 3981 BUG_ON(!ring->funcs->emit_rreg); 3982 3983 spin_lock_irqsave(&kiq->ring_lock, flags); 3984 amdgpu_ring_alloc(ring, 32); 3985 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 3986 amdgpu_ring_write(ring, 9 | /* src: register*/ 3987 (5 << 8) | /* dst: memory */ 3988 (1 << 16) | /* count sel */ 3989 (1 << 20)); /* write confirm */ 3990 amdgpu_ring_write(ring, 0); 3991 amdgpu_ring_write(ring, 0); 3992 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 3993 kiq->reg_val_offs * 4)); 3994 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 3995 kiq->reg_val_offs * 4)); 3996 amdgpu_fence_emit_polling(ring, &seq); 3997 amdgpu_ring_commit(ring); 3998 spin_unlock_irqrestore(&kiq->ring_lock, flags); 3999 4000 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4001 4002 /* don't wait anymore for gpu reset case because this way may 4003 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg 4004 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will 4005 * never return if we keep waiting in virt_kiq_rreg, which cause 4006 * gpu_recover() hang there. 4007 * 4008 * also don't wait anymore for IRQ context 4009 * */ 4010 if (r < 1 && (adev->in_gpu_reset || in_interrupt())) 4011 goto failed_kiq_read; 4012 4013 might_sleep(); 4014 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 4015 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 4016 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4017 } 4018 4019 if (cnt > MAX_KIQ_REG_TRY) 4020 goto failed_kiq_read; 4021 4022 return (uint64_t)adev->wb.wb[kiq->reg_val_offs] | 4023 (uint64_t)adev->wb.wb[kiq->reg_val_offs + 1 ] << 32ULL; 4024 4025 failed_kiq_read: 4026 pr_err("failed to read gpu clock\n"); 4027 return ~0; 4028 } 4029 4030 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4031 { 4032 uint64_t clock; 4033 4034 amdgpu_gfx_off_ctrl(adev, false); 4035 mutex_lock(&adev->gfx.gpu_clock_mutex); 4036 if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) { 4037 clock = gfx_v9_0_kiq_read_clock(adev); 4038 } else { 4039 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 4040 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 4041 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 4042 } 4043 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4044 amdgpu_gfx_off_ctrl(adev, true); 4045 return clock; 4046 } 4047 4048 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4049 uint32_t vmid, 4050 uint32_t gds_base, uint32_t gds_size, 4051 uint32_t gws_base, uint32_t gws_size, 4052 uint32_t oa_base, uint32_t oa_size) 4053 { 4054 struct amdgpu_device *adev = ring->adev; 4055 4056 /* GDS Base */ 4057 gfx_v9_0_write_data_to_reg(ring, 0, false, 4058 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 4059 gds_base); 4060 4061 /* GDS Size */ 4062 gfx_v9_0_write_data_to_reg(ring, 0, false, 4063 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 4064 gds_size); 4065 4066 /* GWS */ 4067 gfx_v9_0_write_data_to_reg(ring, 0, false, 4068 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 4069 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 4070 4071 /* OA */ 4072 gfx_v9_0_write_data_to_reg(ring, 0, false, 4073 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 4074 (1 << (oa_size + oa_base)) - (1 << oa_base)); 4075 } 4076 4077 static const u32 vgpr_init_compute_shader[] = 4078 { 4079 0xb07c0000, 0xbe8000ff, 4080 0x000000f8, 0xbf110800, 4081 0x7e000280, 0x7e020280, 4082 0x7e040280, 0x7e060280, 4083 0x7e080280, 0x7e0a0280, 4084 0x7e0c0280, 0x7e0e0280, 4085 0x80808800, 0xbe803200, 4086 0xbf84fff5, 0xbf9c0000, 4087 0xd28c0001, 0x0001007f, 4088 0xd28d0001, 0x0002027e, 4089 0x10020288, 0xb8810904, 4090 0xb7814000, 0xd1196a01, 4091 0x00000301, 0xbe800087, 4092 0xbefc00c1, 0xd89c4000, 4093 0x00020201, 0xd89cc080, 4094 0x00040401, 0x320202ff, 4095 0x00000800, 0x80808100, 4096 0xbf84fff8, 0x7e020280, 4097 0xbf810000, 0x00000000, 4098 }; 4099 4100 static const u32 sgpr_init_compute_shader[] = 4101 { 4102 0xb07c0000, 0xbe8000ff, 4103 0x0000005f, 0xbee50080, 4104 0xbe812c65, 0xbe822c65, 4105 0xbe832c65, 0xbe842c65, 4106 0xbe852c65, 0xb77c0005, 4107 0x80808500, 0xbf84fff8, 4108 0xbe800080, 0xbf810000, 4109 }; 4110 4111 /* When below register arrays changed, please update gpr_reg_size, 4112 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds, 4113 to cover all gfx9 ASICs */ 4114 static const struct soc15_reg_entry vgpr_init_regs[] = { 4115 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4116 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4117 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4118 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4119 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f }, 4120 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4121 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4122 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4123 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4124 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4125 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4126 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4127 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4128 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4129 }; 4130 4131 static const struct soc15_reg_entry sgpr1_init_regs[] = { 4132 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4133 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4134 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4135 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4136 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4137 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4138 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, 4139 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, 4140 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, 4141 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, 4142 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff }, 4143 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff }, 4144 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff }, 4145 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff }, 4146 }; 4147 4148 static const struct soc15_reg_entry sgpr2_init_regs[] = { 4149 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4150 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4151 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4152 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4153 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4154 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4155 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, 4156 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, 4157 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, 4158 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, 4159 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 }, 4160 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 }, 4161 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 }, 4162 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 }, 4163 }; 4164 4165 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = { 4166 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 4167 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 4168 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 4169 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 4170 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 4171 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 4172 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 4173 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 4174 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 4175 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 4176 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 4177 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 4178 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 4179 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 4180 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 4181 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 4182 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 4183 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 4184 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 4185 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 4186 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16}, 4187 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 4188 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 4189 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 4190 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 4191 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 4192 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 4193 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 4194 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 4195 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 4196 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 4197 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 4198 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 4199 }; 4200 4201 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 4202 { 4203 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4204 int i, r; 4205 4206 /* only support when RAS is enabled */ 4207 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4208 return 0; 4209 4210 r = amdgpu_ring_alloc(ring, 7); 4211 if (r) { 4212 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 4213 ring->name, r); 4214 return r; 4215 } 4216 4217 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 4218 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 4219 4220 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 4221 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 4222 PACKET3_DMA_DATA_DST_SEL(1) | 4223 PACKET3_DMA_DATA_SRC_SEL(2) | 4224 PACKET3_DMA_DATA_ENGINE(0))); 4225 amdgpu_ring_write(ring, 0); 4226 amdgpu_ring_write(ring, 0); 4227 amdgpu_ring_write(ring, 0); 4228 amdgpu_ring_write(ring, 0); 4229 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 4230 adev->gds.gds_size); 4231 4232 amdgpu_ring_commit(ring); 4233 4234 for (i = 0; i < adev->usec_timeout; i++) { 4235 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 4236 break; 4237 udelay(1); 4238 } 4239 4240 if (i >= adev->usec_timeout) 4241 r = -ETIMEDOUT; 4242 4243 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 4244 4245 return r; 4246 } 4247 4248 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 4249 { 4250 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4251 struct amdgpu_ib ib; 4252 struct dma_fence *f = NULL; 4253 int r, i; 4254 unsigned total_size, vgpr_offset, sgpr_offset; 4255 u64 gpu_addr; 4256 4257 int compute_dim_x = adev->gfx.config.max_shader_engines * 4258 adev->gfx.config.max_cu_per_sh * 4259 adev->gfx.config.max_sh_per_se; 4260 int sgpr_work_group_size = 5; 4261 int gpr_reg_size = compute_dim_x / 16 + 6; 4262 4263 /* only support when RAS is enabled */ 4264 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4265 return 0; 4266 4267 /* bail if the compute ring is not ready */ 4268 if (!ring->sched.ready) 4269 return 0; 4270 4271 total_size = 4272 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */ 4273 total_size += 4274 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */ 4275 total_size += 4276 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */ 4277 total_size = ALIGN(total_size, 256); 4278 vgpr_offset = total_size; 4279 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 4280 sgpr_offset = total_size; 4281 total_size += sizeof(sgpr_init_compute_shader); 4282 4283 /* allocate an indirect buffer to put the commands in */ 4284 memset(&ib, 0, sizeof(ib)); 4285 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 4286 if (r) { 4287 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 4288 return r; 4289 } 4290 4291 /* load the compute shaders */ 4292 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 4293 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 4294 4295 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 4296 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 4297 4298 /* init the ib length to 0 */ 4299 ib.length_dw = 0; 4300 4301 /* VGPR */ 4302 /* write the register state for the compute dispatch */ 4303 for (i = 0; i < gpr_reg_size; i++) { 4304 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4305 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) 4306 - PACKET3_SET_SH_REG_START; 4307 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value; 4308 } 4309 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4310 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 4311 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4312 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4313 - PACKET3_SET_SH_REG_START; 4314 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4315 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4316 4317 /* write dispatch packet */ 4318 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4319 ib.ptr[ib.length_dw++] = compute_dim_x; /* x */ 4320 ib.ptr[ib.length_dw++] = 1; /* y */ 4321 ib.ptr[ib.length_dw++] = 1; /* z */ 4322 ib.ptr[ib.length_dw++] = 4323 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4324 4325 /* write CS partial flush packet */ 4326 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4327 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4328 4329 /* SGPR1 */ 4330 /* write the register state for the compute dispatch */ 4331 for (i = 0; i < gpr_reg_size; i++) { 4332 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4333 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i]) 4334 - PACKET3_SET_SH_REG_START; 4335 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value; 4336 } 4337 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4338 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4339 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4340 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4341 - PACKET3_SET_SH_REG_START; 4342 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4343 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4344 4345 /* write dispatch packet */ 4346 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4347 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4348 ib.ptr[ib.length_dw++] = 1; /* y */ 4349 ib.ptr[ib.length_dw++] = 1; /* z */ 4350 ib.ptr[ib.length_dw++] = 4351 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4352 4353 /* write CS partial flush packet */ 4354 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4355 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4356 4357 /* SGPR2 */ 4358 /* write the register state for the compute dispatch */ 4359 for (i = 0; i < gpr_reg_size; i++) { 4360 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4361 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i]) 4362 - PACKET3_SET_SH_REG_START; 4363 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value; 4364 } 4365 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4366 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4367 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4368 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4369 - PACKET3_SET_SH_REG_START; 4370 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4371 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4372 4373 /* write dispatch packet */ 4374 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4375 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4376 ib.ptr[ib.length_dw++] = 1; /* y */ 4377 ib.ptr[ib.length_dw++] = 1; /* z */ 4378 ib.ptr[ib.length_dw++] = 4379 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4380 4381 /* write CS partial flush packet */ 4382 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4383 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4384 4385 /* shedule the ib on the ring */ 4386 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 4387 if (r) { 4388 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 4389 goto fail; 4390 } 4391 4392 /* wait for the GPU to finish processing the IB */ 4393 r = dma_fence_wait(f, false); 4394 if (r) { 4395 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 4396 goto fail; 4397 } 4398 4399 fail: 4400 amdgpu_ib_free(adev, &ib, NULL); 4401 dma_fence_put(f); 4402 4403 return r; 4404 } 4405 4406 static int gfx_v9_0_early_init(void *handle) 4407 { 4408 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4409 4410 if (adev->asic_type == CHIP_ARCTURUS) 4411 adev->gfx.num_gfx_rings = 0; 4412 else 4413 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 4414 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 4415 gfx_v9_0_set_kiq_pm4_funcs(adev); 4416 gfx_v9_0_set_ring_funcs(adev); 4417 gfx_v9_0_set_irq_funcs(adev); 4418 gfx_v9_0_set_gds_init(adev); 4419 gfx_v9_0_set_rlc_funcs(adev); 4420 4421 return 0; 4422 } 4423 4424 static int gfx_v9_0_ecc_late_init(void *handle) 4425 { 4426 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4427 int r; 4428 4429 /* 4430 * Temp workaround to fix the issue that CP firmware fails to 4431 * update read pointer when CPDMA is writing clearing operation 4432 * to GDS in suspend/resume sequence on several cards. So just 4433 * limit this operation in cold boot sequence. 4434 */ 4435 if (!adev->in_suspend) { 4436 r = gfx_v9_0_do_edc_gds_workarounds(adev); 4437 if (r) 4438 return r; 4439 } 4440 4441 /* requires IBs so do in late init after IB pool is initialized */ 4442 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 4443 if (r) 4444 return r; 4445 4446 if (adev->gfx.funcs && 4447 adev->gfx.funcs->reset_ras_error_count) 4448 adev->gfx.funcs->reset_ras_error_count(adev); 4449 4450 r = amdgpu_gfx_ras_late_init(adev); 4451 if (r) 4452 return r; 4453 4454 return 0; 4455 } 4456 4457 static int gfx_v9_0_late_init(void *handle) 4458 { 4459 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4460 int r; 4461 4462 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4463 if (r) 4464 return r; 4465 4466 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4467 if (r) 4468 return r; 4469 4470 r = gfx_v9_0_ecc_late_init(handle); 4471 if (r) 4472 return r; 4473 4474 return 0; 4475 } 4476 4477 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 4478 { 4479 uint32_t rlc_setting; 4480 4481 /* if RLC is not enabled, do nothing */ 4482 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 4483 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 4484 return false; 4485 4486 return true; 4487 } 4488 4489 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) 4490 { 4491 uint32_t data; 4492 unsigned i; 4493 4494 data = RLC_SAFE_MODE__CMD_MASK; 4495 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4496 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4497 4498 /* wait for RLC_SAFE_MODE */ 4499 for (i = 0; i < adev->usec_timeout; i++) { 4500 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 4501 break; 4502 udelay(1); 4503 } 4504 } 4505 4506 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) 4507 { 4508 uint32_t data; 4509 4510 data = RLC_SAFE_MODE__CMD_MASK; 4511 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4512 } 4513 4514 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 4515 bool enable) 4516 { 4517 amdgpu_gfx_rlc_enter_safe_mode(adev); 4518 4519 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 4520 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 4521 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4522 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 4523 } else { 4524 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 4525 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4526 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 4527 } 4528 4529 amdgpu_gfx_rlc_exit_safe_mode(adev); 4530 } 4531 4532 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4533 bool enable) 4534 { 4535 /* TODO: double check if we need to perform under safe mode */ 4536 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4537 4538 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4539 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4540 else 4541 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4542 4543 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4544 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4545 else 4546 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4547 4548 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4549 } 4550 4551 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4552 bool enable) 4553 { 4554 uint32_t data, def; 4555 4556 amdgpu_gfx_rlc_enter_safe_mode(adev); 4557 4558 /* It is disabled by HW by default */ 4559 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4560 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4561 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4562 4563 if (adev->asic_type != CHIP_VEGA12) 4564 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4565 4566 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4567 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4568 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4569 4570 /* only for Vega10 & Raven1 */ 4571 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4572 4573 if (def != data) 4574 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4575 4576 /* MGLS is a global flag to control all MGLS in GFX */ 4577 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4578 /* 2 - RLC memory Light sleep */ 4579 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4580 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4581 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4582 if (def != data) 4583 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4584 } 4585 /* 3 - CP memory Light sleep */ 4586 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4587 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4588 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4589 if (def != data) 4590 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4591 } 4592 } 4593 } else { 4594 /* 1 - MGCG_OVERRIDE */ 4595 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4596 4597 if (adev->asic_type != CHIP_VEGA12) 4598 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4599 4600 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4601 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4602 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4603 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4604 4605 if (def != data) 4606 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4607 4608 /* 2 - disable MGLS in RLC */ 4609 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4610 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4611 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4612 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4613 } 4614 4615 /* 3 - disable MGLS in CP */ 4616 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4617 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4618 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4619 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4620 } 4621 } 4622 4623 amdgpu_gfx_rlc_exit_safe_mode(adev); 4624 } 4625 4626 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4627 bool enable) 4628 { 4629 uint32_t data, def; 4630 4631 if (adev->asic_type == CHIP_ARCTURUS) 4632 return; 4633 4634 amdgpu_gfx_rlc_enter_safe_mode(adev); 4635 4636 /* Enable 3D CGCG/CGLS */ 4637 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 4638 /* write cmd to clear cgcg/cgls ov */ 4639 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4640 /* unset CGCG override */ 4641 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4642 /* update CGCG and CGLS override bits */ 4643 if (def != data) 4644 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4645 4646 /* enable 3Dcgcg FSM(0x0000363f) */ 4647 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4648 4649 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4650 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4651 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4652 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4653 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4654 if (def != data) 4655 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4656 4657 /* set IDLE_POLL_COUNT(0x00900100) */ 4658 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4659 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4660 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4661 if (def != data) 4662 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4663 } else { 4664 /* Disable CGCG/CGLS */ 4665 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4666 /* disable cgcg, cgls should be disabled */ 4667 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 4668 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 4669 /* disable cgcg and cgls in FSM */ 4670 if (def != data) 4671 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4672 } 4673 4674 amdgpu_gfx_rlc_exit_safe_mode(adev); 4675 } 4676 4677 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4678 bool enable) 4679 { 4680 uint32_t def, data; 4681 4682 amdgpu_gfx_rlc_enter_safe_mode(adev); 4683 4684 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 4685 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4686 /* unset CGCG override */ 4687 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 4688 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4689 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4690 else 4691 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4692 /* update CGCG and CGLS override bits */ 4693 if (def != data) 4694 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4695 4696 /* enable cgcg FSM(0x0000363F) */ 4697 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4698 4699 if (adev->asic_type == CHIP_ARCTURUS) 4700 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4701 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4702 else 4703 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4704 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4705 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4706 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4707 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4708 if (def != data) 4709 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4710 4711 /* set IDLE_POLL_COUNT(0x00900100) */ 4712 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4713 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4714 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4715 if (def != data) 4716 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4717 } else { 4718 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4719 /* reset CGCG/CGLS bits */ 4720 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4721 /* disable cgcg and cgls in FSM */ 4722 if (def != data) 4723 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4724 } 4725 4726 amdgpu_gfx_rlc_exit_safe_mode(adev); 4727 } 4728 4729 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 4730 bool enable) 4731 { 4732 if (enable) { 4733 /* CGCG/CGLS should be enabled after MGCG/MGLS 4734 * === MGCG + MGLS === 4735 */ 4736 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4737 /* === CGCG /CGLS for GFX 3D Only === */ 4738 gfx_v9_0_update_3d_clock_gating(adev, enable); 4739 /* === CGCG + CGLS === */ 4740 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4741 } else { 4742 /* CGCG/CGLS should be disabled before MGCG/MGLS 4743 * === CGCG + CGLS === 4744 */ 4745 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4746 /* === CGCG /CGLS for GFX 3D Only === */ 4747 gfx_v9_0_update_3d_clock_gating(adev, enable); 4748 /* === MGCG + MGLS === */ 4749 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4750 } 4751 return 0; 4752 } 4753 4754 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) 4755 { 4756 u32 data; 4757 4758 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL); 4759 4760 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; 4761 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 4762 4763 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); 4764 } 4765 4766 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 4767 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 4768 .set_safe_mode = gfx_v9_0_set_safe_mode, 4769 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 4770 .init = gfx_v9_0_rlc_init, 4771 .get_csb_size = gfx_v9_0_get_csb_size, 4772 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 4773 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 4774 .resume = gfx_v9_0_rlc_resume, 4775 .stop = gfx_v9_0_rlc_stop, 4776 .reset = gfx_v9_0_rlc_reset, 4777 .start = gfx_v9_0_rlc_start, 4778 .update_spm_vmid = gfx_v9_0_update_spm_vmid 4779 }; 4780 4781 static int gfx_v9_0_set_powergating_state(void *handle, 4782 enum amd_powergating_state state) 4783 { 4784 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4785 bool enable = (state == AMD_PG_STATE_GATE); 4786 4787 switch (adev->asic_type) { 4788 case CHIP_RAVEN: 4789 case CHIP_RENOIR: 4790 if (!enable) { 4791 amdgpu_gfx_off_ctrl(adev, false); 4792 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4793 } 4794 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 4795 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 4796 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 4797 } else { 4798 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 4799 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 4800 } 4801 4802 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 4803 gfx_v9_0_enable_cp_power_gating(adev, true); 4804 else 4805 gfx_v9_0_enable_cp_power_gating(adev, false); 4806 4807 /* update gfx cgpg state */ 4808 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 4809 4810 /* update mgcg state */ 4811 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 4812 4813 if (enable) 4814 amdgpu_gfx_off_ctrl(adev, true); 4815 break; 4816 case CHIP_VEGA12: 4817 if (!enable) { 4818 amdgpu_gfx_off_ctrl(adev, false); 4819 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4820 } else { 4821 amdgpu_gfx_off_ctrl(adev, true); 4822 } 4823 break; 4824 default: 4825 break; 4826 } 4827 4828 return 0; 4829 } 4830 4831 static int gfx_v9_0_set_clockgating_state(void *handle, 4832 enum amd_clockgating_state state) 4833 { 4834 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4835 4836 if (amdgpu_sriov_vf(adev)) 4837 return 0; 4838 4839 switch (adev->asic_type) { 4840 case CHIP_VEGA10: 4841 case CHIP_VEGA12: 4842 case CHIP_VEGA20: 4843 case CHIP_RAVEN: 4844 case CHIP_ARCTURUS: 4845 case CHIP_RENOIR: 4846 gfx_v9_0_update_gfx_clock_gating(adev, 4847 state == AMD_CG_STATE_GATE); 4848 break; 4849 default: 4850 break; 4851 } 4852 return 0; 4853 } 4854 4855 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 4856 { 4857 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4858 int data; 4859 4860 if (amdgpu_sriov_vf(adev)) 4861 *flags = 0; 4862 4863 /* AMD_CG_SUPPORT_GFX_MGCG */ 4864 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); 4865 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 4866 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 4867 4868 /* AMD_CG_SUPPORT_GFX_CGCG */ 4869 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); 4870 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 4871 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 4872 4873 /* AMD_CG_SUPPORT_GFX_CGLS */ 4874 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 4875 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 4876 4877 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 4878 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); 4879 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 4880 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 4881 4882 /* AMD_CG_SUPPORT_GFX_CP_LS */ 4883 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); 4884 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 4885 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 4886 4887 if (adev->asic_type != CHIP_ARCTURUS) { 4888 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 4889 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); 4890 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 4891 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 4892 4893 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 4894 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 4895 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 4896 } 4897 } 4898 4899 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 4900 { 4901 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 4902 } 4903 4904 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 4905 { 4906 struct amdgpu_device *adev = ring->adev; 4907 u64 wptr; 4908 4909 /* XXX check if swapping is necessary on BE */ 4910 if (ring->use_doorbell) { 4911 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 4912 } else { 4913 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 4914 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 4915 } 4916 4917 return wptr; 4918 } 4919 4920 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 4921 { 4922 struct amdgpu_device *adev = ring->adev; 4923 4924 if (ring->use_doorbell) { 4925 /* XXX check if swapping is necessary on BE */ 4926 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4927 WDOORBELL64(ring->doorbell_index, ring->wptr); 4928 } else { 4929 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4930 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 4931 } 4932 } 4933 4934 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 4935 { 4936 struct amdgpu_device *adev = ring->adev; 4937 u32 ref_and_mask, reg_mem_engine; 4938 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 4939 4940 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 4941 switch (ring->me) { 4942 case 1: 4943 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 4944 break; 4945 case 2: 4946 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 4947 break; 4948 default: 4949 return; 4950 } 4951 reg_mem_engine = 0; 4952 } else { 4953 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 4954 reg_mem_engine = 1; /* pfp */ 4955 } 4956 4957 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 4958 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 4959 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 4960 ref_and_mask, ref_and_mask, 0x20); 4961 } 4962 4963 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 4964 struct amdgpu_job *job, 4965 struct amdgpu_ib *ib, 4966 uint32_t flags) 4967 { 4968 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4969 u32 header, control = 0; 4970 4971 if (ib->flags & AMDGPU_IB_FLAG_CE) 4972 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 4973 else 4974 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 4975 4976 control |= ib->length_dw | (vmid << 24); 4977 4978 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 4979 control |= INDIRECT_BUFFER_PRE_ENB(1); 4980 4981 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid) 4982 gfx_v9_0_ring_emit_de_meta(ring); 4983 } 4984 4985 amdgpu_ring_write(ring, header); 4986 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4987 amdgpu_ring_write(ring, 4988 #ifdef __BIG_ENDIAN 4989 (2 << 0) | 4990 #endif 4991 lower_32_bits(ib->gpu_addr)); 4992 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4993 amdgpu_ring_write(ring, control); 4994 } 4995 4996 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 4997 struct amdgpu_job *job, 4998 struct amdgpu_ib *ib, 4999 uint32_t flags) 5000 { 5001 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5002 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5003 5004 /* Currently, there is a high possibility to get wave ID mismatch 5005 * between ME and GDS, leading to a hw deadlock, because ME generates 5006 * different wave IDs than the GDS expects. This situation happens 5007 * randomly when at least 5 compute pipes use GDS ordered append. 5008 * The wave IDs generated by ME are also wrong after suspend/resume. 5009 * Those are probably bugs somewhere else in the kernel driver. 5010 * 5011 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5012 * GDS to 0 for this ring (me/pipe). 5013 */ 5014 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5015 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5016 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 5017 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5018 } 5019 5020 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5021 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5022 amdgpu_ring_write(ring, 5023 #ifdef __BIG_ENDIAN 5024 (2 << 0) | 5025 #endif 5026 lower_32_bits(ib->gpu_addr)); 5027 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5028 amdgpu_ring_write(ring, control); 5029 } 5030 5031 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5032 u64 seq, unsigned flags) 5033 { 5034 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5035 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5036 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 5037 5038 /* RELEASE_MEM - flush caches, send int */ 5039 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5040 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 5041 EOP_TC_NC_ACTION_EN) : 5042 (EOP_TCL1_ACTION_EN | 5043 EOP_TC_ACTION_EN | 5044 EOP_TC_WB_ACTION_EN | 5045 EOP_TC_MD_ACTION_EN)) | 5046 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5047 EVENT_INDEX(5))); 5048 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 5049 5050 /* 5051 * the address should be Qword aligned if 64bit write, Dword 5052 * aligned if only send 32bit data low (discard data high) 5053 */ 5054 if (write64bit) 5055 BUG_ON(addr & 0x7); 5056 else 5057 BUG_ON(addr & 0x3); 5058 amdgpu_ring_write(ring, lower_32_bits(addr)); 5059 amdgpu_ring_write(ring, upper_32_bits(addr)); 5060 amdgpu_ring_write(ring, lower_32_bits(seq)); 5061 amdgpu_ring_write(ring, upper_32_bits(seq)); 5062 amdgpu_ring_write(ring, 0); 5063 } 5064 5065 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5066 { 5067 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5068 uint32_t seq = ring->fence_drv.sync_seq; 5069 uint64_t addr = ring->fence_drv.gpu_addr; 5070 5071 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 5072 lower_32_bits(addr), upper_32_bits(addr), 5073 seq, 0xffffffff, 4); 5074 } 5075 5076 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5077 unsigned vmid, uint64_t pd_addr) 5078 { 5079 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5080 5081 /* compute doesn't have PFP */ 5082 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5083 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5084 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5085 amdgpu_ring_write(ring, 0x0); 5086 } 5087 } 5088 5089 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5090 { 5091 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 5092 } 5093 5094 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5095 { 5096 u64 wptr; 5097 5098 /* XXX check if swapping is necessary on BE */ 5099 if (ring->use_doorbell) 5100 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 5101 else 5102 BUG(); 5103 return wptr; 5104 } 5105 5106 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 5107 bool acquire) 5108 { 5109 struct amdgpu_device *adev = ring->adev; 5110 int pipe_num, tmp, reg; 5111 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 5112 5113 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 5114 5115 /* first me only has 2 entries, GFX and HP3D */ 5116 if (ring->me > 0) 5117 pipe_num -= 2; 5118 5119 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num; 5120 tmp = RREG32(reg); 5121 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 5122 WREG32(reg, tmp); 5123 } 5124 5125 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, 5126 struct amdgpu_ring *ring, 5127 bool acquire) 5128 { 5129 int i, pipe; 5130 bool reserve; 5131 struct amdgpu_ring *iring; 5132 5133 mutex_lock(&adev->gfx.pipe_reserve_mutex); 5134 pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0); 5135 if (acquire) 5136 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5137 else 5138 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5139 5140 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 5141 /* Clear all reservations - everyone reacquires all resources */ 5142 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 5143 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 5144 true); 5145 5146 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 5147 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 5148 true); 5149 } else { 5150 /* Lower all pipes without a current reservation */ 5151 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 5152 iring = &adev->gfx.gfx_ring[i]; 5153 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 5154 iring->me, 5155 iring->pipe, 5156 0); 5157 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5158 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 5159 } 5160 5161 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 5162 iring = &adev->gfx.compute_ring[i]; 5163 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 5164 iring->me, 5165 iring->pipe, 5166 0); 5167 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5168 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 5169 } 5170 } 5171 5172 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 5173 } 5174 5175 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, 5176 struct amdgpu_ring *ring, 5177 bool acquire) 5178 { 5179 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 5180 uint32_t queue_priority = acquire ? 0xf : 0x0; 5181 5182 mutex_lock(&adev->srbm_mutex); 5183 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5184 5185 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); 5186 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); 5187 5188 soc15_grbm_select(adev, 0, 0, 0, 0); 5189 mutex_unlock(&adev->srbm_mutex); 5190 } 5191 5192 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring, 5193 enum drm_sched_priority priority) 5194 { 5195 struct amdgpu_device *adev = ring->adev; 5196 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 5197 5198 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 5199 return; 5200 5201 gfx_v9_0_hqd_set_priority(adev, ring, acquire); 5202 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire); 5203 } 5204 5205 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5206 { 5207 struct amdgpu_device *adev = ring->adev; 5208 5209 /* XXX check if swapping is necessary on BE */ 5210 if (ring->use_doorbell) { 5211 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 5212 WDOORBELL64(ring->doorbell_index, ring->wptr); 5213 } else{ 5214 BUG(); /* only DOORBELL method supported on gfx9 now */ 5215 } 5216 } 5217 5218 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5219 u64 seq, unsigned int flags) 5220 { 5221 struct amdgpu_device *adev = ring->adev; 5222 5223 /* we only allocate 32bit for each seq wb address */ 5224 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5225 5226 /* write fence seq to the "addr" */ 5227 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5228 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5229 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5230 amdgpu_ring_write(ring, lower_32_bits(addr)); 5231 amdgpu_ring_write(ring, upper_32_bits(addr)); 5232 amdgpu_ring_write(ring, lower_32_bits(seq)); 5233 5234 if (flags & AMDGPU_FENCE_FLAG_INT) { 5235 /* set register to trigger INT */ 5236 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5237 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5238 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5239 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 5240 amdgpu_ring_write(ring, 0); 5241 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5242 } 5243 } 5244 5245 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 5246 { 5247 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5248 amdgpu_ring_write(ring, 0); 5249 } 5250 5251 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 5252 { 5253 struct v9_ce_ib_state ce_payload = {0}; 5254 uint64_t csa_addr; 5255 int cnt; 5256 5257 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 5258 csa_addr = amdgpu_csa_vaddr(ring->adev); 5259 5260 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5261 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 5262 WRITE_DATA_DST_SEL(8) | 5263 WR_CONFIRM) | 5264 WRITE_DATA_CACHE_POLICY(0)); 5265 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5266 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5267 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 5268 } 5269 5270 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 5271 { 5272 struct v9_de_ib_state de_payload = {0}; 5273 uint64_t csa_addr, gds_addr; 5274 int cnt; 5275 5276 csa_addr = amdgpu_csa_vaddr(ring->adev); 5277 gds_addr = csa_addr + 4096; 5278 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5279 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5280 5281 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 5282 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5283 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 5284 WRITE_DATA_DST_SEL(8) | 5285 WR_CONFIRM) | 5286 WRITE_DATA_CACHE_POLICY(0)); 5287 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5288 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5289 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 5290 } 5291 5292 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) 5293 { 5294 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 5295 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ 5296 } 5297 5298 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 5299 { 5300 uint32_t dw2 = 0; 5301 5302 if (amdgpu_sriov_vf(ring->adev)) 5303 gfx_v9_0_ring_emit_ce_meta(ring); 5304 5305 gfx_v9_0_ring_emit_tmz(ring, true); 5306 5307 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5308 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5309 /* set load_global_config & load_global_uconfig */ 5310 dw2 |= 0x8001; 5311 /* set load_cs_sh_regs */ 5312 dw2 |= 0x01000000; 5313 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5314 dw2 |= 0x10002; 5315 5316 /* set load_ce_ram if preamble presented */ 5317 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 5318 dw2 |= 0x10000000; 5319 } else { 5320 /* still load_ce_ram if this is the first time preamble presented 5321 * although there is no context switch happens. 5322 */ 5323 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 5324 dw2 |= 0x10000000; 5325 } 5326 5327 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5328 amdgpu_ring_write(ring, dw2); 5329 amdgpu_ring_write(ring, 0); 5330 } 5331 5332 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 5333 { 5334 unsigned ret; 5335 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5336 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 5337 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 5338 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 5339 ret = ring->wptr & ring->buf_mask; 5340 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 5341 return ret; 5342 } 5343 5344 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 5345 { 5346 unsigned cur; 5347 BUG_ON(offset > ring->buf_mask); 5348 BUG_ON(ring->ring[offset] != 0x55aa55aa); 5349 5350 cur = (ring->wptr & ring->buf_mask) - 1; 5351 if (likely(cur > offset)) 5352 ring->ring[offset] = cur - offset; 5353 else 5354 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 5355 } 5356 5357 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 5358 { 5359 struct amdgpu_device *adev = ring->adev; 5360 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 5361 5362 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 5363 amdgpu_ring_write(ring, 0 | /* src: register*/ 5364 (5 << 8) | /* dst: memory */ 5365 (1 << 20)); /* write confirm */ 5366 amdgpu_ring_write(ring, reg); 5367 amdgpu_ring_write(ring, 0); 5368 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 5369 kiq->reg_val_offs * 4)); 5370 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 5371 kiq->reg_val_offs * 4)); 5372 } 5373 5374 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 5375 uint32_t val) 5376 { 5377 uint32_t cmd = 0; 5378 5379 switch (ring->funcs->type) { 5380 case AMDGPU_RING_TYPE_GFX: 5381 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 5382 break; 5383 case AMDGPU_RING_TYPE_KIQ: 5384 cmd = (1 << 16); /* no inc addr */ 5385 break; 5386 default: 5387 cmd = WR_CONFIRM; 5388 break; 5389 } 5390 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5391 amdgpu_ring_write(ring, cmd); 5392 amdgpu_ring_write(ring, reg); 5393 amdgpu_ring_write(ring, 0); 5394 amdgpu_ring_write(ring, val); 5395 } 5396 5397 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 5398 uint32_t val, uint32_t mask) 5399 { 5400 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 5401 } 5402 5403 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 5404 uint32_t reg0, uint32_t reg1, 5405 uint32_t ref, uint32_t mask) 5406 { 5407 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5408 struct amdgpu_device *adev = ring->adev; 5409 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 5410 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 5411 5412 if (fw_version_ok) 5413 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 5414 ref, mask, 0x20); 5415 else 5416 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 5417 ref, mask); 5418 } 5419 5420 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 5421 { 5422 struct amdgpu_device *adev = ring->adev; 5423 uint32_t value = 0; 5424 5425 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 5426 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 5427 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 5428 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 5429 WREG32_SOC15(GC, 0, mmSQ_CMD, value); 5430 } 5431 5432 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5433 enum amdgpu_interrupt_state state) 5434 { 5435 switch (state) { 5436 case AMDGPU_IRQ_STATE_DISABLE: 5437 case AMDGPU_IRQ_STATE_ENABLE: 5438 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5439 TIME_STAMP_INT_ENABLE, 5440 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5441 break; 5442 default: 5443 break; 5444 } 5445 } 5446 5447 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5448 int me, int pipe, 5449 enum amdgpu_interrupt_state state) 5450 { 5451 u32 mec_int_cntl, mec_int_cntl_reg; 5452 5453 /* 5454 * amdgpu controls only the first MEC. That's why this function only 5455 * handles the setting of interrupts for this specific MEC. All other 5456 * pipes' interrupts are set by amdkfd. 5457 */ 5458 5459 if (me == 1) { 5460 switch (pipe) { 5461 case 0: 5462 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 5463 break; 5464 case 1: 5465 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 5466 break; 5467 case 2: 5468 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 5469 break; 5470 case 3: 5471 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 5472 break; 5473 default: 5474 DRM_DEBUG("invalid pipe %d\n", pipe); 5475 return; 5476 } 5477 } else { 5478 DRM_DEBUG("invalid me %d\n", me); 5479 return; 5480 } 5481 5482 switch (state) { 5483 case AMDGPU_IRQ_STATE_DISABLE: 5484 mec_int_cntl = RREG32(mec_int_cntl_reg); 5485 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5486 TIME_STAMP_INT_ENABLE, 0); 5487 WREG32(mec_int_cntl_reg, mec_int_cntl); 5488 break; 5489 case AMDGPU_IRQ_STATE_ENABLE: 5490 mec_int_cntl = RREG32(mec_int_cntl_reg); 5491 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5492 TIME_STAMP_INT_ENABLE, 1); 5493 WREG32(mec_int_cntl_reg, mec_int_cntl); 5494 break; 5495 default: 5496 break; 5497 } 5498 } 5499 5500 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 5501 struct amdgpu_irq_src *source, 5502 unsigned type, 5503 enum amdgpu_interrupt_state state) 5504 { 5505 switch (state) { 5506 case AMDGPU_IRQ_STATE_DISABLE: 5507 case AMDGPU_IRQ_STATE_ENABLE: 5508 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5509 PRIV_REG_INT_ENABLE, 5510 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5511 break; 5512 default: 5513 break; 5514 } 5515 5516 return 0; 5517 } 5518 5519 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 5520 struct amdgpu_irq_src *source, 5521 unsigned type, 5522 enum amdgpu_interrupt_state state) 5523 { 5524 switch (state) { 5525 case AMDGPU_IRQ_STATE_DISABLE: 5526 case AMDGPU_IRQ_STATE_ENABLE: 5527 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5528 PRIV_INSTR_INT_ENABLE, 5529 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5530 default: 5531 break; 5532 } 5533 5534 return 0; 5535 } 5536 5537 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 5538 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5539 CP_ECC_ERROR_INT_ENABLE, 1) 5540 5541 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 5542 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5543 CP_ECC_ERROR_INT_ENABLE, 0) 5544 5545 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 5546 struct amdgpu_irq_src *source, 5547 unsigned type, 5548 enum amdgpu_interrupt_state state) 5549 { 5550 switch (state) { 5551 case AMDGPU_IRQ_STATE_DISABLE: 5552 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5553 CP_ECC_ERROR_INT_ENABLE, 0); 5554 DISABLE_ECC_ON_ME_PIPE(1, 0); 5555 DISABLE_ECC_ON_ME_PIPE(1, 1); 5556 DISABLE_ECC_ON_ME_PIPE(1, 2); 5557 DISABLE_ECC_ON_ME_PIPE(1, 3); 5558 break; 5559 5560 case AMDGPU_IRQ_STATE_ENABLE: 5561 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5562 CP_ECC_ERROR_INT_ENABLE, 1); 5563 ENABLE_ECC_ON_ME_PIPE(1, 0); 5564 ENABLE_ECC_ON_ME_PIPE(1, 1); 5565 ENABLE_ECC_ON_ME_PIPE(1, 2); 5566 ENABLE_ECC_ON_ME_PIPE(1, 3); 5567 break; 5568 default: 5569 break; 5570 } 5571 5572 return 0; 5573 } 5574 5575 5576 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5577 struct amdgpu_irq_src *src, 5578 unsigned type, 5579 enum amdgpu_interrupt_state state) 5580 { 5581 switch (type) { 5582 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 5583 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 5584 break; 5585 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5586 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5587 break; 5588 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5589 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5590 break; 5591 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5592 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5593 break; 5594 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5595 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5596 break; 5597 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 5598 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 5599 break; 5600 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 5601 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 5602 break; 5603 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 5604 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 5605 break; 5606 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 5607 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 5608 break; 5609 default: 5610 break; 5611 } 5612 return 0; 5613 } 5614 5615 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 5616 struct amdgpu_irq_src *source, 5617 struct amdgpu_iv_entry *entry) 5618 { 5619 int i; 5620 u8 me_id, pipe_id, queue_id; 5621 struct amdgpu_ring *ring; 5622 5623 DRM_DEBUG("IH: CP EOP\n"); 5624 me_id = (entry->ring_id & 0x0c) >> 2; 5625 pipe_id = (entry->ring_id & 0x03) >> 0; 5626 queue_id = (entry->ring_id & 0x70) >> 4; 5627 5628 switch (me_id) { 5629 case 0: 5630 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5631 break; 5632 case 1: 5633 case 2: 5634 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5635 ring = &adev->gfx.compute_ring[i]; 5636 /* Per-queue interrupt is supported for MEC starting from VI. 5637 * The interrupt can only be enabled/disabled per pipe instead of per queue. 5638 */ 5639 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 5640 amdgpu_fence_process(ring); 5641 } 5642 break; 5643 } 5644 return 0; 5645 } 5646 5647 static void gfx_v9_0_fault(struct amdgpu_device *adev, 5648 struct amdgpu_iv_entry *entry) 5649 { 5650 u8 me_id, pipe_id, queue_id; 5651 struct amdgpu_ring *ring; 5652 int i; 5653 5654 me_id = (entry->ring_id & 0x0c) >> 2; 5655 pipe_id = (entry->ring_id & 0x03) >> 0; 5656 queue_id = (entry->ring_id & 0x70) >> 4; 5657 5658 switch (me_id) { 5659 case 0: 5660 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 5661 break; 5662 case 1: 5663 case 2: 5664 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5665 ring = &adev->gfx.compute_ring[i]; 5666 if (ring->me == me_id && ring->pipe == pipe_id && 5667 ring->queue == queue_id) 5668 drm_sched_fault(&ring->sched); 5669 } 5670 break; 5671 } 5672 } 5673 5674 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 5675 struct amdgpu_irq_src *source, 5676 struct amdgpu_iv_entry *entry) 5677 { 5678 DRM_ERROR("Illegal register access in command stream\n"); 5679 gfx_v9_0_fault(adev, entry); 5680 return 0; 5681 } 5682 5683 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 5684 struct amdgpu_irq_src *source, 5685 struct amdgpu_iv_entry *entry) 5686 { 5687 DRM_ERROR("Illegal instruction in command stream\n"); 5688 gfx_v9_0_fault(adev, entry); 5689 return 0; 5690 } 5691 5692 5693 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = { 5694 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 5695 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), 5696 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) 5697 }, 5698 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 5699 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT), 5700 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) 5701 }, 5702 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 5703 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1), 5704 0, 0 5705 }, 5706 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 5707 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2), 5708 0, 0 5709 }, 5710 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 5711 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT), 5712 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) 5713 }, 5714 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 5715 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT), 5716 0, 0 5717 }, 5718 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 5719 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), 5720 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT) 5721 }, 5722 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 5723 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT), 5724 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT) 5725 }, 5726 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 5727 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1), 5728 0, 0 5729 }, 5730 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 5731 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1), 5732 0, 0 5733 }, 5734 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 5735 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1), 5736 0, 0 5737 }, 5738 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 5739 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC), 5740 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) 5741 }, 5742 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 5743 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 5744 0, 0 5745 }, 5746 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 5747 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), 5748 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) 5749 }, 5750 { "GDS_OA_PHY_PHY_CMD_RAM_MEM", 5751 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 5752 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), 5753 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) 5754 }, 5755 { "GDS_OA_PHY_PHY_DATA_RAM_MEM", 5756 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 5757 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 5758 0, 0 5759 }, 5760 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", 5761 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 5762 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), 5763 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) 5764 }, 5765 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", 5766 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 5767 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), 5768 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) 5769 }, 5770 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", 5771 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 5772 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), 5773 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) 5774 }, 5775 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", 5776 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 5777 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), 5778 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) 5779 }, 5780 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 5781 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 5782 0, 0 5783 }, 5784 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 5785 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), 5786 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) 5787 }, 5788 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 5789 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 5790 0, 0 5791 }, 5792 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 5793 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 5794 0, 0 5795 }, 5796 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 5797 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 5798 0, 0 5799 }, 5800 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 5801 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 5802 0, 0 5803 }, 5804 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 5805 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 5806 0, 0 5807 }, 5808 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 5809 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 5810 0, 0 5811 }, 5812 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5813 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), 5814 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) 5815 }, 5816 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5817 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), 5818 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) 5819 }, 5820 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5821 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), 5822 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) 5823 }, 5824 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5825 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), 5826 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) 5827 }, 5828 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5829 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), 5830 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) 5831 }, 5832 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5833 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 5834 0, 0 5835 }, 5836 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5837 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 5838 0, 0 5839 }, 5840 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5841 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 5842 0, 0 5843 }, 5844 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5845 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 5846 0, 0 5847 }, 5848 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5849 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 5850 0, 0 5851 }, 5852 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5853 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 5854 0, 0 5855 }, 5856 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5857 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 5858 0, 0 5859 }, 5860 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5861 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 5862 0, 0 5863 }, 5864 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5865 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 5866 0, 0 5867 }, 5868 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5869 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), 5870 0, 0 5871 }, 5872 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5873 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 5874 0, 0 5875 }, 5876 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5877 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), 5878 0, 0 5879 }, 5880 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5881 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 5882 0, 0 5883 }, 5884 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 5885 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 5886 0, 0 5887 }, 5888 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5889 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), 5890 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) 5891 }, 5892 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5893 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), 5894 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) 5895 }, 5896 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5897 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 5898 0, 0 5899 }, 5900 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5901 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 5902 0, 0 5903 }, 5904 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5905 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 5906 0, 0 5907 }, 5908 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5909 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), 5910 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) 5911 }, 5912 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5913 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), 5914 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) 5915 }, 5916 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 5917 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), 5918 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) 5919 }, 5920 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 5921 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), 5922 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) 5923 }, 5924 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 5925 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT), 5926 0, 0 5927 }, 5928 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5929 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT), 5930 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) 5931 }, 5932 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5933 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT), 5934 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) 5935 }, 5936 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5937 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT), 5938 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) 5939 }, 5940 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5941 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT), 5942 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) 5943 }, 5944 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5945 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT), 5946 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) 5947 }, 5948 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5949 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT), 5950 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) 5951 }, 5952 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5953 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT), 5954 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) 5955 }, 5956 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5957 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), 5958 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) 5959 }, 5960 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5961 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), 5962 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) 5963 }, 5964 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5965 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), 5966 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) 5967 }, 5968 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5969 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), 5970 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) 5971 }, 5972 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5973 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), 5974 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) 5975 }, 5976 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5977 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), 5978 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) 5979 }, 5980 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5981 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), 5982 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) 5983 }, 5984 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5985 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), 5986 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) 5987 }, 5988 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5989 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), 5990 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) 5991 }, 5992 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5993 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), 5994 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) 5995 }, 5996 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5997 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), 5998 0, 0 5999 }, 6000 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6001 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 6002 0, 0 6003 }, 6004 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6005 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 6006 0, 0 6007 }, 6008 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6009 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 6010 0, 0 6011 }, 6012 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6013 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 6014 0, 0 6015 }, 6016 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6017 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), 6018 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) 6019 }, 6020 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6021 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), 6022 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) 6023 }, 6024 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6025 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), 6026 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) 6027 }, 6028 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6029 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), 6030 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) 6031 }, 6032 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6033 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), 6034 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) 6035 }, 6036 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6037 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), 6038 0, 0 6039 }, 6040 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6041 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 6042 0, 0 6043 }, 6044 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6045 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 6046 0, 0 6047 }, 6048 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6049 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 6050 0, 0 6051 }, 6052 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6053 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 6054 0, 0 6055 }, 6056 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6057 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), 6058 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) 6059 }, 6060 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6061 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), 6062 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) 6063 }, 6064 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6065 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), 6066 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) 6067 }, 6068 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6069 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), 6070 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) 6071 }, 6072 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6073 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), 6074 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) 6075 }, 6076 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6077 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 6078 0, 0 6079 }, 6080 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6081 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 6082 0, 0 6083 }, 6084 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6085 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 6086 0, 0 6087 }, 6088 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6089 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 6090 0, 0 6091 }, 6092 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6093 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 6094 0, 0 6095 }, 6096 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6097 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), 6098 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) 6099 }, 6100 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6101 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), 6102 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) 6103 }, 6104 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6105 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), 6106 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) 6107 }, 6108 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6109 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 6110 0, 0 6111 }, 6112 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6113 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 6114 0, 0 6115 }, 6116 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6117 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 6118 0, 0 6119 }, 6120 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6121 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 6122 0, 0 6123 }, 6124 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6125 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 6126 0, 0 6127 }, 6128 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6129 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 6130 0, 0 6131 } 6132 }; 6133 6134 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 6135 void *inject_if) 6136 { 6137 struct ras_inject_if *info = (struct ras_inject_if *)inject_if; 6138 int ret; 6139 struct ta_ras_trigger_error_input block_info = { 0 }; 6140 6141 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6142 return -EINVAL; 6143 6144 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) 6145 return -EINVAL; 6146 6147 if (!ras_gfx_subblocks[info->head.sub_block_index].name) 6148 return -EPERM; 6149 6150 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & 6151 info->head.type)) { 6152 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", 6153 ras_gfx_subblocks[info->head.sub_block_index].name, 6154 info->head.type); 6155 return -EPERM; 6156 } 6157 6158 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & 6159 info->head.type)) { 6160 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", 6161 ras_gfx_subblocks[info->head.sub_block_index].name, 6162 info->head.type); 6163 return -EPERM; 6164 } 6165 6166 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); 6167 block_info.sub_block_index = 6168 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; 6169 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); 6170 block_info.address = info->address; 6171 block_info.value = info->value; 6172 6173 mutex_lock(&adev->grbm_idx_mutex); 6174 ret = psp_ras_trigger_error(&adev->psp, &block_info); 6175 mutex_unlock(&adev->grbm_idx_mutex); 6176 6177 return ret; 6178 } 6179 6180 static const char *vml2_mems[] = { 6181 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", 6182 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", 6183 "UTC_VML2_BANK_CACHE_0_4K_MEM0", 6184 "UTC_VML2_BANK_CACHE_0_4K_MEM1", 6185 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0", 6186 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1", 6187 "UTC_VML2_BANK_CACHE_1_4K_MEM0", 6188 "UTC_VML2_BANK_CACHE_1_4K_MEM1", 6189 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0", 6190 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1", 6191 "UTC_VML2_BANK_CACHE_2_4K_MEM0", 6192 "UTC_VML2_BANK_CACHE_2_4K_MEM1", 6193 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0", 6194 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1", 6195 "UTC_VML2_BANK_CACHE_3_4K_MEM0", 6196 "UTC_VML2_BANK_CACHE_3_4K_MEM1", 6197 }; 6198 6199 static const char *vml2_walker_mems[] = { 6200 "UTC_VML2_CACHE_PDE0_MEM0", 6201 "UTC_VML2_CACHE_PDE0_MEM1", 6202 "UTC_VML2_CACHE_PDE1_MEM0", 6203 "UTC_VML2_CACHE_PDE1_MEM1", 6204 "UTC_VML2_CACHE_PDE2_MEM0", 6205 "UTC_VML2_CACHE_PDE2_MEM1", 6206 "UTC_VML2_RDIF_LOG_FIFO", 6207 }; 6208 6209 static const char *atc_l2_cache_2m_mems[] = { 6210 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", 6211 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", 6212 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", 6213 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM", 6214 }; 6215 6216 static const char *atc_l2_cache_4k_mems[] = { 6217 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0", 6218 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1", 6219 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2", 6220 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3", 6221 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4", 6222 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5", 6223 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6", 6224 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7", 6225 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0", 6226 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1", 6227 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2", 6228 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3", 6229 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4", 6230 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5", 6231 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6", 6232 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7", 6233 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0", 6234 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1", 6235 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2", 6236 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3", 6237 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4", 6238 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5", 6239 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6", 6240 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7", 6241 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0", 6242 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1", 6243 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2", 6244 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3", 6245 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4", 6246 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5", 6247 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6", 6248 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7", 6249 }; 6250 6251 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, 6252 struct ras_err_data *err_data) 6253 { 6254 uint32_t i, data; 6255 uint32_t sec_count, ded_count; 6256 6257 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6258 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6259 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6260 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6261 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6262 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6263 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6264 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6265 6266 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 6267 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6268 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6269 6270 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT); 6271 if (sec_count) { 6272 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, 6273 vml2_mems[i], sec_count); 6274 err_data->ce_count += sec_count; 6275 } 6276 6277 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT); 6278 if (ded_count) { 6279 DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, 6280 vml2_mems[i], ded_count); 6281 err_data->ue_count += ded_count; 6282 } 6283 } 6284 6285 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 6286 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6287 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6288 6289 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6290 SEC_COUNT); 6291 if (sec_count) { 6292 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, 6293 vml2_walker_mems[i], sec_count); 6294 err_data->ce_count += sec_count; 6295 } 6296 6297 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6298 DED_COUNT); 6299 if (ded_count) { 6300 DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, 6301 vml2_walker_mems[i], ded_count); 6302 err_data->ue_count += ded_count; 6303 } 6304 } 6305 6306 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 6307 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6308 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6309 6310 sec_count = (data & 0x00006000L) >> 0xd; 6311 if (sec_count) { 6312 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, 6313 atc_l2_cache_2m_mems[i], sec_count); 6314 err_data->ce_count += sec_count; 6315 } 6316 } 6317 6318 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 6319 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6320 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6321 6322 sec_count = (data & 0x00006000L) >> 0xd; 6323 if (sec_count) { 6324 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, 6325 atc_l2_cache_4k_mems[i], sec_count); 6326 err_data->ce_count += sec_count; 6327 } 6328 6329 ded_count = (data & 0x00018000L) >> 0xf; 6330 if (ded_count) { 6331 DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, 6332 atc_l2_cache_4k_mems[i], ded_count); 6333 err_data->ue_count += ded_count; 6334 } 6335 } 6336 6337 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6338 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6339 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6340 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6341 6342 return 0; 6343 } 6344 6345 static int gfx_v9_0_ras_error_count(const struct soc15_reg_entry *reg, 6346 uint32_t se_id, uint32_t inst_id, uint32_t value, 6347 uint32_t *sec_count, uint32_t *ded_count) 6348 { 6349 uint32_t i; 6350 uint32_t sec_cnt, ded_cnt; 6351 6352 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) { 6353 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset || 6354 gfx_v9_0_ras_fields[i].seg != reg->seg || 6355 gfx_v9_0_ras_fields[i].inst != reg->inst) 6356 continue; 6357 6358 sec_cnt = (value & 6359 gfx_v9_0_ras_fields[i].sec_count_mask) >> 6360 gfx_v9_0_ras_fields[i].sec_count_shift; 6361 if (sec_cnt) { 6362 DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n", 6363 gfx_v9_0_ras_fields[i].name, 6364 se_id, inst_id, 6365 sec_cnt); 6366 *sec_count += sec_cnt; 6367 } 6368 6369 ded_cnt = (value & 6370 gfx_v9_0_ras_fields[i].ded_count_mask) >> 6371 gfx_v9_0_ras_fields[i].ded_count_shift; 6372 if (ded_cnt) { 6373 DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n", 6374 gfx_v9_0_ras_fields[i].name, 6375 se_id, inst_id, 6376 ded_cnt); 6377 *ded_count += ded_cnt; 6378 } 6379 } 6380 6381 return 0; 6382 } 6383 6384 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev) 6385 { 6386 int i, j, k; 6387 6388 /* read back registers to clear the counters */ 6389 mutex_lock(&adev->grbm_idx_mutex); 6390 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 6391 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 6392 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 6393 gfx_v9_0_select_se_sh(adev, j, 0x0, k); 6394 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 6395 } 6396 } 6397 } 6398 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 6399 mutex_unlock(&adev->grbm_idx_mutex); 6400 6401 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6402 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6403 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6404 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6405 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6406 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6407 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6408 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6409 6410 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 6411 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6412 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6413 } 6414 6415 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 6416 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6417 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6418 } 6419 6420 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 6421 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6422 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6423 } 6424 6425 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 6426 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6427 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6428 } 6429 6430 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6431 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6432 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6433 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6434 } 6435 6436 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 6437 void *ras_error_status) 6438 { 6439 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 6440 uint32_t sec_count = 0, ded_count = 0; 6441 uint32_t i, j, k; 6442 uint32_t reg_value; 6443 6444 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6445 return -EINVAL; 6446 6447 err_data->ue_count = 0; 6448 err_data->ce_count = 0; 6449 6450 mutex_lock(&adev->grbm_idx_mutex); 6451 6452 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 6453 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 6454 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 6455 gfx_v9_0_select_se_sh(adev, j, 0, k); 6456 reg_value = 6457 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 6458 if (reg_value) 6459 gfx_v9_0_ras_error_count(&gfx_v9_0_edc_counter_regs[i], 6460 j, k, reg_value, 6461 &sec_count, &ded_count); 6462 } 6463 } 6464 } 6465 6466 err_data->ce_count += sec_count; 6467 err_data->ue_count += ded_count; 6468 6469 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6470 mutex_unlock(&adev->grbm_idx_mutex); 6471 6472 gfx_v9_0_query_utc_edc_status(adev, err_data); 6473 6474 return 0; 6475 } 6476 6477 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 6478 .name = "gfx_v9_0", 6479 .early_init = gfx_v9_0_early_init, 6480 .late_init = gfx_v9_0_late_init, 6481 .sw_init = gfx_v9_0_sw_init, 6482 .sw_fini = gfx_v9_0_sw_fini, 6483 .hw_init = gfx_v9_0_hw_init, 6484 .hw_fini = gfx_v9_0_hw_fini, 6485 .suspend = gfx_v9_0_suspend, 6486 .resume = gfx_v9_0_resume, 6487 .is_idle = gfx_v9_0_is_idle, 6488 .wait_for_idle = gfx_v9_0_wait_for_idle, 6489 .soft_reset = gfx_v9_0_soft_reset, 6490 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 6491 .set_powergating_state = gfx_v9_0_set_powergating_state, 6492 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 6493 }; 6494 6495 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 6496 .type = AMDGPU_RING_TYPE_GFX, 6497 .align_mask = 0xff, 6498 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6499 .support_64bit_ptrs = true, 6500 .vmhub = AMDGPU_GFXHUB_0, 6501 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 6502 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 6503 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 6504 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 6505 5 + /* COND_EXEC */ 6506 7 + /* PIPELINE_SYNC */ 6507 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6508 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6509 2 + /* VM_FLUSH */ 6510 8 + /* FENCE for VM_FLUSH */ 6511 20 + /* GDS switch */ 6512 4 + /* double SWITCH_BUFFER, 6513 the first COND_EXEC jump to the place just 6514 prior to this double SWITCH_BUFFER */ 6515 5 + /* COND_EXEC */ 6516 7 + /* HDP_flush */ 6517 4 + /* VGT_flush */ 6518 14 + /* CE_META */ 6519 31 + /* DE_META */ 6520 3 + /* CNTX_CTRL */ 6521 5 + /* HDP_INVL */ 6522 8 + 8 + /* FENCE x2 */ 6523 2, /* SWITCH_BUFFER */ 6524 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 6525 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 6526 .emit_fence = gfx_v9_0_ring_emit_fence, 6527 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6528 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6529 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6530 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6531 .test_ring = gfx_v9_0_ring_test_ring, 6532 .test_ib = gfx_v9_0_ring_test_ib, 6533 .insert_nop = amdgpu_ring_insert_nop, 6534 .pad_ib = amdgpu_ring_generic_pad_ib, 6535 .emit_switch_buffer = gfx_v9_ring_emit_sb, 6536 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 6537 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 6538 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 6539 .emit_tmz = gfx_v9_0_ring_emit_tmz, 6540 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6541 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6542 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6543 .soft_recovery = gfx_v9_0_ring_soft_recovery, 6544 }; 6545 6546 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 6547 .type = AMDGPU_RING_TYPE_COMPUTE, 6548 .align_mask = 0xff, 6549 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6550 .support_64bit_ptrs = true, 6551 .vmhub = AMDGPU_GFXHUB_0, 6552 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6553 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6554 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6555 .emit_frame_size = 6556 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6557 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6558 5 + /* hdp invalidate */ 6559 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6560 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6561 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6562 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6563 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 6564 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6565 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 6566 .emit_fence = gfx_v9_0_ring_emit_fence, 6567 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6568 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6569 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6570 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6571 .test_ring = gfx_v9_0_ring_test_ring, 6572 .test_ib = gfx_v9_0_ring_test_ib, 6573 .insert_nop = amdgpu_ring_insert_nop, 6574 .pad_ib = amdgpu_ring_generic_pad_ib, 6575 .set_priority = gfx_v9_0_ring_set_priority_compute, 6576 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6577 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6578 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6579 }; 6580 6581 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 6582 .type = AMDGPU_RING_TYPE_KIQ, 6583 .align_mask = 0xff, 6584 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6585 .support_64bit_ptrs = true, 6586 .vmhub = AMDGPU_GFXHUB_0, 6587 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6588 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6589 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6590 .emit_frame_size = 6591 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6592 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6593 5 + /* hdp invalidate */ 6594 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6595 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6596 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6597 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6598 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6599 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6600 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 6601 .test_ring = gfx_v9_0_ring_test_ring, 6602 .insert_nop = amdgpu_ring_insert_nop, 6603 .pad_ib = amdgpu_ring_generic_pad_ib, 6604 .emit_rreg = gfx_v9_0_ring_emit_rreg, 6605 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6606 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6607 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6608 }; 6609 6610 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 6611 { 6612 int i; 6613 6614 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 6615 6616 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6617 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 6618 6619 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6620 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 6621 } 6622 6623 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 6624 .set = gfx_v9_0_set_eop_interrupt_state, 6625 .process = gfx_v9_0_eop_irq, 6626 }; 6627 6628 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 6629 .set = gfx_v9_0_set_priv_reg_fault_state, 6630 .process = gfx_v9_0_priv_reg_irq, 6631 }; 6632 6633 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 6634 .set = gfx_v9_0_set_priv_inst_fault_state, 6635 .process = gfx_v9_0_priv_inst_irq, 6636 }; 6637 6638 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 6639 .set = gfx_v9_0_set_cp_ecc_error_state, 6640 .process = amdgpu_gfx_cp_ecc_error_irq, 6641 }; 6642 6643 6644 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 6645 { 6646 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6647 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 6648 6649 adev->gfx.priv_reg_irq.num_types = 1; 6650 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 6651 6652 adev->gfx.priv_inst_irq.num_types = 1; 6653 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 6654 6655 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 6656 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 6657 } 6658 6659 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 6660 { 6661 switch (adev->asic_type) { 6662 case CHIP_VEGA10: 6663 case CHIP_VEGA12: 6664 case CHIP_VEGA20: 6665 case CHIP_RAVEN: 6666 case CHIP_ARCTURUS: 6667 case CHIP_RENOIR: 6668 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 6669 break; 6670 default: 6671 break; 6672 } 6673 } 6674 6675 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 6676 { 6677 /* init asci gds info */ 6678 switch (adev->asic_type) { 6679 case CHIP_VEGA10: 6680 case CHIP_VEGA12: 6681 case CHIP_VEGA20: 6682 adev->gds.gds_size = 0x10000; 6683 break; 6684 case CHIP_RAVEN: 6685 case CHIP_ARCTURUS: 6686 adev->gds.gds_size = 0x1000; 6687 break; 6688 default: 6689 adev->gds.gds_size = 0x10000; 6690 break; 6691 } 6692 6693 switch (adev->asic_type) { 6694 case CHIP_VEGA10: 6695 case CHIP_VEGA20: 6696 adev->gds.gds_compute_max_wave_id = 0x7ff; 6697 break; 6698 case CHIP_VEGA12: 6699 adev->gds.gds_compute_max_wave_id = 0x27f; 6700 break; 6701 case CHIP_RAVEN: 6702 if (adev->rev_id >= 0x8) 6703 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 6704 else 6705 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 6706 break; 6707 case CHIP_ARCTURUS: 6708 adev->gds.gds_compute_max_wave_id = 0xfff; 6709 break; 6710 default: 6711 /* this really depends on the chip */ 6712 adev->gds.gds_compute_max_wave_id = 0x7ff; 6713 break; 6714 } 6715 6716 adev->gds.gws_size = 64; 6717 adev->gds.oa_size = 16; 6718 } 6719 6720 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 6721 u32 bitmap) 6722 { 6723 u32 data; 6724 6725 if (!bitmap) 6726 return; 6727 6728 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6729 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6730 6731 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 6732 } 6733 6734 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 6735 { 6736 u32 data, mask; 6737 6738 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 6739 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 6740 6741 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6742 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6743 6744 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 6745 6746 return (~data) & mask; 6747 } 6748 6749 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 6750 struct amdgpu_cu_info *cu_info) 6751 { 6752 int i, j, k, counter, active_cu_number = 0; 6753 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 6754 unsigned disable_masks[4 * 4]; 6755 6756 if (!adev || !cu_info) 6757 return -EINVAL; 6758 6759 /* 6760 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs 6761 */ 6762 if (adev->gfx.config.max_shader_engines * 6763 adev->gfx.config.max_sh_per_se > 16) 6764 return -EINVAL; 6765 6766 amdgpu_gfx_parse_disable_cu(disable_masks, 6767 adev->gfx.config.max_shader_engines, 6768 adev->gfx.config.max_sh_per_se); 6769 6770 mutex_lock(&adev->grbm_idx_mutex); 6771 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 6772 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 6773 mask = 1; 6774 ao_bitmap = 0; 6775 counter = 0; 6776 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 6777 gfx_v9_0_set_user_cu_inactive_bitmap( 6778 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); 6779 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 6780 6781 /* 6782 * The bitmap(and ao_cu_bitmap) in cu_info structure is 6783 * 4x4 size array, and it's usually suitable for Vega 6784 * ASICs which has 4*2 SE/SH layout. 6785 * But for Arcturus, SE/SH layout is changed to 8*1. 6786 * To mostly reduce the impact, we make it compatible 6787 * with current bitmap array as below: 6788 * SE4,SH0 --> bitmap[0][1] 6789 * SE5,SH0 --> bitmap[1][1] 6790 * SE6,SH0 --> bitmap[2][1] 6791 * SE7,SH0 --> bitmap[3][1] 6792 */ 6793 cu_info->bitmap[i % 4][j + i / 4] = bitmap; 6794 6795 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 6796 if (bitmap & mask) { 6797 if (counter < adev->gfx.config.max_cu_per_sh) 6798 ao_bitmap |= mask; 6799 counter ++; 6800 } 6801 mask <<= 1; 6802 } 6803 active_cu_number += counter; 6804 if (i < 2 && j < 2) 6805 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 6806 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; 6807 } 6808 } 6809 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6810 mutex_unlock(&adev->grbm_idx_mutex); 6811 6812 cu_info->number = active_cu_number; 6813 cu_info->ao_cu_mask = ao_cu_mask; 6814 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 6815 6816 return 0; 6817 } 6818 6819 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 6820 { 6821 .type = AMD_IP_BLOCK_TYPE_GFX, 6822 .major = 9, 6823 .minor = 0, 6824 .rev = 0, 6825 .funcs = &gfx_v9_0_ip_funcs, 6826 }; 6827