1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 40 #include "vega10_enum.h" 41 #include "hdp/hdp_4_0_offset.h" 42 43 #include "soc15_common.h" 44 #include "clearstate_gfx9.h" 45 #include "v9_structs.h" 46 47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 48 49 #include "amdgpu_ras.h" 50 51 #include "sdma0/sdma0_4_0_offset.h" 52 #include "sdma1/sdma1_4_0_offset.h" 53 #define GFX9_NUM_GFX_RINGS 1 54 #define GFX9_MEC_HPD_SIZE 4096 55 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 56 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 57 58 #define mmPWR_MISC_CNTL_STATUS 0x0183 59 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0 61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1 62 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L 63 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L 64 65 #define mmGCEA_PROBE_MAP 0x070c 66 #define mmGCEA_PROBE_MAP_BASE_IDX 0 67 68 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 69 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 70 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 71 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 72 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 73 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 74 75 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 76 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 77 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 78 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 79 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 80 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 81 82 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 83 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 84 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 85 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 86 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 87 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 88 89 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 90 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 91 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 92 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 93 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 94 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 95 96 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 97 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 98 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 99 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 100 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 101 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 102 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 103 104 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 105 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 106 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 107 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 108 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 109 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 110 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 111 112 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); 113 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin"); 114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); 115 116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); 117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); 118 MODULE_FIRMWARE("amdgpu/renoir_me.bin"); 119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); 120 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin"); 121 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); 122 123 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 124 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 125 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04 126 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 127 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09 128 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 129 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a 130 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 131 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b 132 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 133 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c 134 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 135 136 enum ta_ras_gfx_subblock { 137 /*CPC*/ 138 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 139 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, 140 TA_RAS_BLOCK__GFX_CPC_UCODE, 141 TA_RAS_BLOCK__GFX_DC_STATE_ME1, 142 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 143 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, 144 TA_RAS_BLOCK__GFX_DC_STATE_ME2, 145 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 146 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 147 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 148 /* CPF*/ 149 TA_RAS_BLOCK__GFX_CPF_INDEX_START, 150 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, 151 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, 152 TA_RAS_BLOCK__GFX_CPF_TAG, 153 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, 154 /* CPG*/ 155 TA_RAS_BLOCK__GFX_CPG_INDEX_START, 156 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, 157 TA_RAS_BLOCK__GFX_CPG_DMA_TAG, 158 TA_RAS_BLOCK__GFX_CPG_TAG, 159 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, 160 /* GDS*/ 161 TA_RAS_BLOCK__GFX_GDS_INDEX_START, 162 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, 163 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 164 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 165 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 166 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 167 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 168 /* SPI*/ 169 TA_RAS_BLOCK__GFX_SPI_SR_MEM, 170 /* SQ*/ 171 TA_RAS_BLOCK__GFX_SQ_INDEX_START, 172 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, 173 TA_RAS_BLOCK__GFX_SQ_LDS_D, 174 TA_RAS_BLOCK__GFX_SQ_LDS_I, 175 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ 176 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, 177 /* SQC (3 ranges)*/ 178 TA_RAS_BLOCK__GFX_SQC_INDEX_START, 179 /* SQC range 0*/ 180 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, 181 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 182 TA_RAS_BLOCK__GFX_SQC_INDEX0_START, 183 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 184 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 185 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 186 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 187 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 188 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 189 TA_RAS_BLOCK__GFX_SQC_INDEX0_END = 190 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 191 /* SQC range 1*/ 192 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 193 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 194 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 195 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 196 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 197 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 198 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 199 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 200 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 201 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 202 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 203 TA_RAS_BLOCK__GFX_SQC_INDEX1_END = 204 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 205 /* SQC range 2*/ 206 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 207 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 208 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 209 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 210 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 211 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 212 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 213 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 214 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 215 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 216 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 217 TA_RAS_BLOCK__GFX_SQC_INDEX2_END = 218 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 219 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, 220 /* TA*/ 221 TA_RAS_BLOCK__GFX_TA_INDEX_START, 222 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, 223 TA_RAS_BLOCK__GFX_TA_FS_AFIFO, 224 TA_RAS_BLOCK__GFX_TA_FL_LFIFO, 225 TA_RAS_BLOCK__GFX_TA_FX_LFIFO, 226 TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 227 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 228 /* TCA*/ 229 TA_RAS_BLOCK__GFX_TCA_INDEX_START, 230 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, 231 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 232 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 233 /* TCC (5 sub-ranges)*/ 234 TA_RAS_BLOCK__GFX_TCC_INDEX_START, 235 /* TCC range 0*/ 236 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, 237 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, 238 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 239 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 240 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 241 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 242 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 243 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 244 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 245 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 246 /* TCC range 1*/ 247 TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 248 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 249 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 250 TA_RAS_BLOCK__GFX_TCC_INDEX1_END = 251 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 252 /* TCC range 2*/ 253 TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 254 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 255 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 256 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 257 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 258 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 259 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, 260 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 261 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 262 TA_RAS_BLOCK__GFX_TCC_INDEX2_END = 263 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 264 /* TCC range 3*/ 265 TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 266 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 267 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 268 TA_RAS_BLOCK__GFX_TCC_INDEX3_END = 269 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 270 /* TCC range 4*/ 271 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 272 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 273 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 274 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 275 TA_RAS_BLOCK__GFX_TCC_INDEX4_END = 276 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 277 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, 278 /* TCI*/ 279 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, 280 /* TCP*/ 281 TA_RAS_BLOCK__GFX_TCP_INDEX_START, 282 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, 283 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 284 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, 285 TA_RAS_BLOCK__GFX_TCP_VM_FIFO, 286 TA_RAS_BLOCK__GFX_TCP_DB_RAM, 287 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 288 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 289 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 290 /* TD*/ 291 TA_RAS_BLOCK__GFX_TD_INDEX_START, 292 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, 293 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 294 TA_RAS_BLOCK__GFX_TD_CS_FIFO, 295 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, 296 /* EA (3 sub-ranges)*/ 297 TA_RAS_BLOCK__GFX_EA_INDEX_START, 298 /* EA range 0*/ 299 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, 300 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, 301 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 302 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 303 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 304 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 305 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 306 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 307 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 308 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 309 /* EA range 1*/ 310 TA_RAS_BLOCK__GFX_EA_INDEX1_START, 311 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, 312 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 313 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 314 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 315 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 316 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 317 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 318 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 319 /* EA range 2*/ 320 TA_RAS_BLOCK__GFX_EA_INDEX2_START, 321 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, 322 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, 323 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, 324 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 325 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 326 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, 327 /* UTC VM L2 bank*/ 328 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, 329 /* UTC VM walker*/ 330 TA_RAS_BLOCK__UTC_VML2_WALKER, 331 /* UTC ATC L2 2MB cache*/ 332 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 333 /* UTC ATC L2 4KB cache*/ 334 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 335 TA_RAS_BLOCK__GFX_MAX 336 }; 337 338 struct ras_gfx_subblock { 339 unsigned char *name; 340 int ta_subblock; 341 int hw_supported_error_type; 342 int sw_supported_error_type; 343 }; 344 345 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ 346 [AMDGPU_RAS_BLOCK__##subblock] = { \ 347 #subblock, \ 348 TA_RAS_BLOCK__##subblock, \ 349 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ 350 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ 351 } 352 353 static const struct ras_gfx_subblock ras_gfx_subblocks[] = { 354 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), 355 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), 356 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 357 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 358 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 359 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 360 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 361 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 362 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 363 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 364 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), 365 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), 366 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), 367 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), 368 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 369 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), 370 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, 371 0), 372 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, 373 0), 374 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 375 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), 376 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), 377 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), 378 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), 379 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), 380 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), 381 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 382 0, 0), 383 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 384 0), 385 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 386 0, 0), 387 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, 388 0), 389 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 390 0, 0), 391 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 392 0), 393 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 394 1), 395 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 396 0, 0, 0), 397 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 398 0), 399 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 400 0), 401 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 402 0), 403 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 404 0), 405 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 406 0), 407 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 408 0, 0), 409 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 410 0), 411 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 412 0), 413 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 414 0, 0, 0), 415 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 416 0), 417 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 418 0), 419 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 420 0), 421 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 422 0), 423 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 424 0), 425 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 426 0, 0), 427 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 428 0), 429 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), 430 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 431 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 432 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 433 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 434 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), 435 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 436 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), 437 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, 438 1), 439 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, 440 1), 441 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, 442 1), 443 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, 444 0), 445 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, 446 0), 447 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 448 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 449 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), 450 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), 451 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), 452 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), 453 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 454 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), 455 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), 456 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 457 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), 458 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, 459 0), 460 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 461 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, 462 0), 463 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, 464 0, 0), 465 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, 466 0), 467 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 468 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), 469 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), 470 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 471 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 472 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 473 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), 474 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), 475 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), 476 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), 477 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 478 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), 479 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 480 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 481 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 482 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 483 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 484 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 485 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 486 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 487 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 488 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 489 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 490 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), 491 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 492 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 493 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), 494 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), 495 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), 496 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), 497 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), 498 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), 499 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), 500 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), 501 }; 502 503 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 504 { 505 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 506 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 507 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 508 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 509 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 510 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 511 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 512 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 513 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 514 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 515 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 516 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 517 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 518 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 519 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 520 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 521 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 522 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 523 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 524 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 525 }; 526 527 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 528 { 529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 532 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 533 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 534 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 535 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 536 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 537 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 538 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 539 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 540 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 541 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 542 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 543 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 544 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 545 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 546 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 547 }; 548 549 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 550 { 551 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 552 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 553 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 554 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 555 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 556 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 557 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 558 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 559 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 560 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 561 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 562 }; 563 564 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 565 { 566 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 567 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 568 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 569 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 570 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 571 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 572 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 573 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 574 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 575 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 576 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 577 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 578 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 579 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 580 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 581 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 582 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 583 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 584 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 587 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 588 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 589 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 590 }; 591 592 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 593 { 594 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 595 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 596 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 597 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 598 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 599 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 600 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 601 }; 602 603 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 604 { 605 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 606 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 607 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 608 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 609 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 610 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 611 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 612 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 613 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 614 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 615 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 616 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 617 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 618 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 619 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 620 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 621 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 622 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 623 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 624 }; 625 626 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = 627 { 628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 631 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), 632 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), 633 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 634 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 635 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 636 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 637 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 638 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 639 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), 640 }; 641 642 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 643 { 644 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 645 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 646 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 647 }; 648 649 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 650 { 651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 657 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 659 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 667 }; 668 669 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 670 { 671 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 672 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 673 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 674 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 682 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 684 }; 685 686 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = 687 { 688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), 690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), 691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), 692 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), 693 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), 694 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), 695 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), 696 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), 697 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), 698 }; 699 700 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 701 { 702 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 703 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 704 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 705 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 706 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 707 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 708 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 709 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 710 }; 711 712 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 713 { 714 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 715 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 716 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 717 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 718 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 719 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 720 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 721 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 722 }; 723 724 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 725 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 726 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 727 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 728 729 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 730 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 731 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 732 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 733 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 734 struct amdgpu_cu_info *cu_info); 735 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 736 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); 737 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 738 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 739 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 740 void *ras_error_status); 741 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 742 void *inject_if); 743 744 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 745 { 746 switch (adev->asic_type) { 747 case CHIP_VEGA10: 748 soc15_program_register_sequence(adev, 749 golden_settings_gc_9_0, 750 ARRAY_SIZE(golden_settings_gc_9_0)); 751 soc15_program_register_sequence(adev, 752 golden_settings_gc_9_0_vg10, 753 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 754 break; 755 case CHIP_VEGA12: 756 soc15_program_register_sequence(adev, 757 golden_settings_gc_9_2_1, 758 ARRAY_SIZE(golden_settings_gc_9_2_1)); 759 soc15_program_register_sequence(adev, 760 golden_settings_gc_9_2_1_vg12, 761 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 762 break; 763 case CHIP_VEGA20: 764 soc15_program_register_sequence(adev, 765 golden_settings_gc_9_0, 766 ARRAY_SIZE(golden_settings_gc_9_0)); 767 soc15_program_register_sequence(adev, 768 golden_settings_gc_9_0_vg20, 769 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 770 break; 771 case CHIP_ARCTURUS: 772 soc15_program_register_sequence(adev, 773 golden_settings_gc_9_4_1_arct, 774 ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); 775 break; 776 case CHIP_RAVEN: 777 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 778 ARRAY_SIZE(golden_settings_gc_9_1)); 779 if (adev->rev_id >= 8) 780 soc15_program_register_sequence(adev, 781 golden_settings_gc_9_1_rv2, 782 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 783 else 784 soc15_program_register_sequence(adev, 785 golden_settings_gc_9_1_rv1, 786 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 787 break; 788 case CHIP_RENOIR: 789 soc15_program_register_sequence(adev, 790 golden_settings_gc_9_1_rn, 791 ARRAY_SIZE(golden_settings_gc_9_1_rn)); 792 return; /* for renoir, don't need common goldensetting */ 793 default: 794 break; 795 } 796 797 if (adev->asic_type != CHIP_ARCTURUS) 798 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 799 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 800 } 801 802 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 803 { 804 adev->gfx.scratch.num_reg = 8; 805 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 806 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 807 } 808 809 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 810 bool wc, uint32_t reg, uint32_t val) 811 { 812 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 813 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 814 WRITE_DATA_DST_SEL(0) | 815 (wc ? WR_CONFIRM : 0)); 816 amdgpu_ring_write(ring, reg); 817 amdgpu_ring_write(ring, 0); 818 amdgpu_ring_write(ring, val); 819 } 820 821 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 822 int mem_space, int opt, uint32_t addr0, 823 uint32_t addr1, uint32_t ref, uint32_t mask, 824 uint32_t inv) 825 { 826 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 827 amdgpu_ring_write(ring, 828 /* memory (1) or register (0) */ 829 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 830 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 831 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 832 WAIT_REG_MEM_ENGINE(eng_sel))); 833 834 if (mem_space) 835 BUG_ON(addr0 & 0x3); /* Dword align */ 836 amdgpu_ring_write(ring, addr0); 837 amdgpu_ring_write(ring, addr1); 838 amdgpu_ring_write(ring, ref); 839 amdgpu_ring_write(ring, mask); 840 amdgpu_ring_write(ring, inv); /* poll interval */ 841 } 842 843 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 844 { 845 struct amdgpu_device *adev = ring->adev; 846 uint32_t scratch; 847 uint32_t tmp = 0; 848 unsigned i; 849 int r; 850 851 r = amdgpu_gfx_scratch_get(adev, &scratch); 852 if (r) 853 return r; 854 855 WREG32(scratch, 0xCAFEDEAD); 856 r = amdgpu_ring_alloc(ring, 3); 857 if (r) 858 goto error_free_scratch; 859 860 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 861 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 862 amdgpu_ring_write(ring, 0xDEADBEEF); 863 amdgpu_ring_commit(ring); 864 865 for (i = 0; i < adev->usec_timeout; i++) { 866 tmp = RREG32(scratch); 867 if (tmp == 0xDEADBEEF) 868 break; 869 udelay(1); 870 } 871 872 if (i >= adev->usec_timeout) 873 r = -ETIMEDOUT; 874 875 error_free_scratch: 876 amdgpu_gfx_scratch_free(adev, scratch); 877 return r; 878 } 879 880 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 881 { 882 struct amdgpu_device *adev = ring->adev; 883 struct amdgpu_ib ib; 884 struct dma_fence *f = NULL; 885 886 unsigned index; 887 uint64_t gpu_addr; 888 uint32_t tmp; 889 long r; 890 891 r = amdgpu_device_wb_get(adev, &index); 892 if (r) 893 return r; 894 895 gpu_addr = adev->wb.gpu_addr + (index * 4); 896 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 897 memset(&ib, 0, sizeof(ib)); 898 r = amdgpu_ib_get(adev, NULL, 16, &ib); 899 if (r) 900 goto err1; 901 902 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 903 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 904 ib.ptr[2] = lower_32_bits(gpu_addr); 905 ib.ptr[3] = upper_32_bits(gpu_addr); 906 ib.ptr[4] = 0xDEADBEEF; 907 ib.length_dw = 5; 908 909 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 910 if (r) 911 goto err2; 912 913 r = dma_fence_wait_timeout(f, false, timeout); 914 if (r == 0) { 915 r = -ETIMEDOUT; 916 goto err2; 917 } else if (r < 0) { 918 goto err2; 919 } 920 921 tmp = adev->wb.wb[index]; 922 if (tmp == 0xDEADBEEF) 923 r = 0; 924 else 925 r = -EINVAL; 926 927 err2: 928 amdgpu_ib_free(adev, &ib, NULL); 929 dma_fence_put(f); 930 err1: 931 amdgpu_device_wb_free(adev, index); 932 return r; 933 } 934 935 936 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 937 { 938 release_firmware(adev->gfx.pfp_fw); 939 adev->gfx.pfp_fw = NULL; 940 release_firmware(adev->gfx.me_fw); 941 adev->gfx.me_fw = NULL; 942 release_firmware(adev->gfx.ce_fw); 943 adev->gfx.ce_fw = NULL; 944 release_firmware(adev->gfx.rlc_fw); 945 adev->gfx.rlc_fw = NULL; 946 release_firmware(adev->gfx.mec_fw); 947 adev->gfx.mec_fw = NULL; 948 release_firmware(adev->gfx.mec2_fw); 949 adev->gfx.mec2_fw = NULL; 950 951 kfree(adev->gfx.rlc.register_list_format); 952 } 953 954 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 955 { 956 const struct rlc_firmware_header_v2_1 *rlc_hdr; 957 958 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 959 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 960 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 961 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 962 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 963 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 964 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 965 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 966 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 967 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 968 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 969 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 970 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 971 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 972 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 973 } 974 975 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 976 { 977 adev->gfx.me_fw_write_wait = false; 978 adev->gfx.mec_fw_write_wait = false; 979 980 if ((adev->gfx.mec_fw_version < 0x000001a5) || 981 (adev->gfx.mec_feature_version < 46) || 982 (adev->gfx.pfp_fw_version < 0x000000b7) || 983 (adev->gfx.pfp_feature_version < 46)) 984 DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ 985 GRBM requires 1-cycle delay in cp firmware\n"); 986 987 switch (adev->asic_type) { 988 case CHIP_VEGA10: 989 if ((adev->gfx.me_fw_version >= 0x0000009c) && 990 (adev->gfx.me_feature_version >= 42) && 991 (adev->gfx.pfp_fw_version >= 0x000000b1) && 992 (adev->gfx.pfp_feature_version >= 42)) 993 adev->gfx.me_fw_write_wait = true; 994 995 if ((adev->gfx.mec_fw_version >= 0x00000193) && 996 (adev->gfx.mec_feature_version >= 42)) 997 adev->gfx.mec_fw_write_wait = true; 998 break; 999 case CHIP_VEGA12: 1000 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1001 (adev->gfx.me_feature_version >= 44) && 1002 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1003 (adev->gfx.pfp_feature_version >= 44)) 1004 adev->gfx.me_fw_write_wait = true; 1005 1006 if ((adev->gfx.mec_fw_version >= 0x00000196) && 1007 (adev->gfx.mec_feature_version >= 44)) 1008 adev->gfx.mec_fw_write_wait = true; 1009 break; 1010 case CHIP_VEGA20: 1011 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1012 (adev->gfx.me_feature_version >= 44) && 1013 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1014 (adev->gfx.pfp_feature_version >= 44)) 1015 adev->gfx.me_fw_write_wait = true; 1016 1017 if ((adev->gfx.mec_fw_version >= 0x00000197) && 1018 (adev->gfx.mec_feature_version >= 44)) 1019 adev->gfx.mec_fw_write_wait = true; 1020 break; 1021 case CHIP_RAVEN: 1022 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1023 (adev->gfx.me_feature_version >= 42) && 1024 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1025 (adev->gfx.pfp_feature_version >= 42)) 1026 adev->gfx.me_fw_write_wait = true; 1027 1028 if ((adev->gfx.mec_fw_version >= 0x00000192) && 1029 (adev->gfx.mec_feature_version >= 42)) 1030 adev->gfx.mec_fw_write_wait = true; 1031 break; 1032 default: 1033 break; 1034 } 1035 } 1036 1037 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 1038 { 1039 switch (adev->asic_type) { 1040 case CHIP_VEGA10: 1041 case CHIP_VEGA12: 1042 case CHIP_VEGA20: 1043 break; 1044 case CHIP_RAVEN: 1045 /* Disable GFXOFF on original raven. There are combinations 1046 * of sbios and platforms that are not stable. 1047 */ 1048 if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)) 1049 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1050 else if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) 1051 &&((adev->gfx.rlc_fw_version != 106 && 1052 adev->gfx.rlc_fw_version < 531) || 1053 (adev->gfx.rlc_fw_version == 53815) || 1054 (adev->gfx.rlc_feature_version < 1) || 1055 !adev->gfx.rlc.is_rlc_v2_1)) 1056 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1057 1058 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1059 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1060 AMD_PG_SUPPORT_CP | 1061 AMD_PG_SUPPORT_RLC_SMU_HS; 1062 break; 1063 case CHIP_RENOIR: 1064 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1065 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1066 AMD_PG_SUPPORT_CP | 1067 AMD_PG_SUPPORT_RLC_SMU_HS; 1068 break; 1069 default: 1070 break; 1071 } 1072 } 1073 1074 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, 1075 const char *chip_name) 1076 { 1077 char fw_name[30]; 1078 int err; 1079 struct amdgpu_firmware_info *info = NULL; 1080 const struct common_firmware_header *header = NULL; 1081 const struct gfx_firmware_header_v1_0 *cp_hdr; 1082 1083 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1084 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1085 if (err) 1086 goto out; 1087 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 1088 if (err) 1089 goto out; 1090 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1091 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1092 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1093 1094 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1095 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1096 if (err) 1097 goto out; 1098 err = amdgpu_ucode_validate(adev->gfx.me_fw); 1099 if (err) 1100 goto out; 1101 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1102 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1103 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1104 1105 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1106 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1107 if (err) 1108 goto out; 1109 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 1110 if (err) 1111 goto out; 1112 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1113 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1114 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1115 1116 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1117 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1118 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1119 info->fw = adev->gfx.pfp_fw; 1120 header = (const struct common_firmware_header *)info->fw->data; 1121 adev->firmware.fw_size += 1122 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1123 1124 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1125 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1126 info->fw = adev->gfx.me_fw; 1127 header = (const struct common_firmware_header *)info->fw->data; 1128 adev->firmware.fw_size += 1129 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1130 1131 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1132 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1133 info->fw = adev->gfx.ce_fw; 1134 header = (const struct common_firmware_header *)info->fw->data; 1135 adev->firmware.fw_size += 1136 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1137 } 1138 1139 out: 1140 if (err) { 1141 dev_err(adev->dev, 1142 "gfx9: Failed to load firmware \"%s\"\n", 1143 fw_name); 1144 release_firmware(adev->gfx.pfp_fw); 1145 adev->gfx.pfp_fw = NULL; 1146 release_firmware(adev->gfx.me_fw); 1147 adev->gfx.me_fw = NULL; 1148 release_firmware(adev->gfx.ce_fw); 1149 adev->gfx.ce_fw = NULL; 1150 } 1151 return err; 1152 } 1153 1154 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, 1155 const char *chip_name) 1156 { 1157 char fw_name[30]; 1158 int err; 1159 struct amdgpu_firmware_info *info = NULL; 1160 const struct common_firmware_header *header = NULL; 1161 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1162 unsigned int *tmp = NULL; 1163 unsigned int i = 0; 1164 uint16_t version_major; 1165 uint16_t version_minor; 1166 uint32_t smu_version; 1167 1168 /* 1169 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 1170 * instead of picasso_rlc.bin. 1171 * Judgment method: 1172 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 1173 * or revision >= 0xD8 && revision <= 0xDF 1174 * otherwise is PCO FP5 1175 */ 1176 if (!strcmp(chip_name, "picasso") && 1177 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 1178 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 1179 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); 1180 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 1181 (smu_version >= 0x41e2b)) 1182 /** 1183 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 1184 */ 1185 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); 1186 else 1187 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1188 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1189 if (err) 1190 goto out; 1191 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1192 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1193 1194 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1195 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1196 if (version_major == 2 && version_minor == 1) 1197 adev->gfx.rlc.is_rlc_v2_1 = true; 1198 1199 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1200 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1201 adev->gfx.rlc.save_and_restore_offset = 1202 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1203 adev->gfx.rlc.clear_state_descriptor_offset = 1204 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1205 adev->gfx.rlc.avail_scratch_ram_locations = 1206 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1207 adev->gfx.rlc.reg_restore_list_size = 1208 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1209 adev->gfx.rlc.reg_list_format_start = 1210 le32_to_cpu(rlc_hdr->reg_list_format_start); 1211 adev->gfx.rlc.reg_list_format_separate_start = 1212 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1213 adev->gfx.rlc.starting_offsets_start = 1214 le32_to_cpu(rlc_hdr->starting_offsets_start); 1215 adev->gfx.rlc.reg_list_format_size_bytes = 1216 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1217 adev->gfx.rlc.reg_list_size_bytes = 1218 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1219 adev->gfx.rlc.register_list_format = 1220 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1221 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1222 if (!adev->gfx.rlc.register_list_format) { 1223 err = -ENOMEM; 1224 goto out; 1225 } 1226 1227 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1228 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1229 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 1230 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1231 1232 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1233 1234 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1235 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1236 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 1237 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1238 1239 if (adev->gfx.rlc.is_rlc_v2_1) 1240 gfx_v9_0_init_rlc_ext_microcode(adev); 1241 1242 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1243 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1244 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1245 info->fw = adev->gfx.rlc_fw; 1246 header = (const struct common_firmware_header *)info->fw->data; 1247 adev->firmware.fw_size += 1248 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1249 1250 if (adev->gfx.rlc.is_rlc_v2_1 && 1251 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 1252 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 1253 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 1254 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 1255 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 1256 info->fw = adev->gfx.rlc_fw; 1257 adev->firmware.fw_size += 1258 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 1259 1260 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 1261 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 1262 info->fw = adev->gfx.rlc_fw; 1263 adev->firmware.fw_size += 1264 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 1265 1266 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 1267 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 1268 info->fw = adev->gfx.rlc_fw; 1269 adev->firmware.fw_size += 1270 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 1271 } 1272 } 1273 1274 out: 1275 if (err) { 1276 dev_err(adev->dev, 1277 "gfx9: Failed to load firmware \"%s\"\n", 1278 fw_name); 1279 release_firmware(adev->gfx.rlc_fw); 1280 adev->gfx.rlc_fw = NULL; 1281 } 1282 return err; 1283 } 1284 1285 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, 1286 const char *chip_name) 1287 { 1288 char fw_name[30]; 1289 int err; 1290 struct amdgpu_firmware_info *info = NULL; 1291 const struct common_firmware_header *header = NULL; 1292 const struct gfx_firmware_header_v1_0 *cp_hdr; 1293 1294 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1295 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1296 if (err) 1297 goto out; 1298 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1299 if (err) 1300 goto out; 1301 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1302 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1303 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1304 1305 1306 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1307 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1308 if (!err) { 1309 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1310 if (err) 1311 goto out; 1312 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1313 adev->gfx.mec2_fw->data; 1314 adev->gfx.mec2_fw_version = 1315 le32_to_cpu(cp_hdr->header.ucode_version); 1316 adev->gfx.mec2_feature_version = 1317 le32_to_cpu(cp_hdr->ucode_feature_version); 1318 } else { 1319 err = 0; 1320 adev->gfx.mec2_fw = NULL; 1321 } 1322 1323 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1324 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1325 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1326 info->fw = adev->gfx.mec_fw; 1327 header = (const struct common_firmware_header *)info->fw->data; 1328 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1329 adev->firmware.fw_size += 1330 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1331 1332 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 1333 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 1334 info->fw = adev->gfx.mec_fw; 1335 adev->firmware.fw_size += 1336 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1337 1338 if (adev->gfx.mec2_fw) { 1339 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1340 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1341 info->fw = adev->gfx.mec2_fw; 1342 header = (const struct common_firmware_header *)info->fw->data; 1343 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1344 adev->firmware.fw_size += 1345 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1346 1347 /* TODO: Determine if MEC2 JT FW loading can be removed 1348 for all GFX V9 asic and above */ 1349 if (adev->asic_type != CHIP_ARCTURUS && 1350 adev->asic_type != CHIP_RENOIR) { 1351 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 1352 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 1353 info->fw = adev->gfx.mec2_fw; 1354 adev->firmware.fw_size += 1355 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, 1356 PAGE_SIZE); 1357 } 1358 } 1359 } 1360 1361 out: 1362 gfx_v9_0_check_if_need_gfxoff(adev); 1363 gfx_v9_0_check_fw_write_wait(adev); 1364 if (err) { 1365 dev_err(adev->dev, 1366 "gfx9: Failed to load firmware \"%s\"\n", 1367 fw_name); 1368 release_firmware(adev->gfx.mec_fw); 1369 adev->gfx.mec_fw = NULL; 1370 release_firmware(adev->gfx.mec2_fw); 1371 adev->gfx.mec2_fw = NULL; 1372 } 1373 return err; 1374 } 1375 1376 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 1377 { 1378 const char *chip_name; 1379 int r; 1380 1381 DRM_DEBUG("\n"); 1382 1383 switch (adev->asic_type) { 1384 case CHIP_VEGA10: 1385 chip_name = "vega10"; 1386 break; 1387 case CHIP_VEGA12: 1388 chip_name = "vega12"; 1389 break; 1390 case CHIP_VEGA20: 1391 chip_name = "vega20"; 1392 break; 1393 case CHIP_RAVEN: 1394 if (adev->rev_id >= 8) 1395 chip_name = "raven2"; 1396 else if (adev->pdev->device == 0x15d8) 1397 chip_name = "picasso"; 1398 else 1399 chip_name = "raven"; 1400 break; 1401 case CHIP_ARCTURUS: 1402 chip_name = "arcturus"; 1403 break; 1404 case CHIP_RENOIR: 1405 chip_name = "renoir"; 1406 break; 1407 default: 1408 BUG(); 1409 } 1410 1411 /* No CPG in Arcturus */ 1412 if (adev->asic_type != CHIP_ARCTURUS) { 1413 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); 1414 if (r) 1415 return r; 1416 } 1417 1418 r = gfx_v9_0_init_rlc_microcode(adev, chip_name); 1419 if (r) 1420 return r; 1421 1422 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name); 1423 if (r) 1424 return r; 1425 1426 return r; 1427 } 1428 1429 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 1430 { 1431 u32 count = 0; 1432 const struct cs_section_def *sect = NULL; 1433 const struct cs_extent_def *ext = NULL; 1434 1435 /* begin clear state */ 1436 count += 2; 1437 /* context control state */ 1438 count += 3; 1439 1440 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1441 for (ext = sect->section; ext->extent != NULL; ++ext) { 1442 if (sect->id == SECT_CONTEXT) 1443 count += 2 + ext->reg_count; 1444 else 1445 return 0; 1446 } 1447 } 1448 1449 /* end clear state */ 1450 count += 2; 1451 /* clear state */ 1452 count += 2; 1453 1454 return count; 1455 } 1456 1457 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 1458 volatile u32 *buffer) 1459 { 1460 u32 count = 0, i; 1461 const struct cs_section_def *sect = NULL; 1462 const struct cs_extent_def *ext = NULL; 1463 1464 if (adev->gfx.rlc.cs_data == NULL) 1465 return; 1466 if (buffer == NULL) 1467 return; 1468 1469 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1470 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1471 1472 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1473 buffer[count++] = cpu_to_le32(0x80000000); 1474 buffer[count++] = cpu_to_le32(0x80000000); 1475 1476 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1477 for (ext = sect->section; ext->extent != NULL; ++ext) { 1478 if (sect->id == SECT_CONTEXT) { 1479 buffer[count++] = 1480 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1481 buffer[count++] = cpu_to_le32(ext->reg_index - 1482 PACKET3_SET_CONTEXT_REG_START); 1483 for (i = 0; i < ext->reg_count; i++) 1484 buffer[count++] = cpu_to_le32(ext->extent[i]); 1485 } else { 1486 return; 1487 } 1488 } 1489 } 1490 1491 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1492 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1493 1494 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1495 buffer[count++] = cpu_to_le32(0); 1496 } 1497 1498 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 1499 { 1500 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 1501 uint32_t pg_always_on_cu_num = 2; 1502 uint32_t always_on_cu_num; 1503 uint32_t i, j, k; 1504 uint32_t mask, cu_bitmap, counter; 1505 1506 if (adev->flags & AMD_IS_APU) 1507 always_on_cu_num = 4; 1508 else if (adev->asic_type == CHIP_VEGA12) 1509 always_on_cu_num = 8; 1510 else 1511 always_on_cu_num = 12; 1512 1513 mutex_lock(&adev->grbm_idx_mutex); 1514 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1515 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1516 mask = 1; 1517 cu_bitmap = 0; 1518 counter = 0; 1519 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1520 1521 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 1522 if (cu_info->bitmap[i][j] & mask) { 1523 if (counter == pg_always_on_cu_num) 1524 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 1525 if (counter < always_on_cu_num) 1526 cu_bitmap |= mask; 1527 else 1528 break; 1529 counter++; 1530 } 1531 mask <<= 1; 1532 } 1533 1534 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1535 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1536 } 1537 } 1538 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1539 mutex_unlock(&adev->grbm_idx_mutex); 1540 } 1541 1542 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1543 { 1544 uint32_t data; 1545 1546 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1547 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1548 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1549 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1550 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1551 1552 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1553 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1554 1555 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1556 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1557 1558 mutex_lock(&adev->grbm_idx_mutex); 1559 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1560 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1561 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1562 1563 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1564 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1565 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1566 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1567 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1568 1569 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1570 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1571 data &= 0x0000FFFF; 1572 data |= 0x00C00000; 1573 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1574 1575 /* 1576 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1577 * programmed in gfx_v9_0_init_always_on_cu_mask() 1578 */ 1579 1580 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1581 * but used for RLC_LB_CNTL configuration */ 1582 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1583 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1584 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1585 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1586 mutex_unlock(&adev->grbm_idx_mutex); 1587 1588 gfx_v9_0_init_always_on_cu_mask(adev); 1589 } 1590 1591 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1592 { 1593 uint32_t data; 1594 1595 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1596 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1597 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1598 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1599 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1600 1601 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1602 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1603 1604 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1605 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1606 1607 mutex_lock(&adev->grbm_idx_mutex); 1608 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1609 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1610 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1611 1612 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1613 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1614 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1615 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1616 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1617 1618 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1619 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1620 data &= 0x0000FFFF; 1621 data |= 0x00C00000; 1622 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1623 1624 /* 1625 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1626 * programmed in gfx_v9_0_init_always_on_cu_mask() 1627 */ 1628 1629 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1630 * but used for RLC_LB_CNTL configuration */ 1631 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1632 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1633 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1634 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1635 mutex_unlock(&adev->grbm_idx_mutex); 1636 1637 gfx_v9_0_init_always_on_cu_mask(adev); 1638 } 1639 1640 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1641 { 1642 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1643 } 1644 1645 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1646 { 1647 return 5; 1648 } 1649 1650 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1651 { 1652 const struct cs_section_def *cs_data; 1653 int r; 1654 1655 adev->gfx.rlc.cs_data = gfx9_cs_data; 1656 1657 cs_data = adev->gfx.rlc.cs_data; 1658 1659 if (cs_data) { 1660 /* init clear state block */ 1661 r = amdgpu_gfx_rlc_init_csb(adev); 1662 if (r) 1663 return r; 1664 } 1665 1666 if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { 1667 /* TODO: double check the cp_table_size for RV */ 1668 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1669 r = amdgpu_gfx_rlc_init_cpt(adev); 1670 if (r) 1671 return r; 1672 } 1673 1674 switch (adev->asic_type) { 1675 case CHIP_RAVEN: 1676 gfx_v9_0_init_lbpw(adev); 1677 break; 1678 case CHIP_VEGA20: 1679 gfx_v9_4_init_lbpw(adev); 1680 break; 1681 default: 1682 break; 1683 } 1684 1685 return 0; 1686 } 1687 1688 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1689 { 1690 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1691 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1692 } 1693 1694 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1695 { 1696 int r; 1697 u32 *hpd; 1698 const __le32 *fw_data; 1699 unsigned fw_size; 1700 u32 *fw; 1701 size_t mec_hpd_size; 1702 1703 const struct gfx_firmware_header_v1_0 *mec_hdr; 1704 1705 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1706 1707 /* take ownership of the relevant compute queues */ 1708 amdgpu_gfx_compute_queue_acquire(adev); 1709 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1710 1711 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1712 AMDGPU_GEM_DOMAIN_VRAM, 1713 &adev->gfx.mec.hpd_eop_obj, 1714 &adev->gfx.mec.hpd_eop_gpu_addr, 1715 (void **)&hpd); 1716 if (r) { 1717 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1718 gfx_v9_0_mec_fini(adev); 1719 return r; 1720 } 1721 1722 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); 1723 1724 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1725 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1726 1727 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1728 1729 fw_data = (const __le32 *) 1730 (adev->gfx.mec_fw->data + 1731 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1732 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 1733 1734 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 1735 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1736 &adev->gfx.mec.mec_fw_obj, 1737 &adev->gfx.mec.mec_fw_gpu_addr, 1738 (void **)&fw); 1739 if (r) { 1740 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 1741 gfx_v9_0_mec_fini(adev); 1742 return r; 1743 } 1744 1745 memcpy(fw, fw_data, fw_size); 1746 1747 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1748 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1749 1750 return 0; 1751 } 1752 1753 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 1754 { 1755 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1756 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1757 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1758 (address << SQ_IND_INDEX__INDEX__SHIFT) | 1759 (SQ_IND_INDEX__FORCE_READ_MASK)); 1760 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1761 } 1762 1763 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 1764 uint32_t wave, uint32_t thread, 1765 uint32_t regno, uint32_t num, uint32_t *out) 1766 { 1767 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1768 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1769 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1770 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1771 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 1772 (SQ_IND_INDEX__FORCE_READ_MASK) | 1773 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1774 while (num--) 1775 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1776 } 1777 1778 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1779 { 1780 /* type 1 wave data */ 1781 dst[(*no_fields)++] = 1; 1782 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 1783 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 1784 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 1785 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 1786 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 1787 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 1788 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 1789 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 1790 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 1791 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 1792 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 1793 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 1794 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 1795 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 1796 } 1797 1798 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 1799 uint32_t wave, uint32_t start, 1800 uint32_t size, uint32_t *dst) 1801 { 1802 wave_read_regs( 1803 adev, simd, wave, 0, 1804 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 1805 } 1806 1807 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 1808 uint32_t wave, uint32_t thread, 1809 uint32_t start, uint32_t size, 1810 uint32_t *dst) 1811 { 1812 wave_read_regs( 1813 adev, simd, wave, thread, 1814 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1815 } 1816 1817 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 1818 u32 me, u32 pipe, u32 q, u32 vm) 1819 { 1820 soc15_grbm_select(adev, me, pipe, q, vm); 1821 } 1822 1823 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 1824 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 1825 .select_se_sh = &gfx_v9_0_select_se_sh, 1826 .read_wave_data = &gfx_v9_0_read_wave_data, 1827 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 1828 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 1829 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 1830 .ras_error_inject = &gfx_v9_0_ras_error_inject, 1831 .query_ras_error_count = &gfx_v9_0_query_ras_error_count 1832 }; 1833 1834 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 1835 { 1836 u32 gb_addr_config; 1837 int err; 1838 1839 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 1840 1841 switch (adev->asic_type) { 1842 case CHIP_VEGA10: 1843 adev->gfx.config.max_hw_contexts = 8; 1844 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1845 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1846 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1847 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1848 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 1849 break; 1850 case CHIP_VEGA12: 1851 adev->gfx.config.max_hw_contexts = 8; 1852 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1853 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1854 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1855 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1856 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 1857 DRM_INFO("fix gfx.config for vega12\n"); 1858 break; 1859 case CHIP_VEGA20: 1860 adev->gfx.config.max_hw_contexts = 8; 1861 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1862 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1863 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1864 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1865 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1866 gb_addr_config &= ~0xf3e777ff; 1867 gb_addr_config |= 0x22014042; 1868 /* check vbios table if gpu info is not available */ 1869 err = amdgpu_atomfirmware_get_gfx_info(adev); 1870 if (err) 1871 return err; 1872 break; 1873 case CHIP_RAVEN: 1874 adev->gfx.config.max_hw_contexts = 8; 1875 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1876 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1877 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1878 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1879 if (adev->rev_id >= 8) 1880 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 1881 else 1882 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 1883 break; 1884 case CHIP_ARCTURUS: 1885 adev->gfx.config.max_hw_contexts = 8; 1886 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1887 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1888 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1889 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1890 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1891 gb_addr_config &= ~0xf3e777ff; 1892 gb_addr_config |= 0x22014042; 1893 break; 1894 case CHIP_RENOIR: 1895 adev->gfx.config.max_hw_contexts = 8; 1896 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1897 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1898 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 1899 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1900 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1901 gb_addr_config &= ~0xf3e777ff; 1902 gb_addr_config |= 0x22010042; 1903 break; 1904 default: 1905 BUG(); 1906 break; 1907 } 1908 1909 adev->gfx.config.gb_addr_config = gb_addr_config; 1910 1911 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 1912 REG_GET_FIELD( 1913 adev->gfx.config.gb_addr_config, 1914 GB_ADDR_CONFIG, 1915 NUM_PIPES); 1916 1917 adev->gfx.config.max_tile_pipes = 1918 adev->gfx.config.gb_addr_config_fields.num_pipes; 1919 1920 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 1921 REG_GET_FIELD( 1922 adev->gfx.config.gb_addr_config, 1923 GB_ADDR_CONFIG, 1924 NUM_BANKS); 1925 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 1926 REG_GET_FIELD( 1927 adev->gfx.config.gb_addr_config, 1928 GB_ADDR_CONFIG, 1929 MAX_COMPRESSED_FRAGS); 1930 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 1931 REG_GET_FIELD( 1932 adev->gfx.config.gb_addr_config, 1933 GB_ADDR_CONFIG, 1934 NUM_RB_PER_SE); 1935 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 1936 REG_GET_FIELD( 1937 adev->gfx.config.gb_addr_config, 1938 GB_ADDR_CONFIG, 1939 NUM_SHADER_ENGINES); 1940 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 1941 REG_GET_FIELD( 1942 adev->gfx.config.gb_addr_config, 1943 GB_ADDR_CONFIG, 1944 PIPE_INTERLEAVE_SIZE)); 1945 1946 return 0; 1947 } 1948 1949 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1950 int mec, int pipe, int queue) 1951 { 1952 int r; 1953 unsigned irq_type; 1954 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1955 1956 ring = &adev->gfx.compute_ring[ring_id]; 1957 1958 /* mec0 is me1 */ 1959 ring->me = mec + 1; 1960 ring->pipe = pipe; 1961 ring->queue = queue; 1962 1963 ring->ring_obj = NULL; 1964 ring->use_doorbell = true; 1965 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1966 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1967 + (ring_id * GFX9_MEC_HPD_SIZE); 1968 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1969 1970 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1971 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1972 + ring->pipe; 1973 1974 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1975 r = amdgpu_ring_init(adev, ring, 1024, 1976 &adev->gfx.eop_irq, irq_type); 1977 if (r) 1978 return r; 1979 1980 1981 return 0; 1982 } 1983 1984 static int gfx_v9_0_sw_init(void *handle) 1985 { 1986 int i, j, k, r, ring_id; 1987 struct amdgpu_ring *ring; 1988 struct amdgpu_kiq *kiq; 1989 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1990 1991 switch (adev->asic_type) { 1992 case CHIP_VEGA10: 1993 case CHIP_VEGA12: 1994 case CHIP_VEGA20: 1995 case CHIP_RAVEN: 1996 case CHIP_ARCTURUS: 1997 case CHIP_RENOIR: 1998 adev->gfx.mec.num_mec = 2; 1999 break; 2000 default: 2001 adev->gfx.mec.num_mec = 1; 2002 break; 2003 } 2004 2005 adev->gfx.mec.num_pipe_per_mec = 4; 2006 adev->gfx.mec.num_queue_per_pipe = 8; 2007 2008 /* EOP Event */ 2009 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 2010 if (r) 2011 return r; 2012 2013 /* Privileged reg */ 2014 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 2015 &adev->gfx.priv_reg_irq); 2016 if (r) 2017 return r; 2018 2019 /* Privileged inst */ 2020 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 2021 &adev->gfx.priv_inst_irq); 2022 if (r) 2023 return r; 2024 2025 /* ECC error */ 2026 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 2027 &adev->gfx.cp_ecc_error_irq); 2028 if (r) 2029 return r; 2030 2031 /* FUE error */ 2032 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 2033 &adev->gfx.cp_ecc_error_irq); 2034 if (r) 2035 return r; 2036 2037 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2038 2039 gfx_v9_0_scratch_init(adev); 2040 2041 r = gfx_v9_0_init_microcode(adev); 2042 if (r) { 2043 DRM_ERROR("Failed to load gfx firmware!\n"); 2044 return r; 2045 } 2046 2047 r = adev->gfx.rlc.funcs->init(adev); 2048 if (r) { 2049 DRM_ERROR("Failed to init rlc BOs!\n"); 2050 return r; 2051 } 2052 2053 r = gfx_v9_0_mec_init(adev); 2054 if (r) { 2055 DRM_ERROR("Failed to init MEC BOs!\n"); 2056 return r; 2057 } 2058 2059 /* set up the gfx ring */ 2060 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2061 ring = &adev->gfx.gfx_ring[i]; 2062 ring->ring_obj = NULL; 2063 if (!i) 2064 sprintf(ring->name, "gfx"); 2065 else 2066 sprintf(ring->name, "gfx_%d", i); 2067 ring->use_doorbell = true; 2068 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2069 r = amdgpu_ring_init(adev, ring, 1024, 2070 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); 2071 if (r) 2072 return r; 2073 } 2074 2075 /* set up the compute queues - allocate horizontally across pipes */ 2076 ring_id = 0; 2077 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2078 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2079 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2080 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2081 continue; 2082 2083 r = gfx_v9_0_compute_ring_init(adev, 2084 ring_id, 2085 i, k, j); 2086 if (r) 2087 return r; 2088 2089 ring_id++; 2090 } 2091 } 2092 } 2093 2094 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); 2095 if (r) { 2096 DRM_ERROR("Failed to init KIQ BOs!\n"); 2097 return r; 2098 } 2099 2100 kiq = &adev->gfx.kiq; 2101 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2102 if (r) 2103 return r; 2104 2105 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2106 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); 2107 if (r) 2108 return r; 2109 2110 adev->gfx.ce_ram_size = 0x8000; 2111 2112 r = gfx_v9_0_gpu_early_init(adev); 2113 if (r) 2114 return r; 2115 2116 return 0; 2117 } 2118 2119 2120 static int gfx_v9_0_sw_fini(void *handle) 2121 { 2122 int i; 2123 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2124 2125 amdgpu_gfx_ras_fini(adev); 2126 2127 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2128 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2129 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2130 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2131 2132 amdgpu_gfx_mqd_sw_fini(adev); 2133 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); 2134 amdgpu_gfx_kiq_fini(adev); 2135 2136 gfx_v9_0_mec_fini(adev); 2137 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 2138 if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { 2139 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2140 &adev->gfx.rlc.cp_table_gpu_addr, 2141 (void **)&adev->gfx.rlc.cp_table_ptr); 2142 } 2143 gfx_v9_0_free_microcode(adev); 2144 2145 return 0; 2146 } 2147 2148 2149 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 2150 { 2151 /* TODO */ 2152 } 2153 2154 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) 2155 { 2156 u32 data; 2157 2158 if (instance == 0xffffffff) 2159 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 2160 else 2161 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 2162 2163 if (se_num == 0xffffffff) 2164 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 2165 else 2166 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2167 2168 if (sh_num == 0xffffffff) 2169 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 2170 else 2171 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 2172 2173 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 2174 } 2175 2176 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2177 { 2178 u32 data, mask; 2179 2180 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 2181 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 2182 2183 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 2184 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 2185 2186 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 2187 adev->gfx.config.max_sh_per_se); 2188 2189 return (~data) & mask; 2190 } 2191 2192 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 2193 { 2194 int i, j; 2195 u32 data; 2196 u32 active_rbs = 0; 2197 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 2198 adev->gfx.config.max_sh_per_se; 2199 2200 mutex_lock(&adev->grbm_idx_mutex); 2201 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2202 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2203 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2204 data = gfx_v9_0_get_rb_active_bitmap(adev); 2205 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 2206 rb_bitmap_width_per_sh); 2207 } 2208 } 2209 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2210 mutex_unlock(&adev->grbm_idx_mutex); 2211 2212 adev->gfx.config.backend_enable_mask = active_rbs; 2213 adev->gfx.config.num_rbs = hweight32(active_rbs); 2214 } 2215 2216 #define DEFAULT_SH_MEM_BASES (0x6000) 2217 #define FIRST_COMPUTE_VMID (8) 2218 #define LAST_COMPUTE_VMID (16) 2219 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 2220 { 2221 int i; 2222 uint32_t sh_mem_config; 2223 uint32_t sh_mem_bases; 2224 2225 /* 2226 * Configure apertures: 2227 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2228 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2229 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2230 */ 2231 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 2232 2233 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 2234 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 2235 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 2236 2237 mutex_lock(&adev->srbm_mutex); 2238 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 2239 soc15_grbm_select(adev, 0, 0, 0, i); 2240 /* CP and shaders */ 2241 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 2242 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 2243 } 2244 soc15_grbm_select(adev, 0, 0, 0, 0); 2245 mutex_unlock(&adev->srbm_mutex); 2246 2247 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 2248 acccess. These should be enabled by FW for target VMIDs. */ 2249 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 2250 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 2251 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 2252 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 2253 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 2254 } 2255 } 2256 2257 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) 2258 { 2259 int vmid; 2260 2261 /* 2262 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2263 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2264 * the driver can enable them for graphics. VMID0 should maintain 2265 * access so that HWS firmware can save/restore entries. 2266 */ 2267 for (vmid = 1; vmid < 16; vmid++) { 2268 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); 2269 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); 2270 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); 2271 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); 2272 } 2273 } 2274 2275 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 2276 { 2277 u32 tmp; 2278 int i; 2279 2280 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2281 2282 gfx_v9_0_tiling_mode_table_init(adev); 2283 2284 gfx_v9_0_setup_rb(adev); 2285 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 2286 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 2287 2288 /* XXX SH_MEM regs */ 2289 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2290 mutex_lock(&adev->srbm_mutex); 2291 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { 2292 soc15_grbm_select(adev, 0, 0, 0, i); 2293 /* CP and shaders */ 2294 if (i == 0) { 2295 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2296 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2297 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2298 !!amdgpu_noretry); 2299 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2300 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 2301 } else { 2302 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2303 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2304 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2305 !!amdgpu_noretry); 2306 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2307 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2308 (adev->gmc.private_aperture_start >> 48)); 2309 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2310 (adev->gmc.shared_aperture_start >> 48)); 2311 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 2312 } 2313 } 2314 soc15_grbm_select(adev, 0, 0, 0, 0); 2315 2316 mutex_unlock(&adev->srbm_mutex); 2317 2318 gfx_v9_0_init_compute_vmid(adev); 2319 gfx_v9_0_init_gds_vmid(adev); 2320 } 2321 2322 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 2323 { 2324 u32 i, j, k; 2325 u32 mask; 2326 2327 mutex_lock(&adev->grbm_idx_mutex); 2328 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2329 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2330 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2331 for (k = 0; k < adev->usec_timeout; k++) { 2332 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 2333 break; 2334 udelay(1); 2335 } 2336 if (k == adev->usec_timeout) { 2337 gfx_v9_0_select_se_sh(adev, 0xffffffff, 2338 0xffffffff, 0xffffffff); 2339 mutex_unlock(&adev->grbm_idx_mutex); 2340 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2341 i, j); 2342 return; 2343 } 2344 } 2345 } 2346 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2347 mutex_unlock(&adev->grbm_idx_mutex); 2348 2349 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2350 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2351 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2352 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2353 for (k = 0; k < adev->usec_timeout; k++) { 2354 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2355 break; 2356 udelay(1); 2357 } 2358 } 2359 2360 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2361 bool enable) 2362 { 2363 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2364 2365 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2366 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2367 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2368 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2369 2370 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2371 } 2372 2373 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2374 { 2375 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2376 /* csib */ 2377 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2378 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2379 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2380 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2381 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2382 adev->gfx.rlc.clear_state_size); 2383 } 2384 2385 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2386 int indirect_offset, 2387 int list_size, 2388 int *unique_indirect_regs, 2389 int unique_indirect_reg_count, 2390 int *indirect_start_offsets, 2391 int *indirect_start_offsets_count, 2392 int max_start_offsets_count) 2393 { 2394 int idx; 2395 2396 for (; indirect_offset < list_size; indirect_offset++) { 2397 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2398 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2399 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2400 2401 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2402 indirect_offset += 2; 2403 2404 /* look for the matching indice */ 2405 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2406 if (unique_indirect_regs[idx] == 2407 register_list_format[indirect_offset] || 2408 !unique_indirect_regs[idx]) 2409 break; 2410 } 2411 2412 BUG_ON(idx >= unique_indirect_reg_count); 2413 2414 if (!unique_indirect_regs[idx]) 2415 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2416 2417 indirect_offset++; 2418 } 2419 } 2420 } 2421 2422 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2423 { 2424 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2425 int unique_indirect_reg_count = 0; 2426 2427 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2428 int indirect_start_offsets_count = 0; 2429 2430 int list_size = 0; 2431 int i = 0, j = 0; 2432 u32 tmp = 0; 2433 2434 u32 *register_list_format = 2435 kmemdup(adev->gfx.rlc.register_list_format, 2436 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2437 if (!register_list_format) 2438 return -ENOMEM; 2439 2440 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2441 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2442 gfx_v9_1_parse_ind_reg_list(register_list_format, 2443 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2444 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2445 unique_indirect_regs, 2446 unique_indirect_reg_count, 2447 indirect_start_offsets, 2448 &indirect_start_offsets_count, 2449 ARRAY_SIZE(indirect_start_offsets)); 2450 2451 /* enable auto inc in case it is disabled */ 2452 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2453 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2454 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2455 2456 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2457 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2458 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2459 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2460 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2461 adev->gfx.rlc.register_restore[i]); 2462 2463 /* load indirect register */ 2464 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2465 adev->gfx.rlc.reg_list_format_start); 2466 2467 /* direct register portion */ 2468 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2469 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2470 register_list_format[i]); 2471 2472 /* indirect register portion */ 2473 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2474 if (register_list_format[i] == 0xFFFFFFFF) { 2475 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2476 continue; 2477 } 2478 2479 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2480 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2481 2482 for (j = 0; j < unique_indirect_reg_count; j++) { 2483 if (register_list_format[i] == unique_indirect_regs[j]) { 2484 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2485 break; 2486 } 2487 } 2488 2489 BUG_ON(j >= unique_indirect_reg_count); 2490 2491 i++; 2492 } 2493 2494 /* set save/restore list size */ 2495 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2496 list_size = list_size >> 1; 2497 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2498 adev->gfx.rlc.reg_restore_list_size); 2499 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2500 2501 /* write the starting offsets to RLC scratch ram */ 2502 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2503 adev->gfx.rlc.starting_offsets_start); 2504 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2505 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2506 indirect_start_offsets[i]); 2507 2508 /* load unique indirect regs*/ 2509 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2510 if (unique_indirect_regs[i] != 0) { 2511 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2512 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2513 unique_indirect_regs[i] & 0x3FFFF); 2514 2515 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2516 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2517 unique_indirect_regs[i] >> 20); 2518 } 2519 } 2520 2521 kfree(register_list_format); 2522 return 0; 2523 } 2524 2525 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2526 { 2527 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2528 } 2529 2530 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2531 bool enable) 2532 { 2533 uint32_t data = 0; 2534 uint32_t default_data = 0; 2535 2536 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2537 if (enable == true) { 2538 /* enable GFXIP control over CGPG */ 2539 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2540 if(default_data != data) 2541 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2542 2543 /* update status */ 2544 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2545 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2546 if(default_data != data) 2547 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2548 } else { 2549 /* restore GFXIP control over GCPG */ 2550 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2551 if(default_data != data) 2552 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2553 } 2554 } 2555 2556 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2557 { 2558 uint32_t data = 0; 2559 2560 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2561 AMD_PG_SUPPORT_GFX_SMG | 2562 AMD_PG_SUPPORT_GFX_DMG)) { 2563 /* init IDLE_POLL_COUNT = 60 */ 2564 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2565 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2566 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2567 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2568 2569 /* init RLC PG Delay */ 2570 data = 0; 2571 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2572 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2573 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2574 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2575 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2576 2577 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2578 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2579 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2580 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2581 2582 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2583 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2584 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2585 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2586 2587 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2588 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2589 2590 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2591 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2592 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2593 2594 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2595 } 2596 } 2597 2598 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2599 bool enable) 2600 { 2601 uint32_t data = 0; 2602 uint32_t default_data = 0; 2603 2604 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2605 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2606 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2607 enable ? 1 : 0); 2608 if (default_data != data) 2609 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2610 } 2611 2612 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2613 bool enable) 2614 { 2615 uint32_t data = 0; 2616 uint32_t default_data = 0; 2617 2618 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2619 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2620 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2621 enable ? 1 : 0); 2622 if(default_data != data) 2623 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2624 } 2625 2626 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2627 bool enable) 2628 { 2629 uint32_t data = 0; 2630 uint32_t default_data = 0; 2631 2632 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2633 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2634 CP_PG_DISABLE, 2635 enable ? 0 : 1); 2636 if(default_data != data) 2637 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2638 } 2639 2640 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2641 bool enable) 2642 { 2643 uint32_t data, default_data; 2644 2645 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2646 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2647 GFX_POWER_GATING_ENABLE, 2648 enable ? 1 : 0); 2649 if(default_data != data) 2650 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2651 } 2652 2653 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 2654 bool enable) 2655 { 2656 uint32_t data, default_data; 2657 2658 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2659 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2660 GFX_PIPELINE_PG_ENABLE, 2661 enable ? 1 : 0); 2662 if(default_data != data) 2663 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2664 2665 if (!enable) 2666 /* read any GFX register to wake up GFX */ 2667 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 2668 } 2669 2670 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 2671 bool enable) 2672 { 2673 uint32_t data, default_data; 2674 2675 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2676 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2677 STATIC_PER_CU_PG_ENABLE, 2678 enable ? 1 : 0); 2679 if(default_data != data) 2680 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2681 } 2682 2683 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 2684 bool enable) 2685 { 2686 uint32_t data, default_data; 2687 2688 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2689 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2690 DYN_PER_CU_PG_ENABLE, 2691 enable ? 1 : 0); 2692 if(default_data != data) 2693 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2694 } 2695 2696 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 2697 { 2698 gfx_v9_0_init_csb(adev); 2699 2700 /* 2701 * Rlc save restore list is workable since v2_1. 2702 * And it's needed by gfxoff feature. 2703 */ 2704 if (adev->gfx.rlc.is_rlc_v2_1) { 2705 if (adev->asic_type == CHIP_VEGA12 || 2706 (adev->asic_type == CHIP_RAVEN && 2707 adev->rev_id >= 8)) 2708 gfx_v9_1_init_rlc_save_restore_list(adev); 2709 gfx_v9_0_enable_save_restore_machine(adev); 2710 } 2711 2712 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2713 AMD_PG_SUPPORT_GFX_SMG | 2714 AMD_PG_SUPPORT_GFX_DMG | 2715 AMD_PG_SUPPORT_CP | 2716 AMD_PG_SUPPORT_GDS | 2717 AMD_PG_SUPPORT_RLC_SMU_HS)) { 2718 WREG32(mmRLC_JUMP_TABLE_RESTORE, 2719 adev->gfx.rlc.cp_table_gpu_addr >> 8); 2720 gfx_v9_0_init_gfx_power_gating(adev); 2721 } 2722 } 2723 2724 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 2725 { 2726 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 2727 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 2728 gfx_v9_0_wait_for_rlc_serdes(adev); 2729 } 2730 2731 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 2732 { 2733 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2734 udelay(50); 2735 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2736 udelay(50); 2737 } 2738 2739 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 2740 { 2741 #ifdef AMDGPU_RLC_DEBUG_RETRY 2742 u32 rlc_ucode_ver; 2743 #endif 2744 2745 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2746 udelay(50); 2747 2748 /* carrizo do enable cp interrupt after cp inited */ 2749 if (!(adev->flags & AMD_IS_APU)) { 2750 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 2751 udelay(50); 2752 } 2753 2754 #ifdef AMDGPU_RLC_DEBUG_RETRY 2755 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 2756 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 2757 if(rlc_ucode_ver == 0x108) { 2758 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 2759 rlc_ucode_ver, adev->gfx.rlc_fw_version); 2760 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 2761 * default is 0x9C4 to create a 100us interval */ 2762 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 2763 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 2764 * to disable the page fault retry interrupts, default is 2765 * 0x100 (256) */ 2766 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 2767 } 2768 #endif 2769 } 2770 2771 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 2772 { 2773 const struct rlc_firmware_header_v2_0 *hdr; 2774 const __le32 *fw_data; 2775 unsigned i, fw_size; 2776 2777 if (!adev->gfx.rlc_fw) 2778 return -EINVAL; 2779 2780 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2781 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2782 2783 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2784 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2785 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2786 2787 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 2788 RLCG_UCODE_LOADING_START_ADDRESS); 2789 for (i = 0; i < fw_size; i++) 2790 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 2791 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2792 2793 return 0; 2794 } 2795 2796 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 2797 { 2798 int r; 2799 2800 if (amdgpu_sriov_vf(adev)) { 2801 gfx_v9_0_init_csb(adev); 2802 return 0; 2803 } 2804 2805 adev->gfx.rlc.funcs->stop(adev); 2806 2807 /* disable CG */ 2808 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 2809 2810 gfx_v9_0_init_pg(adev); 2811 2812 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 2813 /* legacy rlc firmware loading */ 2814 r = gfx_v9_0_rlc_load_microcode(adev); 2815 if (r) 2816 return r; 2817 } 2818 2819 switch (adev->asic_type) { 2820 case CHIP_RAVEN: 2821 if (amdgpu_lbpw == 0) 2822 gfx_v9_0_enable_lbpw(adev, false); 2823 else 2824 gfx_v9_0_enable_lbpw(adev, true); 2825 break; 2826 case CHIP_VEGA20: 2827 if (amdgpu_lbpw > 0) 2828 gfx_v9_0_enable_lbpw(adev, true); 2829 else 2830 gfx_v9_0_enable_lbpw(adev, false); 2831 break; 2832 default: 2833 break; 2834 } 2835 2836 adev->gfx.rlc.funcs->start(adev); 2837 2838 return 0; 2839 } 2840 2841 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 2842 { 2843 int i; 2844 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 2845 2846 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 2847 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 2848 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 2849 if (!enable) { 2850 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2851 adev->gfx.gfx_ring[i].sched.ready = false; 2852 } 2853 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 2854 udelay(50); 2855 } 2856 2857 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 2858 { 2859 const struct gfx_firmware_header_v1_0 *pfp_hdr; 2860 const struct gfx_firmware_header_v1_0 *ce_hdr; 2861 const struct gfx_firmware_header_v1_0 *me_hdr; 2862 const __le32 *fw_data; 2863 unsigned i, fw_size; 2864 2865 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 2866 return -EINVAL; 2867 2868 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 2869 adev->gfx.pfp_fw->data; 2870 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 2871 adev->gfx.ce_fw->data; 2872 me_hdr = (const struct gfx_firmware_header_v1_0 *) 2873 adev->gfx.me_fw->data; 2874 2875 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 2876 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 2877 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 2878 2879 gfx_v9_0_cp_gfx_enable(adev, false); 2880 2881 /* PFP */ 2882 fw_data = (const __le32 *) 2883 (adev->gfx.pfp_fw->data + 2884 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 2885 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 2886 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 2887 for (i = 0; i < fw_size; i++) 2888 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 2889 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 2890 2891 /* CE */ 2892 fw_data = (const __le32 *) 2893 (adev->gfx.ce_fw->data + 2894 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 2895 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 2896 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 2897 for (i = 0; i < fw_size; i++) 2898 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 2899 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 2900 2901 /* ME */ 2902 fw_data = (const __le32 *) 2903 (adev->gfx.me_fw->data + 2904 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 2905 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 2906 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 2907 for (i = 0; i < fw_size; i++) 2908 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 2909 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 2910 2911 return 0; 2912 } 2913 2914 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 2915 { 2916 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 2917 const struct cs_section_def *sect = NULL; 2918 const struct cs_extent_def *ext = NULL; 2919 int r, i, tmp; 2920 2921 /* init the CP */ 2922 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 2923 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 2924 2925 gfx_v9_0_cp_gfx_enable(adev, true); 2926 2927 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 2928 if (r) { 2929 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 2930 return r; 2931 } 2932 2933 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2934 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 2935 2936 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 2937 amdgpu_ring_write(ring, 0x80000000); 2938 amdgpu_ring_write(ring, 0x80000000); 2939 2940 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 2941 for (ext = sect->section; ext->extent != NULL; ++ext) { 2942 if (sect->id == SECT_CONTEXT) { 2943 amdgpu_ring_write(ring, 2944 PACKET3(PACKET3_SET_CONTEXT_REG, 2945 ext->reg_count)); 2946 amdgpu_ring_write(ring, 2947 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 2948 for (i = 0; i < ext->reg_count; i++) 2949 amdgpu_ring_write(ring, ext->extent[i]); 2950 } 2951 } 2952 } 2953 2954 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2955 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 2956 2957 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 2958 amdgpu_ring_write(ring, 0); 2959 2960 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 2961 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 2962 amdgpu_ring_write(ring, 0x8000); 2963 amdgpu_ring_write(ring, 0x8000); 2964 2965 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 2966 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 2967 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 2968 amdgpu_ring_write(ring, tmp); 2969 amdgpu_ring_write(ring, 0); 2970 2971 amdgpu_ring_commit(ring); 2972 2973 return 0; 2974 } 2975 2976 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 2977 { 2978 struct amdgpu_ring *ring; 2979 u32 tmp; 2980 u32 rb_bufsz; 2981 u64 rb_addr, rptr_addr, wptr_gpu_addr; 2982 2983 /* Set the write pointer delay */ 2984 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 2985 2986 /* set the RB to use vmid 0 */ 2987 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 2988 2989 /* Set ring buffer size */ 2990 ring = &adev->gfx.gfx_ring[0]; 2991 rb_bufsz = order_base_2(ring->ring_size / 8); 2992 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 2993 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 2994 #ifdef __BIG_ENDIAN 2995 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 2996 #endif 2997 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 2998 2999 /* Initialize the ring buffer's write pointers */ 3000 ring->wptr = 0; 3001 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3002 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3003 3004 /* set the wb address wether it's enabled or not */ 3005 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3006 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3007 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3008 3009 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3010 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 3011 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 3012 3013 mdelay(1); 3014 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3015 3016 rb_addr = ring->gpu_addr >> 8; 3017 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 3018 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3019 3020 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 3021 if (ring->use_doorbell) { 3022 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3023 DOORBELL_OFFSET, ring->doorbell_index); 3024 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3025 DOORBELL_EN, 1); 3026 } else { 3027 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 3028 } 3029 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 3030 3031 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3032 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3033 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 3034 3035 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 3036 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3037 3038 3039 /* start the ring */ 3040 gfx_v9_0_cp_gfx_start(adev); 3041 ring->sched.ready = true; 3042 3043 return 0; 3044 } 3045 3046 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3047 { 3048 int i; 3049 3050 if (enable) { 3051 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 3052 } else { 3053 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 3054 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 3055 for (i = 0; i < adev->gfx.num_compute_rings; i++) 3056 adev->gfx.compute_ring[i].sched.ready = false; 3057 adev->gfx.kiq.ring.sched.ready = false; 3058 } 3059 udelay(50); 3060 } 3061 3062 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3063 { 3064 const struct gfx_firmware_header_v1_0 *mec_hdr; 3065 const __le32 *fw_data; 3066 unsigned i; 3067 u32 tmp; 3068 3069 if (!adev->gfx.mec_fw) 3070 return -EINVAL; 3071 3072 gfx_v9_0_cp_compute_enable(adev, false); 3073 3074 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3075 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3076 3077 fw_data = (const __le32 *) 3078 (adev->gfx.mec_fw->data + 3079 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3080 tmp = 0; 3081 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3082 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3083 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 3084 3085 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 3086 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 3087 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 3088 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3089 3090 /* MEC1 */ 3091 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3092 mec_hdr->jt_offset); 3093 for (i = 0; i < mec_hdr->jt_size; i++) 3094 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 3095 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3096 3097 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3098 adev->gfx.mec_fw_version); 3099 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 3100 3101 return 0; 3102 } 3103 3104 /* KIQ functions */ 3105 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 3106 { 3107 uint32_t tmp; 3108 struct amdgpu_device *adev = ring->adev; 3109 3110 /* tell RLC which is KIQ queue */ 3111 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 3112 tmp &= 0xffffff00; 3113 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3114 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3115 tmp |= 0x80; 3116 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3117 } 3118 3119 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) 3120 { 3121 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3122 uint64_t queue_mask = 0; 3123 int r, i; 3124 3125 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 3126 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 3127 continue; 3128 3129 /* This situation may be hit in the future if a new HW 3130 * generation exposes more than 64 queues. If so, the 3131 * definition of queue_mask needs updating */ 3132 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 3133 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 3134 break; 3135 } 3136 3137 queue_mask |= (1ull << i); 3138 } 3139 3140 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); 3141 if (r) { 3142 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3143 return r; 3144 } 3145 3146 /* set resources */ 3147 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 3148 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 3149 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 3150 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 3151 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 3152 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 3153 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 3154 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 3155 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 3156 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3157 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3158 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 3159 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3160 3161 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 3162 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 3163 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 3164 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 3165 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 3166 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 3167 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 3168 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 3169 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 3170 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 3171 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ 3172 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 3173 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 3174 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 3175 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 3176 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 3177 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 3178 } 3179 3180 r = amdgpu_ring_test_helper(kiq_ring); 3181 if (r) 3182 DRM_ERROR("KCQ enable failed\n"); 3183 3184 return r; 3185 } 3186 3187 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 3188 { 3189 struct amdgpu_device *adev = ring->adev; 3190 struct v9_mqd *mqd = ring->mqd_ptr; 3191 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3192 uint32_t tmp; 3193 3194 mqd->header = 0xC0310800; 3195 mqd->compute_pipelinestat_enable = 0x00000001; 3196 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3197 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3198 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3199 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3200 mqd->compute_static_thread_mgmt_se4 = 0xffffffff; 3201 mqd->compute_static_thread_mgmt_se5 = 0xffffffff; 3202 mqd->compute_static_thread_mgmt_se6 = 0xffffffff; 3203 mqd->compute_static_thread_mgmt_se7 = 0xffffffff; 3204 mqd->compute_misc_reserved = 0x00000003; 3205 3206 mqd->dynamic_cu_mask_addr_lo = 3207 lower_32_bits(ring->mqd_gpu_addr 3208 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3209 mqd->dynamic_cu_mask_addr_hi = 3210 upper_32_bits(ring->mqd_gpu_addr 3211 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3212 3213 eop_base_addr = ring->eop_gpu_addr >> 8; 3214 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3215 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3216 3217 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3218 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3219 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3220 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 3221 3222 mqd->cp_hqd_eop_control = tmp; 3223 3224 /* enable doorbell? */ 3225 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3226 3227 if (ring->use_doorbell) { 3228 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3229 DOORBELL_OFFSET, ring->doorbell_index); 3230 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3231 DOORBELL_EN, 1); 3232 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3233 DOORBELL_SOURCE, 0); 3234 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3235 DOORBELL_HIT, 0); 3236 } else { 3237 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3238 DOORBELL_EN, 0); 3239 } 3240 3241 mqd->cp_hqd_pq_doorbell_control = tmp; 3242 3243 /* disable the queue if it's active */ 3244 ring->wptr = 0; 3245 mqd->cp_hqd_dequeue_request = 0; 3246 mqd->cp_hqd_pq_rptr = 0; 3247 mqd->cp_hqd_pq_wptr_lo = 0; 3248 mqd->cp_hqd_pq_wptr_hi = 0; 3249 3250 /* set the pointer to the MQD */ 3251 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3252 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3253 3254 /* set MQD vmid to 0 */ 3255 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3256 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3257 mqd->cp_mqd_control = tmp; 3258 3259 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3260 hqd_gpu_addr = ring->gpu_addr >> 8; 3261 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3262 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3263 3264 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3265 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3266 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3267 (order_base_2(ring->ring_size / 4) - 1)); 3268 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3269 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 3270 #ifdef __BIG_ENDIAN 3271 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3272 #endif 3273 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3274 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3275 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3276 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3277 mqd->cp_hqd_pq_control = tmp; 3278 3279 /* set the wb address whether it's enabled or not */ 3280 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3281 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3282 mqd->cp_hqd_pq_rptr_report_addr_hi = 3283 upper_32_bits(wb_gpu_addr) & 0xffff; 3284 3285 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3286 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3287 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3288 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3289 3290 tmp = 0; 3291 /* enable the doorbell if requested */ 3292 if (ring->use_doorbell) { 3293 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3294 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3295 DOORBELL_OFFSET, ring->doorbell_index); 3296 3297 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3298 DOORBELL_EN, 1); 3299 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3300 DOORBELL_SOURCE, 0); 3301 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3302 DOORBELL_HIT, 0); 3303 } 3304 3305 mqd->cp_hqd_pq_doorbell_control = tmp; 3306 3307 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3308 ring->wptr = 0; 3309 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3310 3311 /* set the vmid for the queue */ 3312 mqd->cp_hqd_vmid = 0; 3313 3314 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3315 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3316 mqd->cp_hqd_persistent_state = tmp; 3317 3318 /* set MIN_IB_AVAIL_SIZE */ 3319 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3320 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3321 mqd->cp_hqd_ib_control = tmp; 3322 3323 /* activate the queue */ 3324 mqd->cp_hqd_active = 1; 3325 3326 return 0; 3327 } 3328 3329 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 3330 { 3331 struct amdgpu_device *adev = ring->adev; 3332 struct v9_mqd *mqd = ring->mqd_ptr; 3333 int j; 3334 3335 /* disable wptr polling */ 3336 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3337 3338 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3339 mqd->cp_hqd_eop_base_addr_lo); 3340 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3341 mqd->cp_hqd_eop_base_addr_hi); 3342 3343 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3344 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 3345 mqd->cp_hqd_eop_control); 3346 3347 /* enable doorbell? */ 3348 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3349 mqd->cp_hqd_pq_doorbell_control); 3350 3351 /* disable the queue if it's active */ 3352 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3353 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3354 for (j = 0; j < adev->usec_timeout; j++) { 3355 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3356 break; 3357 udelay(1); 3358 } 3359 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3360 mqd->cp_hqd_dequeue_request); 3361 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3362 mqd->cp_hqd_pq_rptr); 3363 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3364 mqd->cp_hqd_pq_wptr_lo); 3365 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3366 mqd->cp_hqd_pq_wptr_hi); 3367 } 3368 3369 /* set the pointer to the MQD */ 3370 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3371 mqd->cp_mqd_base_addr_lo); 3372 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3373 mqd->cp_mqd_base_addr_hi); 3374 3375 /* set MQD vmid to 0 */ 3376 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3377 mqd->cp_mqd_control); 3378 3379 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3380 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3381 mqd->cp_hqd_pq_base_lo); 3382 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3383 mqd->cp_hqd_pq_base_hi); 3384 3385 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3386 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3387 mqd->cp_hqd_pq_control); 3388 3389 /* set the wb address whether it's enabled or not */ 3390 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3391 mqd->cp_hqd_pq_rptr_report_addr_lo); 3392 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3393 mqd->cp_hqd_pq_rptr_report_addr_hi); 3394 3395 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3396 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3397 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3398 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3399 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3400 3401 /* enable the doorbell if requested */ 3402 if (ring->use_doorbell) { 3403 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3404 (adev->doorbell_index.kiq * 2) << 2); 3405 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3406 (adev->doorbell_index.userqueue_end * 2) << 2); 3407 } 3408 3409 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3410 mqd->cp_hqd_pq_doorbell_control); 3411 3412 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3413 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3414 mqd->cp_hqd_pq_wptr_lo); 3415 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3416 mqd->cp_hqd_pq_wptr_hi); 3417 3418 /* set the vmid for the queue */ 3419 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3420 3421 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3422 mqd->cp_hqd_persistent_state); 3423 3424 /* activate the queue */ 3425 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3426 mqd->cp_hqd_active); 3427 3428 if (ring->use_doorbell) 3429 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3430 3431 return 0; 3432 } 3433 3434 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3435 { 3436 struct amdgpu_device *adev = ring->adev; 3437 int j; 3438 3439 /* disable the queue if it's active */ 3440 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3441 3442 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3443 3444 for (j = 0; j < adev->usec_timeout; j++) { 3445 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3446 break; 3447 udelay(1); 3448 } 3449 3450 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3451 DRM_DEBUG("KIQ dequeue request failed.\n"); 3452 3453 /* Manual disable if dequeue request times out */ 3454 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3455 } 3456 3457 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3458 0); 3459 } 3460 3461 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3462 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3463 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3464 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3465 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3466 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3467 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3468 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3469 3470 return 0; 3471 } 3472 3473 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3474 { 3475 struct amdgpu_device *adev = ring->adev; 3476 struct v9_mqd *mqd = ring->mqd_ptr; 3477 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3478 3479 gfx_v9_0_kiq_setting(ring); 3480 3481 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3482 /* reset MQD to a clean status */ 3483 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3484 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3485 3486 /* reset ring buffer */ 3487 ring->wptr = 0; 3488 amdgpu_ring_clear_ring(ring); 3489 3490 mutex_lock(&adev->srbm_mutex); 3491 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3492 gfx_v9_0_kiq_init_register(ring); 3493 soc15_grbm_select(adev, 0, 0, 0, 0); 3494 mutex_unlock(&adev->srbm_mutex); 3495 } else { 3496 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3497 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3498 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3499 mutex_lock(&adev->srbm_mutex); 3500 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3501 gfx_v9_0_mqd_init(ring); 3502 gfx_v9_0_kiq_init_register(ring); 3503 soc15_grbm_select(adev, 0, 0, 0, 0); 3504 mutex_unlock(&adev->srbm_mutex); 3505 3506 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3507 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3508 } 3509 3510 return 0; 3511 } 3512 3513 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3514 { 3515 struct amdgpu_device *adev = ring->adev; 3516 struct v9_mqd *mqd = ring->mqd_ptr; 3517 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3518 3519 if (!adev->in_gpu_reset && !adev->in_suspend) { 3520 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3521 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3522 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3523 mutex_lock(&adev->srbm_mutex); 3524 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3525 gfx_v9_0_mqd_init(ring); 3526 soc15_grbm_select(adev, 0, 0, 0, 0); 3527 mutex_unlock(&adev->srbm_mutex); 3528 3529 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3530 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3531 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3532 /* reset MQD to a clean status */ 3533 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3534 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3535 3536 /* reset ring buffer */ 3537 ring->wptr = 0; 3538 amdgpu_ring_clear_ring(ring); 3539 } else { 3540 amdgpu_ring_clear_ring(ring); 3541 } 3542 3543 return 0; 3544 } 3545 3546 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3547 { 3548 struct amdgpu_ring *ring; 3549 int r; 3550 3551 ring = &adev->gfx.kiq.ring; 3552 3553 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3554 if (unlikely(r != 0)) 3555 return r; 3556 3557 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3558 if (unlikely(r != 0)) 3559 return r; 3560 3561 gfx_v9_0_kiq_init_queue(ring); 3562 amdgpu_bo_kunmap(ring->mqd_obj); 3563 ring->mqd_ptr = NULL; 3564 amdgpu_bo_unreserve(ring->mqd_obj); 3565 ring->sched.ready = true; 3566 return 0; 3567 } 3568 3569 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3570 { 3571 struct amdgpu_ring *ring = NULL; 3572 int r = 0, i; 3573 3574 gfx_v9_0_cp_compute_enable(adev, true); 3575 3576 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3577 ring = &adev->gfx.compute_ring[i]; 3578 3579 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3580 if (unlikely(r != 0)) 3581 goto done; 3582 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3583 if (!r) { 3584 r = gfx_v9_0_kcq_init_queue(ring); 3585 amdgpu_bo_kunmap(ring->mqd_obj); 3586 ring->mqd_ptr = NULL; 3587 } 3588 amdgpu_bo_unreserve(ring->mqd_obj); 3589 if (r) 3590 goto done; 3591 } 3592 3593 r = gfx_v9_0_kiq_kcq_enable(adev); 3594 done: 3595 return r; 3596 } 3597 3598 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3599 { 3600 int r, i; 3601 struct amdgpu_ring *ring; 3602 3603 if (!(adev->flags & AMD_IS_APU)) 3604 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3605 3606 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3607 if (adev->asic_type != CHIP_ARCTURUS) { 3608 /* legacy firmware loading */ 3609 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3610 if (r) 3611 return r; 3612 } 3613 3614 r = gfx_v9_0_cp_compute_load_microcode(adev); 3615 if (r) 3616 return r; 3617 } 3618 3619 r = gfx_v9_0_kiq_resume(adev); 3620 if (r) 3621 return r; 3622 3623 if (adev->asic_type != CHIP_ARCTURUS) { 3624 r = gfx_v9_0_cp_gfx_resume(adev); 3625 if (r) 3626 return r; 3627 } 3628 3629 r = gfx_v9_0_kcq_resume(adev); 3630 if (r) 3631 return r; 3632 3633 if (adev->asic_type != CHIP_ARCTURUS) { 3634 ring = &adev->gfx.gfx_ring[0]; 3635 r = amdgpu_ring_test_helper(ring); 3636 if (r) 3637 return r; 3638 } 3639 3640 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3641 ring = &adev->gfx.compute_ring[i]; 3642 amdgpu_ring_test_helper(ring); 3643 } 3644 3645 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3646 3647 return 0; 3648 } 3649 3650 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3651 { 3652 if (adev->asic_type != CHIP_ARCTURUS) 3653 gfx_v9_0_cp_gfx_enable(adev, enable); 3654 gfx_v9_0_cp_compute_enable(adev, enable); 3655 } 3656 3657 static int gfx_v9_0_hw_init(void *handle) 3658 { 3659 int r; 3660 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3661 3662 if (!amdgpu_sriov_vf(adev)) 3663 gfx_v9_0_init_golden_registers(adev); 3664 3665 gfx_v9_0_constants_init(adev); 3666 3667 r = adev->gfx.rlc.funcs->resume(adev); 3668 if (r) 3669 return r; 3670 3671 r = gfx_v9_0_cp_resume(adev); 3672 if (r) 3673 return r; 3674 3675 return r; 3676 } 3677 3678 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) 3679 { 3680 int r, i; 3681 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3682 3683 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); 3684 if (r) 3685 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3686 3687 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3688 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3689 3690 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 3691 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 3692 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 3693 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 3694 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 3695 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 3696 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 3697 amdgpu_ring_write(kiq_ring, 0); 3698 amdgpu_ring_write(kiq_ring, 0); 3699 amdgpu_ring_write(kiq_ring, 0); 3700 } 3701 r = amdgpu_ring_test_helper(kiq_ring); 3702 if (r) 3703 DRM_ERROR("KCQ disable failed\n"); 3704 3705 return r; 3706 } 3707 3708 static int gfx_v9_0_hw_fini(void *handle) 3709 { 3710 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3711 3712 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 3713 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3714 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3715 3716 /* DF freeze and kcq disable will fail */ 3717 if (!amdgpu_ras_intr_triggered()) 3718 /* disable KCQ to avoid CPC touch memory not valid anymore */ 3719 gfx_v9_0_kcq_disable(adev); 3720 3721 if (amdgpu_sriov_vf(adev)) { 3722 gfx_v9_0_cp_gfx_enable(adev, false); 3723 /* must disable polling for SRIOV when hw finished, otherwise 3724 * CPC engine may still keep fetching WB address which is already 3725 * invalid after sw finished and trigger DMAR reading error in 3726 * hypervisor side. 3727 */ 3728 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3729 return 0; 3730 } 3731 3732 /* Use deinitialize sequence from CAIL when unbinding device from driver, 3733 * otherwise KIQ is hanging when binding back 3734 */ 3735 if (!adev->in_gpu_reset && !adev->in_suspend) { 3736 mutex_lock(&adev->srbm_mutex); 3737 soc15_grbm_select(adev, adev->gfx.kiq.ring.me, 3738 adev->gfx.kiq.ring.pipe, 3739 adev->gfx.kiq.ring.queue, 0); 3740 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); 3741 soc15_grbm_select(adev, 0, 0, 0, 0); 3742 mutex_unlock(&adev->srbm_mutex); 3743 } 3744 3745 gfx_v9_0_cp_enable(adev, false); 3746 adev->gfx.rlc.funcs->stop(adev); 3747 3748 return 0; 3749 } 3750 3751 static int gfx_v9_0_suspend(void *handle) 3752 { 3753 return gfx_v9_0_hw_fini(handle); 3754 } 3755 3756 static int gfx_v9_0_resume(void *handle) 3757 { 3758 return gfx_v9_0_hw_init(handle); 3759 } 3760 3761 static bool gfx_v9_0_is_idle(void *handle) 3762 { 3763 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3764 3765 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 3766 GRBM_STATUS, GUI_ACTIVE)) 3767 return false; 3768 else 3769 return true; 3770 } 3771 3772 static int gfx_v9_0_wait_for_idle(void *handle) 3773 { 3774 unsigned i; 3775 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3776 3777 for (i = 0; i < adev->usec_timeout; i++) { 3778 if (gfx_v9_0_is_idle(handle)) 3779 return 0; 3780 udelay(1); 3781 } 3782 return -ETIMEDOUT; 3783 } 3784 3785 static int gfx_v9_0_soft_reset(void *handle) 3786 { 3787 u32 grbm_soft_reset = 0; 3788 u32 tmp; 3789 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3790 3791 /* GRBM_STATUS */ 3792 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 3793 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 3794 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 3795 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 3796 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 3797 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 3798 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 3799 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3800 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3801 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3802 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 3803 } 3804 3805 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 3806 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3807 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3808 } 3809 3810 /* GRBM_STATUS2 */ 3811 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 3812 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 3813 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3814 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3815 3816 3817 if (grbm_soft_reset) { 3818 /* stop the rlc */ 3819 adev->gfx.rlc.funcs->stop(adev); 3820 3821 if (adev->asic_type != CHIP_ARCTURUS) 3822 /* Disable GFX parsing/prefetching */ 3823 gfx_v9_0_cp_gfx_enable(adev, false); 3824 3825 /* Disable MEC parsing/prefetching */ 3826 gfx_v9_0_cp_compute_enable(adev, false); 3827 3828 if (grbm_soft_reset) { 3829 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3830 tmp |= grbm_soft_reset; 3831 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 3832 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3833 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3834 3835 udelay(50); 3836 3837 tmp &= ~grbm_soft_reset; 3838 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3839 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3840 } 3841 3842 /* Wait a little for things to settle down */ 3843 udelay(50); 3844 } 3845 return 0; 3846 } 3847 3848 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 3849 { 3850 uint64_t clock; 3851 3852 mutex_lock(&adev->gfx.gpu_clock_mutex); 3853 if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) { 3854 uint32_t tmp, lsb, msb, i = 0; 3855 do { 3856 if (i != 0) 3857 udelay(1); 3858 tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); 3859 lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB); 3860 msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); 3861 i++; 3862 } while (unlikely(tmp != msb) && (i < adev->usec_timeout)); 3863 clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL); 3864 } else { 3865 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 3866 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 3867 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 3868 } 3869 mutex_unlock(&adev->gfx.gpu_clock_mutex); 3870 return clock; 3871 } 3872 3873 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 3874 uint32_t vmid, 3875 uint32_t gds_base, uint32_t gds_size, 3876 uint32_t gws_base, uint32_t gws_size, 3877 uint32_t oa_base, uint32_t oa_size) 3878 { 3879 struct amdgpu_device *adev = ring->adev; 3880 3881 /* GDS Base */ 3882 gfx_v9_0_write_data_to_reg(ring, 0, false, 3883 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 3884 gds_base); 3885 3886 /* GDS Size */ 3887 gfx_v9_0_write_data_to_reg(ring, 0, false, 3888 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 3889 gds_size); 3890 3891 /* GWS */ 3892 gfx_v9_0_write_data_to_reg(ring, 0, false, 3893 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 3894 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 3895 3896 /* OA */ 3897 gfx_v9_0_write_data_to_reg(ring, 0, false, 3898 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 3899 (1 << (oa_size + oa_base)) - (1 << oa_base)); 3900 } 3901 3902 static const u32 vgpr_init_compute_shader[] = 3903 { 3904 0xb07c0000, 0xbe8000ff, 3905 0x000000f8, 0xbf110800, 3906 0x7e000280, 0x7e020280, 3907 0x7e040280, 0x7e060280, 3908 0x7e080280, 0x7e0a0280, 3909 0x7e0c0280, 0x7e0e0280, 3910 0x80808800, 0xbe803200, 3911 0xbf84fff5, 0xbf9c0000, 3912 0xd28c0001, 0x0001007f, 3913 0xd28d0001, 0x0002027e, 3914 0x10020288, 0xb8810904, 3915 0xb7814000, 0xd1196a01, 3916 0x00000301, 0xbe800087, 3917 0xbefc00c1, 0xd89c4000, 3918 0x00020201, 0xd89cc080, 3919 0x00040401, 0x320202ff, 3920 0x00000800, 0x80808100, 3921 0xbf84fff8, 0x7e020280, 3922 0xbf810000, 0x00000000, 3923 }; 3924 3925 static const u32 sgpr_init_compute_shader[] = 3926 { 3927 0xb07c0000, 0xbe8000ff, 3928 0x0000005f, 0xbee50080, 3929 0xbe812c65, 0xbe822c65, 3930 0xbe832c65, 0xbe842c65, 3931 0xbe852c65, 0xb77c0005, 3932 0x80808500, 0xbf84fff8, 3933 0xbe800080, 0xbf810000, 3934 }; 3935 3936 static const struct soc15_reg_entry vgpr_init_regs[] = { 3937 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 3938 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 3939 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 3940 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 3941 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 3942 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 3943 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 3944 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 3945 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f }, 3946 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 3947 }; 3948 3949 static const struct soc15_reg_entry sgpr1_init_regs[] = { 3950 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, 3951 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, 3952 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, 3953 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, 3954 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 3955 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 3956 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 3957 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 3958 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 3959 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 3960 }; 3961 3962 static const struct soc15_reg_entry sgpr2_init_regs[] = { 3963 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, 3964 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, 3965 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, 3966 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, 3967 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 3968 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 3969 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 3970 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 3971 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 3972 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 3973 }; 3974 3975 static const struct soc15_reg_entry sec_ded_counter_registers[] = { 3976 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 3977 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 3978 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 3979 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 3980 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 3981 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 3982 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 3983 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 3984 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 3985 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 3986 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 3987 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 3988 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 3989 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 3990 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 3991 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 3992 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 3993 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 3994 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 3995 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 3996 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16}, 3997 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 3998 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 3999 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 4000 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 4001 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 4002 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 4003 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 4004 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 4005 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 4006 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 4007 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 4008 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 4009 { SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), 0, 1, 1}, 4010 { SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_EDC_COUNTER), 0, 1, 1}, 4011 { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1}, 4012 }; 4013 4014 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 4015 { 4016 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4017 int i, r; 4018 4019 /* only support when RAS is enabled */ 4020 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4021 return 0; 4022 4023 r = amdgpu_ring_alloc(ring, 7); 4024 if (r) { 4025 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 4026 ring->name, r); 4027 return r; 4028 } 4029 4030 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 4031 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 4032 4033 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 4034 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 4035 PACKET3_DMA_DATA_DST_SEL(1) | 4036 PACKET3_DMA_DATA_SRC_SEL(2) | 4037 PACKET3_DMA_DATA_ENGINE(0))); 4038 amdgpu_ring_write(ring, 0); 4039 amdgpu_ring_write(ring, 0); 4040 amdgpu_ring_write(ring, 0); 4041 amdgpu_ring_write(ring, 0); 4042 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 4043 adev->gds.gds_size); 4044 4045 amdgpu_ring_commit(ring); 4046 4047 for (i = 0; i < adev->usec_timeout; i++) { 4048 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 4049 break; 4050 udelay(1); 4051 } 4052 4053 if (i >= adev->usec_timeout) 4054 r = -ETIMEDOUT; 4055 4056 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 4057 4058 return r; 4059 } 4060 4061 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 4062 { 4063 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4064 struct amdgpu_ib ib; 4065 struct dma_fence *f = NULL; 4066 int r, i, j, k; 4067 unsigned total_size, vgpr_offset, sgpr_offset; 4068 u64 gpu_addr; 4069 4070 /* only support when RAS is enabled */ 4071 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4072 return 0; 4073 4074 /* bail if the compute ring is not ready */ 4075 if (!ring->sched.ready) 4076 return 0; 4077 4078 total_size = 4079 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 4080 total_size += 4081 ((ARRAY_SIZE(sgpr1_init_regs) * 3) + 4 + 5 + 2) * 4; 4082 total_size += 4083 ((ARRAY_SIZE(sgpr2_init_regs) * 3) + 4 + 5 + 2) * 4; 4084 total_size = ALIGN(total_size, 256); 4085 vgpr_offset = total_size; 4086 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 4087 sgpr_offset = total_size; 4088 total_size += sizeof(sgpr_init_compute_shader); 4089 4090 /* allocate an indirect buffer to put the commands in */ 4091 memset(&ib, 0, sizeof(ib)); 4092 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 4093 if (r) { 4094 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 4095 return r; 4096 } 4097 4098 /* load the compute shaders */ 4099 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 4100 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 4101 4102 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 4103 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 4104 4105 /* init the ib length to 0 */ 4106 ib.length_dw = 0; 4107 4108 /* VGPR */ 4109 /* write the register state for the compute dispatch */ 4110 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { 4111 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4112 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) 4113 - PACKET3_SET_SH_REG_START; 4114 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value; 4115 } 4116 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4117 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 4118 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4119 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4120 - PACKET3_SET_SH_REG_START; 4121 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4122 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4123 4124 /* write dispatch packet */ 4125 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4126 ib.ptr[ib.length_dw++] = 0x40*2; /* x */ 4127 ib.ptr[ib.length_dw++] = 1; /* y */ 4128 ib.ptr[ib.length_dw++] = 1; /* z */ 4129 ib.ptr[ib.length_dw++] = 4130 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4131 4132 /* write CS partial flush packet */ 4133 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4134 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4135 4136 /* SGPR1 */ 4137 /* write the register state for the compute dispatch */ 4138 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i++) { 4139 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4140 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i]) 4141 - PACKET3_SET_SH_REG_START; 4142 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value; 4143 } 4144 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4145 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4146 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4147 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4148 - PACKET3_SET_SH_REG_START; 4149 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4150 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4151 4152 /* write dispatch packet */ 4153 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4154 ib.ptr[ib.length_dw++] = 0xA0*2; /* x */ 4155 ib.ptr[ib.length_dw++] = 1; /* y */ 4156 ib.ptr[ib.length_dw++] = 1; /* z */ 4157 ib.ptr[ib.length_dw++] = 4158 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4159 4160 /* write CS partial flush packet */ 4161 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4162 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4163 4164 /* SGPR2 */ 4165 /* write the register state for the compute dispatch */ 4166 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i++) { 4167 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4168 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i]) 4169 - PACKET3_SET_SH_REG_START; 4170 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value; 4171 } 4172 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4173 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4174 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4175 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4176 - PACKET3_SET_SH_REG_START; 4177 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4178 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4179 4180 /* write dispatch packet */ 4181 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4182 ib.ptr[ib.length_dw++] = 0xA0*2; /* x */ 4183 ib.ptr[ib.length_dw++] = 1; /* y */ 4184 ib.ptr[ib.length_dw++] = 1; /* z */ 4185 ib.ptr[ib.length_dw++] = 4186 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4187 4188 /* write CS partial flush packet */ 4189 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4190 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4191 4192 /* shedule the ib on the ring */ 4193 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 4194 if (r) { 4195 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 4196 goto fail; 4197 } 4198 4199 /* wait for the GPU to finish processing the IB */ 4200 r = dma_fence_wait(f, false); 4201 if (r) { 4202 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 4203 goto fail; 4204 } 4205 4206 /* read back registers to clear the counters */ 4207 mutex_lock(&adev->grbm_idx_mutex); 4208 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { 4209 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { 4210 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { 4211 gfx_v9_0_select_se_sh(adev, j, 0x0, k); 4212 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); 4213 } 4214 } 4215 } 4216 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 4217 mutex_unlock(&adev->grbm_idx_mutex); 4218 4219 fail: 4220 amdgpu_ib_free(adev, &ib, NULL); 4221 dma_fence_put(f); 4222 4223 return r; 4224 } 4225 4226 static int gfx_v9_0_early_init(void *handle) 4227 { 4228 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4229 4230 if (adev->asic_type == CHIP_ARCTURUS) 4231 adev->gfx.num_gfx_rings = 0; 4232 else 4233 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 4234 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 4235 gfx_v9_0_set_ring_funcs(adev); 4236 gfx_v9_0_set_irq_funcs(adev); 4237 gfx_v9_0_set_gds_init(adev); 4238 gfx_v9_0_set_rlc_funcs(adev); 4239 4240 return 0; 4241 } 4242 4243 static int gfx_v9_0_ecc_late_init(void *handle) 4244 { 4245 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4246 int r; 4247 4248 r = gfx_v9_0_do_edc_gds_workarounds(adev); 4249 if (r) 4250 return r; 4251 4252 /* requires IBs so do in late init after IB pool is initialized */ 4253 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 4254 if (r) 4255 return r; 4256 4257 r = amdgpu_gfx_ras_late_init(adev); 4258 if (r) 4259 return r; 4260 4261 return 0; 4262 } 4263 4264 static int gfx_v9_0_late_init(void *handle) 4265 { 4266 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4267 int r; 4268 4269 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4270 if (r) 4271 return r; 4272 4273 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4274 if (r) 4275 return r; 4276 4277 r = gfx_v9_0_ecc_late_init(handle); 4278 if (r) 4279 return r; 4280 4281 return 0; 4282 } 4283 4284 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 4285 { 4286 uint32_t rlc_setting; 4287 4288 /* if RLC is not enabled, do nothing */ 4289 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 4290 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 4291 return false; 4292 4293 return true; 4294 } 4295 4296 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) 4297 { 4298 uint32_t data; 4299 unsigned i; 4300 4301 data = RLC_SAFE_MODE__CMD_MASK; 4302 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4303 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4304 4305 /* wait for RLC_SAFE_MODE */ 4306 for (i = 0; i < adev->usec_timeout; i++) { 4307 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 4308 break; 4309 udelay(1); 4310 } 4311 } 4312 4313 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) 4314 { 4315 uint32_t data; 4316 4317 data = RLC_SAFE_MODE__CMD_MASK; 4318 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4319 } 4320 4321 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 4322 bool enable) 4323 { 4324 amdgpu_gfx_rlc_enter_safe_mode(adev); 4325 4326 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 4327 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 4328 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4329 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 4330 } else { 4331 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 4332 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4333 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 4334 } 4335 4336 amdgpu_gfx_rlc_exit_safe_mode(adev); 4337 } 4338 4339 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4340 bool enable) 4341 { 4342 /* TODO: double check if we need to perform under safe mode */ 4343 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4344 4345 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4346 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4347 else 4348 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4349 4350 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4351 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4352 else 4353 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4354 4355 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4356 } 4357 4358 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4359 bool enable) 4360 { 4361 uint32_t data, def; 4362 4363 amdgpu_gfx_rlc_enter_safe_mode(adev); 4364 4365 /* It is disabled by HW by default */ 4366 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4367 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4368 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4369 4370 if (adev->asic_type != CHIP_VEGA12) 4371 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4372 4373 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4374 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4375 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4376 4377 /* only for Vega10 & Raven1 */ 4378 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4379 4380 if (def != data) 4381 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4382 4383 /* MGLS is a global flag to control all MGLS in GFX */ 4384 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4385 /* 2 - RLC memory Light sleep */ 4386 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4387 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4388 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4389 if (def != data) 4390 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4391 } 4392 /* 3 - CP memory Light sleep */ 4393 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4394 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4395 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4396 if (def != data) 4397 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4398 } 4399 } 4400 } else { 4401 /* 1 - MGCG_OVERRIDE */ 4402 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4403 4404 if (adev->asic_type != CHIP_VEGA12) 4405 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4406 4407 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4408 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4409 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4410 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4411 4412 if (def != data) 4413 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4414 4415 /* 2 - disable MGLS in RLC */ 4416 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4417 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4418 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4419 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4420 } 4421 4422 /* 3 - disable MGLS in CP */ 4423 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4424 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4425 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4426 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4427 } 4428 } 4429 4430 amdgpu_gfx_rlc_exit_safe_mode(adev); 4431 } 4432 4433 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4434 bool enable) 4435 { 4436 uint32_t data, def; 4437 4438 if (adev->asic_type == CHIP_ARCTURUS) 4439 return; 4440 4441 amdgpu_gfx_rlc_enter_safe_mode(adev); 4442 4443 /* Enable 3D CGCG/CGLS */ 4444 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 4445 /* write cmd to clear cgcg/cgls ov */ 4446 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4447 /* unset CGCG override */ 4448 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4449 /* update CGCG and CGLS override bits */ 4450 if (def != data) 4451 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4452 4453 /* enable 3Dcgcg FSM(0x0000363f) */ 4454 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4455 4456 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4457 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4458 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4459 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4460 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4461 if (def != data) 4462 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4463 4464 /* set IDLE_POLL_COUNT(0x00900100) */ 4465 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4466 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4467 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4468 if (def != data) 4469 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4470 } else { 4471 /* Disable CGCG/CGLS */ 4472 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4473 /* disable cgcg, cgls should be disabled */ 4474 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 4475 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 4476 /* disable cgcg and cgls in FSM */ 4477 if (def != data) 4478 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4479 } 4480 4481 amdgpu_gfx_rlc_exit_safe_mode(adev); 4482 } 4483 4484 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4485 bool enable) 4486 { 4487 uint32_t def, data; 4488 4489 amdgpu_gfx_rlc_enter_safe_mode(adev); 4490 4491 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 4492 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4493 /* unset CGCG override */ 4494 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 4495 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4496 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4497 else 4498 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4499 /* update CGCG and CGLS override bits */ 4500 if (def != data) 4501 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4502 4503 /* enable cgcg FSM(0x0000363F) */ 4504 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4505 4506 if (adev->asic_type == CHIP_ARCTURUS) 4507 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4508 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4509 else 4510 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4511 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4512 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4513 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4514 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4515 if (def != data) 4516 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4517 4518 /* set IDLE_POLL_COUNT(0x00900100) */ 4519 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4520 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4521 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4522 if (def != data) 4523 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4524 } else { 4525 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4526 /* reset CGCG/CGLS bits */ 4527 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4528 /* disable cgcg and cgls in FSM */ 4529 if (def != data) 4530 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4531 } 4532 4533 amdgpu_gfx_rlc_exit_safe_mode(adev); 4534 } 4535 4536 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 4537 bool enable) 4538 { 4539 if (enable) { 4540 /* CGCG/CGLS should be enabled after MGCG/MGLS 4541 * === MGCG + MGLS === 4542 */ 4543 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4544 /* === CGCG /CGLS for GFX 3D Only === */ 4545 gfx_v9_0_update_3d_clock_gating(adev, enable); 4546 /* === CGCG + CGLS === */ 4547 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4548 } else { 4549 /* CGCG/CGLS should be disabled before MGCG/MGLS 4550 * === CGCG + CGLS === 4551 */ 4552 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4553 /* === CGCG /CGLS for GFX 3D Only === */ 4554 gfx_v9_0_update_3d_clock_gating(adev, enable); 4555 /* === MGCG + MGLS === */ 4556 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4557 } 4558 return 0; 4559 } 4560 4561 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 4562 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 4563 .set_safe_mode = gfx_v9_0_set_safe_mode, 4564 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 4565 .init = gfx_v9_0_rlc_init, 4566 .get_csb_size = gfx_v9_0_get_csb_size, 4567 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 4568 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 4569 .resume = gfx_v9_0_rlc_resume, 4570 .stop = gfx_v9_0_rlc_stop, 4571 .reset = gfx_v9_0_rlc_reset, 4572 .start = gfx_v9_0_rlc_start 4573 }; 4574 4575 static int gfx_v9_0_set_powergating_state(void *handle, 4576 enum amd_powergating_state state) 4577 { 4578 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4579 bool enable = (state == AMD_PG_STATE_GATE) ? true : false; 4580 4581 switch (adev->asic_type) { 4582 case CHIP_RAVEN: 4583 case CHIP_RENOIR: 4584 if (!enable) { 4585 amdgpu_gfx_off_ctrl(adev, false); 4586 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4587 } 4588 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 4589 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 4590 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 4591 } else { 4592 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 4593 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 4594 } 4595 4596 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 4597 gfx_v9_0_enable_cp_power_gating(adev, true); 4598 else 4599 gfx_v9_0_enable_cp_power_gating(adev, false); 4600 4601 /* update gfx cgpg state */ 4602 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 4603 4604 /* update mgcg state */ 4605 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 4606 4607 if (enable) 4608 amdgpu_gfx_off_ctrl(adev, true); 4609 break; 4610 case CHIP_VEGA12: 4611 if (!enable) { 4612 amdgpu_gfx_off_ctrl(adev, false); 4613 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4614 } else { 4615 amdgpu_gfx_off_ctrl(adev, true); 4616 } 4617 break; 4618 default: 4619 break; 4620 } 4621 4622 return 0; 4623 } 4624 4625 static int gfx_v9_0_set_clockgating_state(void *handle, 4626 enum amd_clockgating_state state) 4627 { 4628 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4629 4630 if (amdgpu_sriov_vf(adev)) 4631 return 0; 4632 4633 switch (adev->asic_type) { 4634 case CHIP_VEGA10: 4635 case CHIP_VEGA12: 4636 case CHIP_VEGA20: 4637 case CHIP_RAVEN: 4638 case CHIP_ARCTURUS: 4639 case CHIP_RENOIR: 4640 gfx_v9_0_update_gfx_clock_gating(adev, 4641 state == AMD_CG_STATE_GATE ? true : false); 4642 break; 4643 default: 4644 break; 4645 } 4646 return 0; 4647 } 4648 4649 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 4650 { 4651 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4652 int data; 4653 4654 if (amdgpu_sriov_vf(adev)) 4655 *flags = 0; 4656 4657 /* AMD_CG_SUPPORT_GFX_MGCG */ 4658 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4659 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 4660 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 4661 4662 /* AMD_CG_SUPPORT_GFX_CGCG */ 4663 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4664 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 4665 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 4666 4667 /* AMD_CG_SUPPORT_GFX_CGLS */ 4668 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 4669 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 4670 4671 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 4672 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4673 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 4674 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 4675 4676 /* AMD_CG_SUPPORT_GFX_CP_LS */ 4677 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4678 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 4679 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 4680 4681 if (adev->asic_type != CHIP_ARCTURUS) { 4682 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 4683 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4684 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 4685 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 4686 4687 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 4688 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 4689 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 4690 } 4691 } 4692 4693 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 4694 { 4695 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 4696 } 4697 4698 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 4699 { 4700 struct amdgpu_device *adev = ring->adev; 4701 u64 wptr; 4702 4703 /* XXX check if swapping is necessary on BE */ 4704 if (ring->use_doorbell) { 4705 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 4706 } else { 4707 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 4708 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 4709 } 4710 4711 return wptr; 4712 } 4713 4714 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 4715 { 4716 struct amdgpu_device *adev = ring->adev; 4717 4718 if (ring->use_doorbell) { 4719 /* XXX check if swapping is necessary on BE */ 4720 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4721 WDOORBELL64(ring->doorbell_index, ring->wptr); 4722 } else { 4723 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4724 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 4725 } 4726 } 4727 4728 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 4729 { 4730 struct amdgpu_device *adev = ring->adev; 4731 u32 ref_and_mask, reg_mem_engine; 4732 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 4733 4734 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 4735 switch (ring->me) { 4736 case 1: 4737 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 4738 break; 4739 case 2: 4740 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 4741 break; 4742 default: 4743 return; 4744 } 4745 reg_mem_engine = 0; 4746 } else { 4747 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 4748 reg_mem_engine = 1; /* pfp */ 4749 } 4750 4751 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 4752 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 4753 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 4754 ref_and_mask, ref_and_mask, 0x20); 4755 } 4756 4757 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 4758 struct amdgpu_job *job, 4759 struct amdgpu_ib *ib, 4760 uint32_t flags) 4761 { 4762 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4763 u32 header, control = 0; 4764 4765 if (ib->flags & AMDGPU_IB_FLAG_CE) 4766 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 4767 else 4768 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 4769 4770 control |= ib->length_dw | (vmid << 24); 4771 4772 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 4773 control |= INDIRECT_BUFFER_PRE_ENB(1); 4774 4775 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 4776 gfx_v9_0_ring_emit_de_meta(ring); 4777 } 4778 4779 amdgpu_ring_write(ring, header); 4780 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4781 amdgpu_ring_write(ring, 4782 #ifdef __BIG_ENDIAN 4783 (2 << 0) | 4784 #endif 4785 lower_32_bits(ib->gpu_addr)); 4786 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4787 amdgpu_ring_write(ring, control); 4788 } 4789 4790 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 4791 struct amdgpu_job *job, 4792 struct amdgpu_ib *ib, 4793 uint32_t flags) 4794 { 4795 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4796 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 4797 4798 /* Currently, there is a high possibility to get wave ID mismatch 4799 * between ME and GDS, leading to a hw deadlock, because ME generates 4800 * different wave IDs than the GDS expects. This situation happens 4801 * randomly when at least 5 compute pipes use GDS ordered append. 4802 * The wave IDs generated by ME are also wrong after suspend/resume. 4803 * Those are probably bugs somewhere else in the kernel driver. 4804 * 4805 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 4806 * GDS to 0 for this ring (me/pipe). 4807 */ 4808 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 4809 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 4810 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 4811 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 4812 } 4813 4814 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 4815 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4816 amdgpu_ring_write(ring, 4817 #ifdef __BIG_ENDIAN 4818 (2 << 0) | 4819 #endif 4820 lower_32_bits(ib->gpu_addr)); 4821 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4822 amdgpu_ring_write(ring, control); 4823 } 4824 4825 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 4826 u64 seq, unsigned flags) 4827 { 4828 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 4829 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 4830 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 4831 4832 /* RELEASE_MEM - flush caches, send int */ 4833 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 4834 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 4835 EOP_TC_NC_ACTION_EN) : 4836 (EOP_TCL1_ACTION_EN | 4837 EOP_TC_ACTION_EN | 4838 EOP_TC_WB_ACTION_EN | 4839 EOP_TC_MD_ACTION_EN)) | 4840 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 4841 EVENT_INDEX(5))); 4842 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 4843 4844 /* 4845 * the address should be Qword aligned if 64bit write, Dword 4846 * aligned if only send 32bit data low (discard data high) 4847 */ 4848 if (write64bit) 4849 BUG_ON(addr & 0x7); 4850 else 4851 BUG_ON(addr & 0x3); 4852 amdgpu_ring_write(ring, lower_32_bits(addr)); 4853 amdgpu_ring_write(ring, upper_32_bits(addr)); 4854 amdgpu_ring_write(ring, lower_32_bits(seq)); 4855 amdgpu_ring_write(ring, upper_32_bits(seq)); 4856 amdgpu_ring_write(ring, 0); 4857 } 4858 4859 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 4860 { 4861 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 4862 uint32_t seq = ring->fence_drv.sync_seq; 4863 uint64_t addr = ring->fence_drv.gpu_addr; 4864 4865 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 4866 lower_32_bits(addr), upper_32_bits(addr), 4867 seq, 0xffffffff, 4); 4868 } 4869 4870 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 4871 unsigned vmid, uint64_t pd_addr) 4872 { 4873 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 4874 4875 /* compute doesn't have PFP */ 4876 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 4877 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 4878 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 4879 amdgpu_ring_write(ring, 0x0); 4880 } 4881 } 4882 4883 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 4884 { 4885 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 4886 } 4887 4888 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 4889 { 4890 u64 wptr; 4891 4892 /* XXX check if swapping is necessary on BE */ 4893 if (ring->use_doorbell) 4894 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 4895 else 4896 BUG(); 4897 return wptr; 4898 } 4899 4900 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 4901 bool acquire) 4902 { 4903 struct amdgpu_device *adev = ring->adev; 4904 int pipe_num, tmp, reg; 4905 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 4906 4907 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 4908 4909 /* first me only has 2 entries, GFX and HP3D */ 4910 if (ring->me > 0) 4911 pipe_num -= 2; 4912 4913 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num; 4914 tmp = RREG32(reg); 4915 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 4916 WREG32(reg, tmp); 4917 } 4918 4919 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, 4920 struct amdgpu_ring *ring, 4921 bool acquire) 4922 { 4923 int i, pipe; 4924 bool reserve; 4925 struct amdgpu_ring *iring; 4926 4927 mutex_lock(&adev->gfx.pipe_reserve_mutex); 4928 pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0); 4929 if (acquire) 4930 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4931 else 4932 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4933 4934 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 4935 /* Clear all reservations - everyone reacquires all resources */ 4936 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 4937 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 4938 true); 4939 4940 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 4941 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 4942 true); 4943 } else { 4944 /* Lower all pipes without a current reservation */ 4945 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 4946 iring = &adev->gfx.gfx_ring[i]; 4947 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 4948 iring->me, 4949 iring->pipe, 4950 0); 4951 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4952 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 4953 } 4954 4955 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 4956 iring = &adev->gfx.compute_ring[i]; 4957 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 4958 iring->me, 4959 iring->pipe, 4960 0); 4961 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4962 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 4963 } 4964 } 4965 4966 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 4967 } 4968 4969 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, 4970 struct amdgpu_ring *ring, 4971 bool acquire) 4972 { 4973 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 4974 uint32_t queue_priority = acquire ? 0xf : 0x0; 4975 4976 mutex_lock(&adev->srbm_mutex); 4977 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4978 4979 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); 4980 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); 4981 4982 soc15_grbm_select(adev, 0, 0, 0, 0); 4983 mutex_unlock(&adev->srbm_mutex); 4984 } 4985 4986 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring, 4987 enum drm_sched_priority priority) 4988 { 4989 struct amdgpu_device *adev = ring->adev; 4990 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 4991 4992 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 4993 return; 4994 4995 gfx_v9_0_hqd_set_priority(adev, ring, acquire); 4996 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire); 4997 } 4998 4999 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5000 { 5001 struct amdgpu_device *adev = ring->adev; 5002 5003 /* XXX check if swapping is necessary on BE */ 5004 if (ring->use_doorbell) { 5005 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 5006 WDOORBELL64(ring->doorbell_index, ring->wptr); 5007 } else{ 5008 BUG(); /* only DOORBELL method supported on gfx9 now */ 5009 } 5010 } 5011 5012 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5013 u64 seq, unsigned int flags) 5014 { 5015 struct amdgpu_device *adev = ring->adev; 5016 5017 /* we only allocate 32bit for each seq wb address */ 5018 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5019 5020 /* write fence seq to the "addr" */ 5021 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5022 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5023 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5024 amdgpu_ring_write(ring, lower_32_bits(addr)); 5025 amdgpu_ring_write(ring, upper_32_bits(addr)); 5026 amdgpu_ring_write(ring, lower_32_bits(seq)); 5027 5028 if (flags & AMDGPU_FENCE_FLAG_INT) { 5029 /* set register to trigger INT */ 5030 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5031 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5032 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5033 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 5034 amdgpu_ring_write(ring, 0); 5035 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5036 } 5037 } 5038 5039 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 5040 { 5041 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5042 amdgpu_ring_write(ring, 0); 5043 } 5044 5045 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 5046 { 5047 struct v9_ce_ib_state ce_payload = {0}; 5048 uint64_t csa_addr; 5049 int cnt; 5050 5051 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 5052 csa_addr = amdgpu_csa_vaddr(ring->adev); 5053 5054 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5055 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 5056 WRITE_DATA_DST_SEL(8) | 5057 WR_CONFIRM) | 5058 WRITE_DATA_CACHE_POLICY(0)); 5059 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5060 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5061 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 5062 } 5063 5064 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 5065 { 5066 struct v9_de_ib_state de_payload = {0}; 5067 uint64_t csa_addr, gds_addr; 5068 int cnt; 5069 5070 csa_addr = amdgpu_csa_vaddr(ring->adev); 5071 gds_addr = csa_addr + 4096; 5072 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5073 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5074 5075 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 5076 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5077 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 5078 WRITE_DATA_DST_SEL(8) | 5079 WR_CONFIRM) | 5080 WRITE_DATA_CACHE_POLICY(0)); 5081 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5082 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5083 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 5084 } 5085 5086 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) 5087 { 5088 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 5089 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ 5090 } 5091 5092 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 5093 { 5094 uint32_t dw2 = 0; 5095 5096 if (amdgpu_sriov_vf(ring->adev)) 5097 gfx_v9_0_ring_emit_ce_meta(ring); 5098 5099 gfx_v9_0_ring_emit_tmz(ring, true); 5100 5101 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5102 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5103 /* set load_global_config & load_global_uconfig */ 5104 dw2 |= 0x8001; 5105 /* set load_cs_sh_regs */ 5106 dw2 |= 0x01000000; 5107 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5108 dw2 |= 0x10002; 5109 5110 /* set load_ce_ram if preamble presented */ 5111 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 5112 dw2 |= 0x10000000; 5113 } else { 5114 /* still load_ce_ram if this is the first time preamble presented 5115 * although there is no context switch happens. 5116 */ 5117 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 5118 dw2 |= 0x10000000; 5119 } 5120 5121 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5122 amdgpu_ring_write(ring, dw2); 5123 amdgpu_ring_write(ring, 0); 5124 } 5125 5126 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 5127 { 5128 unsigned ret; 5129 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5130 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 5131 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 5132 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 5133 ret = ring->wptr & ring->buf_mask; 5134 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 5135 return ret; 5136 } 5137 5138 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 5139 { 5140 unsigned cur; 5141 BUG_ON(offset > ring->buf_mask); 5142 BUG_ON(ring->ring[offset] != 0x55aa55aa); 5143 5144 cur = (ring->wptr & ring->buf_mask) - 1; 5145 if (likely(cur > offset)) 5146 ring->ring[offset] = cur - offset; 5147 else 5148 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 5149 } 5150 5151 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 5152 { 5153 struct amdgpu_device *adev = ring->adev; 5154 5155 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 5156 amdgpu_ring_write(ring, 0 | /* src: register*/ 5157 (5 << 8) | /* dst: memory */ 5158 (1 << 20)); /* write confirm */ 5159 amdgpu_ring_write(ring, reg); 5160 amdgpu_ring_write(ring, 0); 5161 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 5162 adev->virt.reg_val_offs * 4)); 5163 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 5164 adev->virt.reg_val_offs * 4)); 5165 } 5166 5167 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 5168 uint32_t val) 5169 { 5170 uint32_t cmd = 0; 5171 5172 switch (ring->funcs->type) { 5173 case AMDGPU_RING_TYPE_GFX: 5174 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 5175 break; 5176 case AMDGPU_RING_TYPE_KIQ: 5177 cmd = (1 << 16); /* no inc addr */ 5178 break; 5179 default: 5180 cmd = WR_CONFIRM; 5181 break; 5182 } 5183 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5184 amdgpu_ring_write(ring, cmd); 5185 amdgpu_ring_write(ring, reg); 5186 amdgpu_ring_write(ring, 0); 5187 amdgpu_ring_write(ring, val); 5188 } 5189 5190 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 5191 uint32_t val, uint32_t mask) 5192 { 5193 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 5194 } 5195 5196 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 5197 uint32_t reg0, uint32_t reg1, 5198 uint32_t ref, uint32_t mask) 5199 { 5200 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5201 struct amdgpu_device *adev = ring->adev; 5202 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 5203 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 5204 5205 if (fw_version_ok) 5206 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 5207 ref, mask, 0x20); 5208 else 5209 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 5210 ref, mask); 5211 } 5212 5213 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 5214 { 5215 struct amdgpu_device *adev = ring->adev; 5216 uint32_t value = 0; 5217 5218 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 5219 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 5220 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 5221 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 5222 WREG32_SOC15(GC, 0, mmSQ_CMD, value); 5223 } 5224 5225 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5226 enum amdgpu_interrupt_state state) 5227 { 5228 switch (state) { 5229 case AMDGPU_IRQ_STATE_DISABLE: 5230 case AMDGPU_IRQ_STATE_ENABLE: 5231 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5232 TIME_STAMP_INT_ENABLE, 5233 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5234 break; 5235 default: 5236 break; 5237 } 5238 } 5239 5240 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5241 int me, int pipe, 5242 enum amdgpu_interrupt_state state) 5243 { 5244 u32 mec_int_cntl, mec_int_cntl_reg; 5245 5246 /* 5247 * amdgpu controls only the first MEC. That's why this function only 5248 * handles the setting of interrupts for this specific MEC. All other 5249 * pipes' interrupts are set by amdkfd. 5250 */ 5251 5252 if (me == 1) { 5253 switch (pipe) { 5254 case 0: 5255 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 5256 break; 5257 case 1: 5258 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 5259 break; 5260 case 2: 5261 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 5262 break; 5263 case 3: 5264 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 5265 break; 5266 default: 5267 DRM_DEBUG("invalid pipe %d\n", pipe); 5268 return; 5269 } 5270 } else { 5271 DRM_DEBUG("invalid me %d\n", me); 5272 return; 5273 } 5274 5275 switch (state) { 5276 case AMDGPU_IRQ_STATE_DISABLE: 5277 mec_int_cntl = RREG32(mec_int_cntl_reg); 5278 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5279 TIME_STAMP_INT_ENABLE, 0); 5280 WREG32(mec_int_cntl_reg, mec_int_cntl); 5281 break; 5282 case AMDGPU_IRQ_STATE_ENABLE: 5283 mec_int_cntl = RREG32(mec_int_cntl_reg); 5284 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5285 TIME_STAMP_INT_ENABLE, 1); 5286 WREG32(mec_int_cntl_reg, mec_int_cntl); 5287 break; 5288 default: 5289 break; 5290 } 5291 } 5292 5293 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 5294 struct amdgpu_irq_src *source, 5295 unsigned type, 5296 enum amdgpu_interrupt_state state) 5297 { 5298 switch (state) { 5299 case AMDGPU_IRQ_STATE_DISABLE: 5300 case AMDGPU_IRQ_STATE_ENABLE: 5301 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5302 PRIV_REG_INT_ENABLE, 5303 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5304 break; 5305 default: 5306 break; 5307 } 5308 5309 return 0; 5310 } 5311 5312 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 5313 struct amdgpu_irq_src *source, 5314 unsigned type, 5315 enum amdgpu_interrupt_state state) 5316 { 5317 switch (state) { 5318 case AMDGPU_IRQ_STATE_DISABLE: 5319 case AMDGPU_IRQ_STATE_ENABLE: 5320 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5321 PRIV_INSTR_INT_ENABLE, 5322 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5323 default: 5324 break; 5325 } 5326 5327 return 0; 5328 } 5329 5330 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 5331 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5332 CP_ECC_ERROR_INT_ENABLE, 1) 5333 5334 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 5335 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5336 CP_ECC_ERROR_INT_ENABLE, 0) 5337 5338 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 5339 struct amdgpu_irq_src *source, 5340 unsigned type, 5341 enum amdgpu_interrupt_state state) 5342 { 5343 switch (state) { 5344 case AMDGPU_IRQ_STATE_DISABLE: 5345 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5346 CP_ECC_ERROR_INT_ENABLE, 0); 5347 DISABLE_ECC_ON_ME_PIPE(1, 0); 5348 DISABLE_ECC_ON_ME_PIPE(1, 1); 5349 DISABLE_ECC_ON_ME_PIPE(1, 2); 5350 DISABLE_ECC_ON_ME_PIPE(1, 3); 5351 break; 5352 5353 case AMDGPU_IRQ_STATE_ENABLE: 5354 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5355 CP_ECC_ERROR_INT_ENABLE, 1); 5356 ENABLE_ECC_ON_ME_PIPE(1, 0); 5357 ENABLE_ECC_ON_ME_PIPE(1, 1); 5358 ENABLE_ECC_ON_ME_PIPE(1, 2); 5359 ENABLE_ECC_ON_ME_PIPE(1, 3); 5360 break; 5361 default: 5362 break; 5363 } 5364 5365 return 0; 5366 } 5367 5368 5369 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5370 struct amdgpu_irq_src *src, 5371 unsigned type, 5372 enum amdgpu_interrupt_state state) 5373 { 5374 switch (type) { 5375 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 5376 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 5377 break; 5378 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5379 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5380 break; 5381 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5382 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5383 break; 5384 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5385 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5386 break; 5387 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5388 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5389 break; 5390 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 5391 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 5392 break; 5393 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 5394 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 5395 break; 5396 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 5397 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 5398 break; 5399 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 5400 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 5401 break; 5402 default: 5403 break; 5404 } 5405 return 0; 5406 } 5407 5408 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 5409 struct amdgpu_irq_src *source, 5410 struct amdgpu_iv_entry *entry) 5411 { 5412 int i; 5413 u8 me_id, pipe_id, queue_id; 5414 struct amdgpu_ring *ring; 5415 5416 DRM_DEBUG("IH: CP EOP\n"); 5417 me_id = (entry->ring_id & 0x0c) >> 2; 5418 pipe_id = (entry->ring_id & 0x03) >> 0; 5419 queue_id = (entry->ring_id & 0x70) >> 4; 5420 5421 switch (me_id) { 5422 case 0: 5423 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5424 break; 5425 case 1: 5426 case 2: 5427 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5428 ring = &adev->gfx.compute_ring[i]; 5429 /* Per-queue interrupt is supported for MEC starting from VI. 5430 * The interrupt can only be enabled/disabled per pipe instead of per queue. 5431 */ 5432 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 5433 amdgpu_fence_process(ring); 5434 } 5435 break; 5436 } 5437 return 0; 5438 } 5439 5440 static void gfx_v9_0_fault(struct amdgpu_device *adev, 5441 struct amdgpu_iv_entry *entry) 5442 { 5443 u8 me_id, pipe_id, queue_id; 5444 struct amdgpu_ring *ring; 5445 int i; 5446 5447 me_id = (entry->ring_id & 0x0c) >> 2; 5448 pipe_id = (entry->ring_id & 0x03) >> 0; 5449 queue_id = (entry->ring_id & 0x70) >> 4; 5450 5451 switch (me_id) { 5452 case 0: 5453 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 5454 break; 5455 case 1: 5456 case 2: 5457 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5458 ring = &adev->gfx.compute_ring[i]; 5459 if (ring->me == me_id && ring->pipe == pipe_id && 5460 ring->queue == queue_id) 5461 drm_sched_fault(&ring->sched); 5462 } 5463 break; 5464 } 5465 } 5466 5467 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 5468 struct amdgpu_irq_src *source, 5469 struct amdgpu_iv_entry *entry) 5470 { 5471 DRM_ERROR("Illegal register access in command stream\n"); 5472 gfx_v9_0_fault(adev, entry); 5473 return 0; 5474 } 5475 5476 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 5477 struct amdgpu_irq_src *source, 5478 struct amdgpu_iv_entry *entry) 5479 { 5480 DRM_ERROR("Illegal instruction in command stream\n"); 5481 gfx_v9_0_fault(adev, entry); 5482 return 0; 5483 } 5484 5485 5486 static const struct soc15_ras_field_entry gc_ras_fields_vg20[] = { 5487 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 5488 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), 5489 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) 5490 }, 5491 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 5492 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT), 5493 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) 5494 }, 5495 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 5496 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1), 5497 0, 0 5498 }, 5499 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 5500 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2), 5501 0, 0 5502 }, 5503 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 5504 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT), 5505 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) 5506 }, 5507 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 5508 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT), 5509 0, 0 5510 }, 5511 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 5512 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), 5513 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT) 5514 }, 5515 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 5516 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT), 5517 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT) 5518 }, 5519 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 5520 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1), 5521 0, 0 5522 }, 5523 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 5524 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1), 5525 0, 0 5526 }, 5527 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 5528 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1), 5529 0, 0 5530 }, 5531 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 5532 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC), 5533 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) 5534 }, 5535 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 5536 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 5537 0, 0 5538 }, 5539 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 5540 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), 5541 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) 5542 }, 5543 { "GDS_OA_PHY_PHY_CMD_RAM_MEM", 5544 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 5545 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), 5546 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) 5547 }, 5548 { "GDS_OA_PHY_PHY_DATA_RAM_MEM", 5549 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 5550 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 5551 0, 0 5552 }, 5553 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", 5554 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 5555 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), 5556 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) 5557 }, 5558 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", 5559 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 5560 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), 5561 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) 5562 }, 5563 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", 5564 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 5565 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), 5566 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) 5567 }, 5568 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", 5569 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 5570 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), 5571 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) 5572 }, 5573 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 5574 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 5575 0, 0 5576 }, 5577 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 5578 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), 5579 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) 5580 }, 5581 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 5582 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 5583 0, 0 5584 }, 5585 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 5586 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 5587 0, 0 5588 }, 5589 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 5590 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 5591 0, 0 5592 }, 5593 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 5594 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 5595 0, 0 5596 }, 5597 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 5598 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 5599 0, 0 5600 }, 5601 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 5602 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 5603 0, 0 5604 }, 5605 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5606 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), 5607 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) 5608 }, 5609 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5610 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), 5611 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) 5612 }, 5613 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5614 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), 5615 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) 5616 }, 5617 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5618 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), 5619 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) 5620 }, 5621 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5622 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), 5623 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) 5624 }, 5625 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5626 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 5627 0, 0 5628 }, 5629 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5630 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 5631 0, 0 5632 }, 5633 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5634 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 5635 0, 0 5636 }, 5637 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5638 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 5639 0, 0 5640 }, 5641 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5642 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 5643 0, 0 5644 }, 5645 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 5646 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 5647 0, 0 5648 }, 5649 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5650 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 5651 0, 0 5652 }, 5653 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5654 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 5655 0, 0 5656 }, 5657 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5658 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 5659 0, 0 5660 }, 5661 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5662 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), 5663 0, 0 5664 }, 5665 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5666 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 5667 0, 0 5668 }, 5669 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5670 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), 5671 0, 0 5672 }, 5673 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5674 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 5675 0, 0 5676 }, 5677 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 5678 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 5679 0, 0 5680 }, 5681 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5682 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), 5683 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) 5684 }, 5685 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5686 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), 5687 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) 5688 }, 5689 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5690 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 5691 0, 0 5692 }, 5693 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5694 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 5695 0, 0 5696 }, 5697 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5698 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 5699 0, 0 5700 }, 5701 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5702 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), 5703 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) 5704 }, 5705 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 5706 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), 5707 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) 5708 }, 5709 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 5710 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), 5711 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) 5712 }, 5713 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 5714 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), 5715 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) 5716 }, 5717 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 5718 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT), 5719 0, 0 5720 }, 5721 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5722 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT), 5723 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) 5724 }, 5725 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5726 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT), 5727 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) 5728 }, 5729 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5730 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT), 5731 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) 5732 }, 5733 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5734 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT), 5735 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) 5736 }, 5737 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5738 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT), 5739 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) 5740 }, 5741 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5742 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT), 5743 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) 5744 }, 5745 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 5746 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT), 5747 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) 5748 }, 5749 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5750 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), 5751 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) 5752 }, 5753 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5754 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), 5755 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) 5756 }, 5757 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5758 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), 5759 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) 5760 }, 5761 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5762 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), 5763 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) 5764 }, 5765 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5766 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), 5767 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) 5768 }, 5769 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5770 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), 5771 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) 5772 }, 5773 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5774 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), 5775 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) 5776 }, 5777 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5778 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), 5779 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) 5780 }, 5781 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5782 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), 5783 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) 5784 }, 5785 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5786 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), 5787 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) 5788 }, 5789 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5790 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), 5791 0, 0 5792 }, 5793 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5794 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 5795 0, 0 5796 }, 5797 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5798 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 5799 0, 0 5800 }, 5801 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5802 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 5803 0, 0 5804 }, 5805 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5806 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 5807 0, 0 5808 }, 5809 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 5810 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), 5811 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) 5812 }, 5813 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5814 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), 5815 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) 5816 }, 5817 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5818 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), 5819 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) 5820 }, 5821 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5822 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), 5823 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) 5824 }, 5825 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5826 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), 5827 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) 5828 }, 5829 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5830 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), 5831 0, 0 5832 }, 5833 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5834 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 5835 0, 0 5836 }, 5837 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5838 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 5839 0, 0 5840 }, 5841 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5842 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 5843 0, 0 5844 }, 5845 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 5846 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 5847 0, 0 5848 }, 5849 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5850 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), 5851 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) 5852 }, 5853 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5854 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), 5855 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) 5856 }, 5857 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5858 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), 5859 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) 5860 }, 5861 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5862 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), 5863 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) 5864 }, 5865 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5866 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), 5867 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) 5868 }, 5869 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5870 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 5871 0, 0 5872 }, 5873 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5874 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 5875 0, 0 5876 }, 5877 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5878 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 5879 0, 0 5880 }, 5881 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5882 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 5883 0, 0 5884 }, 5885 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 5886 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 5887 0, 0 5888 }, 5889 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5890 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), 5891 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) 5892 }, 5893 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5894 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), 5895 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) 5896 }, 5897 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5898 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), 5899 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) 5900 }, 5901 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5902 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 5903 0, 0 5904 }, 5905 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5906 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 5907 0, 0 5908 }, 5909 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5910 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 5911 0, 0 5912 }, 5913 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5914 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 5915 0, 0 5916 }, 5917 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5918 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 5919 0, 0 5920 }, 5921 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 5922 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 5923 0, 0 5924 } 5925 }; 5926 5927 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 5928 void *inject_if) 5929 { 5930 struct ras_inject_if *info = (struct ras_inject_if *)inject_if; 5931 int ret; 5932 struct ta_ras_trigger_error_input block_info = { 0 }; 5933 5934 if (adev->asic_type != CHIP_VEGA20) 5935 return -EINVAL; 5936 5937 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) 5938 return -EINVAL; 5939 5940 if (!ras_gfx_subblocks[info->head.sub_block_index].name) 5941 return -EPERM; 5942 5943 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & 5944 info->head.type)) { 5945 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", 5946 ras_gfx_subblocks[info->head.sub_block_index].name, 5947 info->head.type); 5948 return -EPERM; 5949 } 5950 5951 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & 5952 info->head.type)) { 5953 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", 5954 ras_gfx_subblocks[info->head.sub_block_index].name, 5955 info->head.type); 5956 return -EPERM; 5957 } 5958 5959 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); 5960 block_info.sub_block_index = 5961 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; 5962 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); 5963 block_info.address = info->address; 5964 block_info.value = info->value; 5965 5966 mutex_lock(&adev->grbm_idx_mutex); 5967 ret = psp_ras_trigger_error(&adev->psp, &block_info); 5968 mutex_unlock(&adev->grbm_idx_mutex); 5969 5970 return ret; 5971 } 5972 5973 static const char *vml2_mems[] = { 5974 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", 5975 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", 5976 "UTC_VML2_BANK_CACHE_0_4K_MEM0", 5977 "UTC_VML2_BANK_CACHE_0_4K_MEM1", 5978 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0", 5979 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1", 5980 "UTC_VML2_BANK_CACHE_1_4K_MEM0", 5981 "UTC_VML2_BANK_CACHE_1_4K_MEM1", 5982 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0", 5983 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1", 5984 "UTC_VML2_BANK_CACHE_2_4K_MEM0", 5985 "UTC_VML2_BANK_CACHE_2_4K_MEM1", 5986 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0", 5987 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1", 5988 "UTC_VML2_BANK_CACHE_3_4K_MEM0", 5989 "UTC_VML2_BANK_CACHE_3_4K_MEM1", 5990 }; 5991 5992 static const char *vml2_walker_mems[] = { 5993 "UTC_VML2_CACHE_PDE0_MEM0", 5994 "UTC_VML2_CACHE_PDE0_MEM1", 5995 "UTC_VML2_CACHE_PDE1_MEM0", 5996 "UTC_VML2_CACHE_PDE1_MEM1", 5997 "UTC_VML2_CACHE_PDE2_MEM0", 5998 "UTC_VML2_CACHE_PDE2_MEM1", 5999 "UTC_VML2_RDIF_LOG_FIFO", 6000 }; 6001 6002 static const char *atc_l2_cache_2m_mems[] = { 6003 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", 6004 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", 6005 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", 6006 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM", 6007 }; 6008 6009 static const char *atc_l2_cache_4k_mems[] = { 6010 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0", 6011 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1", 6012 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2", 6013 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3", 6014 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4", 6015 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5", 6016 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6", 6017 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7", 6018 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0", 6019 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1", 6020 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2", 6021 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3", 6022 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4", 6023 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5", 6024 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6", 6025 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7", 6026 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0", 6027 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1", 6028 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2", 6029 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3", 6030 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4", 6031 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5", 6032 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6", 6033 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7", 6034 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0", 6035 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1", 6036 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2", 6037 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3", 6038 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4", 6039 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5", 6040 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6", 6041 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7", 6042 }; 6043 6044 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, 6045 struct ras_err_data *err_data) 6046 { 6047 uint32_t i, data; 6048 uint32_t sec_count, ded_count; 6049 6050 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6051 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6052 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6053 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6054 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6055 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6056 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6057 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6058 6059 for (i = 0; i < 16; i++) { 6060 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6061 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6062 6063 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT); 6064 if (sec_count) { 6065 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, 6066 vml2_mems[i], sec_count); 6067 err_data->ce_count += sec_count; 6068 } 6069 6070 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT); 6071 if (ded_count) { 6072 DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, 6073 vml2_mems[i], ded_count); 6074 err_data->ue_count += ded_count; 6075 } 6076 } 6077 6078 for (i = 0; i < 7; i++) { 6079 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6080 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6081 6082 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6083 SEC_COUNT); 6084 if (sec_count) { 6085 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, 6086 vml2_walker_mems[i], sec_count); 6087 err_data->ce_count += sec_count; 6088 } 6089 6090 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6091 DED_COUNT); 6092 if (ded_count) { 6093 DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, 6094 vml2_walker_mems[i], ded_count); 6095 err_data->ue_count += ded_count; 6096 } 6097 } 6098 6099 for (i = 0; i < 4; i++) { 6100 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6101 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6102 6103 sec_count = (data & 0x00006000L) >> 0xd; 6104 if (sec_count) { 6105 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, 6106 atc_l2_cache_2m_mems[i], sec_count); 6107 err_data->ce_count += sec_count; 6108 } 6109 } 6110 6111 for (i = 0; i < 32; i++) { 6112 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6113 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6114 6115 sec_count = (data & 0x00006000L) >> 0xd; 6116 if (sec_count) { 6117 DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i, 6118 atc_l2_cache_4k_mems[i], sec_count); 6119 err_data->ce_count += sec_count; 6120 } 6121 6122 ded_count = (data & 0x00018000L) >> 0xf; 6123 if (ded_count) { 6124 DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i, 6125 atc_l2_cache_4k_mems[i], ded_count); 6126 err_data->ue_count += ded_count; 6127 } 6128 } 6129 6130 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6131 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6132 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6133 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6134 6135 return 0; 6136 } 6137 6138 static int __get_ras_error_count(const struct soc15_reg_entry *reg, 6139 uint32_t se_id, uint32_t inst_id, uint32_t value, 6140 uint32_t *sec_count, uint32_t *ded_count) 6141 { 6142 uint32_t i; 6143 uint32_t sec_cnt, ded_cnt; 6144 6145 for (i = 0; i < ARRAY_SIZE(gc_ras_fields_vg20); i++) { 6146 if(gc_ras_fields_vg20[i].reg_offset != reg->reg_offset || 6147 gc_ras_fields_vg20[i].seg != reg->seg || 6148 gc_ras_fields_vg20[i].inst != reg->inst) 6149 continue; 6150 6151 sec_cnt = (value & 6152 gc_ras_fields_vg20[i].sec_count_mask) >> 6153 gc_ras_fields_vg20[i].sec_count_shift; 6154 if (sec_cnt) { 6155 DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n", 6156 gc_ras_fields_vg20[i].name, 6157 se_id, inst_id, 6158 sec_cnt); 6159 *sec_count += sec_cnt; 6160 } 6161 6162 ded_cnt = (value & 6163 gc_ras_fields_vg20[i].ded_count_mask) >> 6164 gc_ras_fields_vg20[i].ded_count_shift; 6165 if (ded_cnt) { 6166 DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n", 6167 gc_ras_fields_vg20[i].name, 6168 se_id, inst_id, 6169 ded_cnt); 6170 *ded_count += ded_cnt; 6171 } 6172 } 6173 6174 return 0; 6175 } 6176 6177 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 6178 void *ras_error_status) 6179 { 6180 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 6181 uint32_t sec_count = 0, ded_count = 0; 6182 uint32_t i, j, k; 6183 uint32_t reg_value; 6184 6185 if (adev->asic_type != CHIP_VEGA20) 6186 return -EINVAL; 6187 6188 err_data->ue_count = 0; 6189 err_data->ce_count = 0; 6190 6191 mutex_lock(&adev->grbm_idx_mutex); 6192 6193 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { 6194 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { 6195 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { 6196 gfx_v9_0_select_se_sh(adev, j, 0, k); 6197 reg_value = 6198 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); 6199 if (reg_value) 6200 __get_ras_error_count(&sec_ded_counter_registers[i], 6201 j, k, reg_value, 6202 &sec_count, &ded_count); 6203 } 6204 } 6205 } 6206 6207 err_data->ce_count += sec_count; 6208 err_data->ue_count += ded_count; 6209 6210 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6211 mutex_unlock(&adev->grbm_idx_mutex); 6212 6213 gfx_v9_0_query_utc_edc_status(adev, err_data); 6214 6215 return 0; 6216 } 6217 6218 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 6219 .name = "gfx_v9_0", 6220 .early_init = gfx_v9_0_early_init, 6221 .late_init = gfx_v9_0_late_init, 6222 .sw_init = gfx_v9_0_sw_init, 6223 .sw_fini = gfx_v9_0_sw_fini, 6224 .hw_init = gfx_v9_0_hw_init, 6225 .hw_fini = gfx_v9_0_hw_fini, 6226 .suspend = gfx_v9_0_suspend, 6227 .resume = gfx_v9_0_resume, 6228 .is_idle = gfx_v9_0_is_idle, 6229 .wait_for_idle = gfx_v9_0_wait_for_idle, 6230 .soft_reset = gfx_v9_0_soft_reset, 6231 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 6232 .set_powergating_state = gfx_v9_0_set_powergating_state, 6233 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 6234 }; 6235 6236 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 6237 .type = AMDGPU_RING_TYPE_GFX, 6238 .align_mask = 0xff, 6239 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6240 .support_64bit_ptrs = true, 6241 .vmhub = AMDGPU_GFXHUB_0, 6242 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 6243 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 6244 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 6245 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 6246 5 + /* COND_EXEC */ 6247 7 + /* PIPELINE_SYNC */ 6248 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6249 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6250 2 + /* VM_FLUSH */ 6251 8 + /* FENCE for VM_FLUSH */ 6252 20 + /* GDS switch */ 6253 4 + /* double SWITCH_BUFFER, 6254 the first COND_EXEC jump to the place just 6255 prior to this double SWITCH_BUFFER */ 6256 5 + /* COND_EXEC */ 6257 7 + /* HDP_flush */ 6258 4 + /* VGT_flush */ 6259 14 + /* CE_META */ 6260 31 + /* DE_META */ 6261 3 + /* CNTX_CTRL */ 6262 5 + /* HDP_INVL */ 6263 8 + 8 + /* FENCE x2 */ 6264 2, /* SWITCH_BUFFER */ 6265 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 6266 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 6267 .emit_fence = gfx_v9_0_ring_emit_fence, 6268 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6269 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6270 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6271 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6272 .test_ring = gfx_v9_0_ring_test_ring, 6273 .test_ib = gfx_v9_0_ring_test_ib, 6274 .insert_nop = amdgpu_ring_insert_nop, 6275 .pad_ib = amdgpu_ring_generic_pad_ib, 6276 .emit_switch_buffer = gfx_v9_ring_emit_sb, 6277 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 6278 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 6279 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 6280 .emit_tmz = gfx_v9_0_ring_emit_tmz, 6281 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6282 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6283 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6284 .soft_recovery = gfx_v9_0_ring_soft_recovery, 6285 }; 6286 6287 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 6288 .type = AMDGPU_RING_TYPE_COMPUTE, 6289 .align_mask = 0xff, 6290 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6291 .support_64bit_ptrs = true, 6292 .vmhub = AMDGPU_GFXHUB_0, 6293 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6294 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6295 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6296 .emit_frame_size = 6297 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6298 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6299 5 + /* hdp invalidate */ 6300 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6301 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6302 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6303 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6304 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 6305 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6306 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 6307 .emit_fence = gfx_v9_0_ring_emit_fence, 6308 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6309 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6310 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6311 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6312 .test_ring = gfx_v9_0_ring_test_ring, 6313 .test_ib = gfx_v9_0_ring_test_ib, 6314 .insert_nop = amdgpu_ring_insert_nop, 6315 .pad_ib = amdgpu_ring_generic_pad_ib, 6316 .set_priority = gfx_v9_0_ring_set_priority_compute, 6317 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6318 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6319 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6320 }; 6321 6322 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 6323 .type = AMDGPU_RING_TYPE_KIQ, 6324 .align_mask = 0xff, 6325 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6326 .support_64bit_ptrs = true, 6327 .vmhub = AMDGPU_GFXHUB_0, 6328 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6329 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6330 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6331 .emit_frame_size = 6332 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6333 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6334 5 + /* hdp invalidate */ 6335 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6336 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6337 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6338 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6339 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6340 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6341 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 6342 .test_ring = gfx_v9_0_ring_test_ring, 6343 .insert_nop = amdgpu_ring_insert_nop, 6344 .pad_ib = amdgpu_ring_generic_pad_ib, 6345 .emit_rreg = gfx_v9_0_ring_emit_rreg, 6346 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6347 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6348 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6349 }; 6350 6351 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 6352 { 6353 int i; 6354 6355 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 6356 6357 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6358 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 6359 6360 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6361 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 6362 } 6363 6364 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 6365 .set = gfx_v9_0_set_eop_interrupt_state, 6366 .process = gfx_v9_0_eop_irq, 6367 }; 6368 6369 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 6370 .set = gfx_v9_0_set_priv_reg_fault_state, 6371 .process = gfx_v9_0_priv_reg_irq, 6372 }; 6373 6374 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 6375 .set = gfx_v9_0_set_priv_inst_fault_state, 6376 .process = gfx_v9_0_priv_inst_irq, 6377 }; 6378 6379 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 6380 .set = gfx_v9_0_set_cp_ecc_error_state, 6381 .process = amdgpu_gfx_cp_ecc_error_irq, 6382 }; 6383 6384 6385 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 6386 { 6387 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6388 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 6389 6390 adev->gfx.priv_reg_irq.num_types = 1; 6391 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 6392 6393 adev->gfx.priv_inst_irq.num_types = 1; 6394 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 6395 6396 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 6397 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 6398 } 6399 6400 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 6401 { 6402 switch (adev->asic_type) { 6403 case CHIP_VEGA10: 6404 case CHIP_VEGA12: 6405 case CHIP_VEGA20: 6406 case CHIP_RAVEN: 6407 case CHIP_ARCTURUS: 6408 case CHIP_RENOIR: 6409 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 6410 break; 6411 default: 6412 break; 6413 } 6414 } 6415 6416 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 6417 { 6418 /* init asci gds info */ 6419 switch (adev->asic_type) { 6420 case CHIP_VEGA10: 6421 case CHIP_VEGA12: 6422 case CHIP_VEGA20: 6423 adev->gds.gds_size = 0x10000; 6424 break; 6425 case CHIP_RAVEN: 6426 case CHIP_ARCTURUS: 6427 adev->gds.gds_size = 0x1000; 6428 break; 6429 default: 6430 adev->gds.gds_size = 0x10000; 6431 break; 6432 } 6433 6434 switch (adev->asic_type) { 6435 case CHIP_VEGA10: 6436 case CHIP_VEGA20: 6437 adev->gds.gds_compute_max_wave_id = 0x7ff; 6438 break; 6439 case CHIP_VEGA12: 6440 adev->gds.gds_compute_max_wave_id = 0x27f; 6441 break; 6442 case CHIP_RAVEN: 6443 if (adev->rev_id >= 0x8) 6444 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 6445 else 6446 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 6447 break; 6448 case CHIP_ARCTURUS: 6449 adev->gds.gds_compute_max_wave_id = 0xfff; 6450 break; 6451 default: 6452 /* this really depends on the chip */ 6453 adev->gds.gds_compute_max_wave_id = 0x7ff; 6454 break; 6455 } 6456 6457 adev->gds.gws_size = 64; 6458 adev->gds.oa_size = 16; 6459 } 6460 6461 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 6462 u32 bitmap) 6463 { 6464 u32 data; 6465 6466 if (!bitmap) 6467 return; 6468 6469 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6470 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6471 6472 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 6473 } 6474 6475 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 6476 { 6477 u32 data, mask; 6478 6479 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 6480 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 6481 6482 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6483 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6484 6485 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 6486 6487 return (~data) & mask; 6488 } 6489 6490 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 6491 struct amdgpu_cu_info *cu_info) 6492 { 6493 int i, j, k, counter, active_cu_number = 0; 6494 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 6495 unsigned disable_masks[4 * 4]; 6496 6497 if (!adev || !cu_info) 6498 return -EINVAL; 6499 6500 /* 6501 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs 6502 */ 6503 if (adev->gfx.config.max_shader_engines * 6504 adev->gfx.config.max_sh_per_se > 16) 6505 return -EINVAL; 6506 6507 amdgpu_gfx_parse_disable_cu(disable_masks, 6508 adev->gfx.config.max_shader_engines, 6509 adev->gfx.config.max_sh_per_se); 6510 6511 mutex_lock(&adev->grbm_idx_mutex); 6512 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 6513 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 6514 mask = 1; 6515 ao_bitmap = 0; 6516 counter = 0; 6517 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 6518 gfx_v9_0_set_user_cu_inactive_bitmap( 6519 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); 6520 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 6521 6522 /* 6523 * The bitmap(and ao_cu_bitmap) in cu_info structure is 6524 * 4x4 size array, and it's usually suitable for Vega 6525 * ASICs which has 4*2 SE/SH layout. 6526 * But for Arcturus, SE/SH layout is changed to 8*1. 6527 * To mostly reduce the impact, we make it compatible 6528 * with current bitmap array as below: 6529 * SE4,SH0 --> bitmap[0][1] 6530 * SE5,SH0 --> bitmap[1][1] 6531 * SE6,SH0 --> bitmap[2][1] 6532 * SE7,SH0 --> bitmap[3][1] 6533 */ 6534 cu_info->bitmap[i % 4][j + i / 4] = bitmap; 6535 6536 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 6537 if (bitmap & mask) { 6538 if (counter < adev->gfx.config.max_cu_per_sh) 6539 ao_bitmap |= mask; 6540 counter ++; 6541 } 6542 mask <<= 1; 6543 } 6544 active_cu_number += counter; 6545 if (i < 2 && j < 2) 6546 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 6547 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; 6548 } 6549 } 6550 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6551 mutex_unlock(&adev->grbm_idx_mutex); 6552 6553 cu_info->number = active_cu_number; 6554 cu_info->ao_cu_mask = ao_cu_mask; 6555 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 6556 6557 return 0; 6558 } 6559 6560 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 6561 { 6562 .type = AMD_IP_BLOCK_TYPE_GFX, 6563 .major = 9, 6564 .minor = 0, 6565 .rev = 0, 6566 .funcs = &gfx_v9_0_ip_funcs, 6567 }; 6568